From f20dbc33ae6d504b65c9b74c3960af9d24ea10c8 Mon Sep 17 00:00:00 2001 From: flaming archer <2011xuesong@gmail.com> Date: Sat, 10 Dec 2022 09:22:43 +0800 Subject: [PATCH 001/334] Fix fuse small doc ### What changes are proposed in this pull request? Small doc fix. be able see -> be able to see pr-link: Alluxio/alluxio#16629 change-id: cid-4c9ce148b619138e2be115e3a99360a569d8a16a --- docs/en/contributor/Contributor-Tools.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/en/contributor/Contributor-Tools.md b/docs/en/contributor/Contributor-Tools.md index 27b5366d4f99..2ad66f4fe41d 100644 --- a/docs/en/contributor/Contributor-Tools.md +++ b/docs/en/contributor/Contributor-Tools.md @@ -158,7 +158,7 @@ action from the `Navigate > Search Everywhere` dialog. $ ls /tmp/alluxio-fuse $ bin/alluxio fs ls / ``` - You should be able see the file is created and listed by both `ls` commands. + You should be able to see the file is created and listed by both `ls` commands. ##### Starting multiple processes in IntelliJ at once IntelliJ is capable of creating groups of processes that all be launched simultaneously. To do so go to From 261ee99caf706551b0b3dd49250a443a924b7116 Mon Sep 17 00:00:00 2001 From: jianghuazhu <740087514@qq.com> Date: Sat, 10 Dec 2022 09:23:50 +0800 Subject: [PATCH 002/334] Remove unused Name#MASTER_EMBEDDED_JOURNAL_PROXY_HOST ### What changes are proposed in this pull request? In the PropertyKey file, Name#MASTER_EMBEDDED_JOURNAL_PROXY_HOST is not used anywhere and should be removed. Details: https://github.com/Alluxio/alluxio/issues/16624 ### Why are the changes needed? Please clarify why the changes are needed. For instance, 1. If you propose a new API, clarify the use case for a new API. 2. If you fix a bug, describe the bug. ### Does this PR introduce any user facing changes? For users, there is no impact. pr-link: Alluxio/alluxio#16625 change-id: cid-4340b03f36ff5c427afe8aaff00a2771c5dd1678 --- core/common/src/main/java/alluxio/conf/PropertyKey.java | 2 -- 1 file changed, 2 deletions(-) diff --git a/core/common/src/main/java/alluxio/conf/PropertyKey.java b/core/common/src/main/java/alluxio/conf/PropertyKey.java index 90ca0fb97fb2..9d7265ab2bef 100755 --- a/core/common/src/main/java/alluxio/conf/PropertyKey.java +++ b/core/common/src/main/java/alluxio/conf/PropertyKey.java @@ -7567,8 +7567,6 @@ public static final class Name { "alluxio.master.journal.tailer.sleep.time"; private static final String MASTER_JOURNAL_UFS_OPTION = "alluxio.master.journal.ufs.option"; public static final String MASTER_RPC_ADDRESSES = "alluxio.master.rpc.addresses"; - public static final String MASTER_EMBEDDED_JOURNAL_PROXY_HOST = - "alluxio.master.embedded.journal.bind.host"; public static final String MASTER_EMBEDDED_JOURNAL_ADDRESSES = "alluxio.master.embedded.journal.addresses"; public static final String MASTER_EMBEDDED_JOURNAL_MAX_ELECTION_TIMEOUT = From 9ba7998f314a19f702a8a95d2e5ca7600f031863 Mon Sep 17 00:00:00 2001 From: qian0817 Date: Sat, 10 Dec 2022 17:06:40 +0800 Subject: [PATCH 003/334] Add more cors config and make cors handle all http request ### What changes are proposed in this pull request? Add more cors config and make cors handle all http request. ### Why are the changes needed? Releated to #14490 I have added a new api and need to let cors handle all http requests. Also provides a more detailed configuration of cors. FYI https://developer.mozilla.org/en-US/docs/Web/HTTP/CORS#the_http_response_headers ### Does this PR introduce any user facing changes? add some cors config. pr-link: Alluxio/alluxio#14562 change-id: cid-fda642e259ecc2de06ce90356037e351b6f822fa --- .../main/java/alluxio/conf/PropertyKey.java | 50 ++++++++++++++++ .../src/main/java/alluxio/RestUtils.java | 38 ------------ .../src/main/java/alluxio/web/CORSFilter.java | 56 ++++++++++++++++++ .../src/main/java/alluxio/web/HttpFilter.java | 59 +++++++++++++++++++ .../src/main/java/alluxio/web/WebServer.java | 5 ++ docs/_data/table/cn/common-configuration.yml | 14 +++++ docs/_data/table/common-configuration.csv | 6 ++ docs/_data/table/en/common-configuration.yml | 14 ++++- 8 files changed, 203 insertions(+), 39 deletions(-) create mode 100644 core/server/common/src/main/java/alluxio/web/CORSFilter.java create mode 100644 core/server/common/src/main/java/alluxio/web/HttpFilter.java diff --git a/core/common/src/main/java/alluxio/conf/PropertyKey.java b/core/common/src/main/java/alluxio/conf/PropertyKey.java index 9d7265ab2bef..03ebac9000c3 100755 --- a/core/common/src/main/java/alluxio/conf/PropertyKey.java +++ b/core/common/src/main/java/alluxio/conf/PropertyKey.java @@ -836,6 +836,50 @@ public String toString() { .setConsistencyCheckLevel(ConsistencyCheckLevel.WARN) .setScope(Scope.SERVER) .build(); + public static final PropertyKey WEB_CORS_ALLOW_CREDENTIAL = + booleanBuilder(Name.WEB_CORS_ALLOW_CREDENTIAL) + .setDefaultValue(false) + .setDescription("Enable request include credential.") + .setConsistencyCheckLevel(ConsistencyCheckLevel.WARN) + .setScope(Scope.SERVER) + .build(); + public static final PropertyKey WEB_CORS_ALLOW_HEADERS = + stringBuilder(Name.WEB_CORS_ALLOW_HEADERS) + .setDefaultValue("*") + .setDescription("Which headers is allowed for cors. use * allow all any header.") + .setConsistencyCheckLevel(ConsistencyCheckLevel.WARN) + .setScope(Scope.SERVER) + .build(); + public static final PropertyKey WEB_CORS_ALLOW_METHODS = + stringBuilder(Name.WEB_CORS_ALLOW_METHODS) + .setDefaultValue("*") + .setDescription("Which methods is allowed for cors. use * allow all any method.") + .setConsistencyCheckLevel(ConsistencyCheckLevel.WARN) + .setScope(Scope.SERVER) + .build(); + public static final PropertyKey WEB_CORS_ALLOW_ORIGINS = + stringBuilder(Name.WEB_CORS_ALLOW_ORIGINS) + .setDefaultValue("*") + .setDescription("Which origins is allowed for cors. use * allow all any origin.") + .setConsistencyCheckLevel(ConsistencyCheckLevel.WARN) + .setScope(Scope.SERVER) + .build(); + public static final PropertyKey WEB_CORS_EXPOSED_HEADERS = + stringBuilder(Name.WEB_CORS_EXPOSED_HEADERS) + .setDefaultValue("*") + .setDescription("Which headers are allowed to set in response when access " + + "cross-origin resource. use * allow all any header.") + .setConsistencyCheckLevel(ConsistencyCheckLevel.WARN) + .setScope(Scope.SERVER) + .build(); + public static final PropertyKey WEB_CORS_MAX_AGE = + intBuilder(Name.WEB_CORS_MAX_AGE) + .setDefaultValue(-1) + .setDescription("Maximum number of seconds the results can be cached. " + + "-1 means no cache.") + .setConsistencyCheckLevel(ConsistencyCheckLevel.WARN) + .setScope(Scope.SERVER) + .build(); public static final PropertyKey WEB_REFRESH_INTERVAL = durationBuilder(Name.WEB_REFRESH_INTERVAL) .setDefaultValue("15s") @@ -7247,6 +7291,12 @@ public static final class Name { public static final String WEB_RESOURCES = "alluxio.web.resources"; public static final String WEB_THREADS = "alluxio.web.threads"; public static final String WEB_CORS_ENABLED = "alluxio.web.cors.enabled"; + public static final String WEB_CORS_ALLOW_CREDENTIAL = "alluxio.web.cors.allow.credential"; + public static final String WEB_CORS_ALLOW_HEADERS = "alluxio.web.cors.allow.headers"; + public static final String WEB_CORS_ALLOW_METHODS = "alluxio.web.cors.allow.methods"; + public static final String WEB_CORS_ALLOW_ORIGINS = "alluxio.web.cors.allow.origins"; + public static final String WEB_CORS_EXPOSED_HEADERS = "alluxio.web.cors.exposed.headers"; + public static final String WEB_CORS_MAX_AGE = "alluxio.web.cors.max.age"; public static final String WEB_REFRESH_INTERVAL = "alluxio.web.refresh.interval"; public static final String WEB_THREAD_DUMP_TO_LOG = "alluxio.web.threaddump.log.enabled"; public static final String WEB_UI_ENABLED = "alluxio.web.ui.enabled"; diff --git a/core/server/common/src/main/java/alluxio/RestUtils.java b/core/server/common/src/main/java/alluxio/RestUtils.java index aafd4d8c2b17..714bc297b352 100644 --- a/core/server/common/src/main/java/alluxio/RestUtils.java +++ b/core/server/common/src/main/java/alluxio/RestUtils.java @@ -12,7 +12,6 @@ package alluxio; import alluxio.conf.AlluxioConfiguration; -import alluxio.conf.PropertyKey; import alluxio.exception.status.AlluxioStatusException; import alluxio.security.authentication.AuthenticatedClientUser; import alluxio.security.user.ServerUserState; @@ -119,10 +118,6 @@ private static Response createResponse(Object object, AlluxioConfiguration allux headers.forEach(rb::header); } - if (alluxioConf.getBoolean(PropertyKey.WEB_CORS_ENABLED)) { - return makeCORS(rb).build(); - } - return rb.build(); } @@ -175,43 +170,10 @@ private static Response createErrorResponse(Exception e, AlluxioConfiguration al ErrorResponse response = new ErrorResponse(se.getStatus().getCode(), se.getMessage()); Response.ResponseBuilder rb = Response.serverError().entity(response); - if (alluxioConf.getBoolean(PropertyKey.WEB_CORS_ENABLED)) { - return makeCORS(rb).build(); - } return rb.build(); } - /** - * Makes the responseBuilder CORS compatible. - * - * @param responseBuilder the response builder - * @param returnMethod the modified response builder - * @return response builder - */ - public static Response.ResponseBuilder makeCORS(Response.ResponseBuilder responseBuilder, - String returnMethod) { - // TODO(william): Make origin, methods, and headers configurable. - Response.ResponseBuilder rb = responseBuilder.header("Access-Control-Allow-Origin", "*") - .header("Access-Control-Allow-Methods", "GET, POST, OPTIONS"); - - if (!"".equals(returnMethod)) { - rb.header("Access-Control-Allow-Headers", returnMethod); - } - - return rb; - } - - /** - * Makes the responseBuilder CORS compatible, assumes default methods. - * - * @param responseBuilder the modified response builder - * @return response builder - */ - public static Response.ResponseBuilder makeCORS(Response.ResponseBuilder responseBuilder) { - return makeCORS(responseBuilder, ""); - } - private RestUtils() { } // prevent instantiation } diff --git a/core/server/common/src/main/java/alluxio/web/CORSFilter.java b/core/server/common/src/main/java/alluxio/web/CORSFilter.java new file mode 100644 index 000000000000..b3dbcd171ca8 --- /dev/null +++ b/core/server/common/src/main/java/alluxio/web/CORSFilter.java @@ -0,0 +1,56 @@ +/* + * The Alluxio Open Foundation licenses this work under the Apache License, version 2.0 + * (the "License"). You may not use this work except in compliance with the License, which is + * available at www.apache.org/licenses/LICENSE-2.0 + * + * This software is distributed on an "AS IS" basis, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, + * either express or implied, as more fully set forth in the License. + * + * See the NOTICE file distributed with this work for information regarding copyright ownership. + */ + +package alluxio.web; + +import alluxio.conf.Configuration; +import alluxio.conf.PropertyKey; + +import org.apache.commons.lang3.StringUtils; + +import java.io.IOException; +import javax.servlet.FilterChain; +import javax.servlet.ServletException; +import javax.servlet.http.HttpServletRequest; +import javax.servlet.http.HttpServletResponse; + +/** + * A filter for adding the Cors header to the http header. + */ +public class CORSFilter extends HttpFilter { + @Override + public void doFilter(HttpServletRequest req, HttpServletResponse resp, FilterChain chain) + throws IOException, ServletException { + if (Configuration.getBoolean(PropertyKey.WEB_CORS_ENABLED)) { + String allowOrigins = Configuration.getString(PropertyKey.WEB_CORS_ALLOW_ORIGINS); + String allowMethods = Configuration.getString(PropertyKey.WEB_CORS_ALLOW_METHODS); + String allowHeaders = Configuration.getString(PropertyKey.WEB_CORS_ALLOW_HEADERS); + String exposeHeaders = Configuration.getString(PropertyKey.WEB_CORS_EXPOSED_HEADERS); + boolean allowCredential = Configuration.getBoolean( + PropertyKey.WEB_CORS_ALLOW_CREDENTIAL); + int maxAge = Configuration.getInt(PropertyKey.WEB_CORS_MAX_AGE); + + if (!StringUtils.equals(allowOrigins, "*")) { + resp.addHeader("Vary", "Origin"); + } + + resp.setHeader("Access-Control-Allow-Origin", allowOrigins); + resp.setHeader("Access-Control-Allow-Headers", allowHeaders); + resp.setHeader("Access-Control-Allow-Methods", allowMethods); + resp.setHeader("Access-Control-Max-Age", String.valueOf(maxAge)); + resp.setHeader("Access-Control-Expose-Headers", exposeHeaders); + if (allowCredential) { + resp.setHeader("Access-Control-Allow-Credentials", "true"); + } + } + chain.doFilter(req, resp); + } +} diff --git a/core/server/common/src/main/java/alluxio/web/HttpFilter.java b/core/server/common/src/main/java/alluxio/web/HttpFilter.java new file mode 100644 index 000000000000..b09ebaa89daf --- /dev/null +++ b/core/server/common/src/main/java/alluxio/web/HttpFilter.java @@ -0,0 +1,59 @@ +/* + * The Alluxio Open Foundation licenses this work under the Apache License, version 2.0 + * (the "License"). You may not use this work except in compliance with the License, which is + * available at www.apache.org/licenses/LICENSE-2.0 + * + * This software is distributed on an "AS IS" basis, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, + * either express or implied, as more fully set forth in the License. + * + * See the NOTICE file distributed with this work for information regarding copyright ownership. + */ + +package alluxio.web; + +import java.io.IOException; +import javax.servlet.Filter; +import javax.servlet.FilterChain; +import javax.servlet.FilterConfig; +import javax.servlet.ServletException; +import javax.servlet.ServletRequest; +import javax.servlet.ServletResponse; +import javax.servlet.http.HttpServletRequest; +import javax.servlet.http.HttpServletResponse; + +/** + * Provides an abstract class to be subclassed to create an HTTP filter. + */ +public abstract class HttpFilter implements Filter { + @Override + public void init(FilterConfig filterConfig) throws ServletException { + } + + @Override + public void destroy() { + } + + @Override + public final void doFilter(ServletRequest req, ServletResponse resp, FilterChain chain) + throws IOException, ServletException { + if (!(req instanceof HttpServletRequest && resp instanceof HttpServletResponse)) { + throw new ServletException( + String.format("Received non-HTTP request or response: req=%s, resp=%s", + req.getClass(), resp.getClass())); + } + + HttpServletRequest request = (HttpServletRequest) req; + HttpServletResponse response = (HttpServletResponse) resp; + + doFilter(request, response, chain); + } + + /** + * Receives standard HTTP requests from the public doFilter method. + * @param req http request + * @param resp http response + * @param chain filter chain + */ + public abstract void doFilter(HttpServletRequest req, HttpServletResponse resp, FilterChain chain) + throws IOException, ServletException; +} diff --git a/core/server/common/src/main/java/alluxio/web/WebServer.java b/core/server/common/src/main/java/alluxio/web/WebServer.java index 4a5527dc41bb..971352fb53d1 100644 --- a/core/server/common/src/main/java/alluxio/web/WebServer.java +++ b/core/server/common/src/main/java/alluxio/web/WebServer.java @@ -38,7 +38,9 @@ import java.io.IOException; import java.net.InetSocketAddress; +import java.util.EnumSet; import javax.annotation.concurrent.NotThreadSafe; +import javax.servlet.DispatcherType; /** * Class that bootstraps and starts a web server. @@ -112,6 +114,9 @@ public WebServer(String serviceName, InetSocketAddress address) { } mServletContextHandler.addServlet(StacksServlet.class, THREAD_DUMP_PATH); mServletContextHandler.addServlet(JmxServlet.class, JMX_PATH); + mServletContextHandler.addFilter(CORSFilter.class, "/*", + EnumSet.of(DispatcherType.REQUEST, DispatcherType.FORWARD, DispatcherType.INCLUDE, + DispatcherType.ASYNC, DispatcherType.ERROR)); HandlerList handlers = new HandlerList(); handlers.setHandlers(new Handler[] {mMetricsServlet.getHandler(), mPMetricsServlet.getHandler(), mServletContextHandler, new DefaultHandler()}); diff --git a/docs/_data/table/cn/common-configuration.yml b/docs/_data/table/cn/common-configuration.yml index 33a5e268b1be..538724aba0a3 100644 --- a/docs/_data/table/cn/common-configuration.yml +++ b/docs/_data/table/cn/common-configuration.yml @@ -128,6 +128,20 @@ alluxio.underfs.s3a.signer.algorithm: 签名算法,用于向s3服务签名请求。这是可选的,如果没有设置,客户端将自动确定它。若要与仅支持v2签名的S3端点交互,请将其设置为"S3SignerType"。 alluxio.underfs.s3a.socket.timeout: 与S3通信时socket超时的时间长度。 +alluxio.web.cors.allow.credential: + 是否可以将对请求的响应暴露给页面。 +alluxio.web.cors.allow.headers: + 哪些头部信息是允许的,使用 * 允许所有的任何头部信息。 +alluxio.web.cors.allow.methods: + 哪些方法是允许的,使用 * 允许所有的任何方法。 +alluxio.web.cors.allow.origins: + 哪些 Origin 是允许的,使用 * 允许所有的任何 Origin。 +alluxio.web.cors.enabled: + 启用 Web 用户界面的 CORS 配置 +alluxio.web.cors.exposed.headers: + 在访问跨源资源时,允许在响应中设置哪些头部信息。 使用 * 允许所有头部信息。 +alluxio.web.cors.max.age: + 在访问跨源资源时,结果可以被缓存的最大秒数。"-1表示不缓存。 alluxio.web.resources: web应用资源路径。 alluxio.web.threads: diff --git a/docs/_data/table/common-configuration.csv b/docs/_data/table/common-configuration.csv index 1bad2686d28d..76fde1545838 100644 --- a/docs/_data/table/common-configuration.csv +++ b/docs/_data/table/common-configuration.csv @@ -188,6 +188,12 @@ alluxio.underfs.web.header.last.modified,"EEE, dd MMM yyyy HH:mm:ss zzz" alluxio.underfs.web.parent.names,"Parent Directory,..,../" alluxio.underfs.web.titles,"Index of,Directory listing for" alluxio.web.cors.enabled,"false" +alluxio.web.cors.allow.credential,"false" +alluxio.web.cors.allow.headers,"*" +alluxio.web.cors.allow.methods,"*" +alluxio.web.cors.allow.origins,"*" +alluxio.web.cors.exposed.headers,"*" +alluxio.web.cors.max.age,"-1" alluxio.web.file.info.enabled,"true" alluxio.web.refresh.interval,"15s" alluxio.web.threaddump.log.enabled,"false" diff --git a/docs/_data/table/en/common-configuration.yml b/docs/_data/table/en/common-configuration.yml index 6f595cd678dc..a689a848f8fc 100644 --- a/docs/_data/table/en/common-configuration.yml +++ b/docs/_data/table/en/common-configuration.yml @@ -374,8 +374,20 @@ alluxio.underfs.web.parent.names: 'The text of the http link for the parent directory.' alluxio.underfs.web.titles: 'The title of the content for a http url.' +alluxio.web.cors.allow.credential: + 'Enable request include credential.' +alluxio.web.cors.allow.headers: + 'Which headers is allowed for cors. use * allow all any header.' +alluxio.web.cors.allow.methods: + 'Which methods is allowed for cors. use * allow all any method.' +alluxio.web.cors.allow.origins: + 'Which origins is allowed for cors. use * allow all any origin.' alluxio.web.cors.enabled: - 'Set to true to enable Cross-Origin Resource Sharing for RESTful APIendpoints.' + 'Set to true to enable Cross-Origin Resource Sharing for Web UI.' +alluxio.web.cors.exposed.headers: + 'Which headers are allowed to set in response when access cross-origin resource. use * allow all any header.' +alluxio.web.cors.max.age: + 'Maximum number of seconds the results can be cached for cors. "-1 means no cache.' alluxio.web.file.info.enabled: 'Whether detailed file information are enabled for the web UI.' alluxio.web.refresh.interval: From 1e95ed69ab9bec895b3bbfe4a2841238cbedc824 Mon Sep 17 00:00:00 2001 From: maobaolong <307499405@qq.com> Date: Sun, 11 Dec 2022 15:29:54 +0800 Subject: [PATCH 004/334] Bump ratis version to 2.4.1 ### What changes are proposed in this pull request? Please outline the changes and how this PR fixes the issue. ### Why are the changes needed? Please clarify why the changes are needed. For instance, 1. If you propose a new API, clarify the use case for a new API. 2. If you fix a bug, describe the bug. ### Does this PR introduce any user facing changes? Please list the user-facing changes introduced by your change, including 1. change in user-facing APIs 2. addition or removal of property keys 3. webui pr-link: Alluxio/alluxio#16577 change-id: cid-4c4d55e101d3737b2f0f15b78f23016309024cf7 --- core/server/common/pom.xml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/core/server/common/pom.xml b/core/server/common/pom.xml index 457fb9013a5e..438d656008fc 100644 --- a/core/server/common/pom.xml +++ b/core/server/common/pom.xml @@ -26,7 +26,7 @@ ${project.parent.parent.parent.basedir}/build - 2.4.0 + 2.4.1 From 1adfdd44d396260eabc85820e1c5bc08e2594805 Mon Sep 17 00:00:00 2001 From: Jiacheng Liu Date: Thu, 15 Dec 2022 22:45:24 +0800 Subject: [PATCH 005/334] Upgrade protobuf to 3.19.6 Upgrade to resolve a resource consumption problem in https://cwe.mitre.org/data/definitions/400.html pr-link: Alluxio/alluxio#16645 change-id: cid-7e49b4bf6f45dca44a033f1d1228a2baa7572bfc --- pom.xml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pom.xml b/pom.xml index e77b90bad8b7..6674e76e2665 100644 --- a/pom.xml +++ b/pom.xml @@ -149,7 +149,7 @@ 0.8.0 31.0.1-jre 1.11.0 - 3.19.2 + 3.19.6 UTF-8 1.7.30 2.13.3 From 017078481f57e3499855b423740d3c6f227c47ab Mon Sep 17 00:00:00 2001 From: maobaolong <307499405@qq.com> Date: Thu, 15 Dec 2022 22:46:53 +0800 Subject: [PATCH 006/334] Support log source ip to rpc debug log In the debug log, client IP will be logged pr-link: Alluxio/alluxio#16586 change-id: cid-ecd6f1da1ac9f6be7e8a49d968415e1538eb2e42 --- .../common/src/main/java/alluxio/RpcUtils.java | 4 +++- .../alluxio/master/block/DefaultBlockMaster.java | 10 ++++++++-- .../master/file/DefaultFileSystemMaster.java | 13 ++++++++----- .../master/journal/DefaultJournalMaster.java | 7 ++++++- .../alluxio/master/meta/DefaultMetaMaster.java | 14 +++++++++++--- .../master/metrics/DefaultMetricsMaster.java | 6 +++++- .../alluxio/master/table/DefaultTableMaster.java | 6 +++++- 7 files changed, 46 insertions(+), 14 deletions(-) diff --git a/core/server/common/src/main/java/alluxio/RpcUtils.java b/core/server/common/src/main/java/alluxio/RpcUtils.java index b4698c111c46..1a8f77d4493e 100644 --- a/core/server/common/src/main/java/alluxio/RpcUtils.java +++ b/core/server/common/src/main/java/alluxio/RpcUtils.java @@ -21,6 +21,7 @@ import alluxio.metrics.MetricsSystem; import alluxio.security.User; import alluxio.security.authentication.AuthenticatedClientUser; +import alluxio.security.authentication.ClientIpAddressInjector; import com.codahale.metrics.Timer; import io.grpc.StatusException; @@ -121,7 +122,8 @@ public static T callAndReturn(Logger logger, RpcCallableThrowsIOException MetricsSystem.timer(MetricKey.MASTER_TOTAL_RPCS.getName()), MetricsSystem.timer(getQualifiedMetricName(methodName)))) { MetricsSystem.counter(getQualifiedInProgressMetricName(methodName)).inc(); - logger.debug("Enter: {}: {}", methodName, debugDesc); + logger.debug("Enter: {} from {}: {}", methodName, ClientIpAddressInjector.getIpAddress(), + debugDesc); T res = callable.call(); logger.debug("Exit: {}: {}", methodName, debugDesc); return res; diff --git a/core/server/master/src/main/java/alluxio/master/block/DefaultBlockMaster.java b/core/server/master/src/main/java/alluxio/master/block/DefaultBlockMaster.java index 7a44abbb0611..f78032c5c600 100644 --- a/core/server/master/src/main/java/alluxio/master/block/DefaultBlockMaster.java +++ b/core/server/master/src/main/java/alluxio/master/block/DefaultBlockMaster.java @@ -64,6 +64,7 @@ import alluxio.proto.meta.Block.BlockMeta; import alluxio.resource.CloseableIterator; import alluxio.resource.LockResource; +import alluxio.security.authentication.ClientIpAddressInjector; import alluxio.util.CommonUtils; import alluxio.util.IdUtils; import alluxio.util.ThreadFactoryUtils; @@ -83,6 +84,7 @@ import com.google.common.cache.LoadingCache; import com.google.common.collect.ImmutableSet; import com.google.common.util.concurrent.Striped; +import io.grpc.ServerInterceptors; import it.unimi.dsi.fastutil.longs.LongOpenHashSet; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -344,9 +346,13 @@ public String getName() { public Map getServices() { Map services = new HashMap<>(); services.put(ServiceType.BLOCK_MASTER_CLIENT_SERVICE, - new GrpcService(new BlockMasterClientServiceHandler(this))); + new GrpcService(ServerInterceptors + .intercept(new BlockMasterClientServiceHandler(this), + new ClientIpAddressInjector()))); services.put(ServiceType.BLOCK_MASTER_WORKER_SERVICE, - new GrpcService(new BlockMasterWorkerServiceHandler(this))); + new GrpcService(ServerInterceptors + .intercept(new BlockMasterWorkerServiceHandler(this), + new ClientIpAddressInjector()))); return services; } diff --git a/core/server/master/src/main/java/alluxio/master/file/DefaultFileSystemMaster.java b/core/server/master/src/main/java/alluxio/master/file/DefaultFileSystemMaster.java index fe2a2e92324a..ca2ac9058136 100644 --- a/core/server/master/src/main/java/alluxio/master/file/DefaultFileSystemMaster.java +++ b/core/server/master/src/main/java/alluxio/master/file/DefaultFileSystemMaster.java @@ -561,11 +561,14 @@ private static MountInfo getRootMountInfo(MasterUfsManager ufsManager) { public Map getServices() { Map services = new HashMap<>(); services.put(ServiceType.FILE_SYSTEM_MASTER_CLIENT_SERVICE, new GrpcService(ServerInterceptors - .intercept(new FileSystemMasterClientServiceHandler(this), new ClientIpAddressInjector()))); - services.put(ServiceType.FILE_SYSTEM_MASTER_JOB_SERVICE, - new GrpcService(new FileSystemMasterJobServiceHandler(this))); - services.put(ServiceType.FILE_SYSTEM_MASTER_WORKER_SERVICE, - new GrpcService(new FileSystemMasterWorkerServiceHandler(this))); + .intercept(new FileSystemMasterClientServiceHandler(this), + new ClientIpAddressInjector()))); + services.put(ServiceType.FILE_SYSTEM_MASTER_JOB_SERVICE, new GrpcService(ServerInterceptors + .intercept(new FileSystemMasterJobServiceHandler(this), + new ClientIpAddressInjector()))); + services.put(ServiceType.FILE_SYSTEM_MASTER_WORKER_SERVICE, new GrpcService(ServerInterceptors + .intercept(new FileSystemMasterWorkerServiceHandler(this), + new ClientIpAddressInjector()))); return services; } diff --git a/core/server/master/src/main/java/alluxio/master/journal/DefaultJournalMaster.java b/core/server/master/src/main/java/alluxio/master/journal/DefaultJournalMaster.java index aa74350e257a..09d81fdaf570 100644 --- a/core/server/master/src/main/java/alluxio/master/journal/DefaultJournalMaster.java +++ b/core/server/master/src/main/java/alluxio/master/journal/DefaultJournalMaster.java @@ -24,8 +24,11 @@ import alluxio.master.MasterContext; import alluxio.master.PrimarySelector; import alluxio.master.journal.raft.RaftJournalSystem; +import alluxio.security.authentication.ClientIpAddressInjector; import alluxio.util.executor.ExecutorServiceFactories; +import io.grpc.ServerInterceptors; + import java.io.IOException; import java.util.HashMap; import java.util.Map; @@ -111,7 +114,9 @@ public String getName() { public Map getServices() { Map services = new HashMap<>(); services.put(alluxio.grpc.ServiceType.JOURNAL_MASTER_CLIENT_SERVICE, - new GrpcService(new JournalMasterClientServiceHandler(this))); + new GrpcService(ServerInterceptors.intercept( + new JournalMasterClientServiceHandler(this), + new ClientIpAddressInjector()))); return services; } } diff --git a/core/server/master/src/main/java/alluxio/master/meta/DefaultMetaMaster.java b/core/server/master/src/main/java/alluxio/master/meta/DefaultMetaMaster.java index 8b1631c4f43e..6d806a44f091 100644 --- a/core/server/master/src/main/java/alluxio/master/meta/DefaultMetaMaster.java +++ b/core/server/master/src/main/java/alluxio/master/meta/DefaultMetaMaster.java @@ -52,6 +52,7 @@ import alluxio.proto.journal.Journal; import alluxio.proto.journal.Meta; import alluxio.resource.CloseableIterator; +import alluxio.security.authentication.ClientIpAddressInjector; import alluxio.underfs.UfsManager; import alluxio.util.ConfigurationUtils; import alluxio.util.IdUtils; @@ -66,6 +67,7 @@ import alluxio.wire.ConfigHash; import com.google.common.collect.ImmutableSet; +import io.grpc.ServerInterceptors; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -256,11 +258,17 @@ public CloseableIterator getJournalEntryIterator() { public Map getServices() { Map services = new HashMap<>(); services.put(ServiceType.META_MASTER_CONFIG_SERVICE, - new GrpcService(new MetaMasterConfigurationServiceHandler(this)).disableAuthentication()); + new GrpcService(ServerInterceptors.intercept( + new MetaMasterConfigurationServiceHandler(this), + new ClientIpAddressInjector())).disableAuthentication()); services.put(ServiceType.META_MASTER_CLIENT_SERVICE, - new GrpcService(new MetaMasterClientServiceHandler(this))); + new GrpcService(ServerInterceptors.intercept( + new MetaMasterClientServiceHandler(this), + new ClientIpAddressInjector()))); services.put(ServiceType.META_MASTER_MASTER_SERVICE, - new GrpcService(new MetaMasterMasterServiceHandler(this))); + new GrpcService(ServerInterceptors.intercept( + new MetaMasterMasterServiceHandler(this), + new ClientIpAddressInjector()))); // Add backup role services. services.putAll(mBackupRole.getRoleServices()); services.putAll(mJournalSystem.getJournalServices()); diff --git a/core/server/master/src/main/java/alluxio/master/metrics/DefaultMetricsMaster.java b/core/server/master/src/main/java/alluxio/master/metrics/DefaultMetricsMaster.java index c21186140b8c..b302ecb81638 100644 --- a/core/server/master/src/main/java/alluxio/master/metrics/DefaultMetricsMaster.java +++ b/core/server/master/src/main/java/alluxio/master/metrics/DefaultMetricsMaster.java @@ -30,11 +30,13 @@ import alluxio.metrics.MetricsSystem; import alluxio.metrics.MultiValueMetricsAggregator; import alluxio.metrics.aggregator.SingleTagValueAggregator; +import alluxio.security.authentication.ClientIpAddressInjector; import alluxio.util.executor.ExecutorServiceFactories; import alluxio.util.executor.ExecutorServiceFactory; import com.codahale.metrics.Gauge; import com.google.common.annotations.VisibleForTesting; +import io.grpc.ServerInterceptors; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -164,7 +166,9 @@ public String getName() { public Map getServices() { Map services = new HashMap<>(); services.put(ServiceType.METRICS_MASTER_CLIENT_SERVICE, - new GrpcService(getMasterServiceHandler())); + new GrpcService(ServerInterceptors.intercept( + getMasterServiceHandler(), + new ClientIpAddressInjector()))); return services; } diff --git a/table/server/master/src/main/java/alluxio/master/table/DefaultTableMaster.java b/table/server/master/src/main/java/alluxio/master/table/DefaultTableMaster.java index 7643c847d257..7faa51855293 100644 --- a/table/server/master/src/main/java/alluxio/master/table/DefaultTableMaster.java +++ b/table/server/master/src/main/java/alluxio/master/table/DefaultTableMaster.java @@ -34,11 +34,13 @@ import alluxio.master.journal.checkpoint.CheckpointName; import alluxio.master.table.transform.TransformJobInfo; import alluxio.master.table.transform.TransformManager; +import alluxio.security.authentication.ClientIpAddressInjector; import alluxio.table.common.transform.TransformDefinition; import alluxio.util.executor.ExecutorServiceFactories; import com.google.common.collect.ImmutableSet; import com.google.common.collect.Lists; +import io.grpc.ServerInterceptors; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -178,7 +180,9 @@ public String getName() { public Map getServices() { Map services = new HashMap<>(); services.put(ServiceType.TABLE_MASTER_CLIENT_SERVICE, - new GrpcService(new TableMasterClientServiceHandler(this))); + new GrpcService(ServerInterceptors.intercept( + new TableMasterClientServiceHandler(this), + new ClientIpAddressInjector()))); return services; } From 82283414fb2987bea4c6bf6fe59b343f7ce79856 Mon Sep 17 00:00:00 2001 From: maobaolong <307499405@qq.com> Date: Fri, 16 Dec 2022 20:07:42 +0800 Subject: [PATCH 007/334] Add a command to set DirectChildrenLoaded on dir ``` $ bin/alluxio fs syncDirNextTime true /data Successfully marked the dir /data to trigger metadata sync on next access $ bin/alluxio fs syncDirNextTime false /data Successfully marked the dir /data to skip metadata sync on next access ``` pr-link: Alluxio/alluxio#16542 change-id: cid-78744676d83727f380d4c7a4ef7206044001a5fa --- .../master/file/DefaultFileSystemMaster.java | 4 + .../alluxio/master/file/meta/InodeTree.java | 14 +++- .../main/proto/grpc/file_system_master.proto | 1 + .../fs/command/FileSystemCommandUtils.java | 17 ++++ .../fs/command/SyncDirNextTimeCommand.java | 81 +++++++++++++++++++ 5 files changed, 116 insertions(+), 1 deletion(-) create mode 100644 shell/src/main/java/alluxio/cli/fs/command/SyncDirNextTimeCommand.java diff --git a/core/server/master/src/main/java/alluxio/master/file/DefaultFileSystemMaster.java b/core/server/master/src/main/java/alluxio/master/file/DefaultFileSystemMaster.java index ca2ac9058136..179096db687e 100644 --- a/core/server/master/src/main/java/alluxio/master/file/DefaultFileSystemMaster.java +++ b/core/server/master/src/main/java/alluxio/master/file/DefaultFileSystemMaster.java @@ -4074,6 +4074,10 @@ protected void setAttributeSingleFile(RpcContext rpcContext, LockedInodePath ino throws FileDoesNotExistException, InvalidPathException, AccessControlException { Inode inode = inodePath.getInode(); SetAttributePOptions.Builder protoOptions = context.getOptions(); + if (inode.isDirectory() && protoOptions.hasDirectChildrenLoaded()) { + mInodeTree.setDirectChildrenLoaded( + rpcContext, inode.asDirectory(), protoOptions.getDirectChildrenLoaded()); + } if (protoOptions.hasPinned()) { mInodeTree.setPinned(rpcContext, inodePath, context.getOptions().getPinned(), context.getOptions().getPinnedMediaList(), opTimeMs); diff --git a/core/server/master/src/main/java/alluxio/master/file/meta/InodeTree.java b/core/server/master/src/main/java/alluxio/master/file/meta/InodeTree.java index 327c961427d7..9c4a45d172b2 100644 --- a/core/server/master/src/main/java/alluxio/master/file/meta/InodeTree.java +++ b/core/server/master/src/main/java/alluxio/master/file/meta/InodeTree.java @@ -293,9 +293,21 @@ public Map getFileSizeHistogram() { * @param dir the inode directory */ public void setDirectChildrenLoaded(Supplier context, InodeDirectory dir) { + setDirectChildrenLoaded(context, dir, true); + } + + /** + * Marks an inode directory as having its direct children loaded or not. + * + * @param context journal context supplier + * @param dir the inode directory + * @param directChildrenLoaded whether to load the direct children if they were not loaded before + */ + public void setDirectChildrenLoaded(Supplier context, InodeDirectory dir, + boolean directChildrenLoaded) { mState.applyAndJournal(context, UpdateInodeDirectoryEntry.newBuilder() .setId(dir.getId()) - .setDirectChildrenLoaded(true) + .setDirectChildrenLoaded(directChildrenLoaded) .build()); } diff --git a/core/transport/src/main/proto/grpc/file_system_master.proto b/core/transport/src/main/proto/grpc/file_system_master.proto index 211adc60a34f..b516e418648f 100644 --- a/core/transport/src/main/proto/grpc/file_system_master.proto +++ b/core/transport/src/main/proto/grpc/file_system_master.proto @@ -473,6 +473,7 @@ message SetAttributePOptions { repeated string pinnedMedia = 10; map xattr = 11; optional alluxio.proto.journal.XAttrUpdateStrategy xattrUpdateStrategy = 12; + optional bool directChildrenLoaded = 13; } message SetAttributePRequest { /** the path of the file */ diff --git a/shell/src/main/java/alluxio/cli/fs/command/FileSystemCommandUtils.java b/shell/src/main/java/alluxio/cli/fs/command/FileSystemCommandUtils.java index 4e7fc9979c9f..6b9728190a96 100644 --- a/shell/src/main/java/alluxio/cli/fs/command/FileSystemCommandUtils.java +++ b/shell/src/main/java/alluxio/cli/fs/command/FileSystemCommandUtils.java @@ -68,4 +68,21 @@ public static void setPinned(FileSystem fs, AlluxioURI path, boolean pinned, .build(); fs.setAttribute(path, options); } + + /** + * Sets direct children loaded. + * + * @param fs The {@link FileSystem} client + * @param path The {@link AlluxioURI} path as the input of the command + * @param directChildrenLoaded true or false + */ + public static void setDirectChildrenLoaded(FileSystem fs, AlluxioURI path, + boolean directChildrenLoaded) + throws AlluxioException, IOException { + SetAttributePOptions options = SetAttributePOptions.newBuilder() + .setRecursive(false) + .setDirectChildrenLoaded(directChildrenLoaded) + .build(); + fs.setAttribute(path, options); + } } diff --git a/shell/src/main/java/alluxio/cli/fs/command/SyncDirNextTimeCommand.java b/shell/src/main/java/alluxio/cli/fs/command/SyncDirNextTimeCommand.java new file mode 100644 index 000000000000..14a69bdc55d2 --- /dev/null +++ b/shell/src/main/java/alluxio/cli/fs/command/SyncDirNextTimeCommand.java @@ -0,0 +1,81 @@ +/* + * The Alluxio Open Foundation licenses this work under the Apache License, version 2.0 + * (the "License"). You may not use this work except in compliance with the License, which is + * available at www.apache.org/licenses/LICENSE-2.0 + * + * This software is distributed on an "AS IS" basis, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, + * either express or implied, as more fully set forth in the License. + * + * See the NOTICE file distributed with this work for information regarding copyright ownership. + */ + +package alluxio.cli.fs.command; + +import alluxio.AlluxioURI; +import alluxio.annotation.PublicApi; +import alluxio.cli.CommandUtils; +import alluxio.client.file.FileSystemContext; +import alluxio.exception.AlluxioException; +import alluxio.exception.status.InvalidArgumentException; + +import org.apache.commons.cli.CommandLine; + +import java.io.IOException; +import javax.annotation.concurrent.ThreadSafe; + +/** + * Sync direct children next time command. + */ +@ThreadSafe +@PublicApi +public final class SyncDirNextTimeCommand extends AbstractFileSystemCommand { + + private boolean mSyncNextTime; + + /** + * @param fsContext the filesystem of Alluxio + */ + public SyncDirNextTimeCommand(FileSystemContext fsContext) { + super(fsContext); + } + + @Override + public String getCommandName() { + return "syncDirNextTime"; + } + + @Override + public void validateArgs(CommandLine cl) throws InvalidArgumentException { + CommandUtils.checkNumOfArgsEquals(this, cl, 2); + } + + @Override + protected void runPlainPath(AlluxioURI path, CommandLine cl) + throws AlluxioException, IOException { + FileSystemCommandUtils.setDirectChildrenLoaded(mFileSystem, path, mSyncNextTime); + System.out.format("Successfully marked the dir %s to %s%n", path, + mSyncNextTime ? "trigger metadata sync on next access" + : "skip metadata sync on next access"); + } + + @Override + public int run(CommandLine cl) throws AlluxioException, IOException { + String[] args = cl.getArgs(); + mSyncNextTime = Boolean.parseBoolean(args[0]); + runWildCardCmd(new AlluxioURI(args[1]), cl); + return 0; + } + + @Override + public String getUsage() { + return "syncDirNextTime \n" + + "\ttrue means the next access will trigger a metadata sync on the dir" + + "\tfalse means the next metadata sync is disabled"; + } + + @Override + public String getDescription() { + return "Marks a directory to either trigger a metadata sync or skip the " + + "metadata sync on next access."; + } +} From a318f8869081c2616fb136b448fc5772c98ef06d Mon Sep 17 00:00:00 2001 From: jianghuazhu <740087514@qq.com> Date: Mon, 19 Dec 2022 10:39:37 +0800 Subject: [PATCH 008/334] [DOCFIX] Add some missing comments related to BlockStore ### What changes are proposed in this pull request? In the implementation classes associated with BlockStore, some comments are missing. The purpose of this pr is to complement them. Details: https://github.com/Alluxio/alluxio/issues/16648 ### Why are the changes needed? Because some important comments are missing. ### Does this PR introduce any user facing changes? For the user, there is no impact. pr-link: Alluxio/alluxio#16649 change-id: cid-7015f0afb5efeec56492d7a29b21196620f786c7 --- .../main/java/alluxio/worker/block/MonoBlockStore.java | 8 ++++---- .../main/java/alluxio/worker/page/PagedBlockStore.java | 6 +++--- 2 files changed, 7 insertions(+), 7 deletions(-) diff --git a/core/server/worker/src/main/java/alluxio/worker/block/MonoBlockStore.java b/core/server/worker/src/main/java/alluxio/worker/block/MonoBlockStore.java index 187cbdcd7cec..28bb9abd6dee 100644 --- a/core/server/worker/src/main/java/alluxio/worker/block/MonoBlockStore.java +++ b/core/server/worker/src/main/java/alluxio/worker/block/MonoBlockStore.java @@ -76,10 +76,10 @@ public class MonoBlockStore implements BlockStore { /** * Constructor of MonoBlockStore. * - * @param localBlockStore - * @param blockMasterClientPool - * @param ufsManager - * @param workerId + * @param localBlockStore the local block store + * @param blockMasterClientPool a client pool for talking to the block master + * @param ufsManager the UFS manager + * @param workerId the worker id */ public MonoBlockStore(LocalBlockStore localBlockStore, BlockMasterClientPool blockMasterClientPool, diff --git a/core/server/worker/src/main/java/alluxio/worker/page/PagedBlockStore.java b/core/server/worker/src/main/java/alluxio/worker/page/PagedBlockStore.java index 90837874c864..16d9daacfdb5 100644 --- a/core/server/worker/src/main/java/alluxio/worker/page/PagedBlockStore.java +++ b/core/server/worker/src/main/java/alluxio/worker/page/PagedBlockStore.java @@ -88,9 +88,9 @@ public class PagedBlockStore implements BlockStore { /** * Create an instance of PagedBlockStore. - * @param ufsManager - * @param pool - * @param workerId + * @param ufsManager the UFS manager + * @param pool a client pool for talking to the block master + * @param workerId the worker id * @return an instance of PagedBlockStore */ public static PagedBlockStore create(UfsManager ufsManager, BlockMasterClientPool pool, From ff5f8233f23839f0363112de6a6df0802061b33e Mon Sep 17 00:00:00 2001 From: flaming archer <2011xuesong@gmail.com> Date: Mon, 19 Dec 2022 16:19:37 +0800 Subject: [PATCH 009/334] Fix fuse intellij wrong option ### What changes are proposed in this pull request? There are no option named -r. pr-link: Alluxio/alluxio#16628 change-id: cid-524d2c715a2e45ebfa51a9d9559238cf55b99112 --- dev/intellij/runConfigurations/AlluxioFuse.xml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/dev/intellij/runConfigurations/AlluxioFuse.xml b/dev/intellij/runConfigurations/AlluxioFuse.xml index 8932ba8179ff..5c0325a6b1fe 100644 --- a/dev/intellij/runConfigurations/AlluxioFuse.xml +++ b/dev/intellij/runConfigurations/AlluxioFuse.xml @@ -15,7 +15,7 @@ diff --git a/underfs/oss/src/main/java/alluxio/underfs/oss/OSSUnderFileSystem.java b/underfs/oss/src/main/java/alluxio/underfs/oss/OSSUnderFileSystem.java index 2398ab8f5136..c47fd7579c2a 100644 --- a/underfs/oss/src/main/java/alluxio/underfs/oss/OSSUnderFileSystem.java +++ b/underfs/oss/src/main/java/alluxio/underfs/oss/OSSUnderFileSystem.java @@ -41,6 +41,7 @@ import java.io.OutputStream; import java.util.Date; import java.util.List; +import javax.annotation.Nullable; import javax.annotation.concurrent.ThreadSafe; /** @@ -59,6 +60,8 @@ public class OSSUnderFileSystem extends ObjectUnderFileSystem { /** Bucket name of user's configured Alluxio bucket. */ private final String mBucketName; + private StsOssClientProvider mClientProvider; + /** * Constructs a new instance of {@link OSSUnderFileSystem}. * @@ -69,20 +72,7 @@ public class OSSUnderFileSystem extends ObjectUnderFileSystem { public static OSSUnderFileSystem createInstance(AlluxioURI uri, UnderFileSystemConfiguration conf) throws Exception { String bucketName = UnderFileSystemUtils.getBucketName(uri); - Preconditions.checkArgument(conf.isSet(PropertyKey.OSS_ACCESS_KEY), - "Property %s is required to connect to OSS", PropertyKey.OSS_ACCESS_KEY); - Preconditions.checkArgument(conf.isSet(PropertyKey.OSS_SECRET_KEY), - "Property %s is required to connect to OSS", PropertyKey.OSS_SECRET_KEY); - Preconditions.checkArgument(conf.isSet(PropertyKey.OSS_ENDPOINT_KEY), - "Property %s is required to connect to OSS", PropertyKey.OSS_ENDPOINT_KEY); - String accessId = conf.getString(PropertyKey.OSS_ACCESS_KEY); - String accessKey = conf.getString(PropertyKey.OSS_SECRET_KEY); - String endPoint = conf.getString(PropertyKey.OSS_ENDPOINT_KEY); - - ClientBuilderConfiguration ossClientConf = initializeOSSClientConfig(conf); - OSS ossClient = new OSSClientBuilder().build(endPoint, accessId, accessKey, ossClientConf); - - return new OSSUnderFileSystem(uri, ossClient, bucketName, conf); + return new OSSUnderFileSystem(uri, null, bucketName, conf); } /** @@ -93,10 +83,36 @@ public static OSSUnderFileSystem createInstance(AlluxioURI uri, UnderFileSystemC * @param bucketName bucket name of user's configured Alluxio bucket * @param conf configuration for this UFS */ - protected OSSUnderFileSystem(AlluxioURI uri, OSS ossClient, String bucketName, - UnderFileSystemConfiguration conf) { + protected OSSUnderFileSystem(AlluxioURI uri, @Nullable OSS ossClient, String bucketName, + UnderFileSystemConfiguration conf) { super(uri, conf); - mClient = ossClient; + + if (conf.getBoolean(PropertyKey.UNDERFS_OSS_STS_ENABLED)) { + try { + mClientProvider = new StsOssClientProvider(conf); + mClientProvider.init(); + mClient = mClientProvider.getOSSClient(); + } catch (IOException e) { + LOG.error("init sts client provider failed!", e); + throw new ServiceException(e); + } + } else if (null != ossClient) { + mClient = ossClient; + } else { + Preconditions.checkArgument(conf.isSet(PropertyKey.OSS_ACCESS_KEY), + "Property %s is required to connect to OSS", PropertyKey.OSS_ACCESS_KEY); + Preconditions.checkArgument(conf.isSet(PropertyKey.OSS_SECRET_KEY), + "Property %s is required to connect to OSS", PropertyKey.OSS_SECRET_KEY); + Preconditions.checkArgument(conf.isSet(PropertyKey.OSS_ENDPOINT_KEY), + "Property %s is required to connect to OSS", PropertyKey.OSS_ENDPOINT_KEY); + String accessId = conf.getString(PropertyKey.OSS_ACCESS_KEY); + String accessKey = conf.getString(PropertyKey.OSS_SECRET_KEY); + String endPoint = conf.getString(PropertyKey.OSS_ENDPOINT_KEY); + + ClientBuilderConfiguration ossClientConf = initializeOSSClientConfig(conf); + mClient = new OSSClientBuilder().build(endPoint, accessId, accessKey, ossClientConf); + } + mBucketName = bucketName; } @@ -268,10 +284,10 @@ protected String getRootKey() { /** * Creates an OSS {@code ClientConfiguration} using an Alluxio Configuration. - * + * @param alluxioConf the OSS Configuration * @return the OSS {@link ClientBuilderConfiguration} */ - private static ClientBuilderConfiguration initializeOSSClientConfig( + public static ClientBuilderConfiguration initializeOSSClientConfig( AlluxioConfiguration alluxioConf) { ClientBuilderConfiguration ossClientConf = new ClientBuilderConfiguration(); ossClientConf @@ -279,6 +295,7 @@ private static ClientBuilderConfiguration initializeOSSClientConfig( ossClientConf.setSocketTimeout((int) alluxioConf.getMs(PropertyKey.UNDERFS_OSS_SOCKET_TIMEOUT)); ossClientConf.setConnectionTTL(alluxioConf.getMs(PropertyKey.UNDERFS_OSS_CONNECT_TTL)); ossClientConf.setMaxConnections(alluxioConf.getInt(PropertyKey.UNDERFS_OSS_CONNECT_MAX)); + ossClientConf.setMaxErrorRetry(alluxioConf.getInt(PropertyKey.UNDERFS_OSS_RETRY_MAX)); return ossClientConf; } @@ -292,4 +309,10 @@ protected InputStream openObject(String key, OpenOptions options, RetryPolicy re throw new IOException(e.getMessage()); } } + + @Override + public void close() throws IOException { + super.close(); + mClientProvider.close(); + } } diff --git a/underfs/oss/src/main/java/alluxio/underfs/oss/OSSUnderFileSystemFactory.java b/underfs/oss/src/main/java/alluxio/underfs/oss/OSSUnderFileSystemFactory.java index 9788e96675e2..07ee2098502e 100644 --- a/underfs/oss/src/main/java/alluxio/underfs/oss/OSSUnderFileSystemFactory.java +++ b/underfs/oss/src/main/java/alluxio/underfs/oss/OSSUnderFileSystemFactory.java @@ -62,6 +62,10 @@ public boolean supportsPath(String path) { * @return true if both access, secret and endpoint keys are present, false otherwise */ private boolean checkOSSCredentials(UnderFileSystemConfiguration conf) { + if (conf.getBoolean(PropertyKey.UNDERFS_OSS_STS_ENABLED)) { + return conf.isSet(PropertyKey.UNDERFS_OSS_ECS_RAM_ROLE); + } + return conf.isSet(PropertyKey.OSS_ACCESS_KEY) && conf.isSet(PropertyKey.OSS_SECRET_KEY) && conf.isSet(PropertyKey.OSS_ENDPOINT_KEY); diff --git a/underfs/oss/src/main/java/alluxio/underfs/oss/StsOssClientProvider.java b/underfs/oss/src/main/java/alluxio/underfs/oss/StsOssClientProvider.java new file mode 100644 index 000000000000..78edb3e00158 --- /dev/null +++ b/underfs/oss/src/main/java/alluxio/underfs/oss/StsOssClientProvider.java @@ -0,0 +1,190 @@ +/* + * The Alluxio Open Foundation licenses this work under the Apache License, version 2.0 + * (the "License"). You may not use this work except in compliance with the License, which is + * available at www.apache.org/licenses/LICENSE-2.0 + * + * This software is distributed on an "AS IS" basis, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, + * either express or implied, as more fully set forth in the License. + * + * See the NOTICE file distributed with this work for information regarding copyright ownership. + */ + +package alluxio.underfs.oss; + +import alluxio.conf.PropertyKey; +import alluxio.retry.ExponentialBackoffRetry; +import alluxio.retry.RetryPolicy; +import alluxio.underfs.UnderFileSystemConfiguration; +import alluxio.util.ThreadFactoryUtils; +import alluxio.util.network.HttpUtils; + +import com.aliyun.oss.ClientBuilderConfiguration; +import com.aliyun.oss.OSS; +import com.aliyun.oss.OSSClientBuilder; +import com.aliyun.oss.common.auth.DefaultCredentials; +import com.google.common.annotations.VisibleForTesting; +import com.google.gson.Gson; +import com.google.gson.JsonObject; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import java.io.Closeable; +import java.io.IOException; +import java.text.ParseException; +import java.text.SimpleDateFormat; +import java.util.Date; +import java.util.TimeZone; +import java.util.concurrent.Executors; +import java.util.concurrent.ScheduledExecutorService; +import java.util.concurrent.TimeUnit; + +/** + * STS client provider for Aliyun OSS. + */ +public class StsOssClientProvider implements Closeable { + private static final Logger LOG = LoggerFactory.getLogger(StsOssClientProvider.class); + + private static final int ECS_META_GET_TIMEOUT = 10000; + private static final int BASE_SLEEP_TIME_MS = 1000; + private static final int MAX_SLEEP_MS = 3000; + private static final int MAX_RETRIES = 5; + private static final String ACCESS_KEY_ID = "AccessKeyId"; + private static final String ACCESS_KEY_SECRET = "AccessKeySecret"; + private static final String SECURITY_TOKEN = "SecurityToken"; + private static final String EXPIRATION = "Expiration"; + + private volatile OSS mOssClient = null; + private long mStsTokenExpiration = 0; + private final String mEcsMetadataServiceUrl; + private final long mTokenTimeoutMs; + private final UnderFileSystemConfiguration mOssConf; + private final ScheduledExecutorService mRefreshOssClientScheduledThread; + private OSSClientBuilder mOssClientBuilder = new OSSClientBuilder(); + + /** + * Constructs a new instance of {@link StsOssClientProvider}. + * @param ossConfiguration {@link UnderFileSystemConfiguration} for OSS + */ + public StsOssClientProvider(UnderFileSystemConfiguration ossConfiguration) { + mOssConf = ossConfiguration; + mEcsMetadataServiceUrl = ossConfiguration.getString( + PropertyKey.UNDERFS_OSS_STS_ECS_METADATA_SERVICE_ENDPOINT); + mTokenTimeoutMs = ossConfiguration.getMs(PropertyKey.UNDERFS_OSS_STS_TOKEN_REFRESH_INTERVAL_MS); + + mRefreshOssClientScheduledThread = Executors.newSingleThreadScheduledExecutor( + ThreadFactoryUtils.build("refresh_oss_client-%d", false)); + mRefreshOssClientScheduledThread.scheduleAtFixedRate(() -> { + try { + createOrRefreshOssStsClient(mOssConf); + } catch (Exception e) { + //retry it + LOG.warn("exception when refreshing OSS client access token", e); + } + }, 0, 60000, TimeUnit.MILLISECONDS); + } + + /** + * Init {@link StsOssClientProvider}. + * @throws IOException if failed to init OSS Client + */ + public void init() throws IOException { + RetryPolicy retryPolicy = new ExponentialBackoffRetry( + BASE_SLEEP_TIME_MS, MAX_SLEEP_MS, MAX_RETRIES); + IOException lastException = null; + while (retryPolicy.attempt()) { + try { + createOrRefreshOssStsClient(mOssConf); + lastException = null; + break; + } catch (IOException e) { + LOG.warn("init oss client failed! has retried {} times", retryPolicy.getAttemptCount(), e); + lastException = e; + } + } + if (lastException != null) { + LOG.error("init oss client failed.", lastException); + throw lastException; + } + } + + /** + * Create Or Refresh the STS OSS client. + * @param ossConfiguration OSS {@link UnderFileSystemConfiguration} + * @throws IOException if failed to create or refresh OSS client + */ + protected void createOrRefreshOssStsClient(UnderFileSystemConfiguration ossConfiguration) + throws IOException { + ClientBuilderConfiguration ossClientConf = + OSSUnderFileSystem.initializeOSSClientConfig(ossConfiguration); + doCreateOrRefreshStsOssClient(ossConfiguration, ossClientConf); + } + + boolean tokenWillExpiredAfter(long after) { + return mStsTokenExpiration - System.currentTimeMillis() <= after; + } + + private void doCreateOrRefreshStsOssClient( + UnderFileSystemConfiguration ossConfiguration, + ClientBuilderConfiguration clientConfiguration) throws IOException { + if (tokenWillExpiredAfter(mTokenTimeoutMs)) { + String ecsRamRole = ossConfiguration.getString(PropertyKey.UNDERFS_OSS_ECS_RAM_ROLE); + String fullECSMetaDataServiceUrl = mEcsMetadataServiceUrl + ecsRamRole; + String jsonStringResponse = HttpUtils.get(fullECSMetaDataServiceUrl, ECS_META_GET_TIMEOUT); + + JsonObject jsonObject = new Gson().fromJson(jsonStringResponse, JsonObject.class); + String accessKeyId = jsonObject.get(ACCESS_KEY_ID).getAsString(); + String accessKeySecret = jsonObject.get(ACCESS_KEY_SECRET).getAsString(); + String securityToken = jsonObject.get(SECURITY_TOKEN).getAsString(); + mStsTokenExpiration = + convertStringToDate(jsonObject.get(EXPIRATION).getAsString()).getTime(); + + if (null == mOssClient) { + mOssClient = mOssClientBuilder.build( + ossConfiguration.getString(PropertyKey.OSS_ENDPOINT_KEY), + accessKeyId, accessKeySecret, securityToken, + clientConfiguration); + } else { + mOssClient.switchCredentials((new DefaultCredentials( + accessKeyId, accessKeySecret, securityToken))); + } + LOG.debug("oss sts client create success, expiration = {}", mStsTokenExpiration); + } + } + + /** + * Returns the STS OSS client. + * @return oss client + */ + public OSS getOSSClient() { + return mOssClient; + } + + private Date convertStringToDate(String dateString) throws IOException { + TimeZone zeroTimeZone = TimeZone.getTimeZone("ETC/GMT-0"); + SimpleDateFormat sdf = new SimpleDateFormat("yyyy-MM-dd'T'HH:mm:ss'Z'"); + sdf.setTimeZone(zeroTimeZone); + Date date = null; + try { + date = sdf.parse(dateString); + } catch (ParseException e) { + throw new IOException(String.format("failed to parse date: %s", dateString), e); + } + return date; + } + + @Override + public void close() throws IOException { + if (null != mRefreshOssClientScheduledThread) { + mRefreshOssClientScheduledThread.shutdown(); + } + if (null != mOssClient) { + mOssClient.shutdown(); + mOssClient = null; + } + } + + @VisibleForTesting + protected void setOssClientBuilder(OSSClientBuilder ossClientBuilder) { + mOssClientBuilder = ossClientBuilder; + } +} diff --git a/underfs/oss/src/test/java/alluxio/underfs/oss/StsOssClientProviderTest.java b/underfs/oss/src/test/java/alluxio/underfs/oss/StsOssClientProviderTest.java new file mode 100644 index 000000000000..e71884a41ccb --- /dev/null +++ b/underfs/oss/src/test/java/alluxio/underfs/oss/StsOssClientProviderTest.java @@ -0,0 +1,102 @@ +/* + * The Alluxio Open Foundation licenses this work under the Apache License, version 2.0 + * (the "License"). You may not use this work except in compliance with the License, which is + * available at www.apache.org/licenses/LICENSE-2.0 + * + * This software is distributed on an "AS IS" basis, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, + * either express or implied, as more fully set forth in the License. + * + * See the NOTICE file distributed with this work for information regarding copyright ownership. + */ + +package alluxio.underfs.oss; + +import static org.junit.Assert.assertFalse; +import static org.junit.Assert.assertTrue; + +import alluxio.Constants; +import alluxio.conf.Configuration; +import alluxio.conf.InstancedConfiguration; +import alluxio.conf.PropertyKey; +import alluxio.underfs.UnderFileSystemConfiguration; +import alluxio.util.network.HttpUtils; + +import com.aliyun.oss.OSSClient; +import com.aliyun.oss.OSSClientBuilder; +import org.junit.Before; +import org.junit.Test; +import org.mockito.MockedStatic; +import org.mockito.Mockito; + +import java.text.SimpleDateFormat; +import java.util.Date; +import java.util.TimeZone; + +public class StsOssClientProviderTest { + + InstancedConfiguration mConf; + private static final String ECS_RAM_ROLE = "snapshot-role-test"; + private String mEcsMetadataService; + public static final String MOCK_ECS_META_RESPONSE = "{\n" + + " 'AccessKeyId' : 'STS.mockAK',\n" + + " 'AccessKeySecret' : 'mockSK',\n" + + " 'Expiration' : '2018-04-23T09:45:05Z',\n" + + " 'SecurityToken' : 'mockSecurityToken',\n" + + " 'LastUpdated' : '2018-04-23T03:45:05Z',\n" + + " 'Code' : 'Success'\n" + + "}"; + + @Before + public void before() { + mConf = Configuration.copyGlobal(); + mEcsMetadataService = mConf.getString( + PropertyKey.UNDERFS_OSS_STS_ECS_METADATA_SERVICE_ENDPOINT) + ECS_RAM_ROLE; + } + + @Test + public void testInitAndRefresh() throws Exception { + String expiration = toUtcString(new Date(System.currentTimeMillis() + 6 * Constants.HOUR_MS)); + String lastUpdated = toUtcString(new Date(System.currentTimeMillis())); + + mConf.set(PropertyKey.OSS_ENDPOINT_KEY, "http://oss-cn-qingdao.aliyuncs.com"); + mConf.set(PropertyKey.UNDERFS_OSS_ECS_RAM_ROLE, ECS_RAM_ROLE); + final UnderFileSystemConfiguration ossConfiguration = + UnderFileSystemConfiguration.defaults(mConf); + + // init + OSSClientBuilder ossClientBuilder = Mockito.mock(OSSClientBuilder.class); + OSSClient ossClient = Mockito.mock(OSSClient.class); + Mockito.when(ossClientBuilder.build( + Mockito.any(), Mockito.any(), Mockito.any(), Mockito.any(), Mockito.any())) + .thenReturn(ossClient); + try (MockedStatic mockedHttpUtils = Mockito.mockStatic(HttpUtils.class)) { + mockedHttpUtils.when(() -> HttpUtils.get(mEcsMetadataService, 10000)) + .thenReturn(MOCK_ECS_META_RESPONSE); + try (StsOssClientProvider clientProvider = new StsOssClientProvider(ossConfiguration)) { + clientProvider.setOssClientBuilder(ossClientBuilder); + clientProvider.init(); + // refresh + String responseBodyString = "{\n" + + " 'AccessKeyId' : 'STS.mockAK',\n" + + " 'AccessKeySecret' : 'mockSK',\n" + + " 'Expiration' : '" + expiration + "',\n" + + " 'SecurityToken' : 'mockSecurityToken',\n" + + " 'LastUpdated' : '" + lastUpdated + "',\n" + + " 'Code' : 'Success'\n" + + "}"; + mockedHttpUtils.when(() -> HttpUtils.get(mEcsMetadataService, 10000)) + .thenReturn(responseBodyString); + assertTrue(clientProvider.tokenWillExpiredAfter(0)); + clientProvider.createOrRefreshOssStsClient(ossConfiguration); + assertFalse(clientProvider.tokenWillExpiredAfter(0)); + } + } + } + + private String toUtcString(Date date) { + TimeZone zeroTimeZone = TimeZone.getTimeZone("ETC/GMT-0"); + SimpleDateFormat sdf = new SimpleDateFormat("yyyy-MM-dd'T'HH:mm:ss'Z'"); + sdf.setTimeZone(zeroTimeZone); + return sdf.format(date); + } +} From 12ecbc80cecb70a3284d2949645235ab5864485c Mon Sep 17 00:00:00 2001 From: elega <445092967@qq.com> Date: Fri, 6 Jan 2023 09:31:31 +0800 Subject: [PATCH 042/334] Fix concurrent sync dedup ### What changes are proposed in this pull request? The concurrent sync feature will throw exception if the locking scheme uses the constructor that takes a shouldSync boolean. This PR fixes it. ### Does this PR introduce any user facing changes? N/A pr-link: Alluxio/alluxio#16717 change-id: cid-d1ab7d5aa81905bcb2d394b5e922a33cf747908b --- .../alluxio/master/file/InodeSyncStream.java | 8 +++++--- .../alluxio/master/file/meta/LockingScheme.java | 6 ++++++ ...eSystemMasterSyncMetadataConcurrentTest.java | 17 +++++++++++++++++ 3 files changed, 28 insertions(+), 3 deletions(-) diff --git a/core/server/master/src/main/java/alluxio/master/file/InodeSyncStream.java b/core/server/master/src/main/java/alluxio/master/file/InodeSyncStream.java index a193863309c2..e28b9f9dd4d0 100644 --- a/core/server/master/src/main/java/alluxio/master/file/InodeSyncStream.java +++ b/core/server/master/src/main/java/alluxio/master/file/InodeSyncStream.java @@ -48,6 +48,7 @@ import alluxio.master.file.meta.LockingScheme; import alluxio.master.file.meta.MountTable; import alluxio.master.file.meta.MutableInodeFile; +import alluxio.master.file.meta.SyncCheck; import alluxio.master.file.meta.SyncCheck.SyncResult; import alluxio.master.file.meta.UfsAbsentPathCache; import alluxio.master.file.meta.UfsSyncPathCache; @@ -431,7 +432,7 @@ private SyncStatus syncInternal() throws DefaultFileSystemMaster.Metrics.INODE_SYNC_STREAM_SKIPPED.inc(); return SyncStatus.NOT_NEEDED; } - if (mDedupConcurrentSync) { + if (mDedupConcurrentSync && mRootScheme.shouldSync() != SyncCheck.SHOULD_SYNC) { /* * If a concurrent sync on the same path is successful after this sync had already * been initialized and that sync is successful, then there is no need to sync again. @@ -452,9 +453,10 @@ private SyncStatus syncInternal() throws * Note that this still applies if A is to sync recursively path /aaa while B is to * sync path /aaa/bbb as the sync scope of A covers B's. */ - boolean shouldSync = mUfsSyncPathCache.shouldSyncPath(mRootScheme.getPath(), mSyncInterval, + boolean shouldSkipSync = + mUfsSyncPathCache.shouldSyncPath(mRootScheme.getPath(), mSyncInterval, mDescendantType).getLastSyncTime() > mRootScheme.shouldSync().getLastSyncTime(); - if (shouldSync) { + if (shouldSkipSync) { DefaultFileSystemMaster.Metrics.INODE_SYNC_STREAM_SKIPPED.inc(); LOG.debug("Skipped sync on {} due to successful concurrent sync", mRootScheme.getPath()); return SyncStatus.NOT_NEEDED; diff --git a/core/server/master/src/main/java/alluxio/master/file/meta/LockingScheme.java b/core/server/master/src/main/java/alluxio/master/file/meta/LockingScheme.java index 07f64136803d..8cacbe364cf6 100644 --- a/core/server/master/src/main/java/alluxio/master/file/meta/LockingScheme.java +++ b/core/server/master/src/main/java/alluxio/master/file/meta/LockingScheme.java @@ -32,13 +32,19 @@ public final class LockingScheme { private final LockPattern mDesiredLockPattern; private final SyncCheck mShouldSync; + // CHECKSTYLE.OFF: LineLengthExceed - cannot break the method link /** * Constructs a {@link LockingScheme}. * + * Avoid using this constructor where shouldSync is set true, if possible. + * {@link #LockingScheme(AlluxioURI, LockPattern, FileSystemMasterCommonPOptions, UfsSyncPathCache, DescendantType)} + * is the preferred one in such case, to make the metadata sync dedup feature work. + * * @param path the path to lock * @param desiredLockPattern the desired lock mode * @param shouldSync true if the path should be synced */ + // CHECKSTYLE.ON: LineLengthExceed public LockingScheme(AlluxioURI path, LockPattern desiredLockPattern, boolean shouldSync) { mPath = path; mDesiredLockPattern = desiredLockPattern; diff --git a/core/server/master/src/test/java/alluxio/master/file/FileSystemMasterSyncMetadataConcurrentTest.java b/core/server/master/src/test/java/alluxio/master/file/FileSystemMasterSyncMetadataConcurrentTest.java index 1ea48f3bffbe..824f65184c92 100644 --- a/core/server/master/src/test/java/alluxio/master/file/FileSystemMasterSyncMetadataConcurrentTest.java +++ b/core/server/master/src/test/java/alluxio/master/file/FileSystemMasterSyncMetadataConcurrentTest.java @@ -162,6 +162,23 @@ public void syncTheSameDirectoryButTheSecondCallCancelled() throws Exception { assertEquals(InodeSyncStream.SyncStatus.OK, iss3.sync()); } + @Test + public void syncWhenShouldSyncIsSetTrue() throws Exception { + Supplier inodeSyncStreamSupplier = () -> new InodeSyncStream( + new LockingScheme( + new AlluxioURI("/"), InodeTree.LockPattern.READ, true), + mFileSystemMaster, mFileSystemMaster.getSyncPathCache(), + RpcContext.NOOP, DescendantType.ALL, FileSystemMasterCommonPOptions.getDefaultInstance(), + false, + false, + false); + + InodeSyncStream iss1 = inodeSyncStreamSupplier.get(); + InodeSyncStream iss2 = inodeSyncStreamSupplier.get(); + assertSyncHappenTwice(syncConcurrent(iss1, iss2)); + assertSyncHappenTwice(syncSequential(inodeSyncStreamSupplier, inodeSyncStreamSupplier)); + } + private void assertTheSecondSyncSkipped( Pair results) { assertEquals(InodeSyncStream.SyncStatus.OK, results.getFirst()); From bd74a890abd3d2fcff18e86983835d51143df712 Mon Sep 17 00:00:00 2001 From: qian0817 Date: Fri, 6 Jan 2023 14:01:06 +0800 Subject: [PATCH 043/334] Make client send version to server and audit log contain version ### What changes are proposed in this pull request? 1. The client will use an injector to send version and revision to the server 2. Displaying client version in audit log ### Why are the changes needed? This allows us to know the version of the client connected to alluxio. ### Does this PR introduce any user facing changes? Display client version information in the audit log like ``` 2022-11-30 18:08:57,038 INFO AUDIT_LOG: succeeded=true allowed=true ugi=xxx,xxx(AUTH=SIMPLE) ip=/xxx:52912 cmd=mkdirs src=xxx dst=null perm=xxx:xxx:rwxrwxrwx executionTimeUs=160770 clientVersion=xxx proto=rpc ``` pr-link: Alluxio/alluxio#16582 change-id: cid-b08f7f676e36f777c2606bf3fd27deb93f3e0d50 --- .../main/java/alluxio/RuntimeConstants.java | 4 ++ .../main/java/alluxio/conf/PropertyKey.java | 11 ++++ .../grpc/ClientVersionClientInjector.java | 51 +++++++++++++++++++ .../java/alluxio/grpc/GrpcChannelBuilder.java | 3 ++ ....java => ClientContextServerInjector.java} | 38 +++++++++++--- .../src/main/java/alluxio/RpcUtils.java | 6 ++- .../raft/SnapshotReplicationManager.java | 7 +-- .../master/transport/GrpcMessagingServer.java | 4 +- .../GrpcMessagingServiceClientHandler.java | 4 +- .../master/backup/BackupLeaderRole.java | 4 +- .../master/block/DefaultBlockMaster.java | 6 +-- .../master/file/DefaultFileSystemMaster.java | 11 ++-- .../file/FileSystemMasterAuditContext.java | 29 +++++++++-- .../master/journal/DefaultJournalMaster.java | 4 +- .../master/meta/DefaultMetaMaster.java | 8 +-- .../master/metrics/DefaultMetricsMaster.java | 4 +- .../java/alluxio/master/job/JobMaster.java | 8 +-- .../master/job/JobMasterAuditContext.java | 25 +++++++-- .../master/table/DefaultTableMaster.java | 4 +- 19 files changed, 186 insertions(+), 45 deletions(-) create mode 100644 core/common/src/main/java/alluxio/grpc/ClientVersionClientInjector.java rename core/common/src/main/java/alluxio/security/authentication/{ClientIpAddressInjector.java => ClientContextServerInjector.java} (65%) diff --git a/core/common/src/main/java/alluxio/RuntimeConstants.java b/core/common/src/main/java/alluxio/RuntimeConstants.java index 19e1fccf2ae3..8480b95cd0e8 100644 --- a/core/common/src/main/java/alluxio/RuntimeConstants.java +++ b/core/common/src/main/java/alluxio/RuntimeConstants.java @@ -36,6 +36,10 @@ public final class RuntimeConstants { } } + public static final String REVISION_SHORT = ProjectConstants.REVISION.substring(0, 8); + public static final String VERSION_AND_REVISION_SHORT = + VERSION + "-" + REVISION_SHORT; + /** The relative path to the Alluxio target jar. */ public static final String ALLUXIO_JAR = "target/alluxio-" + VERSION + "-jar-with-dependencies.jar"; diff --git a/core/common/src/main/java/alluxio/conf/PropertyKey.java b/core/common/src/main/java/alluxio/conf/PropertyKey.java index d0343cab711a..de45b2bfde9a 100755 --- a/core/common/src/main/java/alluxio/conf/PropertyKey.java +++ b/core/common/src/main/java/alluxio/conf/PropertyKey.java @@ -5863,6 +5863,15 @@ public String toString() { .setConsistencyCheckLevel(ConsistencyCheckLevel.WARN) .setScope(Scope.CLIENT) .build(); + + public static final PropertyKey USER_CLIENT_REPORT_VERSION_ENABLED = + booleanBuilder(Name.USER_CLIENT_REPORT_VERSION_ENABLED) + .setDefaultValue(false) + .setDescription("Whether the client reports version information to the server.") + .setConsistencyCheckLevel(ConsistencyCheckLevel.WARN) + .setScope(Scope.CLIENT) + .build(); + public static final PropertyKey USER_FILE_WRITE_TYPE_DEFAULT = enumBuilder(Name.USER_FILE_WRITE_TYPE_DEFAULT, WriteType.class) .setDefaultValue(WriteType.ASYNC_THROUGH) @@ -8369,6 +8378,8 @@ public static final class Name { "alluxio.user.client.cache.timeout.duration"; public static final String USER_CLIENT_CACHE_TIMEOUT_THREADS = "alluxio.user.client.cache.timeout.threads"; + public static final String USER_CLIENT_REPORT_VERSION_ENABLED = + "alluxio.user.client.report.version.enabled"; public static final String USER_CONF_CLUSTER_DEFAULT_ENABLED = "alluxio.user.conf.cluster.default.enabled"; public static final String USER_CONF_SYNC_INTERVAL = "alluxio.user.conf.sync.interval"; diff --git a/core/common/src/main/java/alluxio/grpc/ClientVersionClientInjector.java b/core/common/src/main/java/alluxio/grpc/ClientVersionClientInjector.java new file mode 100644 index 000000000000..cb059bc84551 --- /dev/null +++ b/core/common/src/main/java/alluxio/grpc/ClientVersionClientInjector.java @@ -0,0 +1,51 @@ +/* + * The Alluxio Open Foundation licenses this work under the Apache License, version 2.0 + * (the "License"). You may not use this work except in compliance with the License, which is + * available at www.apache.org/licenses/LICENSE-2.0 + * + * This software is distributed on an "AS IS" basis, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, + * either express or implied, as more fully set forth in the License. + * + * See the NOTICE file distributed with this work for information regarding copyright ownership. + */ + +package alluxio.grpc; + +import alluxio.RuntimeConstants; + +import io.grpc.CallOptions; +import io.grpc.Channel; +import io.grpc.ClientCall; +import io.grpc.ClientInterceptor; +import io.grpc.ForwardingClientCall; +import io.grpc.ForwardingClientCallListener; +import io.grpc.Metadata; +import io.grpc.MethodDescriptor; + +/** + * Client side interceptor that is used to set the request header for the client version. + */ +public class ClientVersionClientInjector implements ClientInterceptor { + public static final Metadata.Key S_CLIENT_VERSION_KEY = + Metadata.Key.of("alluxio-version", Metadata.ASCII_STRING_MARSHALLER); + + @Override + public ClientCall interceptCall(MethodDescriptor method, + CallOptions callOptions, Channel next) { + return new ForwardingClientCall.SimpleForwardingClientCall( + next.newCall(method, callOptions)) { + @Override + public void start(Listener responseListener, Metadata headers) { + // Put version to headers. + headers.put(S_CLIENT_VERSION_KEY, RuntimeConstants.VERSION_AND_REVISION_SHORT); + super.start(new ForwardingClientCallListener.SimpleForwardingClientCallListener( + responseListener) { + @Override + public void onHeaders(Metadata headers) { + super.onHeaders(headers); + } + }, headers); + } + }; + } +} diff --git a/core/common/src/main/java/alluxio/grpc/GrpcChannelBuilder.java b/core/common/src/main/java/alluxio/grpc/GrpcChannelBuilder.java index 709f38188ad2..c63b0fd93c94 100644 --- a/core/common/src/main/java/alluxio/grpc/GrpcChannelBuilder.java +++ b/core/common/src/main/java/alluxio/grpc/GrpcChannelBuilder.java @@ -104,6 +104,9 @@ public GrpcChannel build() throws AlluxioStatusException { } throw AlluxioStatusException.fromThrowable(t); } + if (mConfiguration.getBoolean(PropertyKey.USER_CLIENT_REPORT_VERSION_ENABLED)) { + channel.intercept(new ClientVersionClientInjector()); + } return channel; } } diff --git a/core/common/src/main/java/alluxio/security/authentication/ClientIpAddressInjector.java b/core/common/src/main/java/alluxio/security/authentication/ClientContextServerInjector.java similarity index 65% rename from core/common/src/main/java/alluxio/security/authentication/ClientIpAddressInjector.java rename to core/common/src/main/java/alluxio/security/authentication/ClientContextServerInjector.java index 07ff7374ef7c..ab9de9ebed2f 100644 --- a/core/common/src/main/java/alluxio/security/authentication/ClientIpAddressInjector.java +++ b/core/common/src/main/java/alluxio/security/authentication/ClientContextServerInjector.java @@ -11,6 +11,8 @@ package alluxio.security.authentication; +import alluxio.grpc.ClientVersionClientInjector; + import io.grpc.ForwardingServerCallListener; import io.grpc.Grpc; import io.grpc.Metadata; @@ -18,32 +20,50 @@ import io.grpc.ServerCallHandler; import io.grpc.ServerInterceptor; +import javax.annotation.Nullable; + /** * Server side interceptor that is used to put remote client's IP Address to thread local storage. */ -public class ClientIpAddressInjector implements ServerInterceptor { +public class ClientContextServerInjector implements ServerInterceptor { /** * A {@link ThreadLocal} variable to maintain the client's IP address along with a specific * thread. */ - private static ThreadLocal sIpAddressThreadLocal = new ThreadLocal<>(); + private static final ThreadLocal IP_ADDRESS_THREAD_LOCAL = new ThreadLocal<>(); + /** + * A {@link ThreadLocal} variable to maintain the client's version along with a specific + * thread. + */ + private static final ThreadLocal CLIENT_VERSION_THREAD_LOCAL = + new ThreadLocal<>(); /** * @return IP address of the gRPC client that is making the call */ + @Nullable public static String getIpAddress() { - return sIpAddressThreadLocal.get(); + return IP_ADDRESS_THREAD_LOCAL.get(); + } + + /** + * @return the client version + */ + @Nullable + public static String getClientVersion() { + return CLIENT_VERSION_THREAD_LOCAL.get(); } @Override public ServerCall.Listener interceptCall(ServerCall call, Metadata headers, ServerCallHandler next) { /** - * For streaming calls, below will make sure remote IP address is injected prior to creating the - * stream. + * For streaming calls, below will make sure remote IP address and client version are + * injected prior to creating the stream. */ setRemoteIpAddress(call); + setClientVersion(headers); /** * For non-streaming calls to server, below listener will be invoked in the same thread that is @@ -54,6 +74,7 @@ public ServerCall.Listener interceptCall(ServerCall void setRemoteIpAddress(ServerCall call) { String remoteIpAddress = call.getAttributes().get(Grpc.TRANSPORT_ATTR_REMOTE_ADDR).toString(); - sIpAddressThreadLocal.set(remoteIpAddress); + IP_ADDRESS_THREAD_LOCAL.set(remoteIpAddress); + } + + private void setClientVersion(Metadata headers) { + String version = headers.get(ClientVersionClientInjector.S_CLIENT_VERSION_KEY); + CLIENT_VERSION_THREAD_LOCAL.set(version); } } diff --git a/core/server/common/src/main/java/alluxio/RpcUtils.java b/core/server/common/src/main/java/alluxio/RpcUtils.java index 1a8f77d4493e..e77a16d9e638 100644 --- a/core/server/common/src/main/java/alluxio/RpcUtils.java +++ b/core/server/common/src/main/java/alluxio/RpcUtils.java @@ -21,7 +21,7 @@ import alluxio.metrics.MetricsSystem; import alluxio.security.User; import alluxio.security.authentication.AuthenticatedClientUser; -import alluxio.security.authentication.ClientIpAddressInjector; +import alluxio.security.authentication.ClientContextServerInjector; import com.codahale.metrics.Timer; import io.grpc.StatusException; @@ -122,7 +122,9 @@ public static T callAndReturn(Logger logger, RpcCallableThrowsIOException MetricsSystem.timer(MetricKey.MASTER_TOTAL_RPCS.getName()), MetricsSystem.timer(getQualifiedMetricName(methodName)))) { MetricsSystem.counter(getQualifiedInProgressMetricName(methodName)).inc(); - logger.debug("Enter: {} from {}: {}", methodName, ClientIpAddressInjector.getIpAddress(), + logger.debug("Enter: {} from {}: {} client version: {}", methodName, + ClientContextServerInjector.getIpAddress(), + ClientContextServerInjector.getClientVersion(), debugDesc); T res = callable.call(); logger.debug("Exit: {}: {}", methodName, debugDesc); diff --git a/core/server/common/src/main/java/alluxio/master/journal/raft/SnapshotReplicationManager.java b/core/server/common/src/main/java/alluxio/master/journal/raft/SnapshotReplicationManager.java index d999d0494024..feeb17dcec70 100644 --- a/core/server/common/src/main/java/alluxio/master/journal/raft/SnapshotReplicationManager.java +++ b/core/server/common/src/main/java/alluxio/master/journal/raft/SnapshotReplicationManager.java @@ -35,7 +35,7 @@ import alluxio.metrics.MetricKey; import alluxio.metrics.MetricsSystem; import alluxio.resource.LockResource; -import alluxio.security.authentication.ClientIpAddressInjector; +import alluxio.security.authentication.ClientContextServerInjector; import alluxio.util.FormatUtils; import alluxio.util.LogUtils; import alluxio.util.logging.SamplingLogger; @@ -293,7 +293,7 @@ public long maybeCopySnapshotFromFollower() { */ public StreamObserver receiveSnapshotFromFollower( StreamObserver responseStreamObserver) { - String followerIp = ClientIpAddressInjector.getIpAddress(); + String followerIp = ClientContextServerInjector.getIpAddress(); LOG.info("Received upload snapshot request from follower {}", followerIp); SnapshotDownloader observer = @@ -389,7 +389,8 @@ public Message handleRequest(JournalQueryRequest queryRequest) throws IOExceptio public StreamObserver sendSnapshotToFollower( StreamObserver responseObserver) { SnapshotInfo snapshot = mStorage.getLatestSnapshot(); - LOG.debug("Received snapshot download request from {}", ClientIpAddressInjector.getIpAddress()); + LOG.debug("Received snapshot download request from {}", + ClientContextServerInjector.getIpAddress()); SnapshotUploader requestStreamObserver = SnapshotUploader.forLeader(mStorage, snapshot, responseObserver); if (snapshot == null) { diff --git a/core/server/common/src/main/java/alluxio/master/transport/GrpcMessagingServer.java b/core/server/common/src/main/java/alluxio/master/transport/GrpcMessagingServer.java index 3dfc965a26f2..35d0526ce6a1 100644 --- a/core/server/common/src/main/java/alluxio/master/transport/GrpcMessagingServer.java +++ b/core/server/common/src/main/java/alluxio/master/transport/GrpcMessagingServer.java @@ -16,7 +16,7 @@ import alluxio.grpc.GrpcServerAddress; import alluxio.grpc.GrpcServerBuilder; import alluxio.grpc.GrpcService; -import alluxio.security.authentication.ClientIpAddressInjector; +import alluxio.security.authentication.ClientContextServerInjector; import alluxio.security.user.UserState; import io.grpc.ServerInterceptors; @@ -105,7 +105,7 @@ public synchronized CompletableFuture listen(InetSocketAddress address, .addService(new GrpcService(ServerInterceptors.intercept( new GrpcMessagingServiceClientHandler(address, listener::accept, threadContext, mExecutor, mConf.getMs(PropertyKey.MASTER_EMBEDDED_JOURNAL_MAX_ELECTION_TIMEOUT)), - new ClientIpAddressInjector()))) + new ClientContextServerInjector()))) .build(); try { diff --git a/core/server/common/src/main/java/alluxio/master/transport/GrpcMessagingServiceClientHandler.java b/core/server/common/src/main/java/alluxio/master/transport/GrpcMessagingServiceClientHandler.java index 108f984cb609..62df849e40a9 100644 --- a/core/server/common/src/main/java/alluxio/master/transport/GrpcMessagingServiceClientHandler.java +++ b/core/server/common/src/main/java/alluxio/master/transport/GrpcMessagingServiceClientHandler.java @@ -13,7 +13,7 @@ import alluxio.grpc.MessagingServiceGrpc; import alluxio.grpc.TransportMessage; -import alluxio.security.authentication.ClientIpAddressInjector; +import alluxio.security.authentication.ClientContextServerInjector; import com.google.common.base.MoreObjects; import io.grpc.stub.StreamObserver; @@ -74,7 +74,7 @@ public StreamObserver connect( // Transport level identifier for this connection. String transportId = MoreObjects.toStringHelper(this) .add("ServerAddress", mServerAddress) - .add("ClientAddress", ClientIpAddressInjector.getIpAddress()) + .add("ClientAddress", ClientContextServerInjector.getIpAddress()) .toString(); LOG.debug("Creating a messaging server connection: {}", transportId); diff --git a/core/server/master/src/main/java/alluxio/master/backup/BackupLeaderRole.java b/core/server/master/src/main/java/alluxio/master/backup/BackupLeaderRole.java index 80e9c7590171..a09577ff9113 100644 --- a/core/server/master/src/main/java/alluxio/master/backup/BackupLeaderRole.java +++ b/core/server/master/src/main/java/alluxio/master/backup/BackupLeaderRole.java @@ -30,7 +30,7 @@ import alluxio.master.transport.GrpcMessagingConnection; import alluxio.master.transport.GrpcMessagingServiceClientHandler; import alluxio.resource.LockResource; -import alluxio.security.authentication.ClientIpAddressInjector; +import alluxio.security.authentication.ClientContextServerInjector; import alluxio.util.ConfigurationUtils; import alluxio.util.network.NetworkAddressUtils; import alluxio.wire.BackupStatus; @@ -144,7 +144,7 @@ public Map getRoleServices() { Configuration.global()), (conn) -> activateWorkerConnection(conn), mGrpcMessagingContext, mExecutorService, mCatalystRequestTimeout), - new ClientIpAddressInjector())).withCloseable(this)); + new ClientContextServerInjector())).withCloseable(this)); return services; } diff --git a/core/server/master/src/main/java/alluxio/master/block/DefaultBlockMaster.java b/core/server/master/src/main/java/alluxio/master/block/DefaultBlockMaster.java index bec37bbcb37d..8023449789b9 100644 --- a/core/server/master/src/main/java/alluxio/master/block/DefaultBlockMaster.java +++ b/core/server/master/src/main/java/alluxio/master/block/DefaultBlockMaster.java @@ -63,7 +63,7 @@ import alluxio.proto.meta.Block.BlockMeta; import alluxio.resource.CloseableIterator; import alluxio.resource.LockResource; -import alluxio.security.authentication.ClientIpAddressInjector; +import alluxio.security.authentication.ClientContextServerInjector; import alluxio.util.CommonUtils; import alluxio.util.IdUtils; import alluxio.util.ThreadFactoryUtils; @@ -341,11 +341,11 @@ public Map getServices() { services.put(ServiceType.BLOCK_MASTER_CLIENT_SERVICE, new GrpcService(ServerInterceptors .intercept(new BlockMasterClientServiceHandler(this), - new ClientIpAddressInjector()))); + new ClientContextServerInjector()))); services.put(ServiceType.BLOCK_MASTER_WORKER_SERVICE, new GrpcService(ServerInterceptors .intercept(new BlockMasterWorkerServiceHandler(this), - new ClientIpAddressInjector()))); + new ClientContextServerInjector()))); return services; } diff --git a/core/server/master/src/main/java/alluxio/master/file/DefaultFileSystemMaster.java b/core/server/master/src/main/java/alluxio/master/file/DefaultFileSystemMaster.java index f7644243f0dd..7cb55db0af8b 100644 --- a/core/server/master/src/main/java/alluxio/master/file/DefaultFileSystemMaster.java +++ b/core/server/master/src/main/java/alluxio/master/file/DefaultFileSystemMaster.java @@ -144,7 +144,7 @@ import alluxio.retry.RetryPolicy; import alluxio.security.authentication.AuthType; import alluxio.security.authentication.AuthenticatedClientUser; -import alluxio.security.authentication.ClientIpAddressInjector; +import alluxio.security.authentication.ClientContextServerInjector; import alluxio.security.authorization.AclEntry; import alluxio.security.authorization.AclEntryType; import alluxio.security.authorization.Mode; @@ -562,13 +562,13 @@ public Map getServices() { Map services = new HashMap<>(); services.put(ServiceType.FILE_SYSTEM_MASTER_CLIENT_SERVICE, new GrpcService(ServerInterceptors .intercept(new FileSystemMasterClientServiceHandler(this), - new ClientIpAddressInjector()))); + new ClientContextServerInjector()))); services.put(ServiceType.FILE_SYSTEM_MASTER_JOB_SERVICE, new GrpcService(ServerInterceptors .intercept(new FileSystemMasterJobServiceHandler(this), - new ClientIpAddressInjector()))); + new ClientContextServerInjector()))); services.put(ServiceType.FILE_SYSTEM_MASTER_WORKER_SERVICE, new GrpcService(ServerInterceptors .intercept(new FileSystemMasterWorkerServiceHandler(this), - new ClientIpAddressInjector()))); + new ClientContextServerInjector()))); return services; } @@ -5224,7 +5224,8 @@ private FileSystemMasterAuditContext createAuditContext(String command, AlluxioU Configuration.getEnum(PropertyKey.SECURITY_AUTHENTICATION_TYPE, AuthType.class); auditContext.setUgi(ugi) .setAuthType(authType) - .setIp(ClientIpAddressInjector.getIpAddress()) + .setIp(ClientContextServerInjector.getIpAddress()) + .setClientVersion(ClientContextServerInjector.getClientVersion()) .setCommand(command).setSrcPath(srcPath).setDstPath(dstPath) .setSrcInode(srcInode).setAllowed(true) .setCreationTimeNs(System.nanoTime()); diff --git a/core/server/master/src/main/java/alluxio/master/file/FileSystemMasterAuditContext.java b/core/server/master/src/main/java/alluxio/master/file/FileSystemMasterAuditContext.java index 45e2f0e0f24f..ce440da857dc 100644 --- a/core/server/master/src/main/java/alluxio/master/file/FileSystemMasterAuditContext.java +++ b/core/server/master/src/main/java/alluxio/master/file/FileSystemMasterAuditContext.java @@ -12,6 +12,8 @@ package alluxio.master.file; import alluxio.AlluxioURI; +import alluxio.conf.Configuration; +import alluxio.conf.PropertyKey; import alluxio.master.audit.AsyncUserAccessAuditLogWriter; import alluxio.master.audit.AuditContext; import alluxio.master.file.meta.Inode; @@ -37,6 +39,7 @@ public final class FileSystemMasterAuditContext implements AuditContext { private Inode mSrcInode; private long mCreationTimeNs; private long mExecutionTimeNs; + private String mClientVersion; @Override public FileSystemMasterAuditContext setAllowed(boolean allowed) { @@ -139,6 +142,17 @@ public FileSystemMasterAuditContext setCreationTimeNs(long creationTimeNs) { return this; } + /** + * set client version. + * + * @param version client version + * @return this {@link AuditContext} instance + */ + public FileSystemMasterAuditContext setClientVersion(String version) { + mClientVersion = version; + return this; + } + /** * Constructor of {@link FileSystemMasterAuditContext}. * @@ -160,21 +174,28 @@ public void close() { @Override public String toString() { + StringBuilder auditLog = new StringBuilder(); if (mSrcInode != null) { short mode = mSrcInode.getMode(); - return String.format( + auditLog.append(String.format( "succeeded=%b\tallowed=%b\tugi=%s (AUTH=%s)\tip=%s\tcmd=%s\tsrc=%s\tdst=%s\t" + "perm=%s:%s:%s%s%s\texecutionTimeUs=%d", mSucceeded, mAllowed, mUgi, mAuthType, mIp, mCommand, mSrcPath, mDstPath, mSrcInode.getOwner(), mSrcInode.getGroup(), Mode.extractOwnerBits(mode), Mode.extractGroupBits(mode), Mode.extractOtherBits(mode), - mExecutionTimeNs / 1000); + mExecutionTimeNs / 1000)); } else { - return String.format( + auditLog.append(String.format( "succeeded=%b\tallowed=%b\tugi=%s (AUTH=%s)\tip=%s\tcmd=%s\tsrc=%s\tdst=%s\t" + "perm=null\texecutionTimeUs=%d", mSucceeded, mAllowed, mUgi, mAuthType, mIp, mCommand, mSrcPath, mDstPath, - mExecutionTimeNs / 1000); + mExecutionTimeNs / 1000)); + } + if (Configuration.global().getBoolean(PropertyKey.USER_CLIENT_REPORT_VERSION_ENABLED)) { + auditLog.append( + String.format("\tclientVersion=%s\t", mClientVersion)); } + auditLog.append("\tproto=rpc"); + return auditLog.toString(); } } diff --git a/core/server/master/src/main/java/alluxio/master/journal/DefaultJournalMaster.java b/core/server/master/src/main/java/alluxio/master/journal/DefaultJournalMaster.java index 09d81fdaf570..eae8d452c37f 100644 --- a/core/server/master/src/main/java/alluxio/master/journal/DefaultJournalMaster.java +++ b/core/server/master/src/main/java/alluxio/master/journal/DefaultJournalMaster.java @@ -24,7 +24,7 @@ import alluxio.master.MasterContext; import alluxio.master.PrimarySelector; import alluxio.master.journal.raft.RaftJournalSystem; -import alluxio.security.authentication.ClientIpAddressInjector; +import alluxio.security.authentication.ClientContextServerInjector; import alluxio.util.executor.ExecutorServiceFactories; import io.grpc.ServerInterceptors; @@ -116,7 +116,7 @@ public Map getServices() { services.put(alluxio.grpc.ServiceType.JOURNAL_MASTER_CLIENT_SERVICE, new GrpcService(ServerInterceptors.intercept( new JournalMasterClientServiceHandler(this), - new ClientIpAddressInjector()))); + new ClientContextServerInjector()))); return services; } } diff --git a/core/server/master/src/main/java/alluxio/master/meta/DefaultMetaMaster.java b/core/server/master/src/main/java/alluxio/master/meta/DefaultMetaMaster.java index 6d806a44f091..f4901f649343 100644 --- a/core/server/master/src/main/java/alluxio/master/meta/DefaultMetaMaster.java +++ b/core/server/master/src/main/java/alluxio/master/meta/DefaultMetaMaster.java @@ -52,7 +52,7 @@ import alluxio.proto.journal.Journal; import alluxio.proto.journal.Meta; import alluxio.resource.CloseableIterator; -import alluxio.security.authentication.ClientIpAddressInjector; +import alluxio.security.authentication.ClientContextServerInjector; import alluxio.underfs.UfsManager; import alluxio.util.ConfigurationUtils; import alluxio.util.IdUtils; @@ -260,15 +260,15 @@ public Map getServices() { services.put(ServiceType.META_MASTER_CONFIG_SERVICE, new GrpcService(ServerInterceptors.intercept( new MetaMasterConfigurationServiceHandler(this), - new ClientIpAddressInjector())).disableAuthentication()); + new ClientContextServerInjector())).disableAuthentication()); services.put(ServiceType.META_MASTER_CLIENT_SERVICE, new GrpcService(ServerInterceptors.intercept( new MetaMasterClientServiceHandler(this), - new ClientIpAddressInjector()))); + new ClientContextServerInjector()))); services.put(ServiceType.META_MASTER_MASTER_SERVICE, new GrpcService(ServerInterceptors.intercept( new MetaMasterMasterServiceHandler(this), - new ClientIpAddressInjector()))); + new ClientContextServerInjector()))); // Add backup role services. services.putAll(mBackupRole.getRoleServices()); services.putAll(mJournalSystem.getJournalServices()); diff --git a/core/server/master/src/main/java/alluxio/master/metrics/DefaultMetricsMaster.java b/core/server/master/src/main/java/alluxio/master/metrics/DefaultMetricsMaster.java index b302ecb81638..c7af089dc355 100644 --- a/core/server/master/src/main/java/alluxio/master/metrics/DefaultMetricsMaster.java +++ b/core/server/master/src/main/java/alluxio/master/metrics/DefaultMetricsMaster.java @@ -30,7 +30,7 @@ import alluxio.metrics.MetricsSystem; import alluxio.metrics.MultiValueMetricsAggregator; import alluxio.metrics.aggregator.SingleTagValueAggregator; -import alluxio.security.authentication.ClientIpAddressInjector; +import alluxio.security.authentication.ClientContextServerInjector; import alluxio.util.executor.ExecutorServiceFactories; import alluxio.util.executor.ExecutorServiceFactory; @@ -168,7 +168,7 @@ public Map getServices() { services.put(ServiceType.METRICS_MASTER_CLIENT_SERVICE, new GrpcService(ServerInterceptors.intercept( getMasterServiceHandler(), - new ClientIpAddressInjector()))); + new ClientContextServerInjector()))); return services; } diff --git a/job/server/src/main/java/alluxio/master/job/JobMaster.java b/job/server/src/main/java/alluxio/master/job/JobMaster.java index dac1fbf2a884..b19bed025b79 100644 --- a/job/server/src/main/java/alluxio/master/job/JobMaster.java +++ b/job/server/src/main/java/alluxio/master/job/JobMaster.java @@ -60,7 +60,7 @@ import alluxio.resource.LockResource; import alluxio.security.authentication.AuthType; import alluxio.security.authentication.AuthenticatedClientUser; -import alluxio.security.authentication.ClientIpAddressInjector; +import alluxio.security.authentication.ClientContextServerInjector; import alluxio.underfs.UfsManager; import alluxio.util.CommonUtils; import alluxio.util.executor.ExecutorServiceFactories; @@ -222,7 +222,8 @@ public Map getServices() { Map services = Maps.newHashMap(); services.put(ServiceType.JOB_MASTER_CLIENT_SERVICE, new GrpcService(ServerInterceptors - .intercept(new JobMasterClientServiceHandler(this), new ClientIpAddressInjector()))); + .intercept(new JobMasterClientServiceHandler(this), + new ClientContextServerInjector()))); services.put(ServiceType.JOB_MASTER_WORKER_SERVICE, new GrpcService(new JobMasterWorkerServiceHandler(this))); return services; @@ -669,7 +670,8 @@ private JobMasterAuditContext createAuditContext(String command) { Configuration.getEnum(PropertyKey.SECURITY_AUTHENTICATION_TYPE, AuthType.class); auditContext.setUgi(ugi) .setAuthType(authType) - .setIp(ClientIpAddressInjector.getIpAddress()) + .setIp(ClientContextServerInjector.getIpAddress()) + .setClientVersion(ClientContextServerInjector.getClientVersion()) .setCommand(command) .setAllowed(true) .setCreationTimeNs(System.nanoTime()); diff --git a/job/server/src/main/java/alluxio/master/job/JobMasterAuditContext.java b/job/server/src/main/java/alluxio/master/job/JobMasterAuditContext.java index 2760338ed870..c3940b088a1e 100644 --- a/job/server/src/main/java/alluxio/master/job/JobMasterAuditContext.java +++ b/job/server/src/main/java/alluxio/master/job/JobMasterAuditContext.java @@ -11,6 +11,8 @@ package alluxio.master.job; +import alluxio.conf.Configuration; +import alluxio.conf.PropertyKey; import alluxio.master.audit.AsyncUserAccessAuditLogWriter; import alluxio.master.audit.AuditContext; import alluxio.security.authentication.AuthType; @@ -33,6 +35,7 @@ public class JobMasterAuditContext implements AuditContext { private String mJobName; private long mCreationTimeNs; private long mExecutionTimeNs; + private String mClientVersion; @Override public JobMasterAuditContext setAllowed(boolean allowed) { @@ -124,10 +127,21 @@ public JobMasterAuditContext setJobName(String jobName) { return this; } + /** + * Sets client version. + * + * @param clientVersion the client version + * @return this {@link AuditContext} instance + */ + public JobMasterAuditContext setClientVersion(String clientVersion) { + mClientVersion = clientVersion; + return this; + } + /** * Constructor of {@link JobMasterAuditContext}. * - * @param asyncAuditLogWriter + * @param asyncAuditLogWriter async audit log writer */ protected JobMasterAuditContext(AsyncUserAccessAuditLogWriter asyncAuditLogWriter) { mAsyncAuditLogWriter = asyncAuditLogWriter; @@ -145,10 +159,15 @@ public void close() { @Override public String toString() { - return String.format( + StringBuilder auditLog = new StringBuilder(); + auditLog.append(String.format( "succeeded=%b\tallowed=%b\tugi=%s (AUTH=%s)\tip=%s\tcmd=%s\tjobId=%d\tjobName=%s\t" + "perm=null\texecutionTimeUs=%d", mSucceeded, mAllowed, mUgi, mAuthType, mIp, mCommand, mJobId, mJobName, - mExecutionTimeNs / 1000); + mExecutionTimeNs / 1000)); + if (Configuration.global().getBoolean(PropertyKey.USER_CLIENT_REPORT_VERSION_ENABLED)) { + auditLog.append(String.format("\tclientVersion=%s\t", mClientVersion)); + } + return auditLog.toString(); } } diff --git a/table/server/master/src/main/java/alluxio/master/table/DefaultTableMaster.java b/table/server/master/src/main/java/alluxio/master/table/DefaultTableMaster.java index 503c8fb9432f..f3267710144f 100644 --- a/table/server/master/src/main/java/alluxio/master/table/DefaultTableMaster.java +++ b/table/server/master/src/main/java/alluxio/master/table/DefaultTableMaster.java @@ -33,7 +33,7 @@ import alluxio.master.journal.checkpoint.CheckpointName; import alluxio.master.table.transform.TransformJobInfo; import alluxio.master.table.transform.TransformManager; -import alluxio.security.authentication.ClientIpAddressInjector; +import alluxio.security.authentication.ClientContextServerInjector; import alluxio.table.common.transform.TransformDefinition; import alluxio.util.executor.ExecutorServiceFactories; @@ -181,7 +181,7 @@ public Map getServices() { services.put(ServiceType.TABLE_MASTER_CLIENT_SERVICE, new GrpcService(ServerInterceptors.intercept( new TableMasterClientServiceHandler(this), - new ClientIpAddressInjector()))); + new ClientContextServerInjector()))); return services; } From b47fbda023be925e4c54c651ddf866e1de3b9e20 Mon Sep 17 00:00:00 2001 From: Tyler Crain Date: Fri, 6 Jan 2023 13:21:52 -0800 Subject: [PATCH 044/334] Fix journal ports in use for unit tests Fixes https://github.com/Alluxio/alluxio/issues/16722 This is done by assigning at open port for the journal for each test instead of using static ports. pr-link: Alluxio/alluxio#16720 change-id: cid-257f74103d11213529fa2f63530662059f1bad6e --- .../master/journal/JournalContextTest.java | 2 ++ .../master/journal/JournalTestUtils.java | 22 +++++++++++++++ .../raft/RaftJournalSystemMetricsTest.java | 27 +++++++++++-------- 3 files changed, 40 insertions(+), 11 deletions(-) diff --git a/core/server/master/src/test/java/alluxio/master/journal/JournalContextTest.java b/core/server/master/src/test/java/alluxio/master/journal/JournalContextTest.java index a89af6602283..9d943b990e31 100644 --- a/core/server/master/src/test/java/alluxio/master/journal/JournalContextTest.java +++ b/core/server/master/src/test/java/alluxio/master/journal/JournalContextTest.java @@ -11,6 +11,7 @@ package alluxio.master.journal; +import static alluxio.master.journal.JournalTestUtils.createEmbeddedJournalTestPorts; import static org.junit.Assert.assertEquals; import static org.junit.Assert.assertFalse; import static org.junit.Assert.assertNotNull; @@ -88,6 +89,7 @@ public JournalContextTest(String journalType) { @Before public void before() throws Exception { Configuration.set(PropertyKey.MASTER_JOURNAL_TYPE, mJournalType); + createEmbeddedJournalTestPorts(1); mRegistry = new MasterRegistry(); mJournalSystem = JournalTestUtils.createJournalSystem(mTemporaryFolder); diff --git a/core/server/master/src/test/java/alluxio/master/journal/JournalTestUtils.java b/core/server/master/src/test/java/alluxio/master/journal/JournalTestUtils.java index 8cadd7e84a7d..a4f29bd7f8da 100644 --- a/core/server/master/src/test/java/alluxio/master/journal/JournalTestUtils.java +++ b/core/server/master/src/test/java/alluxio/master/journal/JournalTestUtils.java @@ -11,6 +11,9 @@ package alluxio.master.journal; +import alluxio.conf.Configuration; +import alluxio.conf.PropertyKey; +import alluxio.master.PortRegistry; import alluxio.util.CommonUtils.ProcessType; import org.junit.rules.TemporaryFolder; @@ -18,12 +21,31 @@ import java.io.IOException; import java.net.URI; import java.net.URISyntaxException; +import java.util.ArrayList; +import java.util.List; /** * Utility methods for testing against a journal system. */ public class JournalTestUtils { + public static List createEmbeddedJournalTestPorts(int count) throws IOException { + List ports = new ArrayList<>(); + StringBuilder addresses = new StringBuilder(); + for (int i = 0; i < count; i++) { + if (i != 0) { + addresses.append(","); + } + int port = PortRegistry.getFreePort(); + ports.add(port); + addresses.append(String.format("localhost:%d", port)); + } + Configuration.set(PropertyKey.MASTER_EMBEDDED_JOURNAL_ADDRESSES, addresses.toString()); + Configuration.set(PropertyKey.MASTER_HOSTNAME, "localhost"); + Configuration.set(PropertyKey.MASTER_EMBEDDED_JOURNAL_PORT, ports.get(0)); + return ports; + } + public static JournalSystem createJournalSystem(TemporaryFolder folder) { try { return createJournalSystem(folder.newFolder("journal").getAbsolutePath()); diff --git a/core/server/master/src/test/java/alluxio/master/journal/raft/RaftJournalSystemMetricsTest.java b/core/server/master/src/test/java/alluxio/master/journal/raft/RaftJournalSystemMetricsTest.java index 4d89cf0c54b5..35323c1c564c 100644 --- a/core/server/master/src/test/java/alluxio/master/journal/raft/RaftJournalSystemMetricsTest.java +++ b/core/server/master/src/test/java/alluxio/master/journal/raft/RaftJournalSystemMetricsTest.java @@ -11,12 +11,12 @@ package alluxio.master.journal.raft; +import static alluxio.master.journal.JournalTestUtils.createEmbeddedJournalTestPorts; import static org.junit.Assert.assertEquals; import static org.junit.Assert.assertNotNull; import static org.junit.Assert.assertNull; import alluxio.conf.Configuration; -import alluxio.conf.PropertyKey; import alluxio.metrics.MetricKey; import alluxio.metrics.MetricsSystem; import alluxio.util.network.NetworkAddressUtils.ServiceType; @@ -30,6 +30,7 @@ import org.mockito.Mockito; import java.util.HashMap; +import java.util.List; import java.util.Map; /** @@ -45,12 +46,17 @@ public void after() { Configuration.reloadProperties(); } + private void resetMetrics() { + MetricsSystem.resetAllMetrics(); + MetricsSystem.METRIC_REGISTRY.remove(MetricKey.CLUSTER_LEADER_INDEX.getName()); + MetricsSystem.METRIC_REGISTRY.remove(MetricKey.MASTER_ROLE_ID.getName()); + MetricsSystem.METRIC_REGISTRY.remove(MetricKey.CLUSTER_LEADER_ID.getName()); + } + @Test public void journalStateMachineMetrics() throws Exception { - Configuration.set(PropertyKey.MASTER_EMBEDDED_JOURNAL_ADDRESSES, - "localhost:29200,localhost:29201,localhost:29202"); - Configuration.set(PropertyKey.MASTER_HOSTNAME, "localhost"); - Configuration.set(PropertyKey.MASTER_EMBEDDED_JOURNAL_PORT, 29200); + resetMetrics(); + createEmbeddedJournalTestPorts(3); RaftJournalSystem system = new RaftJournalSystem(mFolder.newFolder().toURI(), ServiceType.MASTER_RAFT); String[] metricsNames = new String[] { @@ -80,10 +86,9 @@ public void journalStateMachineMetrics() throws Exception { @Test public void metrics() throws Exception { - Configuration.set(PropertyKey.MASTER_EMBEDDED_JOURNAL_ADDRESSES, - "localhost:19200,localhost:19201,localhost:19202"); - Configuration.set(PropertyKey.MASTER_HOSTNAME, "localhost"); - Configuration.set(PropertyKey.MASTER_EMBEDDED_JOURNAL_PORT, 19200); + resetMetrics(); + List ports = createEmbeddedJournalTestPorts(3); + RaftJournalSystem raftJournalSystem = new RaftJournalSystem(mFolder.newFolder().toURI(), ServiceType.MASTER_RAFT); RaftJournalSystem system = Mockito.spy(raftJournalSystem); @@ -94,7 +99,7 @@ public void metrics() throws Exception { .setFollowerInfo(RaftProtos.FollowerInfoProto.newBuilder() .setLeaderInfo(RaftProtos.ServerRpcProto.newBuilder() .setId(RaftProtos.RaftPeerProto.newBuilder() - .setId(ByteString.copyFromUtf8("localhost_19201"))))) + .setId(ByteString.copyFromUtf8(String.format("localhost_%d", ports.get(1))))))) .build(); Map sn1 = new HashMap() { @@ -136,7 +141,7 @@ public void metrics() throws Exception { Mockito.doReturn(followerInfo).when(system).getRaftRoleInfo(); assertEquals(1, getClusterLeaderIndex()); assertEquals(RaftProtos.RaftPeerRole.FOLLOWER_VALUE, getMasterRoleId()); - assertEquals("localhost_19201", getClusterLeaderId()); + assertEquals(String.format("localhost_%d", ports.get(1)), getClusterLeaderId()); assertEquals(sn3, getMasterJournalSequenceNumbers(system)); Map sn4 = new HashMap() { From 0f4e5978d3267d5fa6f81a07396477202a347417 Mon Sep 17 00:00:00 2001 From: bingzheng Date: Sat, 7 Jan 2023 16:45:29 +0800 Subject: [PATCH 045/334] Support monitor helm chart ### What changes are proposed in this pull request? Add a monitor helm chart to monitor alluxio cluster in kubernetes, view the README.md for detail information. ### Why are the changes needed? Easy to deploy a monitor system (grafana + prometheus) in kubernetes. ### Does this PR introduce any user facing changes? We can view cluster information by using grafana page just like this: ![image](https://user-images.githubusercontent.com/42070967/176720081-3d80c753-d561-4121-9662-067364d4240c.png) ![image](https://user-images.githubusercontent.com/42070967/176720493-177554dd-62ba-429e-9d3d-7cd7ebd291ca.png) ![image](https://user-images.githubusercontent.com/42070967/176720531-e75bdc87-28cd-42e6-971a-fbe9b4713651.png) pr-link: Alluxio/alluxio#15803 change-id: cid-ad53631d4e20c62b1e040ae70229b75467a1851e --- .../kubernetes/helm-chart/alluxio/values.yaml | 2 + .../kubernetes/helm-chart/monitor/.helmignore | 21 +++ .../helm-chart/monitor/CHANGELOG.md | 4 + .../kubernetes/helm-chart/monitor/Chart.yaml | 16 +++ .../kubernetes/helm-chart/monitor/README.md | 123 ++++++++++++++++ .../source/grafana/dashboard/dashboard.yaml | 34 +++++ .../monitor/source/grafana/datasource.yaml | 67 +++++++++ .../monitor/source/prometheus/prometheus.yaml | 131 ++++++++++++++++++ .../helm-chart/monitor/templates/_helpers.tpl | 84 +++++++++++ .../templates/config/grafana-conf.yaml | 37 +++++ .../templates/config/prometheus-conf.yaml | 26 ++++ .../monitor/templates/grafana/deployment.yaml | 83 +++++++++++ .../templates/prometheus/deployment.yaml | 84 +++++++++++ .../monitor/templates/prometheus/rbac.yaml | 60 ++++++++ .../monitor/templates/prometheus/service.yaml | 34 +++++ .../kubernetes/helm-chart/monitor/values.yaml | 82 +++++++++++ 16 files changed, 888 insertions(+) create mode 100644 integration/kubernetes/helm-chart/monitor/.helmignore create mode 100644 integration/kubernetes/helm-chart/monitor/CHANGELOG.md create mode 100644 integration/kubernetes/helm-chart/monitor/Chart.yaml create mode 100644 integration/kubernetes/helm-chart/monitor/README.md create mode 100644 integration/kubernetes/helm-chart/monitor/source/grafana/dashboard/dashboard.yaml create mode 100644 integration/kubernetes/helm-chart/monitor/source/grafana/datasource.yaml create mode 100644 integration/kubernetes/helm-chart/monitor/source/prometheus/prometheus.yaml create mode 100644 integration/kubernetes/helm-chart/monitor/templates/_helpers.tpl create mode 100644 integration/kubernetes/helm-chart/monitor/templates/config/grafana-conf.yaml create mode 100644 integration/kubernetes/helm-chart/monitor/templates/config/prometheus-conf.yaml create mode 100644 integration/kubernetes/helm-chart/monitor/templates/grafana/deployment.yaml create mode 100644 integration/kubernetes/helm-chart/monitor/templates/prometheus/deployment.yaml create mode 100644 integration/kubernetes/helm-chart/monitor/templates/prometheus/rbac.yaml create mode 100644 integration/kubernetes/helm-chart/monitor/templates/prometheus/service.yaml create mode 100644 integration/kubernetes/helm-chart/monitor/values.yaml diff --git a/integration/kubernetes/helm-chart/alluxio/values.yaml b/integration/kubernetes/helm-chart/alluxio/values.yaml index ce3f7353790e..000120b2df78 100644 --- a/integration/kubernetes/helm-chart/alluxio/values.yaml +++ b/integration/kubernetes/helm-chart/alluxio/values.yaml @@ -588,6 +588,8 @@ metrics: # podAnnotations: # prometheus.io/scrape: "true" # prometheus.io/port: "19999" + # prometheus.io/jobPort: "20002" + # prometheus.io/workerPort: "30000" # prometheus.io/path: "/metrics/prometheus/" podAnnotations: {} diff --git a/integration/kubernetes/helm-chart/monitor/.helmignore b/integration/kubernetes/helm-chart/monitor/.helmignore new file mode 100644 index 000000000000..f0c131944441 --- /dev/null +++ b/integration/kubernetes/helm-chart/monitor/.helmignore @@ -0,0 +1,21 @@ +# Patterns to ignore when building packages. +# This supports shell glob matching, relative path matching, and +# negation (prefixed with !). Only one pattern per line. +.DS_Store +# Common VCS dirs +.git/ +.gitignore +.bzr/ +.bzrignore +.hg/ +.hgignore +.svn/ +# Common backup files +*.swp +*.bak +*.tmp +*~ +# Various IDEs +.project +.idea/ +*.tmproj diff --git a/integration/kubernetes/helm-chart/monitor/CHANGELOG.md b/integration/kubernetes/helm-chart/monitor/CHANGELOG.md new file mode 100644 index 000000000000..b8467c9713a9 --- /dev/null +++ b/integration/kubernetes/helm-chart/monitor/CHANGELOG.md @@ -0,0 +1,4 @@ +0.1.0 + +- Init support +- Modularized the directory structure diff --git a/integration/kubernetes/helm-chart/monitor/Chart.yaml b/integration/kubernetes/helm-chart/monitor/Chart.yaml new file mode 100644 index 000000000000..12918cb66d64 --- /dev/null +++ b/integration/kubernetes/helm-chart/monitor/Chart.yaml @@ -0,0 +1,16 @@ +# +# The Alluxio Open Foundation licenses this work under the Apache License, version 2.0 +# (the "License"). You may not use this work except in compliance with the License, which is +# available at www.apache.org/licenses/LICENSE-2.0 +# +# This software is distributed on an "AS IS" basis, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, +# either express or implied, as more fully set forth in the License. +# +# See the NOTICE file distributed with this work for information regarding copyright ownership. +# + +name: monitor +apiVersion: v1 +description: Use prometheus and grafana to monitor alluxio cluster. +version: 0.1.0 + diff --git a/integration/kubernetes/helm-chart/monitor/README.md b/integration/kubernetes/helm-chart/monitor/README.md new file mode 100644 index 000000000000..64b28d1ab478 --- /dev/null +++ b/integration/kubernetes/helm-chart/monitor/README.md @@ -0,0 +1,123 @@ +# Introduction + +This chart bootstraps a monitoring system on a Kubernetes cluster using the [Helm](https://helm.sh/docs/using_helm/#installing-helm) package manager. This monitor system can be used to +monitor an [Alluxio](https://www.alluxio.io/) cluster started on Kubernetes cluster. + +## Pre-requisites + +### Kubernetes +Kubernetes 1.11+ with Beta APIs enabled + +## Install the Chart + +To install the Monitor Chart into your Kubernetes cluster: + +``` +$ helm install --namespace "alluxio" "alluxio-monitor" monitor +``` + +After installation succeeds, you can get a status of Chart + +``` +$ helm status "alluxio-monitor" +``` + +## Uninstall the Chart + +If you want to delete your Chart, use this command: + +``` +$ helm delete --purge "alluxio-monitor" +``` + +## Configuration +The monitor system is implemented based on Prometheus + Grafana, the resource files are placed in the `monitor/source` directory. +Before installing the monitor chart, you may make some appropriate modifications to the configuration. +### 1. source/grafana/datasource.yaml +This grafana datasource url domain name is `[MONITORNAME]-prometheus`, for example: our monitor installation name is `alluxio-monitor`, then it will be 'alluxio-monitor-prometheus' +``` +datasources: + - name: Prometheus + ... + url: http://alluxio-monitor-prometheus:9090 +``` +### 2. source/prometheus/prometheus.yaml +Change each prometheus job's namespace, For example, if the alluxio cluster we want to monitor is installed in `alluxio` namespace, then edit the prometheus.yaml: +``` +scrape_configs: + - job_name: 'alluxio master' + kubernetes_sd_configs: + - role: pod + namespaces: + names: + - alluxio +``` +### 3. Enable the alluxio metrics +To use the monitor, we need the alluxio prometheus podAnnotations defined in the '../alluxio/values.yaml' metrics part, so it is necessary to enable metrics before installing the alluxio chart. +After that, the monitor can keep track of the target alluxio cluster. +``` +metrics: + enabled: true + ... + PrometheusMetricsServlet: + enabled: true + # Pod annotations for Prometheus + podAnnotations: + prometheus.io/scrape: "true" + prometheus.io/port: "19999" + prometheus.io/jobPort: "20002" + prometheus.io/workerPort: "30000" + prometheus.io/path: "/metrics/prometheus/" +``` +### 4. Download the alluxio dashboard +Download the alluxio dashboard from [Alluxio grafana dashboard V1](https://grafana.com/grafana/dashboards/17763-alluxio-prometheus-grafana-monitor-v1/), then +move the dashboard file to `monitor/source/grafana/dashboard` directory. + +## Helm Chart Values + +Full documentation can be found in the comments of the `values.yaml` file, but a high level overview is provided here. + +__Common Values:__ + +| Parameter | Description | Default | +|-------------------------|--------------------------------------------------------|-----------------------------------------| +| `fullnameOverride` | To replace the generated name | `alluxio-monitor` | +| `imagePullPolicy` | Docker image pull policy | `IfNotPresent` | +| `grafanaConfig.name[0]` | Grafana dashboard config name | `grafana-dashboard-config` | +| `grafanaConfig.path[0]` | Grafana dashboard config path in the image container | `/etc/grafana/provisioning/dashboards` | +| `grafanaConfig.name[1]` | Grafana datasource config name | `grafana-datasource-config` | +| `grafanaConfig.path[1]` | Grafana datasource config path in the image container | `/etc/grafana/provisioning/datasources` | +| `prometheusConfig.name` | Prometheus config name | `prometheus-config` | +| `prometheusConfig.path` | Prometheus config path in the image container | `/etc/prometheus` | + +__Prometheus values:__ + +| Parameter | Description | Default | +|-----------------------------|-----------------------------------------------------------------------------------------------------------------|----------------------------------------------------------------------------------------------------------------------------------------| +| `imageInfo.image` | The prometheus docker image | `prom/prometheus` | +| `imageInfo.tag` | The prometheus image tag | `latest` | +| `port.TCP` | The prometheus default listen address | `9090` | +| `args` | The prometheus config args, see values.yaml for detail explanation | `--config.file=/etc/prometheus/prometheus.yml --storage.tsdb.path=/prometheus --storage.tsdb.retention=72h --web.listen-address=:9090` | +| `hostNetwork` | Controls whether the pod may use the node network namespace | `false` | +| `dnsPolicy` | `dnsPolicy` will be `ClusterFirstWithHostNet` if `hostNetwork: true` and `ClusterFirst` if `hostNetwork: false` | `ClusterFirst` | +| `resources.limits.cpu` | CPU Limit | `4` | +| `resources.limits.memory` | Memory Limit | `4G` | +| `resources.requests.cpu` | CPU Request | `1` | +| `resources.requests.memory` | Memory Request | `1G` | + +__Grafana values:__ + +| Parameter | Description | Default | +|---------------------------------|-----------------------------------------------------------------------------------------------------------------|-------------------| +| `imageInfo.image` | The grafana docker image | `grafana/grafana` | +| `imageInfo.tag` | The grafana image tag | `latest` | +| `env.GF_AUTH_BASIC_ENABLED` | Environment variable of grafana to enable basic authentication | `true` | +| `env.GF_AUTH_ANONYMOUS_ENABLED` | Environment variable of grafana to disable anonymous authentication | `false` | +| `port.web` | The grafana web port | `9090` | +| `port.hostPort` | The hostPort export node port to visit the grafana web | `8081` | +| `hostNetwork` | Controls whether the pod may use the node network namespace | `false` | +| `dnsPolicy` | `dnsPolicy` will be `ClusterFirstWithHostNet` if `hostNetwork: true` and `ClusterFirst` if `hostNetwork: false` | `ClusterFirst` | +| `resources.limits.cpu` | CPU Limit | `2` | +| `resources.limits.memory` | Memory Limit | `2G` | +| `resources.requests.cpu` | CPU Request | `0.5` | +| `resources.requests.memory` | Memory Request | `1G` | \ No newline at end of file diff --git a/integration/kubernetes/helm-chart/monitor/source/grafana/dashboard/dashboard.yaml b/integration/kubernetes/helm-chart/monitor/source/grafana/dashboard/dashboard.yaml new file mode 100644 index 000000000000..6c2a4870520a --- /dev/null +++ b/integration/kubernetes/helm-chart/monitor/source/grafana/dashboard/dashboard.yaml @@ -0,0 +1,34 @@ +# +# The Alluxio Open Foundation licenses this work under the Apache License, version 2.0 +# (the "License"). You may not use this work except in compliance with the License, which is +# available at www.apache.org/licenses/LICENSE-2.0 +# +# This software is distributed on an "AS IS" basis, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, +# either express or implied, as more fully set forth in the License. +# +# See the NOTICE file distributed with this work for information regarding copyright ownership. +# + +apiVersion: 1 +providers: + # an unique provider name. Required + - name: Alluxio-Prometheus-Grafana-Monitor-V1 + # Org id. Default to 1 + orgId: 1 + # name of the dashboard folder. + folder: '' + # folder UID. will be automatically generated if not specified + folderUid: '' + # provider type. Default to 'file' + type: file + # disable dashboard deletion + disableDeletion: false + # how often Grafana will scan for changed dashboards + updateIntervalSeconds: 15 + # allow updating provisioned dashboards from the UI + allowUiUpdates: false + options: + # path to dashboard files on disk. Required when using the 'file' type + path: /etc/grafana/provisioning/dashboards + # use folder names from filesystem to create folders in Grafana + foldersFromFilesStructure: false diff --git a/integration/kubernetes/helm-chart/monitor/source/grafana/datasource.yaml b/integration/kubernetes/helm-chart/monitor/source/grafana/datasource.yaml new file mode 100644 index 000000000000..847e3799d546 --- /dev/null +++ b/integration/kubernetes/helm-chart/monitor/source/grafana/datasource.yaml @@ -0,0 +1,67 @@ +# +# The Alluxio Open Foundation licenses this work under the Apache License, version 2.0 +# (the "License"). You may not use this work except in compliance with the License, which is +# available at www.apache.org/licenses/LICENSE-2.0 +# +# This software is distributed on an "AS IS" basis, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, +# either express or implied, as more fully set forth in the License. +# +# See the NOTICE file distributed with this work for information regarding copyright ownership. +# + +# config file version +apiVersion: 1 + +# list of datasources that should be deleted from the database +deleteDatasources: + - name: Prometheus + orgId: 1 + +# list of datasources to insert/update depending +# what's available in the database +datasources: + # name of the datasource. Required + - name: Prometheus + # datasource type. Required + type: prometheus + # access mode. proxy or direct (Server or Browser in the UI). Required + access: proxy + # org id. will default to orgId 1 if not specified + orgId: 1 + # custom UID which can be used to reference this datasource in other parts of the configuration, if not specified will be generated automatically + uid: my_unique_uid + # url + url: http://alluxio-monitor-prometheus:9090 + # Deprecated, use secureJsonData.password + password: + # database user, if used + user: + # database name, if used + database: + # enable/disable basic auth + basicAuth: + # basic auth username + basicAuthUser: + # Deprecated, use secureJsonData.basicAuthPassword + basicAuthPassword: + # enable/disable with credentials headers + withCredentials: + # mark as default datasource. Max one per org + isDefault: true + # fields that will be converted to json and stored in jsonData + jsonData: + graphiteVersion: '1.1' + tlsAuth: false + tlsAuthWithCACert: false + # json object of data that will be encrypted. + secureJsonData: + tlsCACert: '...' + tlsClientCert: '...' + tlsClientKey: '...' + # database password, if used + password: + # basic auth password + basicAuthPassword: + version: 1 + # allow users to edit datasources from the UI. + editable: false diff --git a/integration/kubernetes/helm-chart/monitor/source/prometheus/prometheus.yaml b/integration/kubernetes/helm-chart/monitor/source/prometheus/prometheus.yaml new file mode 100644 index 000000000000..1eab76a6115b --- /dev/null +++ b/integration/kubernetes/helm-chart/monitor/source/prometheus/prometheus.yaml @@ -0,0 +1,131 @@ +# +# The Alluxio Open Foundation licenses this work under the Apache License, version 2.0 +# (the "License"). You may not use this work except in compliance with the License, which is +# available at www.apache.org/licenses/LICENSE-2.0 +# +# This software is distributed on an "AS IS" basis, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, +# either express or implied, as more fully set forth in the License. +# +# See the NOTICE file distributed with this work for information regarding copyright ownership. +# + +global: + scrape_interval: 15s + evaluation_interval: 15s +scrape_configs: + - job_name: 'alluxio master' + kubernetes_sd_configs: + - role: pod + namespaces: + names: + - alluxio + tls_config: + insecure_skip_verify: true + relabel_configs: + - source_labels: [__meta_kubernetes_pod_label_role] + action: keep + regex: alluxio-master + - source_labels: [__meta_kubernetes_pod_annotation_prometheus_io_scrape] + action: keep + regex: true + - source_labels: [__meta_kubernetes_pod_annotation_prometheus_io_path] + action: replace + target_label: __metrics_path__ + regex: (.+) + - source_labels: [__address__, __meta_kubernetes_pod_annotation_prometheus_io_port] + action: replace + regex: ([^:]+)(?::\d+)?;(\d+) + replacement: $1:$2 + target_label: __address__ + - action: labelmap + regex: __meta_kubernetes_pod_label_(.+) + - source_labels: [__meta_kubernetes_namespace] + action: replace + target_label: namespace + - source_labels: [__meta_kubernetes_pod_name] + action: replace + target_label: pod_name + - source_labels: [__meta_kubernetes_pod_node_name] + action: replace + target_label: node + - source_labels: [__meta_kubernetes_pod_label_release] + action: replace + target_label: cluster_name + + - job_name: 'alluxio job master' + kubernetes_sd_configs: + - role: pod + namespaces: + names: + - alluxio + tls_config: + insecure_skip_verify: true + relabel_configs: + - source_labels: [__meta_kubernetes_pod_label_role] + action: keep + regex: alluxio-master + - source_labels: [__meta_kubernetes_pod_annotation_prometheus_io_scrape] + action: keep + regex: true + - source_labels: [__meta_kubernetes_pod_annotation_prometheus_io_path] + action: replace + target_label: __metrics_path__ + regex: (.+) + - source_labels: [__address__, __meta_kubernetes_pod_annotation_prometheus_io_jobPort] + action: replace + regex: ([^:]+)(?::\d+)?;(\d+) + replacement: $1:$2 + target_label: __address__ + - action: labelmap + regex: __meta_kubernetes_pod_label_(.+) + - source_labels: [__meta_kubernetes_namespace] + action: replace + target_label: namespace + - source_labels: [__meta_kubernetes_pod_name] + action: replace + target_label: pod_name + - source_labels: [__meta_kubernetes_pod_node_name] + action: replace + target_label: node + - source_labels: [__meta_kubernetes_pod_label_release] + action: replace + target_label: cluster_name + + - job_name: 'alluxio worker' + kubernetes_sd_configs: + - role: pod + namespaces: + names: + - alluxio + tls_config: + insecure_skip_verify: true + relabel_configs: + - source_labels: [__meta_kubernetes_pod_label_role] + action: keep + regex: alluxio-worker + - source_labels: [__meta_kubernetes_pod_annotation_prometheus_io_scrape] + action: keep + regex: true + - source_labels: [__meta_kubernetes_pod_annotation_prometheus_io_path] + action: replace + target_label: __metrics_path__ + regex: (.+) + - source_labels: [__address__, __meta_kubernetes_pod_annotation_prometheus_io_workerPort] + action: replace + regex: ([^:]+)(?::\d+)?;(\d+) + replacement: $1:$2 + target_label: __address__ + - action: labelmap + regex: __meta_kubernetes_pod_label_(.+) + - source_labels: [__meta_kubernetes_namespace] + action: replace + target_label: namespace + - source_labels: [__meta_kubernetes_pod_name] + action: replace + target_label: pod_name + - source_labels: [__meta_kubernetes_pod_node_name] + action: replace + target_label: node + - source_labels: [__meta_kubernetes_pod_label_release] + action: replace + target_label: cluster_name diff --git a/integration/kubernetes/helm-chart/monitor/templates/_helpers.tpl b/integration/kubernetes/helm-chart/monitor/templates/_helpers.tpl new file mode 100644 index 000000000000..9a06139bfdc0 --- /dev/null +++ b/integration/kubernetes/helm-chart/monitor/templates/_helpers.tpl @@ -0,0 +1,84 @@ +{{/* vim: set filetype=mustache: */}} +{{/* +Expand the name of the chart. +*/}} +{{- define "monitor.name" -}} +{{- default .Chart.Name .Values.nameOverride | trunc 63 | trimSuffix "-" -}} +{{- end -}} + +{{/* +Create a default fully qualified app name. +We truncate at 63 chars because some Kubernetes name fields are limited to this (by the DNS naming spec). +If release name contains chart name it will be used as a full name. +*/}} +{{- define "monitor.fullname" -}} +{{- if .Values.fullnameOverride -}} +{{- .Values.fullnameOverride | trunc 63 | trimSuffix "-" -}} +{{- else -}} +{{- $name := default .Chart.Name .Values.nameOverride -}} +{{- if contains $name .Release.Name -}} +{{- .Release.Name | trunc 63 | trimSuffix "-" -}} +{{- else -}} +{{- printf "%s-%s" .Release.Name $name | trunc 63 | trimSuffix "-" -}} +{{- end -}} +{{- end -}} +{{- end -}} + +{{/* +Create chart name and version as used by the chart label. +*/}} +{{- define "monitor.chart" -}} +{{- printf "%s-%s" .Chart.Name .Chart.Version | replace "+" "_" | trunc 63 | trimSuffix "-" -}} +{{- end -}} + +{{- define "monitor.prometheus.resources" -}} +resources: + limits: + {{- if .Values.prometheus.resources.limits }} + {{- if .Values.prometheus.resources.limits.cpu }} + cpu: {{ .Values.prometheus.resources.limits.cpu }} + {{- end }} + {{- if .Values.prometheus.resources.limits.memory }} + memory: {{ .Values.prometheus.resources.limits.memory }} + {{- end }} + {{- end }} + requests: + {{- if .Values.prometheus.resources.requests }} + {{- if .Values.prometheus.resources.requests.cpu }} + cpu: {{ .Values.prometheus.resources.requests.cpu }} + {{- end }} + {{- if .Values.prometheus.resources.requests.memory }} + memory: {{ .Values.prometheus.resources.requests.memory }} + {{- end }} + {{- end }} +{{- end -}} + +{{- define "monitor.grafana.readinessProbe" -}} +readinessProbe: + httpGet: + path: /login + port: 3000 + initialDelaySeconds: 30 +{{- end -}} + +{{- define "monitor.grafana.resources" -}} +resources: + limits: + {{- if .Values.grafana.resources.limits }} + {{- if .Values.grafana.resources.limits.cpu }} + cpu: {{ .Values.grafana.resources.limits.cpu }} + {{- end }} + {{- if .Values.grafana.resources.limits.memory }} + memory: {{ .Values.grafana.resources.limits.memory }} + {{- end }} + {{- end }} + requests: + {{- if .Values.grafana.resources.requests }} + {{- if .Values.grafana.resources.requests.cpu }} + cpu: {{ .Values.grafana.resources.requests.cpu }} + {{- end }} + {{- if .Values.grafana.resources.requests.memory }} + memory: {{ .Values.grafana.resources.requests.memory }} + {{- end }} + {{- end }} +{{- end -}} diff --git a/integration/kubernetes/helm-chart/monitor/templates/config/grafana-conf.yaml b/integration/kubernetes/helm-chart/monitor/templates/config/grafana-conf.yaml new file mode 100644 index 000000000000..0b03e048cbfb --- /dev/null +++ b/integration/kubernetes/helm-chart/monitor/templates/config/grafana-conf.yaml @@ -0,0 +1,37 @@ +# +# The Alluxio Open Foundation licenses this work under the Apache License, version 2.0 +# (the "License"). You may not use this work except in compliance with the License, which is +# available at www.apache.org/licenses/LICENSE-2.0 +# +# This software is distributed on an "AS IS" basis, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, +# either express or implied, as more fully set forth in the License. +# +# See the NOTICE file distributed with this work for information regarding copyright ownership. +# + +{{- $name := include "monitor.name" . }} +{{- $fullName := include "monitor.fullname" . }} +{{- $chart := include "monitor.chart" . }} +apiVersion: v1 +kind: ConfigMap +metadata: + name: {{ $fullName }}-grafana-dashboard-config + labels: + app: {{ $name }} + chart: {{ $chart }} + release: {{ .Release.Name }} + heritage: {{ .Release.Service }} +data: + {{- (.Files.Glob "source/grafana/dashboard/*").AsConfig | nindent 2 }} +--- +apiVersion: v1 +kind: ConfigMap +metadata: + name: {{ $fullName }}-grafana-datasource-config + labels: + app: {{ $name }} + chart: {{ $chart }} + release: {{ .Release.Name }} + heritage: {{ .Release.Service }} +data: + {{- (.Files.Glob "source/grafana/datasource.yaml").AsConfig | nindent 2 }} diff --git a/integration/kubernetes/helm-chart/monitor/templates/config/prometheus-conf.yaml b/integration/kubernetes/helm-chart/monitor/templates/config/prometheus-conf.yaml new file mode 100644 index 000000000000..95c8738c5f78 --- /dev/null +++ b/integration/kubernetes/helm-chart/monitor/templates/config/prometheus-conf.yaml @@ -0,0 +1,26 @@ +# +# The Alluxio Open Foundation licenses this work under the Apache License, version 2.0 +# (the "License"). You may not use this work except in compliance with the License, which is +# available at www.apache.org/licenses/LICENSE-2.0 +# +# This software is distributed on an "AS IS" basis, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, +# either express or implied, as more fully set forth in the License. +# +# See the NOTICE file distributed with this work for information regarding copyright ownership. +# + +{{- $name := include "monitor.name" . }} +{{- $fullName := include "monitor.fullname" . }} +{{- $chart := include "monitor.chart" . }} + +apiVersion: v1 +kind: ConfigMap +metadata: + name: {{ $fullName }}-prometheus-config + labels: + app: {{ $name }} + chart: {{ $chart }} + release: {{ .Release.Name }} + heritage: {{ .Release.Service }} +data: + {{- (.Files.Glob "source/prometheus/*").AsConfig | nindent 2 }} diff --git a/integration/kubernetes/helm-chart/monitor/templates/grafana/deployment.yaml b/integration/kubernetes/helm-chart/monitor/templates/grafana/deployment.yaml new file mode 100644 index 000000000000..3c4cee375143 --- /dev/null +++ b/integration/kubernetes/helm-chart/monitor/templates/grafana/deployment.yaml @@ -0,0 +1,83 @@ +# +# The Alluxio Open Foundation licenses this work under the Apache License, version 2.0 +# (the "License"). You may not use this work except in compliance with the License, which is +# available at www.apache.org/licenses/LICENSE-2.0 +# +# This software is distributed on an "AS IS" basis, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, +# either express or implied, as more fully set forth in the License. +# +# See the NOTICE file distributed with this work for information regarding copyright ownership. +# + +{{- $fullName := include "monitor.fullname" . }} +{{- $hostNetwork := .Values.grafana.hostNetwork }} +{{- $hostPID := .Values.grafana.hostPID }} +{{- $name := include "monitor.name" . }} +{{- $chart := include "monitor.chart" . }} + +apiVersion: apps/v1 +kind: Deployment +metadata: + name: {{ $fullName }}-grafana + labels: + name: {{ $fullName }}-grafana + app: {{ $name }} + chart: {{ $chart }} + release: {{ .Release.Name }} + heritage: {{ .Release.Service }} +spec: + replicas: 1 + selector: + matchLabels: + name: {{ $fullName }}-grafana + app: {{ $name }} + chart: {{ $chart }} + release: {{ .Release.Name }} + heritage: {{ .Release.Service }} + template: + metadata: + labels: + name: {{ $fullName }}-grafana + app: {{ $name }} + chart: {{ $chart }} + release: {{ .Release.Name }} + heritage: {{ .Release.Service }} + spec: + hostPID: {{ $hostPID }} + hostNetwork: {{ $hostNetwork }} + dnsPolicy: {{ .Values.grafana.dnsPolicy | default ($hostNetwork | ternary "ClusterFirstWithHostNet" "ClusterFirst") }} + containers: + - image: {{ .Values.grafana.imageInfo.image }}:{{ .Values.grafana.imageInfo.imageTag }} + name: {{ $fullName }}-grafana + imagePullPolicy: {{ .Values.imagePullPolicy }} +{{ include "monitor.grafana.resources" . | indent 8 }} + env: + {{- range $key, $value := .Values.grafana.env }} + - name: "{{ $key }}" + value: "{{ $value }}" + {{- end }} + ports: + - containerPort: {{ .Values.grafana.port.web }} + name: web + protocol: TCP + hostPort: {{ .Values.grafana.port.hostPort }} +{{ include "monitor.grafana.readinessProbe" . | indent 8 }} + volumeMounts: + - name: grafana-storage + mountPath: /opt + {{- if .Values.grafanaConfig }} + {{- range .Values.grafanaConfig }} + - name: {{ $fullName }}-{{ .name }} + mountPath: {{ .path }} + {{- end }} + {{- end }} + volumes: + - name: grafana-storage + emptyDir: {} + {{- if .Values.grafanaConfig }} + {{- range .Values.grafanaConfig }} + - name: {{ $fullName }}-{{ .name }} + configMap: + name: {{ $fullName }}-{{ .name }} + {{- end }} + {{- end }} diff --git a/integration/kubernetes/helm-chart/monitor/templates/prometheus/deployment.yaml b/integration/kubernetes/helm-chart/monitor/templates/prometheus/deployment.yaml new file mode 100644 index 000000000000..74b39ab7497c --- /dev/null +++ b/integration/kubernetes/helm-chart/monitor/templates/prometheus/deployment.yaml @@ -0,0 +1,84 @@ +# +# The Alluxio Open Foundation licenses this work under the Apache License, version 2.0 +# (the "License"). You may not use this work except in compliance with the License, which is +# available at www.apache.org/licenses/LICENSE-2.0 +# +# This software is distributed on an "AS IS" basis, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, +# either express or implied, as more fully set forth in the License. +# +# See the NOTICE file distributed with this work for information regarding copyright ownership. +# + +{{ if .Values.prometheus.enabled -}} +{{- $hostNetwork := .Values.prometheus.hostNetwork }} +{{- $fullName := include "monitor.fullname" . }} +{{- $hostPID := .Values.prometheus.hostPID }} +{{- $name := include "monitor.name" . }} +{{- $chart := include "monitor.chart" . }} + +apiVersion: apps/v1 +kind: Deployment +metadata: + labels: + name: {{ $fullName }}-prometheus + app: {{ $name }} + chart: {{ $chart }} + release: {{ .Release.Name }} + heritage: {{ .Release.Service }} + name: {{ $fullName }}-prometheus +spec: + replicas: 1 + selector: + matchLabels: + name: {{ $fullName }}-prometheus + app: {{ $name }} + chart: {{ $chart }} + release: {{ .Release.Name }} + heritage: {{ .Release.Service }} + template: + metadata: + labels: + name: {{ $fullName }}-prometheus + app: {{ $name }} + chart: {{ $chart }} + release: {{ .Release.Name }} + heritage: {{ .Release.Service }} + spec: + hostPID: {{ $hostPID }} + hostNetwork: {{ $hostNetwork }} + dnsPolicy: {{ .Values.prometheus.dnsPolicy | default ($hostNetwork | ternary "ClusterFirstWithHostNet" "ClusterFirst") }} + containers: + - image: {{ .Values.prometheus.imageInfo.image }}:{{ .Values.prometheus.imageInfo.imageTag }} + imagePullPolicy: {{ .Values.imagePullPolicy }} + name: {{ $fullName }}-prometheus + command: + - "/bin/prometheus" + args: +{{ toYaml .Values.prometheus.args | trim | indent 12 }} + ports: + - containerPort: {{ .Values.prometheus.port.TCP }} + protocol: TCP + volumeMounts: + - mountPath: "/prometheus" + name: data + {{- if .Values.prometheusConfig }} + {{- range .Values.prometheusConfig }} + - name: {{ $fullName }}-{{ .name }} + mountPath: "{{ .path }}" + {{- end }} + {{- end }} + {{- if .Values.prometheus.resources }} +{{ include "monitor.prometheus.resources" . | indent 10 }} + {{- end }} + serviceAccountName: {{ $fullName }}-prometheus + volumes: + - name: data + emptyDir: {} + {{- if .Values.prometheusConfig }} + {{- range .Values.prometheusConfig }} + - name: {{ $fullName }}-{{ .name }} + configMap: + name: {{ $fullName }}-{{ .name }} + {{- end }} + {{- end }} +{{- end }} diff --git a/integration/kubernetes/helm-chart/monitor/templates/prometheus/rbac.yaml b/integration/kubernetes/helm-chart/monitor/templates/prometheus/rbac.yaml new file mode 100644 index 000000000000..05ef92bcab63 --- /dev/null +++ b/integration/kubernetes/helm-chart/monitor/templates/prometheus/rbac.yaml @@ -0,0 +1,60 @@ +# +# The Alluxio Open Foundation licenses this work under the Apache License, version 2.0 +# (the "License"). You may not use this work except in compliance with the License, which is +# available at www.apache.org/licenses/LICENSE-2.0 +# +# This software is distributed on an "AS IS" basis, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, +# either express or implied, as more fully set forth in the License. +# +# See the NOTICE file distributed with this work for information regarding copyright ownership. +# + +{{ if .Values.prometheus.enabled -}} +{{- $name := include "monitor.name" . }} +{{- $fullName := include "monitor.fullname" . }} +{{- $chart := include "monitor.chart" . }} +apiVersion: rbac.authorization.k8s.io/v1 +kind: Role +metadata: + name: {{ $fullName }}-prometheus + labels: + name: {{ $fullName }}-prometheus + app: {{ $name }} + chart: {{ $chart }} + release: {{ .Release.Name }} + heritage: {{ .Release.Service }} +rules: + - apiGroups: [""] + resources: + - pods + verbs: ["get", "list", "watch"] +--- +apiVersion: v1 +kind: ServiceAccount +metadata: + name: {{ $fullName }}-prometheus + labels: + name: {{ $fullName }}-prometheus + app: {{ $name }} + chart: {{ $chart }} + release: {{ .Release.Name }} + heritage: {{ .Release.Service }} +--- +apiVersion: rbac.authorization.k8s.io/v1 +kind: RoleBinding +metadata: + name: {{ $fullName }}-prometheus + labels: + name: {{ $fullName }}-prometheus + app: {{ $name }} + chart: {{ $chart }} + release: {{ .Release.Name }} + heritage: {{ .Release.Service }} +roleRef: + apiGroup: rbac.authorization.k8s.io + kind: Role + name: {{ $fullName }}-prometheus +subjects: + - kind: ServiceAccount + name: {{ $fullName }}-prometheus +{{- end }} diff --git a/integration/kubernetes/helm-chart/monitor/templates/prometheus/service.yaml b/integration/kubernetes/helm-chart/monitor/templates/prometheus/service.yaml new file mode 100644 index 000000000000..9d7ff0ffba90 --- /dev/null +++ b/integration/kubernetes/helm-chart/monitor/templates/prometheus/service.yaml @@ -0,0 +1,34 @@ +# +# The Alluxio Open Foundation licenses this work under the Apache License, version 2.0 +# (the "License"). You may not use this work except in compliance with the License, which is +# available at www.apache.org/licenses/LICENSE-2.0 +# +# This software is distributed on an "AS IS" basis, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, +# either express or implied, as more fully set forth in the License. +# +# See the NOTICE file distributed with this work for information regarding copyright ownership. +# + +{{ if .Values.prometheus.enabled -}} +{{- $name := include "monitor.name" . }} +{{- $fullName := include "monitor.fullname" . }} +{{- $chart := include "monitor.chart" . }} +kind: Service +apiVersion: v1 +metadata: + labels: + name: {{ $fullName }}-prometheus + app: {{ $name }} + chart: {{ $chart }} + release: {{ .Release.Name }} + heritage: {{ .Release.Service }} + name: {{ $fullName }}-prometheus +spec: + ports: + - port: {{int .Values.prometheus.port.TCP}} + name: web + clusterIP: None + selector: + app: {{ $name }} + name: {{ $fullName }}-prometheus +{{- end }} diff --git a/integration/kubernetes/helm-chart/monitor/values.yaml b/integration/kubernetes/helm-chart/monitor/values.yaml new file mode 100644 index 000000000000..86142b62a765 --- /dev/null +++ b/integration/kubernetes/helm-chart/monitor/values.yaml @@ -0,0 +1,82 @@ +# +# The Alluxio Open Foundation licenses this work under the Apache License, version 2.0 +# (the "License"). You may not use this work except in compliance with the License, which is +# available at www.apache.org/licenses/LICENSE-2.0 +# +# This software is distributed on an "AS IS" basis, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, +# either express or implied, as more fully set forth in the License. +# +# See the NOTICE file distributed with this work for information regarding copyright ownership. +# + +# The fullnameOverride should not be modified in the usual case. +fullnameOverride: alluxio-monitor +imagePullPolicy: IfNotPresent + +# The grafana plugin path config, include datasource path and dashboards path. +grafanaConfig: +- name: grafana-dashboard-config + path: /etc/grafana/provisioning/dashboards +- name: grafana-datasource-config + path: /etc/grafana/provisioning/datasources + +# The prometheus.yaml file path. +prometheusConfig: + - name: prometheus-config + path: /etc/prometheus + +## prometheus ## + +prometheus: + enabled: true + imageInfo: + image: prom/prometheus + imageTag: latest + port: + TCP: 9090 + args: + - "--config.file=/etc/prometheus/prometheus.yaml" # the prometheus config file path + - "--storage.tsdb.path=/prometheus" # Where prometheus writes its database. + - "--storage.tsdb.retention=72h" # When to remove old data + - "--web.listen-address=:9090" # Listen address + hostPID: false + hostNetwork: false + # dnsPolicy will be ClusterFirstWithHostNet if hostNetwork: true + # and ClusterFirst if hostNetwork: false + # You can specify dnsPolicy here to override this inference + # dnsPolicy: ClusterFirst + resources: + limits: + cpu: "4" + memory: "4G" + requests: + cpu: "1" + memory: "1G" + +## grafana ## + +grafana: + env: + GF_AUTH_BASIC_ENABLED: "true" # Enable authentication + GF_AUTH_ANONYMOUS_ENABLED: "false" + imageInfo: + image: grafana/grafana + imageTag: latest + # Use nodeIp:hostPort visit the grafana web + port: + web: 3000 + hostPort: 8080 + hostPID: false + hostNetwork: false + # dnsPolicy will be ClusterFirstWithHostNet if hostNetwork: true + # and ClusterFirst if hostNetwork: false + # You can specify dnsPolicy here to override this inference + # dnsPolicy: ClusterFirst + resources: + limits: + cpu: "2" + memory: "2G" + requests: + cpu: "0.5" + memory: "1G" + From 2a23f07d882b365add824323bd2d0ef724d90d6b Mon Sep 17 00:00:00 2001 From: LuQQiu Date: Mon, 9 Jan 2023 09:43:51 -0800 Subject: [PATCH 046/334] Add stale labels for PRs/Issues automatically Automatically add stale labels to PRs/Issues. A pre step for close the PRs/issues https://github.com/rook/rook/blob/master/.github/workflows/stale.yaml the workload flow: -> Issue/PRs -> no activity for one month -> label as stale and warning for close if does not have activities for another week -> Manually check the stale PRs/issues and decide whether to close them Example https://github.com/rook/rook/issues/10993 pr-link: Alluxio/alluxio#16712 change-id: cid-f4ac6e8b3505ed385d9c09921ba49689e0011ff1 --- .github/workflows/stale.yaml | 34 ++++++++++++++++++++++++++++++++++ 1 file changed, 34 insertions(+) create mode 100644 .github/workflows/stale.yaml diff --git a/.github/workflows/stale.yaml b/.github/workflows/stale.yaml new file mode 100644 index 000000000000..be5d340ec5ae --- /dev/null +++ b/.github/workflows/stale.yaml @@ -0,0 +1,34 @@ +name: "Mark stale issues and PRs" +on: + schedule: + # Run the stalebot every day at 8pm UTC + - cron: "00 20 * * *" + +permissions: + contents: read + +jobs: + stale: + permissions: + issues: write # for writing stale message + pull-requests: write # for writing stale message + runs-on: ubuntu-20.04 + if: github.repository == 'alluxio/alluxio' + steps: + - uses: actions/stale@v6 + with: + repo-token: ${{ secrets.GITHUB_TOKEN }} + days-before-stale: 30 + days-before-close: -1 + stale-issue-message: > + This issue has been automatically marked as stale because it has not had recent activity. + It will be closed in two weeks if no further activity occurs. + Thank you for your contributions. + stale-pr-message: > + This pull request has been automatically marked as stale because it has not had + recent activity. It will be closed in two weeks if no further activity occurs. + Thank you for your contributions. + stale-pr-label: "stale" + stale-issue-label: "stale" + exempt-issue-labels: "keepalive,priority-high" + exempt-pr-labels: "keepalive,priority-high" From 7a4e0a53a3ea89630c0cd9bca77bb7fdea254873 Mon Sep 17 00:00:00 2001 From: Tyler Crain Date: Mon, 9 Jan 2023 09:50:17 -0800 Subject: [PATCH 047/334] Fix hanging page store test In the async page store, when worker threads run out, the caller thread should perform the action. The test had a race condition where all the threads could block, including the calling thread. pr-link: Alluxio/alluxio#16731 change-id: cid-81d809c343968d68b57988978575d8b9e4ace63a --- .../client/file/cache/LocalCacheManager.java | 3 +- .../client/file/cache/HangingPageStore.java | 20 +++++++++++ .../file/cache/LocalCacheManagerTest.java | 35 ++++++++++++++++--- 3 files changed, 53 insertions(+), 5 deletions(-) diff --git a/core/client/fs/src/main/java/alluxio/client/file/cache/LocalCacheManager.java b/core/client/fs/src/main/java/alluxio/client/file/cache/LocalCacheManager.java index a1dad88b169b..f49541fbf45a 100644 --- a/core/client/fs/src/main/java/alluxio/client/file/cache/LocalCacheManager.java +++ b/core/client/fs/src/main/java/alluxio/client/file/cache/LocalCacheManager.java @@ -150,7 +150,8 @@ public static LocalCacheManager create(CacheManagerOptions options, * @param pageId page identifier * @return the page lock id */ - private int getPageLockId(PageId pageId) { + @VisibleForTesting + public int getPageLockId(PageId pageId) { return Math.floorMod((int) (pageId.getFileId().hashCode() + pageId.getPageIndex()), LOCK_SIZE); } diff --git a/core/client/fs/src/test/java/alluxio/client/file/cache/HangingPageStore.java b/core/client/fs/src/test/java/alluxio/client/file/cache/HangingPageStore.java index b8cf2f239544..fde5fafbc3c2 100644 --- a/core/client/fs/src/test/java/alluxio/client/file/cache/HangingPageStore.java +++ b/core/client/fs/src/test/java/alluxio/client/file/cache/HangingPageStore.java @@ -20,6 +20,7 @@ import java.nio.ByteBuffer; import java.util.concurrent.atomic.AtomicBoolean; import java.util.concurrent.atomic.AtomicInteger; +import java.util.concurrent.atomic.AtomicLong; /** * A PageStore can hang on put, get or delete. @@ -29,6 +30,7 @@ class HangingPageStore extends LocalPageStore { private AtomicBoolean mGetHanging = new AtomicBoolean(false); private AtomicBoolean mPutHanging = new AtomicBoolean(false); private AtomicInteger mPut = new AtomicInteger(0); + private AtomicLong mStopHangingThread = new AtomicLong(-1); public HangingPageStore(PageStoreOptions options) { super(options); @@ -45,6 +47,7 @@ public void delete(PageId pageId) throws IOException, PageNotFoundException { public int get(PageId pageId, int pageOffset, int bytesToRead, PageReadTargetBuffer target, boolean isTemporary) throws IOException, PageNotFoundException { + checkStopHanging(); // never quit while (mGetHanging.get()) {} return super.get(pageId, pageOffset, bytesToRead, target, isTemporary); @@ -52,12 +55,20 @@ public int get(PageId pageId, int pageOffset, int bytesToRead, PageReadTargetBuf @Override public void put(PageId pageId, ByteBuffer page, boolean isTemporary) throws IOException { + checkStopHanging(); // never quit while (mPutHanging.get()) {} super.put(pageId, page, isTemporary); mPut.getAndIncrement(); } + private void checkStopHanging() { + if (mStopHangingThread.get() == Thread.currentThread().getId()) { + mPutHanging.set(false); + mGetHanging.set(false); + } + } + /** * @param value if delete operation hangs */ @@ -79,6 +90,15 @@ public void setPutHanging(boolean value) { mPutHanging.set(value); } + /** + * Set a thread id so that if a thread with the given id reaches + * the line where it should hang, it will disable hanging. + * @param id the thread id to stop the hanging + */ + public void setStopHangingThread(long id) { + mStopHangingThread.set(id); + } + /** * @return number of put operations */ diff --git a/core/client/fs/src/test/java/alluxio/client/file/cache/LocalCacheManagerTest.java b/core/client/fs/src/test/java/alluxio/client/file/cache/LocalCacheManagerTest.java index 4b1a37df9f2e..f564d4035ed2 100644 --- a/core/client/fs/src/test/java/alluxio/client/file/cache/LocalCacheManagerTest.java +++ b/core/client/fs/src/test/java/alluxio/client/file/cache/LocalCacheManagerTest.java @@ -46,6 +46,7 @@ import com.google.common.collect.ImmutableList; import com.google.common.collect.ImmutableMap; +import org.junit.After; import org.junit.Assume; import org.junit.Before; import org.junit.Rule; @@ -56,8 +57,10 @@ import java.io.IOException; import java.nio.ByteBuffer; import java.nio.file.Paths; +import java.util.HashSet; import java.util.List; import java.util.Optional; +import java.util.Set; import java.util.UUID; import java.util.concurrent.atomic.AtomicBoolean; import java.util.concurrent.atomic.AtomicInteger; @@ -111,6 +114,11 @@ public void before() throws Exception { mCacheManager = createLocalCacheManager(); } + @After + public void after() throws Exception { + mCacheManager.close(); + } + private byte[] page(int i, int pageLen) { return BufferUtils.getIncreasingByteArray(i, pageLen); } @@ -800,6 +808,7 @@ public void asyncRestoreWithMorePagesThanCapacity() throws Exception { @Test public void asyncCache() throws Exception { + // this must be smaller than the number of locks in the page store for the test to succeed final int threads = 16; mConf.set(PropertyKey.USER_CLIENT_CACHE_ASYNC_WRITE_ENABLED, true); mConf.set(PropertyKey.USER_CLIENT_CACHE_ASYNC_WRITE_THREADS, threads); @@ -813,14 +822,30 @@ public void asyncCache() throws Exception { pageStore.setPutHanging(true); mPageMetaStore = new DefaultPageMetaStore(ImmutableList.of(dir)); mCacheManager = createLocalCacheManager(mConf, mPageMetaStore); + Set lockedPages = new HashSet<>(); for (int i = 0; i < threads; i++) { PageId pageId = new PageId("5", i); assertTrue(mCacheManager.put(pageId, page(i, PAGE_SIZE_BYTES))); + lockedPages.add(mCacheManager.getPageLockId(pageId)); } - pageStore.setPutHanging(false); - //fallback to caller's thread when queue is full - assertTrue(mCacheManager.put(PAGE_ID1, PAGE1)); - while (pageStore.getPuts() < threads) { + // by setting the following line the hanging will only be stopped when the current + // thread adds a page + pageStore.setStopHangingThread(Thread.currentThread().getId()); + // fallback to caller's thread (the current here) when queue is full + // find a page id that is not already locked + int pageLockId; + long nxtIdx = 0; + PageId callerPageId; + do { + callerPageId = new PageId("0L", nxtIdx); + pageLockId = mCacheManager.getPageLockId(callerPageId); + nxtIdx++; + } while (lockedPages.contains(pageLockId)); + // this page will be inserted by the current thread and not a worker thread + assertTrue(mCacheManager.put(callerPageId, PAGE1)); + // Wait for all tasks to complete + // one for each thread worker thread, and one on the main thread + while (pageStore.getPuts() < threads + 1) { Thread.sleep(1000); } pageStore.setPutHanging(true); @@ -828,6 +853,7 @@ public void asyncCache() throws Exception { PageId pageId = new PageId("6", i); assertTrue(mCacheManager.put(pageId, page(i, PAGE_SIZE_BYTES))); } + pageStore.setPutHanging(false); } @Test @@ -851,6 +877,7 @@ public void asyncCacheSamePage() throws Exception { } pageStore.setPutHanging(true); assertTrue(mCacheManager.put(PAGE_ID1, PAGE1)); + pageStore.setPutHanging(false); } @Test From 0a6fe08d9b496d476acfcc9d3952f2cf78de4301 Mon Sep 17 00:00:00 2001 From: Xinran Dong <81548653+007DXR@users.noreply.github.com> Date: Tue, 10 Jan 2023 04:31:56 +0800 Subject: [PATCH 048/334] [DOCFIX] Update cn version of Job Service doc ### What changes are proposed in this pull request? Update cn version of Job Service doc. ### Why are the changes needed? The Chinese overview/Job Service document is not updated with the latest changes, this PR synchronizes these updates. ### Does this PR introduce any user facing changes? Developers can get to know Alluxio in Chinese easily. pr-link: Alluxio/alluxio#16686 change-id: cid-8a7a47c86f15608035131ba410fabd16145a3264 --- docs/cn/overview/JobService.md | 106 +++++++++++++++++++++++++++++++++ 1 file changed, 106 insertions(+) create mode 100644 docs/cn/overview/JobService.md diff --git a/docs/cn/overview/JobService.md b/docs/cn/overview/JobService.md new file mode 100644 index 000000000000..49c3df86b551 --- /dev/null +++ b/docs/cn/overview/JobService.md @@ -0,0 +1,106 @@ +--- +layout: global +title: 作业服务器 +group: Overview +priority: 3 +--- + +* 内容列表 +{:toc} + +## 架构概览 + +Alluxio 作业服务器是负责将各种不同类型的操作分配给Job Worker的任务调度框架。 + +Master负责将作业分配为更小的任务,供Job Worker执行并管理作业的完成状态。 + +Job Worker将来自Job Master的任务排列(queue),并通过管理可配置的固定线程池(`alluxio.job.worker.threadpool.size`)来完成这些任务。 + +## 不同类型的作业 + +### 加载 Load + +`fs distributedLoad`CLI命令中使用了加载作业,按特定的副本数将文件加载到Alluxio。 + +### 迁移 Migrate + +`fs distributedCp`和`fs distributedMv`CLI命令中使用了迁移作业,使用固定的[写入类型]({{ '/en/overview/Architecture.html#data-flow-write' | relativize_url }})进行数据复制/移动。 + +### 持久化 Persist + +`fs persist` CLI命令间接使用了持久化作业,以`ASYNC_THROUGH`[写入类型]({{ '/en/overview/Architecture.html#data-flow-write' | relativize_url }})写入Alluxio时使用持久化作业在后台进行持久化。 + +该作业负责将Alluxio中的文件持久化到特定的ufs路径中。 + +### 驱逐 Evict + +`fs free` CLI命令和后台复制进程间接使用了驱逐作业。 + +该作业负责从Alluxio中驱逐出特定数量的数据块副本。 + +### 移动 Move + +复制后台进程使用移动作业将数据块从一个worker移动到另一个worker。 + +### 复制 Replicate + +后台复制进程使用复制作业将数据块从一个worker复制到特定数量的其他worker上。 + +## 巡检命令 + +作业服务器提供以下一系列的巡检命令。 + +### fsadmin report jobservice + +`fsadmin report jobservice` 会报告作业服务器摘要。 + +```console +$ ./bin/alluxio fsadmin report jobservice +Worker: MigrationTest-workers-2 Task Pool Size: 10 Unfinished Tasks: 1303 Active Tasks: 10 Load Avg: 1.08, 0.64, 0.27 +Worker: MigrationTest-workers-3 Task Pool Size: 10 Unfinished Tasks: 1766 Active Tasks: 10 Load Avg: 1.02, 0.48, 0.21 +Worker: MigrationTest-workers-1 Task Pool Size: 10 Unfinished Tasks: 1808 Active Tasks: 10 Load Avg: 0.73, 0.5, 0.23 + +Status: CREATED Count: 4877 +Status: CANCELED Count: 0 +Status: FAILED Count: 1 +Status: RUNNING Count: 0 +Status: COMPLETED Count: 8124 + +10 Most Recently Modified Jobs: +Timestamp: 10-28-2020 22:02:34:001 Id: 1603922371976 Name: Persist Status: COMPLETED +Timestamp: 10-28-2020 22:02:34:001 Id: 1603922371982 Name: Persist Status: COMPLETED +(only a subset of the results is shown) + +10 Most Recently Failed Jobs: +Timestamp: 10-24-2019 17:15:22:946 Id: 1603922372008 Name: Persist Status: FAILED + +10 Longest Running Jobs: +``` + +### job ls + +`job ls` 会列出正在作业服务器上运行或运行过的作业。 + +```console +$ ./bin/alluxio job ls +1613673433925 Persist COMPLETED +1613673433926 Persist COMPLETED +1613673433927 Persist COMPLETED +1613673433928 Persist COMPLETED +1613673433929 Persist COMPLETED +``` + +### job stat -v + +`job stat -v ` 会列出某个作业的详细信息。(加 `-v` 表示包含worker上指定任务的信息) + +```console +bin/alluxio job stat -v 1613673433929 +ID: 1613673433929 +Name: Persist +Description: PersistConfig{filePath=/test5/lib/alluxio-underfs-cosn-2.5.0-SNAPSHOT.jar, mountId=1, overwrite=false, ufsPath=... +Status: COMPLETED +Task 0 + Worker: 192.168.42.71 + Status: COMPLETED +``` \ No newline at end of file From f926797ff6b073c2a928251bbb88d11ca8e4b11f Mon Sep 17 00:00:00 2001 From: kimsu98 <40134677+kimsu98@users.noreply.github.com> Date: Mon, 9 Jan 2023 14:37:09 -0800 Subject: [PATCH 049/334] [DOCFIX] Add missing newline to README file ### What changes are proposed in this pull request? Trivial housekeeping change to add missing newline to README file Please outline the changes and how this PR fixes the issue. Add missing newline to end of README file ### Why are the changes needed? Housekeeping Please clarify why the changes are needed. For instance, 1. If you propose a new API, clarify the use case for a new API. 2. If you fix a bug, describe the bug. ### Does this PR introduce any user facing changes? No Please list the user-facing changes introduced by your change, including 1. change in user-facing APIs 2. addition or removal of property keys 3. webui pr-link: Alluxio/alluxio#16737 change-id: cid-b72eea2affa0e801a293312bc59fffa10e7534ce --- integration/kubernetes/helm-chart/monitor/README.md | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/integration/kubernetes/helm-chart/monitor/README.md b/integration/kubernetes/helm-chart/monitor/README.md index 64b28d1ab478..c973117a8399 100644 --- a/integration/kubernetes/helm-chart/monitor/README.md +++ b/integration/kubernetes/helm-chart/monitor/README.md @@ -120,4 +120,5 @@ __Grafana values:__ | `resources.limits.cpu` | CPU Limit | `2` | | `resources.limits.memory` | Memory Limit | `2G` | | `resources.requests.cpu` | CPU Request | `0.5` | -| `resources.requests.memory` | Memory Request | `1G` | \ No newline at end of file +| `resources.requests.memory` | Memory Request | `1G` | + From d4362849d53d67ba9724d8d1f24fc0969c4ff03b Mon Sep 17 00:00:00 2001 From: Shawn Sun <32376495+ssz1997@users.noreply.github.com> Date: Mon, 9 Jan 2023 16:33:20 -0800 Subject: [PATCH 050/334] Remove incorrect copy in StackFS All properties have already copied from `args` to set `fuseOpts` in line 45-47. Arraycopy would error out. pr-link: Alluxio/alluxio#16738 change-id: cid-25c62163e9b6e807e7ea31b04236ea891f0db444 --- integration/fuse/src/main/java/alluxio/fuse/StackMain.java | 1 - 1 file changed, 1 deletion(-) diff --git a/integration/fuse/src/main/java/alluxio/fuse/StackMain.java b/integration/fuse/src/main/java/alluxio/fuse/StackMain.java index d75eddfdf92c..575bb8357bad 100644 --- a/integration/fuse/src/main/java/alluxio/fuse/StackMain.java +++ b/integration/fuse/src/main/java/alluxio/fuse/StackMain.java @@ -45,7 +45,6 @@ public static void main(String[] args) { for (int i = 2; i < args.length; i++) { fuseOpts.add(args[i].substring(2)); // remove -o } - System.arraycopy(args, 2, fuseOpts, 0, args.length - 2); try { CommonUtils.PROCESS_TYPE.set(CommonUtils.ProcessType.CLIENT); MetricsSystem.startSinks(conf.getString(PropertyKey.METRICS_CONF_FILE)); From fb7d01e3796544676688a9b50907027aced28eed Mon Sep 17 00:00:00 2001 From: LuQQiu Date: Tue, 10 Jan 2023 14:32:45 -0800 Subject: [PATCH 051/334] Fix stale github action change to run at 7AM US west time, 11PM China time, 3PM UTC run more operations per run (from 30 to 100) From old issues/PRs to new ones pr-link: Alluxio/alluxio#16744 change-id: cid-caa6b238f0eb1aa74024bb655c5d73e158af3603 --- .github/workflows/stale.yaml | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/.github/workflows/stale.yaml b/.github/workflows/stale.yaml index be5d340ec5ae..efc2237486b7 100644 --- a/.github/workflows/stale.yaml +++ b/.github/workflows/stale.yaml @@ -1,8 +1,8 @@ name: "Mark stale issues and PRs" on: schedule: - # Run the stalebot every day at 8pm UTC - - cron: "00 20 * * *" + # Run the stalebot every day at 3pm UTC + - cron: "00 15 * * *" permissions: contents: read @@ -18,6 +18,8 @@ jobs: - uses: actions/stale@v6 with: repo-token: ${{ secrets.GITHUB_TOKEN }} + ascending: true # old issues/PRs first + operations-per-run: 100 # default is 30, enlarge for dealing with more issues/PRs days-before-stale: 30 days-before-close: -1 stale-issue-message: > From be64b01bc32de8edadaf861bc32c3e9924ecb1e5 Mon Sep 17 00:00:00 2001 From: kimsu98 <40134677+kimsu98@users.noreply.github.com> Date: Tue, 10 Jan 2023 14:32:52 -0800 Subject: [PATCH 052/334] [DOCFIX] Add newline to end of doc file ### What changes are proposed in this pull request? Trivial housekeeping change to add missing newline to doc file Please outline the changes and how this PR fixes the issue. Add missing newline to end of doc file ### Why are the changes needed? Housekeeping Please clarify why the changes are needed. For instance, 1. If you propose a new API, clarify the use case for a new API. 2. If you fix a bug, describe the bug. ### Does this PR introduce any user facing changes? No Please list the user-facing changes introduced by your change, including 1. change in user-facing APIs 2. addition or removal of property keys 3. webui pr-link: Alluxio/alluxio#16745 change-id: cid-21791b63f863c72a79cacefadc70115791e91f84 --- docs/cn/overview/JobService.md | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/docs/cn/overview/JobService.md b/docs/cn/overview/JobService.md index 49c3df86b551..ffa5f3d732ee 100644 --- a/docs/cn/overview/JobService.md +++ b/docs/cn/overview/JobService.md @@ -103,4 +103,5 @@ Status: COMPLETED Task 0 Worker: 192.168.42.71 Status: COMPLETED -``` \ No newline at end of file +``` + From de4f1b2ce71575927c39eb8db2aaa2cb8e788190 Mon Sep 17 00:00:00 2001 From: Shuaibing Zhao Date: Wed, 11 Jan 2023 06:38:15 +0800 Subject: [PATCH 053/334] Fix bug for ufs journal dumper when read regular checkpoint MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit ### What changes are proposed in this pull request? Fix `UfsJournalDumper.dumpJournal()`, which shouldn't call `Files.createDirectories(dir)`. ### Why are the changes needed? bug: when using UfsJournal, if you use JournalTool to dump the ufs journal of a regular checkpoint(like BlockMaster), it will throws FileNotFoundException: /path-to-output/checkpoints-106 (Is a directory). like this: 截屏2022-11-15 11 15 58 Because in `AbstractJournalDumper.readCheckpoint(checkpoint, path)`, path is a file or dir. For regular checkpoint, it is a file; and for compound checkpoint, it is a dir. But `UfsJournalDumper` calls `Files.createDirectories(dir)`, which should be called in `AbstractJournalDumper.readCompoundCheckpoint`. ### Does this PR introduce any user facing changes? None pr-link: Alluxio/alluxio#16550 change-id: cid-19f14a9065879b141a5e0301592e1d5827d96a3d --- .../master/journal/tool/UfsJournalDumper.java | 6 ++-- .../alluxio/client/cli/JournalToolTest.java | 31 +++++++++++++++++++ 2 files changed, 33 insertions(+), 4 deletions(-) diff --git a/core/server/master/src/main/java/alluxio/master/journal/tool/UfsJournalDumper.java b/core/server/master/src/main/java/alluxio/master/journal/tool/UfsJournalDumper.java index 59f3f69952a7..b0d4f754612b 100644 --- a/core/server/master/src/main/java/alluxio/master/journal/tool/UfsJournalDumper.java +++ b/core/server/master/src/main/java/alluxio/master/journal/tool/UfsJournalDumper.java @@ -28,7 +28,6 @@ import java.io.PrintStream; import java.net.URI; import java.net.URISyntaxException; -import java.nio.file.Files; import java.nio.file.Path; import java.nio.file.Paths; @@ -67,9 +66,8 @@ public void dumpJournal() throws Throwable { switch (state) { case CHECKPOINT: try (CheckpointInputStream checkpoint = reader.getCheckpoint()) { - Path dir = Paths.get(mCheckpointsDir + "-" + reader.getNextSequenceNumber()); - Files.createDirectories(dir); - readCheckpoint(checkpoint, dir); + Path path = Paths.get(mCheckpointsDir + "-" + reader.getNextSequenceNumber()); + readCheckpoint(checkpoint, path); } break; case LOG: diff --git a/tests/src/test/java/alluxio/client/cli/JournalToolTest.java b/tests/src/test/java/alluxio/client/cli/JournalToolTest.java index b6b4bbb5b61e..c45579013299 100644 --- a/tests/src/test/java/alluxio/client/cli/JournalToolTest.java +++ b/tests/src/test/java/alluxio/client/cli/JournalToolTest.java @@ -14,6 +14,8 @@ import static java.util.stream.Collectors.toList; import static org.hamcrest.Matchers.containsString; import static org.junit.Assert.assertEquals; +import static org.junit.Assert.assertFalse; +import static org.junit.Assert.assertNotNull; import static org.junit.Assert.assertThat; import static org.junit.Assert.assertTrue; @@ -23,6 +25,7 @@ import alluxio.Constants; import alluxio.SystemOutRule; import alluxio.client.WriteType; +import alluxio.client.file.FileOutStream; import alluxio.client.file.FileSystem; import alluxio.client.meta.RetryHandlingMetaMasterClient; import alluxio.conf.Configuration; @@ -141,6 +144,34 @@ public void dumpHeapCheckpointFromUfsJournal() throws Throwable { } } + @Test + public void dumpBlockMasterCheckpointFromUfsJournal() throws Throwable { + blockMasterCheckpointUfsJournal(); + JournalTool.main(new String[] {"-outputDir", mDumpDir.getAbsolutePath(), + "-master", Constants.BLOCK_MASTER_NAME}); + + assertNonemptyFileWithPrefixExist(mDumpDir, "checkpoints"); + } + + private void assertNonemptyFileWithPrefixExist(File parent, String prefix) { + File[] files = parent.listFiles(); + assertNotNull(files); + List checkpointFiles = Arrays.stream(files) + .filter(File::isFile) + .filter(file -> file.getName().startsWith(prefix)).collect(toList()); + assertFalse(checkpointFiles.isEmpty()); + } + + private void blockMasterCheckpointUfsJournal() throws Exception { + // Perform operations to generate a checkpoint. + for (int i = 0; i < CHECKPOINT_SIZE * 2; i++) { + FileOutStream out = mFs.createFile(new AlluxioURI("/" + i)); + out.write(new byte[1]); + out.close(); + } + IntegrationTestUtils.waitForUfsJournalCheckpoint(Constants.BLOCK_MASTER_NAME); + } + @Test @LocalAlluxioClusterResource.Config(confParams = {PropertyKey.Name.MASTER_JOURNAL_TYPE, "EMBEDDED", PropertyKey.Name.MASTER_METASTORE, "HEAP"}) From 9b66b0392fb58ddc899a6d9083fa7bccbd565206 Mon Sep 17 00:00:00 2001 From: voddle Date: Wed, 11 Jan 2023 10:35:41 +0800 Subject: [PATCH 054/334] [DOCFIX] Update cn version of Metric doc What changes are proposed in this pull request? Update cn version of Metric doc. Why are the changes needed? There is no corresponding Chinese documentation for Metrics. Does this PR introduce any user facing changes? More Chinese users can access Alluxio documentation more easily. pr-link: Alluxio/alluxio#16701 change-id: cid-2655c7c0e89c01bdab36c7996fa8b9102737af56 --- docs/_data/table/cn/client-metrics.yml | 120 ++++++++ docs/_data/table/cn/cluster-metrics.yml | 66 +++++ docs/_data/table/cn/fuse-metrics.yml | 6 + docs/_data/table/cn/master-metrics.yml | 362 ++++++++++++++++++++++++ docs/_data/table/cn/process-metrics.yml | 2 + docs/_data/table/cn/server-metrics.yml | 6 + docs/_data/table/cn/worker-metrics.yml | 130 +++++++++ docs/cn/reference/Metrics-List.md | 268 ++++++++++++++++++ 8 files changed, 960 insertions(+) create mode 100644 docs/_data/table/cn/client-metrics.yml create mode 100644 docs/_data/table/cn/cluster-metrics.yml create mode 100644 docs/_data/table/cn/fuse-metrics.yml create mode 100644 docs/_data/table/cn/master-metrics.yml create mode 100644 docs/_data/table/cn/process-metrics.yml create mode 100644 docs/_data/table/cn/server-metrics.yml create mode 100644 docs/_data/table/cn/worker-metrics.yml create mode 100644 docs/cn/reference/Metrics-List.md diff --git a/docs/_data/table/cn/client-metrics.yml b/docs/_data/table/cn/client-metrics.yml new file mode 100644 index 000000000000..2b8b59f6790d --- /dev/null +++ b/docs/_data/table/cn/client-metrics.yml @@ -0,0 +1,120 @@ +Client.BlockMasterClientCount: + 'BlockMasterClientPool 中实例数量' +Client.BlockReadChunkRemote: + '这个客户端从远程 Alluxio worker 读取数据chunk数量。当 alluxio.user.block.read.metrics.enabled 设置为 true 时,才会记录此指标' +Client.BlockWorkerClientCount: + 'BlockWorkerClientPool 中实例数量' +Client.BusyExceptionCount: + '观察到的 BusyException 数量' +Client.BytesReadLocal: + '这个客户端短路读取的总字节数' +Client.BytesReadLocalThroughput: + '这个客户端短路读取的字节吞吐量' +Client.BytesWrittenLocal: + '客户端短路写入 Alluxio 缓存的字节总数' +Client.BytesWrittenLocalThroughput: + '客户端短路写入 Alluxio 缓存的字节吞吐量' +Client.BytesWrittenUfs: + '这个客户端写入 UFS 的字节数' +Client.CacheBytesDiscarded: + '客户端缓存丢弃的总字节数' +Client.CacheBytesEvicted: + '客户端缓存驱逐的总字节数' +Client.CacheBytesReadCache: + '从客户端缓存读的总字节数' +Client.CacheBytesReadExternal: + '由于客户端缓存未命中从 Alluxio 集群读取的总字节数。chunk read 可能导致这个数字小于 Client.CacheBytesReadExternal' +Client.CacheBytesReadInStreamBuffer: + '从客户端缓存的输入流缓冲区中读取的总字节数' +Client.CacheBytesRequestedExternal: + '引起缓存未命中的用户读请求总字节数。这个数字可能会比 Client.CacheBytesReadExternal 小,因为它可能被分成多个块读取' +Client.CacheBytesWrittenCache: + '向客户端缓存写入的总字节数' +Client.CacheCleanErrors: + '为了初始化新缓存时清理已存在缓存路径的失败总数' +Client.CacheCleanupGetErrors: + '清理失败内存读取失败总数' +Client.CacheCleanupPutErrors: + '清理失败内存写入失败总数' +Client.CacheCreateErrors: + '在客户端缓存中创建缓存的失败总数' +Client.CacheDeleteErrors: + '在客户端缓存中删除缓存数据的失败总数' +Client.CacheDeleteFromStoreErrors: + '删除页的失败总数' +Client.CacheDeleteNonExistingPageErrors: + '由于页缺失导致删除页失败的总数' +Client.CacheDeleteNotReadyErrors: + '由于缓存未就绪删除页失败的总数' +Client.CacheGetErrors: + '从客户端缓存中获取缓存数据失败总数' +Client.CacheGetNotReadyErrors: + '由于缓存未就绪获取页失败的总数' +Client.CacheGetStoreReadErrors: + '由于从页存储读取失败导致客户端缓存中获取缓存数据失败的次数' +Client.CacheHitRate: + '缓存命中率:(# 从缓存读取的字节数)/(# 请求的字节数)' +Client.CachePageReadCacheTimeNanos: + '客户端缓存命中时读取页面时间(ns)' +Client.CachePageReadExternalTimeNanos: + '当缓存未命中时,从外部源读取数据所花费时间(ns)' +Client.CachePages: + '客户端缓存中的总页数' +Client.CachePagesDiscarded: + '恢复页存储时丢失页的总数' +Client.CachePagesEvicted: + '从客户端缓存中驱逐页的总数' +Client.CachePutAsyncRejectionErrors: + '客户端缓存中放置缓存数据时,由于异步写队列注入失败而导致的失败次数' +Client.CachePutBenignRacingErrors: + '由于驱逐竞争而导致的缓存页添加失败的次数。这个错误是良性的' +Client.CachePutErrors: + '向客户端缓存中放置缓存数据的失败次数' +Client.CachePutEvictionErrors: + '由于驱逐失败而导致的缓存页添加失败的次数。这个错误是良性的' +Client.CachePutInsufficientSpaceErrors: + '由于在驱逐后空间不足导致的将缓存数据放入客户端缓存时的失败次数' +Client.CachePutNotReadyErrors: + '由于缓存不能准备好添加页,添加页失败的次数' +Client.CachePutStoreDeleteErrors: + '在页存储中删除失败导致的缓存数据放置失败的次数' +Client.CachePutStoreWriteErrors: + '由于向页面存储写入失败而导致的将缓存数据放入客户端缓存中失败的次数' +Client.CachePutStoreWriteNoSpaceErrors: + '未达到缓存容量上限但磁盘已满时将缓存数据放入客户端缓存时失败的次数。如果低估写入数据的存储开销比例,这种情况就可能会发生' +Client.CacheShadowCacheBytes: + '客户端 shadow cache 的字节数' +Client.CacheShadowCacheBytesHit: + '客户端 shadow cache 命中的字节数' +Client.CacheShadowCacheBytesRead: + '从客户端 shadow cache 读取的字节数' +Client.CacheShadowCacheFalsePositiveRatio: + '正在使用的工作集布隆过滤器犯错的概率。该值为 0-100。如果太高,则需要分配更多空间' +Client.CacheShadowCachePages: + '客户端 shadow cache 中页的数量' +Client.CacheShadowCachePagesHit: + '客户端 shadow cache 中页的命中次数' +Client.CacheShadowCachePagesRead: + '从客户端 shadow cache 中读取页的数量' +Client.CacheSpaceAvailable: + '客户端缓存中可用字节数' +Client.CacheSpaceUsed: + '客户端缓存使用字节数' +Client.CacheSpaceUsedCount: + '客户端缓存用作计数器的字节数量' +Client.CacheState: + '缓存状态:0(不在使用中),1(只读),2(读写)' +Client.CacheStoreDeleteTimeout: + '从页存储中删除页超时次数' +Client.CacheStoreGetTimeout: + '从页存储中读取页超时次数' +Client.CacheStorePutTimeout: + '向页存储中写入新页超时次数' +Client.CacheStoreThreadsRejected: + '向线程池提交任务时拒绝 I/O 线程的次数,可能是由于本地文件系统无响应。' +Client.DefaultHiveClientCount: + 'DefaultHiveClientPool 中实例数量' +Client.FileSystemMasterClientCount: + 'FileSystemMasterClientPool 中实例数量' +Client.MetadataCacheSize: + '客户端被缓存的文件和目录的元数据总数。只在文件系统为 alluxio.client.file.MetadataCachingBaseFileSystem 时有效' diff --git a/docs/_data/table/cn/cluster-metrics.yml b/docs/_data/table/cn/cluster-metrics.yml new file mode 100644 index 000000000000..b097b0725abc --- /dev/null +++ b/docs/_data/table/cn/cluster-metrics.yml @@ -0,0 +1,66 @@ +Cluster.ActiveRpcReadCount: + ' worker 上进行中的 read-RPC 数量' +Cluster.ActiveRpcWriteCount: + 'worker 上进行中的 write-RPC 数量' +Cluster.BytesReadDirect: + '汇总在所有 worker 上不通过 RPC 读取的字节数。这记录了 worker 内部调用(e.g. 嵌入在 worker 中的客户端)读取的数据,数据存在于 worker 缓存中或由 worker 从 UFS 获取' +Cluster.BytesReadDirectThroughput: + '汇总在所有 worker 上不通过 RPC 读取字节的吞吐量。这记录了 worker 内部调用(e.g. 嵌入在 worker 中的客户端)读取的数据,数据存在于 worker 缓存中或由 worker 从 UFS 获取' +Cluster.BytesReadDomain: + '从所有 worker 通过域套接字读取的总字节数' +Cluster.BytesReadDomainThroughput: + '通过域套接字从所有 worker 每分钟读取字节的吞吐量' +Cluster.BytesReadLocal: + '由所有客户端报告的短路读取的总字节数' +Cluster.BytesReadLocalThroughput: + '由所有客户端报告的每分钟短路读取字节的吞吐量' +Cluster.BytesReadPerUfs: + '所有 worker 从特定 UFS 读取的字节数总和' +Cluster.BytesReadRemote: + '从所有 worker 通过网络(RPC)读取的总字节数。数据存在于 worker 存储中,或者由 worker 从 UFS 获取。这不包括本地短路读和域套接字读' +Cluster.BytesReadRemoteThroughput: + '从所有 worker 通过网络(RPC 调用)每分钟读取的字节数吞吐量。数据存在于 worker 存储中,或者由 worker 从 UFS 获取。这不包括短路本地读取和域套接字读取' +Cluster.BytesReadUfsAll: + '所有 worker 从所有 UFS 读取的字节数总和' +Cluster.BytesReadUfsThroughput: + '所有 worker 从所有 UFS 每分钟读取的字节数吞吐量' +Cluster.BytesWrittenDomain: + '通过域套接字写入所有 worker 的字节数总和' +Cluster.BytesWrittenDomainThroughput: + '通过域套接字向所有 worker 每分钟写入字节的吞吐量' +Cluster.BytesWrittenLocal: + '所有客户端短路写入到本地 worker 数据存储的字节数总和' +Cluster.BytesWrittenLocalThroughput: + '所有客户端每分钟写入本地 worker 数据存储字节的吞吐量' +Cluster.BytesWrittenPerUfs: + '所有 worker 向特定的 Alluxio UFS 写入的字节数总和' +Cluster.BytesWrittenRemote: + '通过网络(RPC)写入 worker 的字节数总和。数据被写入 worker 存储,或者由 worker 写入底层 UFS。其中不包括短路本地写入和域套接字写入' +Cluster.BytesWrittenRemoteThroughput: + '通过网络(RPC)每分钟向 worker 写入字节的吞吐量。数据被写入 worker 存储,或者由 worker 写入底层 UFS。其中不包括短路本地写入和域套接字写入' +Cluster.BytesWrittenUfsAll: + '所有 worker 向所有 UFS 写入的字节数总和' +Cluster.BytesWrittenUfsThroughput: + '所有 worker 每分钟向所有 UFS 写入字节的吞吐量' +Cluster.CacheHitRate: + '缓存命中率:(#从缓存读取的字节数)/(#请求的字节数)' +Cluster.CapacityFree: + 'Alluxio 所有 worker 上所有层的总空闲字节数' +Cluster.CapacityTotal: + 'Alluxio 所有 worker 上所有层的总容量(以字节为单位)' +Cluster.CapacityUsed: + 'Alluxio 所有 worker 上所有层的使用字节总数' +Cluster.LeaderId: + '展示当前 primary master id' +Cluster.LeaderIndex: + '当前 primary master 的序号' +Cluster.LostWorkers: + '集群内丢失的 worker 总数' +Cluster.RootUfsCapacityFree: + 'Alluxio 根 UFS 的空闲容量(以字节为单位)' +Cluster.RootUfsCapacityTotal: + 'Alluxio 根 UFS 的总容量(以字节为单位)' +Cluster.RootUfsCapacityUsed: + 'Alluxio 根 UFS 的使用容量(以字节为单位)' +Cluster.Workers: + '集群内活跃的 worker 总数' diff --git a/docs/_data/table/cn/fuse-metrics.yml b/docs/_data/table/cn/fuse-metrics.yml new file mode 100644 index 000000000000..2519ff294603 --- /dev/null +++ b/docs/_data/table/cn/fuse-metrics.yml @@ -0,0 +1,6 @@ +Fuse.CachedPathCount: + '缓存的 Alluxio 路径映射的总数。这个值小于或等于 alluxio.fuse.cached.paths.max' +Fuse.ReadWriteFileCount: + '当前被打开的读写文件数量' +Fuse.TotalCalls: + 'JNI FUSE 操作调用的吞吐量。此指标表明 Alluxio Fuse 应用处理请求的繁忙程度' diff --git a/docs/_data/table/cn/master-metrics.yml b/docs/_data/table/cn/master-metrics.yml new file mode 100644 index 000000000000..761ef8319034 --- /dev/null +++ b/docs/_data/table/cn/master-metrics.yml @@ -0,0 +1,362 @@ +Master.AbsentCacheHits: + 'Absent cache(记录不存在的路径)的缓存命中次数' +Master.AbsentCacheMisses: + 'Absent cache(记录不存在的路径)的缓存未命中次数' +Master.AbsentCacheSize: + 'Absent cache(记录不存在的路径)的大小' +Master.AbsentPathCacheQueueSize: + 'Alluxio 维护了一个Absent cache(记录不存在的路径),这是正在处理的 UFS 路径数量。' +Master.AsyncPersistCancel: + '已取消的 AsyncPersist 操作数量' +Master.AsyncPersistFail: + '失败的 AsyncPersist 操作数量' +Master.AsyncPersistFileCount: + 'AsyncPersist 操作创建的文件数量' +Master.AsyncPersistFileSize: + 'AsyncPersist 操作创建的文件总大小' +Master.AsyncPersistSuccess: + '成功的 AsyncPersist 操作数量' +Master.AuditLogEntriesSize: + '审核日志条目队列的大小' +Master.BlockHeapSize: + '数据块元数据占 JVM 堆大小的估计值' +Master.BlockReplicaCount: + 'Alluxio 中块副本的总数' +Master.CompleteFileOps: + 'CompleteFile 操作的总数' +Master.CompletedOperationRetryCount: + '已由客户端重试的完成操作总数' +Master.CreateDirectoryOps: + '这个指标通过汇总 Master.RocksBlockEstimatedMemUsage 和 Master.RocksInodeEstimatedMemUsage 的值,给出了 RocksDB 内存使用总量的估计值' +Master.CreateFileOps: + 'CreateFile 操作的总数' +Master.DeletePathOps: + 'Delete 操作的总数' +Master.DirectoriesCreated: + 'CreateDirectory 操作的总数' +Master.EdgeCacheEvictions: + '从缓存中删除的边(inode 元数据)总数。边缓存负责管理从(parentId,childName)到 childId 的映射' +Master.EdgeCacheHits: + '边(inode 元数据)缓存的命中总数。边缓存负责管理从(parentId,childName)到 childId 的映射' +Master.EdgeCacheLoadTimes: + '导致缓存未命中的边(inode 元数据)缓存的总加载时间。边缓存负责管理从(parentId,childName)到 childId 的映射' +Master.EdgeCacheMisses: + '边(inode 元数据)缓存的未命中总数。边缓存负责管理从(parentId,childName)到 childId 的映射' +Master.EdgeCacheSize: + '缓存的边(inode 元数据)总数。边缓存负责管理从(parentId,childName)到 childId 的映射' +Master.EdgeLockPoolSize: + 'Edge 锁池的大小' +Master.EmbeddedJournalSnapshotDownloadGenerate: + '描述从集群中的其他主机下载日志快照所需的时间。只有在使用嵌入式日志时有效。使用此指标可以确定 Alluxio 主机之间是否存在潜在的通信瓶颈' +Master.EmbeddedJournalSnapshotGenerateTimer: + '描述在此主机上生成本地日志快照所需的时间。只有在使用嵌入式日志时有效。使用此指标可以测量 Alluxio 快照生成的性能' +Master.EmbeddedJournalSnapshotInstallTimer: + '描述从另一个主机安装下载的日志快照所需的时间。只有在使用嵌入式日志时有效。使用此指标可以确定 Alluxio 在从 leader 安装快照时的性能。较高的数字可能表示磁盘性能低或 CPU 竞争大' +Master.EmbeddedJournalSnapshotLastIndex: + '表示此主机在最近的本地快照或从集群中另一个主机下载的快照中记录的最新日志索引。只有在使用嵌入式日志时才有效' +Master.EmbeddedJournalSnapshotReplayTimer: + '描述将日志快照重放到主机状态机所需的时间。只有在使用嵌入式日志时才有效。使用此指标确定 Alluxio 重放日志快照文件时的性能。较高的数字可能表示磁盘性能低或 CPU 竞争大' +Master.FileBlockInfosGot: + '成功的 GetFileBlockInfo 操作总数' +Master.FileInfosGot: + '成功的 GetFileInfo 操作总数' +Master.FileSize: + '文件大小分布' +Master.FilesCompleted: + '成功的 CompleteFile 操作总数' +Master.FilesCreated: + '成功的 CreateFile 操作总数' +Master.FilesFreed: + '成功的 FreeFile 操作总数' +Master.FilesPersisted: + '成功持久化的文件总数' +Master.FilesPinned: + '当前固定的文件总数' +Master.FilesToBePersisted: + '当前待持久化的文件总数' +Master.FreeFileOps: + 'FreeFile 操作总数' +Master.GetFileBlockInfoOps: + 'GetFileBlockInfo 操作总数' +Master.GetFileInfoOps: + 'GetFileInfo 操作总数' +Master.GetNewBlockOps: + 'GetNewBlock 操作总数' +Master.InodeCacheEvictions: + '缓存逐出的 inode 总数' +Master.InodeCacheHitRatio: + 'Inode 缓存命中率' +Master.InodeCacheHits: + 'inode(inode 元数据)缓存的命中总数' +Master.InodeCacheLoadTimes: + '缓存未命中导致的 inode(inode 元数据)加载次数总数' +Master.InodeCacheMisses: + 'inode 缓存未命中总数' +Master.InodeCacheSize: + 'inode(inode 元数据)缓存的总数' +Master.InodeHeapSize: + 'inode 堆大小的估计值' +Master.InodeLockPoolSize: + 'master inode lock pool 大小' +Master.JobCanceled: + '取消状态异步任务数' +Master.JobCompleted: + '完成状态异步任务数' +Master.JobCount: + '所有状态任务数' +Master.JobCreated: + '创建状态任务数' +Master.JobDistributedLoadCancel: + '取消的 DistributedLoad 操作数' +Master.JobDistributedLoadFail: + '失败的 DistributedLoad 操作数' +Master.JobDistributedLoadFileCount: + 'DistributedLoad 操作的文件数' +Master.JobDistributedLoadFileSizes: + 'DistributedLoad 操作的文件大小' +Master.JobDistributedLoadRate: + '平均 DistributedLoad 加载率' +Master.JobDistributedLoadSuccess: + 'DistributedLoad 操作成功数' +Master.JobFailed: + '失败状态异步任务数' +Master.JobRunning: + '运行中状态异步任务数' +Master.JournalCheckpointWarn: + 'alluxio.master.journal.checkpoint.period.entries,并且最后一个检查点超过了 alluxio.master.journal.checkpoint.warning.threshold.time,则返回 1 以指示需要警告,否则返回 0' +Master.JournalEntriesSinceCheckPoint: + '自上次检查点以来的日志条目数' +Master.JournalFlushFailure: + '日志刷新失败的总数' +Master.JournalFlushTimer: + '日志刷新计时器统计' +Master.JournalFreeBytes: + 'Alluxio 主机的日志磁盘上剩余的字节。此指标仅在 Linux 上使用内置日志时有效。使用此指标监视日志是否耗尽磁盘空间' +Master.JournalFreePercent: + 'Alluxio 主机日志磁盘剩余字节。此指标仅在 Linux 上且使用内置日志时有效。使用此指标监控日志是否有剩余磁盘空间' +Master.JournalGainPrimacyTimer: + '日志获得优先权的计时器统计信息' +Master.JournalLastAppliedCommitIndex: + '最后一个被应用到状态机的 raft 日志索引' +Master.JournalLastCheckPointTime: + '上一个日志检查点时间' +Master.JournalSequenceNumber: + '当前日志序列号' +Master.LastBackupEntriesCount: + '上次主元数据备份中写入的条目总数' +Master.LastBackupRestoreCount: + '当 primary master 初始化元数据时,从备份还原的条目总数' +Master.LastBackupRestoreTimeMs: + '最后一次从备份恢复的过程时间' +Master.LastBackupTimeMs: + '上一次备份的时间' +Master.ListingCacheEvictions: + 'master 节点列表缓存中的总淘汰次数' +Master.ListingCacheHits: + 'master 列表缓存中的命中总数' +Master.ListingCacheLoadTimes: + 'master 列表缓存的总加载时间(以纳秒为单位),这是由缓存未命中所导致的' +Master.ListingCacheMisses: + 'master 列表缓存中的未命中总数' +Master.ListingCacheSize: + 'master 列表缓存大小' +Master.LostBlockCount: + '丢失数据块计数' +Master.LostFileCount: + '丢失文件的数量。这个数字是被缓存的,可能与 Master.LostBlockCount 不同步' +Master.MetadataSyncActivePaths: + '所有 InodeSyncStream 实例中正在进行的路径数量' +Master.MetadataSyncExecutor: + 'master 元数据同步执行器线程的指标。Master.MetadataSyncExecutor.submitted 是提交给执行器的任务的计数。Master.MetadataSyncExecutor.completed 是执行器完成的任务的计数。Master.MetadataSyncExecutor.activeTaskQueue 是在执行器中每次添加新任务时计算的活动任务(运行或提交)的幂指数衰减随机容器的数量。最大值是执行过程中任何时候的活动任务的最大数量。Master.MetadataSyncExecutor.running 是执行器正在运行的任务数量。Master.MetadataSyncExecutor.idle 是提交的任务(即在执行前等待队列)闲置的时间。Master.MetadataSyncExecutor.duration 是运行提交的任务的时间。如果执行器是线程池执行器,则 Master.MetadataSyncExecutor.queueSize 是任务队列的大小' +Master.MetadataSyncExecutorQueueSize: + '元数据同步线程池中排队的同步任务数,由 alluxio.master.metadata.sync.executor.pool.size 控制' +Master.MetadataSyncFail: + 'InodeSyncStream 失败的次数,无论是部分失败还是完全失败' +Master.MetadataSyncNoChange: + '未更改 inodes 的 InodeSyncStream 完成数量' +Master.MetadataSyncOpsCount: + '元数据同步操作的数量。每个同步操作对应于一个 InodeSyncStream 实例' +Master.MetadataSyncPathsCancel: + '所有最终被忽略而没被处理的 InodeSyncStream 实例中未决路径的数量' +Master.MetadataSyncPathsFail: + '在元数据同步所有 InodeSyncStream 实例期间失败的路径数量。' +Master.MetadataSyncPathsSuccess: + '从所有 InodeSyncStream 实例同步的路径数量' +Master.MetadataSyncPendingPaths: + '所有活跃 InodeSyncStream 实例中等待元数据同步的的待处理路径数量' +Master.MetadataSyncPrefetchCancel: + '从元数据同步取消的预取任务数量(由于重复的预取请求)' +Master.MetadataSyncPrefetchExecutor: + '关于主元数据同步预取执行线程的指标。Master.MetadataSyncPrefetchExecutor.submitted 是提交给执行器的任务的计数器。Master.MetadataSyncPrefetchExecutor.completed 是由执行器完成的任务的计数器。Master.MetadataSyncPrefetchExecutor.activeTaskQueue 是在执行器上运行或提交的活动任务的指数衰减随机容器,每次向执行器添加新任务时计算。最大值是执行期间任意时间内的最大活动任务数。Master.MetadataSyncPrefetchExecutor.running 是执行器正在运行的任务数。Master.MetadataSyncPrefetchExecutor.idle 是提交的任务(即等待队列中之前执行的时间)的空闲时间。Master.MetadataSyncPrefetchExecutor.duration 是运行提交的任务的时间。如果执行器是线程池执行器,则 Master.MetadataSyncPrefetchExecutor.queueSize 是任务队列的大小。' +Master.MetadataSyncPrefetchExecutorQueueSize: + '元数据同步线程池中排队的预取任务数,由 alluxio.master.metadata.sync.ufs.prefetch.pool.size 控制' +Master.MetadataSyncPrefetchFail: + '元数据同步中失败的预取工作数量' +Master.MetadataSyncPrefetchOpsCount: + '由预取线程池处理的预取操作数量' +Master.MetadataSyncPrefetchPaths: + '元数据同步预取任务获取的 UFS 路径总数' +Master.MetadataSyncPrefetchRetries: + '元数据同步预取任务的重试获取次数' +Master.MetadataSyncPrefetchSuccess: + '元数据同步预取任务的成功获取次数' +Master.MetadataSyncSkipped: + '由于 Alluxio 元数据比 alluxio.user.file.metadata.sync.interval 更新而跳过的 InodeSyncStream 数量' +Master.MetadataSyncSuccess: + 'InodeSyncStream 成功次数' +Master.MetadataSyncTimeMs: + '所有 InodeSyncStream 实例存在总时间' +Master.MetadataSyncUfsMount.: + '对给定 UFS 挂载点进行 UFS sync 操作的次数' +Master.MigrateJobCancel: + 'MigrateJob 取消次数' +Master.MigrateJobFail: + 'MigrateJob 失败次数' +Master.MigrateJobFileCount: + 'MigrateJob 文件数' +Master.MigrateJobFileSize: + 'MigrateJob 文件体积之和' +Master.MigrateJobSuccess: + 'MigrateJob 操作成功次数' +Master.MountOps: + 'Mount 操作次数' +Master.NewBlocksGot: + 'GetNewBlock 操作成功次数' +Master.PathsDeleted: + 'Delete 操作成功次数' +Master.PathsMounted: + 'Mount 操作成功次数' +Master.PathsRenamed: + 'Rename 操作成功次数' +Master.PathsUnmounted: + 'Unmount 操作成功次数' +Master.RenamePathOps: + 'Rename 操作次数' +Master.ReplicaMgmtActiveJobSize: + '活跃块复制/逐出任务的数。这些任务由 master 创建,以维护块副本因子。该值带有一定延迟,是估计值' +Master.RocksBlockBackgroundErrors: + 'RocksDB 块表。背景错误累积数' +Master.RocksBlockBlockCacheCapacity: + 'RocksDB 块表。块缓存容量' +Master.RocksBlockBlockCachePinnedUsage: + 'RocksDB 块表。固定条目内存体积' +Master.RocksBlockBlockCacheUsage: + 'RocksDB 块表。存储在块缓存中的条目的内存大小' +Master.RocksBlockCompactionPending: + 'RocksDB 块表。如果有至少一个压缩操作正在等待,则此指标为 1;否则,此指标为 0' +Master.RocksBlockCurSizeActiveMemTable: + 'RocksDB 块表。活跃 MemTable 的近似字节大小' +Master.RocksBlockCurSizeAllMemTables: + 'RocksDB 块表。活跃的、未刷新且不可变的,和固定住不可变的 MemTable 的以字节为单位的近似大小。固定不可变内存表是被保留在内存中的刷新内存表,用于在内存中保留写入历史记录' +Master.RocksBlockEstimateNumKeys: + 'RocksDB 块表。活跃和未刷新地不可变 MemTable 以及存储中总键数的估计值' +Master.RocksBlockEstimatePendingCompactionBytes: + 'RocksDB 块表。估计一次压缩需要在磁盘上重写的总字节数,以使所有层降到目标大小之下。换句话说,这个指标与层压缩中的写入放大率有关。因此,这个指标对层压缩以外的压缩是无效的。' +Master.RocksBlockEstimateTableReadersMem: + 'RocksDB inode 表。以字节为单位估计用于读取 SST 表的内存,不包括块缓存中使用的内存(e.g. 过滤器和索引块)。如果过滤器和索引不在块缓存中维护,此指标记录迭代器使用的内存以及过滤器和索引。此指标基本上反映了读取数据时块缓存外使用的内存' +Master.RocksBlockEstimatedMemUsage: + 'RocksDB块表。这个指标通过聚合 Master.RocksBlockBlockCacheUsage、Master.RocksBlockEstimateTableReadersMem、Master.RocksBlockCurSizeAllMemTables 和 Master.RocksBlockBlockCachePinnedUsage 的值来估计 RockDB 块表的内存使用情况。' +Master.RocksBlockLiveSstFilesSize: + 'RocksDB块表。属于最新 LSM 树的所有 SST 文件以字节为单位的总大小' +Master.RocksBlockMemTableFlushPending: + 'RocksDB 块表。如果 Memtable 刷新操作正在等待,则此指标为 1;否则为 0' +Master.RocksBlockNumDeletesActiveMemTable: + 'RocksDB 块表。活跃 Memtable 中的删除条目总数' +Master.RocksBlockNumDeletesImmMemTables: + 'RocksDB 块表。未刷新不可变 MemTable 中删除条目的总数' +Master.RocksBlockNumEntriesActiveMemTable: + 'RocksDB 块表。活跃 MemTable 中的条目总数' +Master.RocksBlockNumEntriesImmMemTables: + 'RocksDB 块表。未刷新不可变 MemTable 中的条目总数' +Master.RocksBlockNumImmutableMemTable: + 'RocksDB 块表。尚未刷新的不可变 MemTable 的数量' +Master.RocksBlockNumLiveVersions: + 'RocksDB inode 表。存活版本数。存活版本较多时,通常意味着更多 SST 文件被迭代器或未完成的压缩保留而未被删除' +Master.RocksBlockNumRunningCompactions: + 'RocksDB 块表。当前正在运行的压缩数量' +Master.RocksBlockNumRunningFlushes: + 'RocksDB 块表。当前正在运行的刷新数量。' +Master.RocksBlockSizeAllMemTables: + 'RocksDB 块表。所有 MemTable 的大小' +Master.RocksBlockTotalSstFilesSize: + 'RocksDB 块表。所有 SST 文件以字节为单位的总大小' +Master.RocksInodeBackgroundErrors: + 'RocksDB inode 表。后台错误累积数' +Master.RocksInodeBlockCacheCapacity: + 'RocksDB inode 表。 块缓存容量' +Master.RocksInodeBlockCachePinnedUsage: + 'RocksDB inode 表。固定键内存体积' +Master.RocksInodeBlockCacheUsage: + 'RocksDB inode 表。这是用来描述存储在块缓存中的条目内存大小的指标' +Master.RocksInodeCompactionPending: + 'RocksDB inode 表。 如果至少有一个压缩操作正在等待则该指标为 1;否则,该指标为 0' +Master.RocksInodeCurSizeActiveMemTable: + 'RocksDB inode 表。活跃 MemTable 以字节为单位的近似大小' +Master.RocksInodeCurSizeAllMemTables: + 'RocksDB inode 表。活跃和未刷新不可变 MemTable 以字节为单位的近似大小' +Master.RocksInodeEstimateNumKeys: + 'RocksDB inode 表。活跃和未刷新不可变 MemTable 以及存储中所有键的估计数量' +Master.RocksInodeEstimatePendingCompactionBytes: + 'RocksDB 块表。估计一次压缩为了将所有层降到目标大小以下需要在磁盘上重写的总字节数。换句话说,这个指标与层压缩中的写入放大率有关。因此,这个指标对层压缩以外的压缩是无效的' +Master.RocksInodeEstimateTableReadersMem: + 'RocksDB inode 表。估计用于读取SST表的字节数,不包括用于块缓存的内存(e.g. 过滤器和索引块)。如果过滤器和索引不在块缓存中维护,则这个指标记录了迭代器以及过滤器和索引所使用的内存。这个指标基本上反应了在块缓存之外用于读取数据的内存。' +Master.RocksInodeEstimatedMemUsage: + 'RocksDB 块表。这个指标通过聚合 Master.RocksInodeBlockCacheUsage、Master.RocksInodeEstimateTableReadersMem、Master.RocksInodeCurSizeAllMemTables 和 Master.RocksInodeBlockCachePinnedUsage 的值,估计了 RockDB Inode 表的内存使用情况' +Master.RocksInodeLiveSstFilesSize: + 'RocksDB inode 表。属于最新 LSM 树的所有 SST 文件以字节为单位的总大小' +Master.RocksInodeMemTableFlushPending: + 'RocksDB inode 表。如果 MemTable 刷新正在等待,则该指标为 1;否则,该指标为 0' +Master.RocksInodeNumDeletesActiveMemTable: + 'RocksDB inode 表。活跃 MemTable 中删除条目的总数' +Master.RocksInodeNumDeletesImmMemTables: + 'RocksDB inode table. 未刷新不可变 MemTable 中删除条目的总数' +Master.RocksInodeNumEntriesActiveMemTable: + 'RocksDB inode 表。活跃 MemTable 中的总条目数' +Master.RocksInodeNumEntriesImmMemTables: + 'RocksDB inode 表。未刷新不可变 MemTable 中的总条目数' +Master.RocksInodeNumImmutableMemTable: + 'RocksDB inode 表。尚未刷新的不可变 MemTable 的数量' +Master.RocksInodeNumLiveVersions: + 'RocksDB inode 表。活跃版本的数量。更多的活跃版本通常意味着被迭代器或未完成的压缩保留的不被删除的 SST 文件更多' +Master.RocksInodeNumRunningCompactions: + 'RocksDB inode 表。当前正在进行的压缩数量' +Master.RocksInodeNumRunningFlushes: + 'RocksDB inode 表。当前正在进行的刷新数量' +Master.RocksInodeSizeAllMemTables: + 'RocksDB inode 表。当前活跃的,为刷新不变的,以及固定不变的 MemTable 以字节为单位的近似体积。固定不变的 MemTable 是保留在内存中用于维护内存写入历史更新过的 MemTable' +Master.RocksInodeTotalSstFilesSize: + 'RocksDB inode 表。所有 SST 文件以字节为单位的总体积。' +Master.RocksTotalEstimatedMemUsage: + '这个指标通过汇总 Master.RocksBlockEstimatedMemUs age 和 Master.RocksInodeEstimatedMemUsage 的值,给出了 RocksDB 内存使用总量' +Master.RoleId: + '展示 master role id' +Master.RpcQueueLength: + 'master RPC 队列的长度。使用这个指标来监控 master 上的 RPC 压力' +Master.RpcThreadActiveCount: + '在 master RPC 执行器线程池中正在积极执行任务的线程数量。使用这个指标来监控 master 上的 RPC 压力' +Master.RpcThreadCurrentCount: + '当前 master RPC 执行器线程池中的线程数。使用这个指标来监控主服务器上的 RPC 压力' +Master.SetAclOps: + 'SetAcl 操作总次数' +Master.SetAttributeOps: + 'SetAttribute 操作总次数' +Master.ToRemoveBlockCount: + '要从 worker 中移除的块副本数量。如果 1 个块要从 2 个 worker 中移除,会被记为 2 个' +Master.TotalPaths: + 'Alluxio 命名空间中的文件和目录总数' +Master.TotalRpcs: + 'master RPC 调用的吞吐量。这个指标表明 master 服务客户端请求的繁忙程度' +Master.UfsJournalCatchupTimer: + '日志追赶的定时器统计只在使用 Ufs 日志时有效。它提供了一个 standby master 赶上 master 所需时间的概要,如果 master 转换时间过长则应进行监控' +Master.UfsJournalFailureRecoverTimer: + 'UFS 日志故障恢复的定时器统计数据' +Master.UfsJournalInitialReplayTimeMs: + '启动时 UFS 日志初始回放过程的持续时间。只在使用 UFS 日志时有效。它记录了第一次日志回放的持续时间。使用这个指标来监测你的 master 启动时间是否过长' +Master.UfsStatusCacheChildrenSize: + 'UFS 文件元数据缓存总量。该缓存在元数据同步期间使用' +Master.UfsStatusCacheSize: + '正在由元数据同步预取线程池处理的 Alluxio 路径总数' +Master.UniqueBlocks: + 'Alluxio 中数据块总数(不算副本)' +Master.UnmountOps: + 'Unmount 操作总次数' diff --git a/docs/_data/table/cn/process-metrics.yml b/docs/_data/table/cn/process-metrics.yml new file mode 100644 index 000000000000..0e5804baa1af --- /dev/null +++ b/docs/_data/table/cn/process-metrics.yml @@ -0,0 +1,2 @@ +Process.pool.direct.mem.used: + '已使用的直接内存' diff --git a/docs/_data/table/cn/server-metrics.yml b/docs/_data/table/cn/server-metrics.yml new file mode 100644 index 000000000000..9604d787839d --- /dev/null +++ b/docs/_data/table/cn/server-metrics.yml @@ -0,0 +1,6 @@ +Server.JvmPauseMonitorInfoTimeExceeded: + 'JVM 暂停时间长于 alluxio.jvm.monitor.info.threshold 阈值的总次数' +Server.JvmPauseMonitorTotalExtraTime: + 'JVM 暂停的总时间,JVM暂停通常由GC或jstack等事件引发' +Server.JvmPauseMonitorWarnTimeExceeded: + 'JVM 暂停时间长于 alluxio.jvm.monitor.warn.threshold 阈值的总次数' diff --git a/docs/_data/table/cn/worker-metrics.yml b/docs/_data/table/cn/worker-metrics.yml new file mode 100644 index 000000000000..21f592b1c931 --- /dev/null +++ b/docs/_data/table/cn/worker-metrics.yml @@ -0,0 +1,130 @@ +Worker.ActiveClients: + '正在活跃地读取或写入此 worker 的客户端数量' +Worker.ActiveRpcReadCount: + '此 worker 管理的读 RPC 数量' +Worker.ActiveRpcWriteCount: + '此 worker 管理的写 RPC 数量' +Worker.BlockReaderCompleteTaskCount: + '已经完成执行的读任务的近似值' +Worker.BlockReaderThreadActiveCount: + 'reader 线程池中正在活跃执行任务的读线程数量的近似值' +Worker.BlockReaderThreadCurrentCount: + '此 reader 线程池中的读线程数' +Worker.BlockReaderThreadMaxCount: + 'reader 线程池中读线程允许的最大数量' +Worker.BlockRemoverBlocksRemovedCount: + '此 worker 中被 asynchronous block remover 成功移除的块的总数量' +Worker.BlockRemoverRemovingBlocksSize: + 'asynchronous block remover 正在从此 worker 移除的块大小' +Worker.BlockRemoverTryRemoveBlocksSize: + 'asynchronous block remover 正要从此 worker 移除的块大小' +Worker.BlockRemoverTryRemoveCount: + 'asynchronous block remover 尝试从此 worker 移除的块大小' +Worker.BlockSerializedCompleteTaskCount: + '完成执行的块序列化任务完成总量近似值' +Worker.BlockSerializedThreadActiveCount: + 'serialized 线程池中正在活跃执行任务的 block serialized 线程近似数量' +Worker.BlockSerializedThreadCurrentCount: + '此 serialized 线程池中 block serialized 线程数量' +Worker.BlockSerializedThreadMaxCount: + 'serialized 线程池中 block serialized 线程允许的最大数量' +Worker.BlockWriterCompleteTaskCount: + '已经完成执行的 block serialized 任务的近似值' +Worker.BlockWriterThreadActiveCount: + 'writer 线程池中正在活跃执行任务的写线程数量的近似值' +Worker.BlockWriterThreadCurrentCount: + '此 writer 线程池中的写线程数' +Worker.BlockWriterThreadMaxCount: + 'writer 线程池中写线程允许的最大数量' +Worker.BlocksAccessed: + '此 worker 中数据块被访问的总次数' +Worker.BlocksCached: + '一个 Alluxio worker 中被用于缓存数据的块总数' +Worker.BlocksCancelled: + '此 worker 中废弃的临时块总量' +Worker.BlocksDeleted: + '此 worker 中被外部请求删除的块总量' +Worker.BlocksEvicted: + '此 worker 中被驱逐的块总量' +Worker.BlocksEvictionRate: + '此 worker 的块驱逐率' +Worker.BlocksLost: + '此 worker 丢失块总量' +Worker.BlocksPromoted: + '此 worker 中,任何一个块被移到新层的总次数' +Worker.BlocksReadLocal: + '通过此 worker 本地读的数据块总数' +Worker.BlocksReadRemote: + '通过此 worker 远程读的数据块总数' +Worker.BlocksReadUfs: + '通过此 worker 从 UFS 读取的数据块总数' +Worker.BytesReadDirect: + '此 worker 中没有外部 RPC 参与的总字节数。数据存在于 worker 存储中或者由此 worker 从底层 UFS 获取。此指标记录了 worker 内部调用读取的数据(e.g. 嵌入在此 worker 中的客户端)' +Worker.BytesReadDirectThroughput: + '此 worker 中没有涉及外部 RPC 的字节读取吞吐量。数据存在于 worker 存储中或由该 worker 从底层 UFS 中获取。这记录了 worker 内部调用(e.g. 位于此 worker 中的客户端)读取的数据' +Worker.BytesReadDomain: + '此 worker 通过域套接字读取的总字节数' +Worker.BytesReadDomainThroughput: + '此 worker 通过域套接字读取字节的吞吐量' +Worker.BytesReadPerUfs: + '此 worker 从特定 UFS 读取的总字节数' +Worker.BytesReadRemote: + '通过网络(RPC)远程读取此 worker 的字节总数。数据存在于 worker 存储中或由该 worker 从底层 UFS 获取。这不包括短路本地读取和域套接字读取' +Worker.BytesReadRemoteThroughput: + '这是一项衡量通过网络(RPC)从此 worker 读取的字节数的吞吐量的指标。数据存在于 worker 存储中,或者由该 worker 从底层 UFS 中获取。这不包括短路本地读取和域套接字读取' +Worker.BytesReadUfsThroughput: + '由此 worker 从 UFS 读取字节的吞吐量' +Worker.BytesWrittenDirect: + '不涉及外部 RPC写入此 worker 的总字节数。数据写入 worker 存储或由此 worker 写入下层 UFS。这记录了 worker 内部调用(e.g. 嵌入在 此 worker 中的客户端)写入的数据' +Worker.BytesWrittenDirectThroughput: + '不涉及外部 RPC 写入此 worker 的字节吞吐量。数据写入 worker 存储或由此 worker 写入下层 UFS。这记录了 worker 内部调用(e.g. 嵌入在此 worker 中的客户端)写入的数据' +Worker.BytesWrittenDomain: + '通过域套接字写入此 worker 的总字节数' +Worker.BytesWrittenDomainThroughput: + '通过域套接字写入此 worker 的吞吐量' +Worker.BytesWrittenPerUfs: + '此 worker 向特定 UFS 写入的总字节数' +Worker.BytesWrittenRemote: + '通过网络(RPC)写入此 worker 的总字节数。数据写入 worker 存储或由此 worker 写入下层 UFS。这不包括短路本地写入和域套接字写入' +Worker.BytesWrittenRemoteThroughput: + '通过网络(RPC)写入此 worker 的字节写入吞吐量。数据写入 worker 存储或由此 worker 写入下层 UFS。这不包括短路本地写入和域套接字写入' +Worker.BytesWrittenUfsThroughput: + '此 worker 向所有 Alluxio UFS 写入字节的吞吐量' +Worker.CacheBlocksSize: + '通过缓存请求缓存的字节量' +Worker.CacheFailedBlocks: + '此 worker 缓存块失败数量' +Worker.CacheManagerCompleteTaskCount: + '已经完成执行的块缓存任务的近似量' +Worker.CacheManagerThreadActiveCount: + 'cache manager 线程池中正在活跃执行任务的块缓存线程数量的近似值' +Worker.CacheManagerThreadCurrentCount: + '此 cache manager 线程池中的块缓存线程数' +Worker.CacheManagerThreadMaxCount: + 'cache manager 线程池中块缓存线程允许的最大数量' +Worker.CacheManagerThreadQueueWaitingTaskCount: + '此 worker 中 cache manager 线程池中工作队列中等待的任务数,受 alluxio.worker.network.async.cache.manager.queue.max 的限制。' +Worker.CacheRemoteBlocks: + '此 worker 需要从远程源缓存的块的总数' +Worker.CacheRequests: + '此 worker 收到的缓存请求总数' +Worker.CacheRequestsAsync: + '此 worker 收到的异步缓存请求的总数' +Worker.CacheRequestsSync: + '此 worker 收到的同步缓存请求的总数' +Worker.CacheSucceededBlocks: + '此 worker 中的缓存成功的块的总数' +Worker.CacheUfsBlocks: + '此 worker 中需要从本地源缓存的块的总数' +Worker.CapacityFree: + '此 Alluxio worker 的所有层级上的总空闲字节' +Worker.CapacityTotal: + '此 Alluxio worker 在所有层次上以字节为单位的总容量' +Worker.CapacityUsed: + '此 Alluxio worker 所有层级上使用的总字节数' +Worker.RpcQueueLength: + 'worker RPC 队列的长度。用此指标监视 worker 的 RPC 压力' +Worker.RpcThreadActiveCount: + '此 worker RPC 执行程序线程池中正在执行任务的线程数。用此指标监视 worker 的 RPC 压力' +Worker.RpcThreadCurrentCount: + '此 worker RPC 执行器线程池中的线程数。用此指标监视 worker 的 RPC 压力' diff --git a/docs/cn/reference/Metrics-List.md b/docs/cn/reference/Metrics-List.md new file mode 100644 index 000000000000..1f615e27bd5c --- /dev/null +++ b/docs/cn/reference/Metrics-List.md @@ -0,0 +1,268 @@ +--- +layout: global +title: List of Metrics +group: Reference +priority: 1 +--- + +* Table of Contents +{:toc} + +在 Alluxio 中,有两种类型的指标,集群范围内的合计指标和每个进程的详细指标。 + +* 集群指标由 leading master 收集和计算的,并且在 web UI 下的指标标签下展示。 + 这些指标旨在提供 Alluxio 服务的集群状态以及数据与元数据总量的快照。 + +* 进程指标由每个 Alluxio 进程收集,并通过任何配置的接收器以机器可读的格式暴露出来。 + 进程指标高度详细,旨在被第三方监测工具使用。 + 用户可以通过细粒度的数据面板查看每个指标的时间序列图。 + 比如数据传输量或 RPC 调用次数。 + +Alluxio 的 master 节点指标具有以下格式: + +``` +Master.[metricName].[tag1].[tag2]... +``` + +Alluxio 的非 master 节点指标具有以下格式 + +``` +[processType].[metricName].[tag1].[tag2]...[hostName] +``` + +通常情况下,Alluxio 会为每一种 RPC 调用生成一个指标,无论是调用 Alluxio 还是调用下层存储。 + +标签是指标的附加元数据,如用户名或存储位置。 +标签可用于进一步筛选或聚合各种特征。 + +## 集群指标 + +Worker 和 client 通过心跳包将指标数据发送到 Alluxio master。心跳间隔分别由 `alluxio.master.worker.heartbeat.interval` 和 `alluxio.user.metrics.heartbeat.interval` 属性定义。 + +字节指标是来自 worker 或 client 的聚合值。字节吞吐量指标是在 master 上计算的。 +字节吞吐量的值等于字节指标计数器值除以指标记录时间,并以字节/分钟的形式呈现。 + + + +{% for item in site.data.table.cluster-metrics %} + + + + + +{% endfor %} +
名称类型描述
{{ item.metricName }}{{ item.metricType }}{{ site.data.table.cn.cluster-metrics[item.metricName] }}
+ +## 进程指标 + +所有 Alluxio 服务器和客户端进程共享的指标。 + + + +{% for item in site.data.table.process-metrics %} + + + + + +{% endfor %} +
名称类型描述
{{ item.metricName }}{{ item.metricType }}{{ site.data.table.cn.process-metrics[item.metricName] }}
+ +## 服务器指标 + +Alluxio 服务器共享的指标。 + + + +{% for item in site.data.table.server-metrics %} + + + + + +{% endfor %} +
名称类型描述
{{ item.metricName }}{{ item.metricType }}{{ site.data.table.cn.server-metrics[item.metricName] }}
+ +## Master 指标 + +默认 Master 指标: + + + +{% for item in site.data.table.master-metrics %} + + + + + +{% endfor %} +
名称类型描述
{{ item.metricName }}{{ item.metricType }}{{ site.data.table.cn.master-metrics[item.metricName] }}
+ +动态生成的 Master 指标: + +| 名称 | 描述 | +|-------------------------|-----------------------------------------------------| +| Master.CapacityTotalTier{TIER_NAME} | Alluxio 文件系统中层 {TIER_NAME} 以字节为单位的总容量 | +| Master.CapacityUsedTier{TIER_NAME} | Alluxio 文件系统中层 {TIER_NAME} 以字节为单位已使用的容量 | +| Master.CapacityFreeTier{TIER_NAME} | Alluxio 文件系统中层 {TIER_NAME} 以字节为单位未使用的容量 | +| Master.UfsSessionCount-Ufs:{UFS_ADDRESS} | 当前打开并连接到给定 {UFS_ADDRESS} 的 UFS 会话数 | +| Master.{UFS_RPC_NAME}.UFS:{UFS_ADDRESS}.UFS_TYPE:{UFS_TYPE}.User:{USER} | 当前 master 完成的 UFS RPC 操作细节 | +| Master.PerUfsOp{UFS_RPC_NAME}.UFS:{UFS_ADDRESS} | 当前主 master 在 UFS {UFS_ADDRESS} 上运行的 UFS 操作 {UFS_RPC_NAME} 的总数 | +| Master.{LEADING_MASTER_RPC_NAME} | 主 master 上暴露的 RPC 调用的持续时间统计信息 | + +## Worker 指标 + +默认 worker 指标: + + + +{% for item in site.data.table.worker-metrics %} + + + + + +{% endfor %} +
名称类型描述
{{ item.metricName }}{{ item.metricType }}{{ site.data.table.cn.worker-metrics[item.metricName] }}
+ +动态的 worker 指标: + +| 名称 | 描述 | +|-------------------------|-----------------------------------------------------| +| Worker.UfsSessionCount-Ufs:{UFS_ADDRESS} | 当前打开并连接到给定 {UFS_ADDRESS} 的 UFS 会话数 | +| Worker.{RPC_NAME} | worker 上暴露的 RPC 调用的持续时间统计信息 | + +## Client 指标 + +每个客户端度量将使用其本地主机名或配置的 `alluxio.user.app.id` 进行记录。 +如果配置了 `alluxio.user.app.id`,多个客户端可以组合成一个逻辑应用。 + + + +{% for item in site.data.table.client-metrics %} + + + + + +{% endfor %} +
名称类型描述
{{ item.metricName }}{{ item.metricType }}{{ site.data.table.cn.client-metrics[item.metricName] }}
+ +## Fuse 指标 + +Fuse 是长期运行的 Alluxio 客户端。 +根据启动方式,Fuse 指标将显示为: +* 当文件系统客户端在独立的 AlluxioFuse 进程中启动时,显示为客户端指标。 +* 当 Fuse 客户端嵌入在 AlluxioWorker 进程中时,显示为 worker 指标。 + +Fuse metrics includes: + + + +{% for item in site.data.table.fuse-metrics %} + + + + + +{% endfor %} +
描述类型描述
{{ item.metricName }}{{ item.metricType }}{{ site.data.table.cn.fuse-metrics[item.metricName] }}
+ +Fuse 读/写文件数量可用作 Fuse 应用程序压力的指标。 +如果在短时间内发生大量并发读/写操作,则每个读/写操作可能需要更长的时间来完成。 + +当用户或应用程序在 Fuse 挂载点下运行文件系统命令时,该命令将由操作系统处理和转换,并触发在 [AlluxioFuse](https://github.com/Alluxio/alluxio/blob/db01aae966849e88d342a71609ab3d910215afeb/integration/fuse/src/main/java/alluxio/fuse/AlluxioJniFuseFileSystem.java) 中暴露的相关 Fuse 操作。每个操作被调用的次数以及每次调用的持续时间将使用动态指标名称 `Fuse.` 记录。 + + +重要的 Fuse 指标包括: + +| 名称 | 描述 | +|-------------------------|-----------------------------------------------------| +| Fuse.readdir | 列出目录的持续时间指标 | +| Fuse.getattr | 获取文件元数据的持续时间指标 | +| Fuse.open | 打开文件进行读或覆写的持续时间指标 | +| Fuse.read | 读取文件的一部分的持续时间指标 | +| Fuse.create | 为了写入创建文件的持续时间指标 | +| Fuse.write | 写入文件的持续时间指标 | +| Fuse.release | 在读取或写入后关闭文件的持续时间指标。请注意,释放是异步的,因此 FUSE 线程不会等待释放完成 | +| Fuse.mkdir | 创建目录的持续时间指标 | +| Fuse.unlink | 删除文件或目录的持续时间指标 | +| Fuse.rename | 重命名文件或目录的持续时间指标 | +| Fuse.chmod | 更改文件或目录模式的持续时间指标 | +| Fuse.chown | 修改文件或目录的用户和/或组所有权的持续时间指标 | + +Fuse相关的指标包括: +* `Client.TotalRPCClients` 显示用于连接到或可连接到 master 或 worker 进行操作的 RPC 客户端的总数。 +* 带有 `Direct` 关键字的 worker 指标。当 Fuse 嵌入到 worker 进程中时,它可以通过 worker 内部 API 从该 worker 读取/写入。 +相关指标以 `Direct` 结尾。例如,`Worker.BytesReadDirect` 显示该 worker 为其嵌入的 Fuse 客户端提供读取的字节数。 +* 如果配置了 `alluxio.user.block.read.metrics.enabled=true`,则会记录 `Client.BlockReadChunkRemote`。 该指标显示通过 gRPC 从远程 worker 读取数据的持续时间统计。 + +`Client.TotalRPCClients` 和 `Fuse.TotalCalls` 指标是 Fuse 应用程序当前负载的优秀指标。 +如果在 Alluxio Fuse 上运行应用程序(e.g. Tensorflow),但这两个指标值比之前低得多,则训练作业可能会卡在 Alluxio 上。 + +## 普通进程指标 + +在每个实例(Master、Worker 或 Client)上收集的指标。 + +### JVM Attributes + +| 名称 | 描述 | +|-------------------------|-----------------------------------------------------| +| name | JVM 名称 | +| uptime | JVM 的运行时间 | +| vendor | 当前的 JVM 供应商 | + +### GC 统计 + +| 名称 | 描述 | +|-------------------------|-----------------------------------------------------| +| PS-MarkSweep.count | 标记和清除 old gen 的总数 | +| PS-MarkSweep.time | 标记和清除 old gen 的总时间 | +| PS-Scavenge.count | 清除 young gen 总数 | +| PS-Scavenge.time | 清除 young gen 总时间 | + +### 内存使用情况 + +Alluxio 提供整体和详细的内存使用信息。 +每个进程中代码缓存、压缩类空间、元数据空间、PS Eden 空间、PS old gen 以及 PS survivor 空间的详细内存使用信息都会被收集。 + +以下是内存使用指标的子集: + +| 名称 | 描述 | +|------------------------------|-----------------------------------------------------| +| total.committed | 保证可供 JVM 使用的以字节为单位的内存数量 | +| total.init | 可供 JVM 使用的以字节为单位的内存数量 | +| total.max | 以字节为单位的 JVM 可用的最大内存量 | +| total.used | 以字节为单位当前使用的内存大小 | +| heap.committed | 在堆上保证可用的内存大小 | +| heap.init | 初始化时堆上可用的内存量 | +| heap.max | 在堆上可用的最大内存量 | +| heap.usage | 堆上当前正在使用的以 GB 为单位的内存量 | +| heap.used | 堆上当前已经使用过的以 GB 为单位的内存量 | +| pools.Code-Cache.used | 内存池中用于编译和存储本地代码的内存总量 | +| pools.Compressed-Class-Space.used | 内存池中用于类元数据的内存总量 | +| pools.PS-Eden-Space.used | 内存池中用于大多数对象初始分配的内存总量 | +| pools.PS-Survivor-Space.used | 从包含在 Eden space 的垃圾回收中幸存下来的对象的池中使用的内存总量 | + +### 类加载统计 + +| 名称 | 描述 | +|-------------------------|-----------------------------------------------------| +| loaded | 加载的类总数 | +| unloaded | 未加载的类总量 | + +### 线程统计 + +| 名称 | 描述 | +|-------------------------|-----------------------------------------------------| +| count | 当前存活线程数 | +| daemon.count | 当前守护线程的数量 | +| peak.count | 存活线程数峰值 | +| total_started.count | 启动线程总数 | +| deadlock.count | 死锁线程总数 | +| deadlock | 与每个线程有关的死锁的调用栈 | +| new.count | 有新状态的线程数 | +| blocked.count | 阻塞态线程数 | +| runnable.count | 可运行状态线程数 | +| terminated.count | 终结态线程数 | +| timed_waiting.count | 定时等待状态的线程数量 | From 270291c9d3ed169af3ea94917de9617e5da24b2b Mon Sep 17 00:00:00 2001 From: Xinran Dong <81548653+007DXR@users.noreply.github.com> Date: Thu, 12 Jan 2023 01:15:07 +0800 Subject: [PATCH 055/334] [DOCFIX] Update cn version of Architecture doc What changes are proposed in this pull request? Update cn version of Architecture doc. Why are the changes needed? The Chinese overview/Architecture document is not updated with the latest changes, this PR synchronizes these updates. Does this PR introduce any user facing changes? Developers can get to know Alluxio in Chinese easily. pr-link: Alluxio/alluxio#16687 change-id: cid-7f7de73e5968d5b0095ffc69f126f0ae96e91f4f --- docs/cn/overview/Architecture.md | 182 +++++++++++++++++++++++++++++++ 1 file changed, 182 insertions(+) create mode 100644 docs/cn/overview/Architecture.md diff --git a/docs/cn/overview/Architecture.md b/docs/cn/overview/Architecture.md new file mode 100644 index 000000000000..3795117a54dd --- /dev/null +++ b/docs/cn/overview/Architecture.md @@ -0,0 +1,182 @@ +--- +layout: global +title: 架构 +group: Overview +priority: 2 +--- + +* Table of Contents +{:toc} + +## Architecture Overview + +Alluxio作为大数据和机器学习生态系统中的新增数据访问层,可位于任何持久化存储系统(如Amazon S3、Microsoft Azure 对象存储、Apache HDFS或OpenStack Swift)和计算框架(如Apache Spark、Presto或Hadoop MapReduce)之间,但是Alluxio本身并非持久化存储系统。使用Alluxio作为数据访问层可带来诸多优势: + + +- 对于用户应用和计算框架而言,Alluxio提供的快速存储可以让任务(无论是否在同一计算引擎上运行)进行数据共享,并且在同时将数据缓存在本地计算集群。因此,当数据在本地时,Alluxio可以提供内存级别的数据访问速度;当数据在Alluxio中时,Alluxio将提供计算集群网络带宽级别的数据访问速度。数据只需在第一次被访问时从底层存储系统中读取一次即可。因此,即使底层存储的访问速度较慢,也可以通过Alluxio显著加速数据访问。为了获得最佳性能,建议将 Alluxio与集群的计算框架部署在一起。 + + +- 就底层存储系统而言,Alluxio将大数据应用和不同的存储系统连接起来,因此扩充了能够利用数据的可用工作负载集。由于Alluxio和底层存储系统的集成对于应用程序是透明的,因此任何底层存储都可以通过Alluxio支持数据访问的应用和框架。此外,当同时挂载多个底层存储系统时,Alluxio可以作为任意数量的不同数据源的统一层。 + + +

+Architecture overview +

+ +Alluxio包含三种组件:master、worker和client。一个集群通常包含一个leading master,多个standby master,一个primary job master、多个standby job master,多个worker和多个job workers 。master进程和worker进程通常由系统管理员维护和管理,构成了Alluxio server(服务端)。而应用程序(如Spark或MapReduce作业、Alluxio命令行或FUSE层)通过client(客户端)与Alluxio server进行交互。 + +Alluxio Job Masters和Job Worker可以归为一个单独的功能,称为**Job Service**。Alluxio Job Service是一个轻量级的任务调度框架,负责将许多不同类型的操作分配给Job Worker。这些任务包括: + +- 将数据从UFS(under file system)加载到Alluxio +- 将数据持久化到UFS +- 在Alluxio中复制文件 +- 在UFS间或Alluxio节点间移动或拷贝数据 + +Job service的设计使得所有与Job相关的进程不一定需要与其他Alluxio集群位于同一位置。但是,为了使得RPC和数据传输延迟较低,我们还是建议将job worker与对应的Alluxio worker并置。 + +## Masters + +

+Alluxio masters +

+ +Alluxio包含两种不同类型的master进程。一种是**Alluxio Master**,Alluxio Master服务所有用户请求并记录文件系统元数据修改情况。另一种是**Alluxio Job Master**,这是一种用来调度将在**Alluxio Job Workers**上执行的各种文件系统操作的轻量级调度程序。 + +为了提供容错能力,可以以一个leading master和多个备用master的方式来部署**Alluxio Master**。当leading master宕机时,某个standby master会被选举成为新的leading master。 + +### Leading Master + +Alluxio集群中只有一个leading master,负责管理整个集群的全局元数据,包括文件系统元数据(如索引节点树)、数据块(block)元数据(如block位置)以及worker容量元数据(空闲和已用空间)。leading master只会查询底层存储中的元数据。应用数据永远不会通过master来路由传输。Alluxio client通过与leading master交互来读取或修改元数据。此外,所有worker都会定期向leading master发送心跳信息,从而维持其在集群中的工作状态。leading master通常不会主动发起与其他组件的通信,只会被动响应通过RPC服务发来的请求。此外,leading master还负责将日志写入分布式持久化存储,保证集群重启后能够恢复master状态信息。这组记录被称为日志(Journal)。 + +### Standby Masters + +standby master在不同的服务器上启动,确保在高可用(HA)模式下运行Alluxio时能提供容错。standby master读取leading master的日志,从而保证其master状态副本是最新的。standby master还编写日志检查点, 以便将来能更快地恢复到最新状态。但是,standby master不处理来自其他Alluxio组件的任何请求。当leading master出现故障后,standby master会重新选举新的leading master。 + +### Secondary master(用于UFS 日志) + +当使用UFS日志运行非HA模式的单个Alluxio master时,可以在与leading master相同的服务器上启动secondary master来编写日志检查点。请注意,secondary master并非用于提供高可用,而是为了减轻leading master的负载,使其能够快速恢复到最新状态。与standby master不同的是,secondary master永远无法升级为leading master。 + +### Job Masters + +Alluxio Job Master是负责在Alluxio中异步调度大量较为重量级文件系统操作的独立进程。通过将执行这些操作的需求从在同一进程中的leading Alluxio Master上独立出去,Leading Alluxio Master会消耗更少的资源,并且能够在更短的时间内服务更多的client。此外,这也为将来增加更复杂的操作提供了可扩展的框架。 + +Alluxio Job Master接受执行上述操作的请求,并在将要具体执行操作的(作为Alluxio 文件系统client的)**Alluxio Job Workers**间进行任务调度。下一节将对job worker作更多的介绍。 + +## Workers + +

+Alluxio workers +

+ +### Alluxio Workers + +Alluxio worker负责管理分配给Alluxio的用户可配置本地资源(RAMDisk、SSD、HDD 等)。Alluxio worker以block为单位存储数据,并通过在其本地资源中读取或创建新的block来服务client读取或写入数据的请求。worker只负责管理block, 从文件到block的实际映射都由master管理。 + +Worker在底层存储上执行数据操作, 由此带来两大重要的益处: + +* 从底层存储读取的数据可以存储在worker中,并可以立即提供给其他client使用。 +* client可以是轻量级的,不依赖于底层存储连接器。 + +由于RAM通常提供的存储容量有限,因此在Alluxio worker空间已满的情况下,需要释放worker中的block。Worker可通过配置释放策略(eviction policies)来决定在Alluxio空间中将保留哪些数据。有关此主题的更多介绍,请查看[分层存储]({{ '/cn/core-services/Caching.html' | relativize_url }}#multiple-tier-storage)文档。 + +### Alluxio Job Workers + +Alluxio Job Workers是Alluxio文件系统的client, 负责运行Alluxio Job Master分配的任务。Job Worker接收在指定的文件系统位置上运行负载、持久化、复制、移动或拷贝操作的指令。 + +Alluxio job worker无需与普通worker并置,但建议将两者部署在同一个物理节点上。 + +## Client + +Alluxio client为用户提供了一个可与Alluxio server交互的网关。client发起与leading master节点的通信,来执行元数据操作,并从worker读取和写入存储在Alluxio中的数据。Alluxio支持Java中的原生文件系统API,并支持包括REST、Go和Python在内的多种客户端语言。此外,Alluxio支持与HDFS API以及Amazon S3 API兼容的API。 + +注意,Alluxio client不直接访问底层存储,而是通过Alluxio worker传输数据。 + +## 数据流:读操作 + +本节和下一节将介绍在典型配置下的Alluxio在常规读和写场景的行为:Alluxio与计算框架和应用程序并置部署,而底层存储一般是远端存储集群或云存储。 + +Alluxio位于存储和计算框架之间,可以作为数据读取的缓存层。本小节介绍Alluxio的不同缓存场景及其对性能的影响。 + +### 本地缓存命中 + +当应用程序需要读取的数据已经被缓存在本地Alluxio worker上时,即为本地缓存命中。应用程序通过Alluxio client请求数据访问后,Alluxio client会向 Alluxio master检索储存该数据的Alluxio worker位置。如果本地Alluxio worker存有该数据,Alluxio client将使用"短路读"绕过Alluxio worker,直接通过本地文件系统读取文件。短路读可避免通过TCP socket传输数据,并能提供内存级别的数据访问速度。短路读是从Alluxio读取数据最快的方式。 + +在默认情况下,短路读需要获得相应的本地文件系统操作权限。当Alluxio worker和client是在容器化的环境中运行时,可能会由于不正确的资源信息统计而无法实现短路读。在基于文件系统的短路读不可行的情况下,Alluxio可以基于domain socket的方式实现短路读,这时,Alluxio worker将通过预先指定的domain socket路径将数据传输到client。有关该主题的更多信息,请参见[在Docker上运行Alluxio]({{ '/cn/deploy/Running-Alluxio-On-Docker.html' | relativize_url }})的文档。 + +此外,除内存外,Alluxio还可以管理其他存储介质(如SSD、HDD),因此本地访问速度可能因本地存储介质而异。要了解有关此主题的更多信息,请参见[分层存储文档]({{ '/cn/core-services/Caching.html' | relativize_url }}#multiple-tier-storage)。 + +

+Data Flow of Read from a Local Worker +

+ +### 远程缓存命中 + +当Alluxio client请求的数据不在本地Alluxio worker上,但在集群中的某个远端Alluxio worker上时,Alluxio client将从远端worker读取数据。当client完成数据读取后,会指示本地worker(如果存在的话),在本地写入一个副本,以便将来再有相同数据的访问请求时,可以从本地内存中读取。远程缓存命中情况下的数据读取速度可以达到本地网络传输速度。由于Alluxio worker之间的网络速度通常比Alluxio worker与底层存储之间的速度快,因此Alluxio会优先从远端worker存储中读取数据。 + +

+Data Flow of Read from a Remote Worker +

+ +### 缓存未命中 + +如果请求的数据不在Alluxio空间中,即发生请求未命中缓存的情况,应用程序将必须从底层存储中读取数据。Alluxio client会将读取请求委托给一个Alluxio worker(优先选择本地worker),从底层存储中读取和缓存数据。缓存未命中时,由于应用程序必须从底层存储系统中读取数据,因此一般会导致较大的延迟。缓存未命中通常发生在第一次读取数据时。 + +当Alluxio client仅读取block的一部分或者非顺序读取时,Alluxio client将指示Alluxio worker异步缓存整个block。这也称为异步缓存。异步缓存不会阻塞client,但如果Alluxio和存储系统之间的网络带宽成为瓶颈,则仍然可能会影响性能。我们可以通过设置 `alluxio.worker.network.async.cache.manager.threads.max`来调节异步缓存的影响。默认值为 `8`。 + +

+Cache Miss data flow +

+ +### 绕过缓存 + +用户可以通过将client中的配置项[`alluxio.user.file.readtype.default`]({{ '/cn/reference/Properties-List.html' | relativize_url }}#alluxio.user.file.readtype.default)设置为`NO_CACHE`来关闭Alluxio中的缓存功能。 + +## 数据流:写操作 + +用户可以通过选择不同的写入类型来配置不同的写入方式。用户可以通过Alluxio API或在client中配置[`alluxio.user.file.writetype.default`]({{ '/cn/reference/Properties-List.html' | relativize_url }}#alluxio.user.file.writetype.default) +来设置写入类型。本节将介绍不同写入类型的行为以及其对应用程序的性能影响。 + +### 仅写Alluxio缓存(`MUST_CACHE`) + +如果使用写入类型MUST_CACHE,Alluxio client仅将数据写入本地Alluxio worker,不会将数据写入底层存储系统。在写入期间,如果"短路写"可用,Alluxio client将直接将数据写入本地RAM盘上的文件中,绕过Alluxio worker,从而避免网络传输。由于数据没有持久化地写入底层存储,如果机器崩溃或需要通过释放缓存数据来进行较新的写入,则数据可能会丢失。因此,只有当可以容忍数据丢失的场景时(如写入临时数据),才考虑使用`MUST_CACHE` 类型的写入。 + +

+MUST_CACHE data flow +

+ +### 同步写缓存与持久化存储 (`CACHE_THROUGH`) + +如果使用`CACHE_THROUGH的`写入类型,数据将被同步写入Alluxio worker和底层存储系统。Alluxio client将写入委托给本地worker,而worker将同时写入本地内存和底层存储。由于写入底层存储的速度通常比写入本地存储慢得多,因此client写入速度将与底层存储的写入速度相当。当需要保证数据持久性时,建议使用 `CACHE_THROUGH` 写入类型。该类型还会写入本地副本,本地存储的数据可供将来(读取)使用。 + +

+CACHE_THROUGH data flow +

+ +### 异步写回持久化存储(`ASYNC_THROUGH`) + +Alluxio还提供了一种`ASYNC_THROUGH`写入类型。如果使用`ASYNC_THROUGH`,数据将被先同步写入Alluxio worker,再在后台持久化写入底层存储系统。 `ASYNC_THROUGH`可以以接近`MUST_CACHE`的内存速度提供数据写入,并能够完成数据持久化。从Alluxio 2.0开始,`ASYNC_THROUGH`已成为默认写入类型。 + +

+ASYNC_THROUGH data flow +

+ +为了提供容错能力,还有一个重要配置项`alluxio.user.file.replication.durable`会和`ASYNC_THROUGH`一起使用。该配置项设置了在数据写入完成后但未被持久化到底层存储之前新数据在Alluxio中的目标复制级别,默认值为1。Alluxio将在后台持久化过程完成之前维持文件的目标复制级别,并在持久化完成之后收回Alluxio中的空间,因此数据只会被写入UFS一次。 + +如果使用`ASYNC_THROUGH`写入副本,并且在持久化数据之前出现包含副本的所有worker都崩溃的情况,则会导致数据丢失。 + +### 仅写持久化存储(`THROUGH`) + +如果使用`THROUGH`,数据会同步写入底层存储,而不缓存到Alluxio worker。这种写入类型确保写入完成后数据将被持久化,但写入速度会受限于底层存储吞吐量。 + +### 数据一致性 + +无论写入类型如何,这些写入操作都会首先经由Alluxio master并在修改Alluxio文件系统之后再向client或应用程序返回成功,所以**Alluxio 空间**中的文件/目录始终是高度一致的。因此,只要相应的写入操作成功完成,不同的Alluxio client看到的数据将始终是最新的。 + +但是,对于需考虑UFS中数据状态的用户或应用程序而言,不同写入类型可能会导致差异: + +- `MUST_CACHE` 不向UFS写入数据,因此Alluxio空间的数据永远不会与UFS一致。 +- `CACHE_THROUGH` 在向应用程序返回成功之前将数据同步写入Alluxio和UFS。 + - 如果写入UFS也是强一致的(例如,HDFS),且UFS中没有其他未经由Alluxio的更新,则Alluxio空间的数据将始终与UFS保持一致; + - 如果写入UFS是最终一致的(例如S3),则文件可能已成功写入Alluxio,但会稍晚才显示在UFS中。在这种情况下,由于Alluxio client总是会咨询强一致的Alluxio master,因此Alluxio client仍然会看到一致的文件系统;因此,尽管不同的Alluxio client始终在Alluxio空间中看到数据一致的状态,但在数据最终传输到UFS之前可能会存在数据不一致的阶段。 +- `ASYNC_THROUGH` 将数据写入Alluxio并返回给应用程序,而Alluxio会将数据异步传输到UFS。从用户的角度来看,该文件可以成功写入Alluxio,但稍晚才会持久化到UFS中。 +- `THROUGH` 直接将数据写入UFS,不在Alluxio中缓存数据,但是,Alluxio知道文件的存在及其状态。因此元数据仍然是一致的。 From db9f07a50e1f7f6c45d511d591c8775ada6b71bb Mon Sep 17 00:00:00 2001 From: jja725 Date: Wed, 11 Jan 2023 14:03:59 -0800 Subject: [PATCH 056/334] Add new distributed load ### What changes are proposed in this pull request? Add new load for better performance ### Why are the changes needed? na ### Does this PR introduce any user facing changes? na pr-link: Alluxio/alluxio#16747 change-id: cid-9c2b2879aca2d264231236d1d780537351e15767 --- .../alluxio/client/file/BaseFileSystem.java | 26 + .../client/file/DelegatingFileSystem.java | 17 + .../java/alluxio/client/file/FileSystem.java | 28 + .../client/file/FileSystemMasterClient.java | 28 + .../RetryHandlingFileSystemMasterClient.java | 43 ++ .../client/file/ufs/UfsBaseFileSystem.java | 17 + .../file/MockFileSystemMasterClient.java | 17 + .../cache/LocalCacheFileInStreamTest.java | 17 + .../main/java/alluxio/conf/PropertyKey.java | 19 + .../main/java/alluxio/metrics/MetricKey.java | 33 +- .../journal/JournalEntryAssociation.java | 3 +- .../journal/checkpoint/CheckpointName.java | 1 + .../journal/JournalEntryAssociationTest.java | 5 +- .../master/file/DefaultFileSystemMaster.java | 15 +- .../FileSystemMasterClientServiceHandler.java | 53 +- .../master/file/loadmanager/LoadJob.java | 725 ++++++++++++++++++ .../master/file/loadmanager/LoadJobState.java | 71 ++ .../master/file/loadmanager/LoadManager.java | 572 ++++++++++++++ .../master/file/loadmanager/LoadJobTest.java | 142 ++++ .../file/loadmanager/LoadManagerTest.java | 430 +++++++++++ .../file/loadmanager/LoadTestUtils.java | 118 +++ .../alluxio/worker/block/UfsIOManager.java | 4 + .../src/main/proto/grpc/block_worker.proto | 1 + .../main/proto/grpc/file_system_master.proto | 51 ++ core/transport/src/main/proto/proto.lock | 234 ++++++ .../src/main/proto/proto/journal/job.proto | 23 + .../main/proto/proto/journal/journal.proto | 4 +- .../fuse/auth/AbstractAuthPolicyTest.java | 17 + .../cli/MockFuseFileSystemMasterClient.java | 17 + .../fsmaster/FileSystemMasterBase.java | 3 +- .../alluxio/cli/fs/command/LoadCommand.java | 280 +++++-- .../command/LoadCommandIntegrationTest.java | 187 +++-- .../OldLoadCommandIntegrationTest.java | 100 +++ 33 files changed, 3183 insertions(+), 118 deletions(-) create mode 100644 core/server/master/src/main/java/alluxio/master/file/loadmanager/LoadJob.java create mode 100644 core/server/master/src/main/java/alluxio/master/file/loadmanager/LoadJobState.java create mode 100644 core/server/master/src/main/java/alluxio/master/file/loadmanager/LoadManager.java create mode 100644 core/server/master/src/test/java/alluxio/master/file/loadmanager/LoadJobTest.java create mode 100644 core/server/master/src/test/java/alluxio/master/file/loadmanager/LoadManagerTest.java create mode 100644 core/server/master/src/test/java/alluxio/master/file/loadmanager/LoadTestUtils.java create mode 100644 core/transport/src/main/proto/proto/journal/job.proto create mode 100644 tests/src/test/java/alluxio/client/cli/fs/command/OldLoadCommandIntegrationTest.java diff --git a/core/client/fs/src/main/java/alluxio/client/file/BaseFileSystem.java b/core/client/fs/src/main/java/alluxio/client/file/BaseFileSystem.java index 6babc837edc0..4d9602b3a556 100644 --- a/core/client/fs/src/main/java/alluxio/client/file/BaseFileSystem.java +++ b/core/client/fs/src/main/java/alluxio/client/file/BaseFileSystem.java @@ -516,6 +516,32 @@ public void needsSync(AlluxioURI path) }); } + @Override + public boolean submitLoad(AlluxioURI path, java.util.OptionalLong bandwidth, + boolean usePartialListing, boolean verify) { + try (CloseableResource client = + mFsContext.acquireMasterClientResource()) { + return client.get().submitLoad(path, bandwidth, usePartialListing, verify); + } + } + + @Override + public boolean stopLoad(AlluxioURI path) { + try (CloseableResource client = + mFsContext.acquireMasterClientResource()) { + return client.get().stopLoad(path); + } + } + + @Override + public String getLoadProgress(AlluxioURI path, + java.util.Optional format, boolean verbose) { + try (CloseableResource client = + mFsContext.acquireMasterClientResource()) { + return client.get().getLoadProgress(path, format, verbose); + } + } + /** * Checks an {@link AlluxioURI} for scheme and authority information. Warn the user and throw an * exception if necessary. diff --git a/core/client/fs/src/main/java/alluxio/client/file/DelegatingFileSystem.java b/core/client/fs/src/main/java/alluxio/client/file/DelegatingFileSystem.java index d0b5e1801721..d4030a674d9c 100644 --- a/core/client/fs/src/main/java/alluxio/client/file/DelegatingFileSystem.java +++ b/core/client/fs/src/main/java/alluxio/client/file/DelegatingFileSystem.java @@ -241,6 +241,23 @@ public void needsSync(AlluxioURI path) throws IOException, AlluxioException { mDelegatedFileSystem.needsSync(path); } + @Override + public boolean submitLoad(AlluxioURI path, java.util.OptionalLong bandwidth, + boolean usePartialListing, boolean verify) { + return mDelegatedFileSystem.submitLoad(path, bandwidth, usePartialListing, verify); + } + + @Override + public boolean stopLoad(AlluxioURI path) { + return mDelegatedFileSystem.stopLoad(path); + } + + @Override + public String getLoadProgress(AlluxioURI path, + java.util.Optional format, boolean verbose) { + return mDelegatedFileSystem.getLoadProgress(path, format, verbose); + } + @Override public void close() throws IOException { mDelegatedFileSystem.close(); diff --git a/core/client/fs/src/main/java/alluxio/client/file/FileSystem.java b/core/client/fs/src/main/java/alluxio/client/file/FileSystem.java index cbf6a9534c01..eb311ca57bc8 100644 --- a/core/client/fs/src/main/java/alluxio/client/file/FileSystem.java +++ b/core/client/fs/src/main/java/alluxio/client/file/FileSystem.java @@ -737,4 +737,32 @@ default void unmount(AlluxioURI path) throws IOException, AlluxioException { * @param path the path needing synchronization */ void needsSync(AlluxioURI path) throws IOException, AlluxioException; + + /** + * Submit a load job. + * @param path alluxio path to be loaded + * @param bandwidth bandwidth allocated to this load, unlimited if empty + * @param usePartialListing whether to use partial listing + * @param verify whether to verify after load finishes + * @return true if job is submitted, false if a load of the same path already exists + */ + boolean submitLoad(AlluxioURI path, java.util.OptionalLong bandwidth, + boolean usePartialListing, boolean verify); + + /** + * Stop a load job. + * @param path alluxio path to be stopped + * @return true if job is stopped, false if cannot find job + */ + boolean stopLoad(AlluxioURI path); + + /** + * Get progress of a load job. + * @param path alluxio path to get progress + * @param format progress report format + * @param verbose whether to return verbose report + * @return the load job progress + */ + String getLoadProgress(AlluxioURI path, + java.util.Optional format, boolean verbose); } diff --git a/core/client/fs/src/main/java/alluxio/client/file/FileSystemMasterClient.java b/core/client/fs/src/main/java/alluxio/client/file/FileSystemMasterClient.java index 3d643329d21f..dff5d49cfe61 100644 --- a/core/client/fs/src/main/java/alluxio/client/file/FileSystemMasterClient.java +++ b/core/client/fs/src/main/java/alluxio/client/file/FileSystemMasterClient.java @@ -342,4 +342,32 @@ void updateUfsMode(AlluxioURI ufsUri, UpdateUfsModePOptions options) * @param path the path to invalidate */ void needsSync(AlluxioURI path) throws AlluxioStatusException; + + /** + * Submit a directory load job. + * @param path alluxio path to be loaded + * @param bandwidth bandwidth allocated to this load, unlimited if empty + * @param usePartialListing whether to use partial listing + * @param verify whether to verify after load finishes + * @return true if job is submitted, false if a load of the same path already exists + */ + boolean submitLoad(AlluxioURI path, java.util.OptionalLong bandwidth, + boolean usePartialListing, boolean verify); + + /** + * Stop a directory load. + * @param path alluxio path to be stopped + * @return true if job is stopped, false if cannot find job + */ + boolean stopLoad(AlluxioURI path); + + /** + * Get progress of a load job. + * @param path alluxio path to get progress + * @param format progress report format + * @param verbose whether to return verbose report + * @return the load job progress + */ + String getLoadProgress(AlluxioURI path, + java.util.Optional format, boolean verbose); } diff --git a/core/client/fs/src/main/java/alluxio/client/file/RetryHandlingFileSystemMasterClient.java b/core/client/fs/src/main/java/alluxio/client/file/RetryHandlingFileSystemMasterClient.java index ecd6f6962884..64f10ed3d40c 100644 --- a/core/client/fs/src/main/java/alluxio/client/file/RetryHandlingFileSystemMasterClient.java +++ b/core/client/fs/src/main/java/alluxio/client/file/RetryHandlingFileSystemMasterClient.java @@ -418,6 +418,49 @@ public void needsSync(AlluxioURI path) throws AlluxioStatusException { RPC_LOG, "NeedsSync", "path=%s", path); } + @Override + public boolean submitLoad(AlluxioURI path, java.util.OptionalLong bandwidth, + boolean usePartialListing, boolean verify) { + alluxio.grpc.LoadPathPOptions.Builder options = alluxio.grpc.LoadPathPOptions + .newBuilder().setPartialListing(usePartialListing).setVerify(verify); + if (bandwidth.isPresent()) { + options.setBandwidth(bandwidth.getAsLong()); + } + connectWithRuntimeException(); + alluxio.grpc.LoadPathPResponse response = mClient.loadPath( + alluxio.grpc.LoadPathPRequest.newBuilder() + .setPath(path.getPath()) + .setOptions(options.build()) + .build()); + return response.getNewLoadSubmitted(); + } + + @Override + public boolean stopLoad(AlluxioURI path) { + connectWithRuntimeException(); + alluxio.grpc.StopLoadPathPResponse response = mClient.stopLoadPath( + alluxio.grpc.StopLoadPathPRequest.newBuilder() + .setPath(path.getPath()) + .build()); + return response.getExistingLoadStopped(); + } + + @Override + public String getLoadProgress(AlluxioURI path, + java.util.Optional format, boolean verbose) { + alluxio.grpc.LoadProgressPOptions.Builder options = + alluxio.grpc.LoadProgressPOptions.newBuilder() + .setVerbose(verbose); + format.map(options::setFormat); + connectWithRuntimeException(); + alluxio.grpc.GetLoadProgressPResponse response = mClient.getLoadProgress( + alluxio.grpc.GetLoadProgressPRequest.newBuilder() + .setPath(path.getPath()) + .setOptions(options.build()) + .build()); + return response.getProgressReport(); + } + /** * Gets the path that will be transported to master. * diff --git a/core/client/fs/src/main/java/alluxio/client/file/ufs/UfsBaseFileSystem.java b/core/client/fs/src/main/java/alluxio/client/file/ufs/UfsBaseFileSystem.java index 5bc1e5ec8821..9f1820428a68 100644 --- a/core/client/fs/src/main/java/alluxio/client/file/ufs/UfsBaseFileSystem.java +++ b/core/client/fs/src/main/java/alluxio/client/file/ufs/UfsBaseFileSystem.java @@ -392,6 +392,23 @@ public void needsSync(AlluxioURI path) throws IOException, AlluxioException { throw new UnsupportedOperationException(); } + @Override + public boolean submitLoad(AlluxioURI path, java.util.OptionalLong bandwidth, + boolean usePartialListing, boolean verify) { + throw new UnsupportedOperationException(); + } + + @Override + public boolean stopLoad(AlluxioURI path) { + throw new UnsupportedOperationException(); + } + + @Override + public String getLoadProgress(AlluxioURI path, + java.util.Optional format, boolean verbose) { + throw new UnsupportedOperationException(); + } + /** * Transform UFS file/directory status to client-side status. * diff --git a/core/client/fs/src/test/java/alluxio/client/file/MockFileSystemMasterClient.java b/core/client/fs/src/test/java/alluxio/client/file/MockFileSystemMasterClient.java index 149a1bcd4619..3272f5b63106 100644 --- a/core/client/fs/src/test/java/alluxio/client/file/MockFileSystemMasterClient.java +++ b/core/client/fs/src/test/java/alluxio/client/file/MockFileSystemMasterClient.java @@ -234,4 +234,21 @@ public void close() throws IOException { @Override public void needsSync(AlluxioURI path) throws AlluxioStatusException { } + + @Override + public boolean submitLoad(AlluxioURI path, java.util.OptionalLong bandwidth, + boolean usePartialListing, boolean verify) { + return false; + } + + @Override + public boolean stopLoad(AlluxioURI path) { + return false; + } + + @Override + public String getLoadProgress(AlluxioURI path, + java.util.Optional format, boolean verbose) { + return null; + } } diff --git a/core/client/fs/src/test/java/alluxio/client/file/cache/LocalCacheFileInStreamTest.java b/core/client/fs/src/test/java/alluxio/client/file/cache/LocalCacheFileInStreamTest.java index 570f5110c50f..bebf0af52b81 100644 --- a/core/client/fs/src/test/java/alluxio/client/file/cache/LocalCacheFileInStreamTest.java +++ b/core/client/fs/src/test/java/alluxio/client/file/cache/LocalCacheFileInStreamTest.java @@ -862,6 +862,23 @@ public void needsSync(AlluxioURI path) { throw new UnsupportedOperationException(); } + @Override + public boolean submitLoad(AlluxioURI path, java.util.OptionalLong bandwidth, + boolean usePartialListing, boolean verify) { + throw new UnsupportedOperationException(); + } + + @Override + public boolean stopLoad(AlluxioURI path) { + throw new UnsupportedOperationException(); + } + + @Override + public String getLoadProgress(AlluxioURI path, + java.util.Optional format, boolean verbose) { + throw new UnsupportedOperationException(); + } + @Override public void close() throws IOException { throw new UnsupportedOperationException(); diff --git a/core/common/src/main/java/alluxio/conf/PropertyKey.java b/core/common/src/main/java/alluxio/conf/PropertyKey.java index de45b2bfde9a..c050dcd5e961 100755 --- a/core/common/src/main/java/alluxio/conf/PropertyKey.java +++ b/core/common/src/main/java/alluxio/conf/PropertyKey.java @@ -6894,6 +6894,21 @@ public String toString() { .setIsHidden(true) .build(); + // new job service + public static final PropertyKey JOB_BATCH_SIZE = + intBuilder(Name.JOB_BATCH_SIZE) + .setDescription("The number of tasks would be included in a job request.") + .setDefaultValue(20) + .setScope(Scope.MASTER) + .build(); + public static final PropertyKey JOB_RETENTION_TIME = + durationBuilder(Name.JOB_RETENTION_TIME) + .setDescription("The length of time the Alluxio should save information about " + + "completed jobs before they are discarded.") + .setDefaultValue("1d") + .setScope(Scope.MASTER) + .build(); + // // Job service // @@ -8633,6 +8648,10 @@ public static final class Name { "alluxio.network.tls.ssl.context.provider.classname"; public static final String NETWORK_TLS_ENABLED = "alluxio.network.tls.enabled"; + // new job service + public static final String JOB_BATCH_SIZE = "alluxio.job.batch.size"; + public static final String JOB_RETENTION_TIME = "alluxio.job.retention.time"; + // // Job service // diff --git a/core/common/src/main/java/alluxio/metrics/MetricKey.java b/core/common/src/main/java/alluxio/metrics/MetricKey.java index c75723028cef..6297193d426f 100644 --- a/core/common/src/main/java/alluxio/metrics/MetricKey.java +++ b/core/common/src/main/java/alluxio/metrics/MetricKey.java @@ -983,7 +983,38 @@ public static String getSyncMetricName(long mountId) { .setDescription("The number of running status job") .setMetricType(MetricType.COUNTER) .build(); - +// new job metrics + public static final MetricKey MASTER_JOB_LOAD_SUCCESS = + new Builder("Master.JobLoadSuccess") + .setDescription("The number of successful Load commands") + .setMetricType(MetricType.COUNTER) + .build(); + public static final MetricKey MASTER_JOB_LOAD_FAIL = + new Builder("Master.JobLoadFail") + .setDescription("The number of failed Load commands") + .setMetricType(MetricType.COUNTER) + .build(); + public static final MetricKey MASTER_JOB_LOAD_BLOCK_COUNT = + new Builder("Master.JobLoadBlockCount") + .setDescription("The number of blocks loaded by load commands") + .setMetricType(MetricType.COUNTER) + .build(); + public static final MetricKey MASTER_JOB_LOAD_BLOCK_FAIL = + new Builder("Master.JobLoadBlockFail") + .setDescription("The number of blocks failed to be loaded by load commands") + .setMetricType(MetricType.COUNTER) + .build(); + public static final MetricKey MASTER_JOB_LOAD_BLOCK_SIZE = + new Builder("Master.JobDistributedLoadBlockSizes") + .setDescription("The total block size loaded by load commands") + .setMetricType(MetricType.COUNTER) + .build(); + public static final MetricKey MASTER_JOB_LOAD_RATE = + new Builder("Master.JobLoadRate") + .setDescription("The average loading rate of Load commands") + .setMetricType(MetricType.METER) + .setIsClusterAggregated(true) + .build(); // Distributed command related metrics public static final MetricKey MASTER_JOB_DISTRIBUTED_LOAD_SUCCESS = new Builder("Master.JobDistributedLoadSuccess") diff --git a/core/server/common/src/main/java/alluxio/master/journal/JournalEntryAssociation.java b/core/server/common/src/main/java/alluxio/master/journal/JournalEntryAssociation.java index 3cdfefa784f8..2e650cb0d3b8 100644 --- a/core/server/common/src/main/java/alluxio/master/journal/JournalEntryAssociation.java +++ b/core/server/common/src/main/java/alluxio/master/journal/JournalEntryAssociation.java @@ -44,7 +44,8 @@ public static String getMasterForEntry(JournalEntry entry) { || entry.hasUpdateUfsMode() || entry.hasUpdateInode() || entry.hasUpdateInodeDirectory() - || entry.hasUpdateInodeFile()) { + || entry.hasUpdateInodeFile() + || entry.hasLoadJob()) { return Constants.FILE_SYSTEM_MASTER_NAME; } if (entry.hasBlockContainerIdGenerator() diff --git a/core/server/common/src/main/java/alluxio/master/journal/checkpoint/CheckpointName.java b/core/server/common/src/main/java/alluxio/master/journal/checkpoint/CheckpointName.java index 490768dd6648..9ce742199271 100644 --- a/core/server/common/src/main/java/alluxio/master/journal/checkpoint/CheckpointName.java +++ b/core/server/common/src/main/java/alluxio/master/journal/checkpoint/CheckpointName.java @@ -39,4 +39,5 @@ public enum CheckpointName { TABLE_MASTER_TRANSFORM_MANAGER, TO_BE_PERSISTED_FILE_IDS, TTL_BUCKET_LIST, + LOAD_MANAGER, } diff --git a/core/server/common/src/test/java/alluxio/master/journal/JournalEntryAssociationTest.java b/core/server/common/src/test/java/alluxio/master/journal/JournalEntryAssociationTest.java index 83e9cfaf6e1c..cff02dd0e428 100644 --- a/core/server/common/src/test/java/alluxio/master/journal/JournalEntryAssociationTest.java +++ b/core/server/common/src/test/java/alluxio/master/journal/JournalEntryAssociationTest.java @@ -98,7 +98,10 @@ public class JournalEntryAssociationTest { JournalEntry.newBuilder().setUpdateInodeFile(UpdateInodeFileEntry.getDefaultInstance()).build(), JournalEntry.newBuilder().setAddTransformJobInfo(Table.AddTransformJobInfoEntry.getDefaultInstance()).build(), JournalEntry.newBuilder().setRemoveTransformJobInfo(Table.RemoveTransformJobInfoEntry.getDefaultInstance()).build(), - JournalEntry.newBuilder().setCompleteTransformTable(Table.CompleteTransformTableEntry.getDefaultInstance()).build() + JournalEntry.newBuilder().setCompleteTransformTable(Table.CompleteTransformTableEntry.getDefaultInstance()).build(), + JournalEntry.newBuilder().setLoadJob(alluxio.proto.journal.Job.LoadJobEntry.newBuilder() + .setLoadPath("/test").setState(alluxio.proto.journal.Job.PJobState.CREATED) + .setBandwidth(1).setPartialListing(false).setVerify(true).setJobId("1").build()).build() ); // CHECKSTYLE.OFF: LineLengthExceed diff --git a/core/server/master/src/main/java/alluxio/master/file/DefaultFileSystemMaster.java b/core/server/master/src/main/java/alluxio/master/file/DefaultFileSystemMaster.java index 7cb55db0af8b..7fa2dfe61572 100644 --- a/core/server/master/src/main/java/alluxio/master/file/DefaultFileSystemMaster.java +++ b/core/server/master/src/main/java/alluxio/master/file/DefaultFileSystemMaster.java @@ -405,6 +405,7 @@ public class DefaultFileSystemMaster extends CoreMaster /** Used to check pending/running backup from RPCs. */ private final CallTracker mStateLockCallTracker; + private final alluxio.master.file.loadmanager.LoadManager mLoadManager; final Clock mClock; @@ -508,6 +509,7 @@ public Type getType() { mSyncPrefetchExecutor.allowCoreThreadTimeOut(true); mSyncMetadataExecutor.allowCoreThreadTimeOut(true); mActiveSyncMetadataExecutor.allowCoreThreadTimeOut(true); + mLoadManager = new alluxio.master.file.loadmanager.LoadManager(this); // The mount table should come after the inode tree because restoring the mount table requires // that the inode tree is already restored. @@ -518,6 +520,7 @@ public Type getType() { add(mMountTable); add(mUfsManager); add(mSyncManager); + add(mLoadManager); } }; mJournaledGroup = new JournaledGroup(journaledComponents, CheckpointName.FILE_SYSTEM_MASTER); @@ -561,7 +564,7 @@ private static MountInfo getRootMountInfo(MasterUfsManager ufsManager) { public Map getServices() { Map services = new HashMap<>(); services.put(ServiceType.FILE_SYSTEM_MASTER_CLIENT_SERVICE, new GrpcService(ServerInterceptors - .intercept(new FileSystemMasterClientServiceHandler(this), + .intercept(new FileSystemMasterClientServiceHandler(this, mLoadManager), new ClientContextServerInjector()))); services.put(ServiceType.FILE_SYSTEM_MASTER_JOB_SERVICE, new GrpcService(ServerInterceptors .intercept(new FileSystemMasterJobServiceHandler(this), @@ -749,6 +752,7 @@ public void start(Boolean isPrimary) throws IOException { } mAccessTimeUpdater.start(); mSyncManager.start(); + mLoadManager.start(); } } @@ -761,6 +765,7 @@ public void stop() throws IOException { } mSyncManager.stop(); mAccessTimeUpdater.stop(); + mLoadManager.stop(); super.stop(); } @@ -5336,4 +5341,12 @@ public MountTable getMountTable() { public void needsSync(AlluxioURI path) throws InvalidPathException { getSyncPathCache().notifyInvalidation(path); } + + /** + * Get load manager. + * @return load manager + */ + public alluxio.master.file.loadmanager.LoadManager getLoadManager() { + return mLoadManager; + } } diff --git a/core/server/master/src/main/java/alluxio/master/file/FileSystemMasterClientServiceHandler.java b/core/server/master/src/main/java/alluxio/master/file/FileSystemMasterClientServiceHandler.java index 493c27869bac..adb6b3fe1229 100644 --- a/core/server/master/src/main/java/alluxio/master/file/FileSystemMasterClientServiceHandler.java +++ b/core/server/master/src/main/java/alluxio/master/file/FileSystemMasterClientServiceHandler.java @@ -117,15 +117,19 @@ public final class FileSystemMasterClientServiceHandler private static final Logger LOG = LoggerFactory.getLogger(FileSystemMasterClientServiceHandler.class); private final FileSystemMaster mFileSystemMaster; + private final alluxio.master.file.loadmanager.LoadManager mLoadManager; /** * Creates a new instance of {@link FileSystemMasterClientServiceHandler}. * * @param fileSystemMaster the {@link FileSystemMaster} the handler uses internally + * @param loadManager the {@link alluxio.master.file.loadmanager.LoadManager} */ - public FileSystemMasterClientServiceHandler(FileSystemMaster fileSystemMaster) { + public FileSystemMasterClientServiceHandler(FileSystemMaster fileSystemMaster, + alluxio.master.file.loadmanager.LoadManager loadManager) { Preconditions.checkNotNull(fileSystemMaster, "fileSystemMaster"); mFileSystemMaster = fileSystemMaster; + mLoadManager = Preconditions.checkNotNull(loadManager, "loadManager"); } @Override @@ -483,6 +487,53 @@ public void needsSync(NeedsSyncRequest request, }, "NeedsSync", true, "request=%s", responseObserver, request); } + @Override + public void loadPath(alluxio.grpc.LoadPathPRequest request, + StreamObserver responseObserver) { + RpcUtils.call(LOG, () -> { + boolean submitted = mLoadManager.submitLoad( + request.getPath(), + request.getOptions().hasBandwidth() + ? java.util.OptionalLong.of(request.getOptions().getBandwidth()) + : java.util.OptionalLong.empty(), + request.getOptions().hasPartialListing() && request.getOptions().getPartialListing(), + request.getOptions().hasVerify() && request.getOptions().getVerify()); + return alluxio.grpc.LoadPathPResponse.newBuilder() + .setNewLoadSubmitted(submitted) + .build(); + }, "LoadPath", "request=%s", responseObserver, request); + } + + @Override + public void stopLoadPath(alluxio.grpc.StopLoadPathPRequest request, + StreamObserver responseObserver) { + RpcUtils.call(LOG, () -> { + boolean stopped = mLoadManager.stopLoad(request.getPath()); + return alluxio.grpc.StopLoadPathPResponse.newBuilder() + .setExistingLoadStopped(stopped) + .build(); + }, "stopLoadPath", "request=%s", responseObserver, request); + } + + @Override + public void getLoadProgress(alluxio.grpc.GetLoadProgressPRequest request, + StreamObserver responseObserver) { + RpcUtils.call(LOG, () -> { + alluxio.grpc.LoadProgressReportFormat format = alluxio.grpc.LoadProgressReportFormat.TEXT; + if (request.hasOptions() && request.getOptions().hasFormat()) { + format = request.getOptions().getFormat(); + } + boolean verbose = false; + if (request.hasOptions() && request.getOptions().hasVerbose()) { + verbose = request.getOptions().getVerbose(); + } + return alluxio.grpc.GetLoadProgressPResponse.newBuilder() + .setProgressReport(mLoadManager.getLoadProgress( + request.getPath(), format, verbose)) + .build(); + }, "getLoadProgress", "request=%s", responseObserver, request); + } + /** * Helper to return {@link AlluxioURI} from transport URI. * diff --git a/core/server/master/src/main/java/alluxio/master/file/loadmanager/LoadJob.java b/core/server/master/src/main/java/alluxio/master/file/loadmanager/LoadJob.java new file mode 100644 index 000000000000..94d94e573fb9 --- /dev/null +++ b/core/server/master/src/main/java/alluxio/master/file/loadmanager/LoadJob.java @@ -0,0 +1,725 @@ +/* + * The Alluxio Open Foundation licenses this work under the Apache License, version 2.0 + * (the "License"). You may not use this work except in compliance with the License, which is + * available at www.apache.org/licenses/LICENSE-2.0 + * + * This software is distributed on an "AS IS" basis, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, + * either express or implied, as more fully set forth in the License. + * + * See the NOTICE file distributed with this work for information regarding copyright ownership. + */ + +package alluxio.master.file.loadmanager; + +import static java.util.Objects.requireNonNull; + +import alluxio.AlluxioURI; +import alluxio.conf.Configuration; +import alluxio.conf.PropertyKey; +import alluxio.exception.AccessControlException; +import alluxio.exception.FileDoesNotExistException; +import alluxio.exception.InvalidPathException; +import alluxio.exception.runtime.AlluxioRuntimeException; +import alluxio.exception.runtime.InternalRuntimeException; +import alluxio.exception.runtime.InvalidArgumentRuntimeException; +import alluxio.exception.runtime.NotFoundRuntimeException; +import alluxio.exception.runtime.UnauthenticatedRuntimeException; +import alluxio.grpc.Block; +import alluxio.grpc.ListStatusPOptions; +import alluxio.grpc.ListStatusPartialPOptions; +import alluxio.grpc.LoadProgressReportFormat; +import alluxio.master.file.FileSystemMaster; +import alluxio.master.file.contexts.ListStatusContext; +import alluxio.proto.journal.Job; +import alluxio.proto.journal.Journal; +import alluxio.security.authentication.AuthenticatedClientUser; +import alluxio.util.FormatUtils; +import alluxio.wire.BlockInfo; +import alluxio.wire.FileBlockInfo; +import alluxio.wire.FileInfo; + +import com.fasterxml.jackson.annotation.JsonAutoDetect; +import com.fasterxml.jackson.annotation.JsonInclude; +import com.fasterxml.jackson.annotation.PropertyAccessor; +import com.fasterxml.jackson.core.JsonProcessingException; +import com.fasterxml.jackson.databind.ObjectMapper; +import com.google.common.annotations.VisibleForTesting; +import com.google.common.base.MoreObjects; +import com.google.common.base.Objects; +import com.google.common.base.Preconditions; +import com.google.common.collect.ImmutableList; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import java.io.IOException; +import java.util.Collection; +import java.util.Collections; +import java.util.HashMap; +import java.util.Iterator; +import java.util.LinkedList; +import java.util.List; +import java.util.Map; +import java.util.Optional; +import java.util.OptionalLong; +import java.util.UUID; +import java.util.concurrent.atomic.AtomicLong; +import java.util.stream.Collectors; +import javax.annotation.concurrent.NotThreadSafe; + +/** + * This class should only be manipulated from the scheduler thread in LoadManager + * thus the state changing functions are not thread safe. + */ +@NotThreadSafe +public class LoadJob { + private static final Logger LOG = LoggerFactory.getLogger(LoadJob.class); + private static final double FAILURE_RATIO_THRESHOLD = 0.05; + private static final int FAILURE_COUNT_THRESHOLD = 100; + private static final int RETRY_BLOCK_CAPACITY = 1000; + private static final double RETRY_THRESHOLD = 0.8 * RETRY_BLOCK_CAPACITY; + private static final int BATCH_SIZE = Configuration.getInt(PropertyKey.JOB_BATCH_SIZE); + // Job configurations + private final String mPath; + private final Optional mUser; + private OptionalLong mBandwidth; + private boolean mUsePartialListing; + private boolean mVerificationEnabled; + + // Job states + private final LinkedList mRetryBlocks = new LinkedList<>(); + private final Map mFailedFiles = new HashMap<>(); + private final long mStartTime; + private final AtomicLong mProcessedFileCount = new AtomicLong(); + private final AtomicLong mTotalFileCount = new AtomicLong(); + private final AtomicLong mLoadedByteCount = new AtomicLong(); + + private final AtomicLong mTotalByteCount = new AtomicLong(); + private final AtomicLong mTotalBlockCount = new AtomicLong(); + private final AtomicLong mCurrentBlockCount = new AtomicLong(); + private final AtomicLong mTotalFailureCount = new AtomicLong(); + private final AtomicLong mCurrentFailureCount = new AtomicLong(); + private final String mJobId; + private LoadJobState mState; + private Optional mFailedReason = Optional.empty(); + private Optional mFileIterator = Optional.empty(); + private FileInfo mCurrentFile; + private Iterator mBlockIterator = Collections.emptyIterator(); + private OptionalLong mEndTime = OptionalLong.empty(); + + /** + * Constructor. + * @param path file path + * @param user user for authentication + * @param bandwidth bandwidth + */ + @VisibleForTesting + public LoadJob(String path, String user, OptionalLong bandwidth) { + this(path, Optional.of(user), UUID.randomUUID().toString(), bandwidth, false, false); + } + + /** + * Constructor. + * + * @param path file path + * @param user user for authentication + * @param jobId job identifier + * @param bandwidth bandwidth + * @param usePartialListing whether to use partial listing + * @param verificationEnabled whether to verify the job after loaded + */ + public LoadJob( + String path, + Optional user, String jobId, OptionalLong bandwidth, + boolean usePartialListing, + boolean verificationEnabled) { + mPath = requireNonNull(path, "path is null"); + mUser = requireNonNull(user, "user is null"); + mJobId = requireNonNull(jobId, "jobId is null"); + Preconditions.checkArgument( + !bandwidth.isPresent() || bandwidth.getAsLong() > 0, + String.format("bandwidth should be greater than 0 if provided, get %s", bandwidth)); + mBandwidth = bandwidth; + mUsePartialListing = usePartialListing; + mVerificationEnabled = verificationEnabled; + mStartTime = System.currentTimeMillis(); + mState = LoadJobState.LOADING; + } + + /** + * Get load file path. + * @return file path + */ + public String getPath() { + return mPath; + } + + /** + * Get user. + * @return user + */ + public Optional getUser() { + return mUser; + } + + /** + * Get end time. + * @return end time + */ + public OptionalLong getEndTime() { + return mEndTime; + } + + /** + * Get bandwidth. + * @return the allocated bandwidth + */ + public OptionalLong getBandwidth() { + return mBandwidth; + } + + /** + * Update end time. + * @param time time in ms + */ + public void setEndTime(long time) { + mEndTime = OptionalLong.of(time); + } + + /** + * Update bandwidth. + * @param bandwidth new bandwidth + */ + public void updateBandwidth(OptionalLong bandwidth) { + mBandwidth = bandwidth; + } + + /** + * Is verification enabled. + * @return whether verification is enabled + */ + public boolean isVerificationEnabled() { + return mVerificationEnabled; + } + + /** + * Enable verification. + * @param enableVerification whether to enable verification + */ + public void setVerificationEnabled(boolean enableVerification) { + mVerificationEnabled = enableVerification; + } + + /** + * Get load status. + * @return the load job's status + */ + public LoadJobState getJobState() { + return mState; + } + + /** + * Set load state. + * @param state new state + */ + public void setJobState(LoadJobState state) { + LOG.debug("Change JobState to {} for job {}", state, this); + mState = state; + if (!isRunning()) { + mEndTime = OptionalLong.of(System.currentTimeMillis()); + } + } + + /** + * Get uniq tag. + * @return the tag + */ + public String getJobId() { + return mJobId; + } + + /** + * Set load state to FAILED with given reason. + * @param reason failure exception + */ + public void failJob(AlluxioRuntimeException reason) { + setJobState(LoadJobState.FAILED); + mFailedReason = Optional.of(reason); + LoadManager.JOB_LOAD_FAIL.inc(); + } + + /** + * Get batch size. + * @return batch size + */ + public int getBatchSize() { + return BATCH_SIZE; + } + + /** + * Add bytes to total loaded bytes. + * @param bytes bytes to be added to total + */ + public void addLoadedBytes(long bytes) { + mLoadedByteCount.addAndGet(bytes); + } + + /** + * Get load job progress. + * @param format report format + * @param verbose whether to include error details in the report + * @return the load progress report + */ + public String getProgress(LoadProgressReportFormat format, boolean verbose) { + return (new LoadProgressReport(this, verbose)).getReport(format); + } + + /** + * Get the processed block count in the current loading pass. + * @return current block count + */ + public long getCurrentBlockCount() { + return mCurrentBlockCount.get(); + } + + /** + * Get the total processed block count for this job. + * @return total block count + */ + public long getTotalBlockCount() { + return mTotalBlockCount.get(); + } + + @Override + public boolean equals(Object o) { + if (this == o) { + return true; + } + if (o == null || getClass() != o.getClass()) { + return false; + } + LoadJob that = (LoadJob) o; + return Objects.equal(mPath, that.mPath); + } + + @Override + public int hashCode() { + return Objects.hashCode(mPath); + } + + /** + * Check whether the load job is healthy. + * @return true if the load job is healthy, false if not + */ + public boolean isHealthy() { + long currentFailureCount = mCurrentFailureCount.get(); + return mState != LoadJobState.FAILED + && currentFailureCount <= FAILURE_COUNT_THRESHOLD + || (double) currentFailureCount / mCurrentBlockCount.get() <= FAILURE_RATIO_THRESHOLD; + } + + /** + * Check whether the load job is still running. + * @return true if the load job is running, false if not + */ + public boolean isRunning() { + return mState == LoadJobState.LOADING || mState == LoadJobState.VERIFYING; + } + + /** + * Check whether the load job is finished. + * @return true if the load job is finished, false if not + */ + public boolean isDone() { + return mState == LoadJobState.SUCCEEDED || mState == LoadJobState.FAILED; + } + + /** + * Check whether the current loading pass is finished. + * @return true if the load job is finished, false if not + */ + public boolean isCurrentLoadDone() { + return mFileIterator.isPresent() && !mFileIterator.get().hasNext() && !mBlockIterator.hasNext() + && mRetryBlocks.isEmpty(); + } + + /** + * Initiate a verification pass. This will re-list the directory and find + * any unloaded files / blocks and try to load them again. + */ + public void initiateVerification() { + Preconditions.checkState(isCurrentLoadDone(), "Previous pass is not finished"); + mFileIterator = Optional.empty(); + mTotalBlockCount.addAndGet(mCurrentBlockCount.get()); + mTotalFailureCount.addAndGet(mCurrentFailureCount.get()); + mCurrentBlockCount.set(0); + mCurrentFailureCount.set(0); + mState = LoadJobState.VERIFYING; + } + + /** + * Get next batch of blocks. + * @param fileSystemMaster file system master to fetch file infos + * @param count number of blocks + * @return list of blocks + */ + public List getNextBatch(FileSystemMaster fileSystemMaster, int count) { + if (!mFileIterator.isPresent()) { + mFileIterator = + Optional.of(new FileIterator(fileSystemMaster, mPath, mUser, mUsePartialListing)); + if (!mFileIterator.get().hasNext()) { + return ImmutableList.of(); + } + mCurrentFile = mFileIterator.get().next(); + mProcessedFileCount.incrementAndGet(); + mBlockIterator = mCurrentFile.getBlockIds().listIterator(); + } + + ImmutableList.Builder batchBuilder = ImmutableList.builder(); + int i = 0; + // retry failed blocks if there's too many failed blocks otherwise wait until no more new block + if (mRetryBlocks.size() > RETRY_THRESHOLD + || (!mFileIterator.get().hasNext() && !mBlockIterator.hasNext())) { + while (i < count && !mRetryBlocks.isEmpty()) { + batchBuilder.add(requireNonNull(mRetryBlocks.removeFirst())); + i++; + } + } + for (; i < count; i++) { + if (!mBlockIterator.hasNext()) { + if (!mFileIterator.get().hasNext()) { + return batchBuilder.build(); + } + mCurrentFile = mFileIterator.get().next(); + mProcessedFileCount.incrementAndGet(); + mBlockIterator = mCurrentFile.getBlockIds().listIterator(); + } + long blockId = mBlockIterator.next(); + BlockInfo blockInfo = mCurrentFile.getFileBlockInfo(blockId).getBlockInfo(); + if (blockInfo.getLocations().isEmpty()) { + batchBuilder.add(buildBlock(mCurrentFile, blockId)); + mCurrentBlockCount.incrementAndGet(); + } + } + return batchBuilder.build(); + } + + /** + * Add a block to retry later. + * @param block the block that failed to load thus needing retry + * @return whether the block is successfully added + */ + public boolean addBlockToRetry(Block block) { + if (mRetryBlocks.size() >= RETRY_BLOCK_CAPACITY) { + return false; + } + LOG.debug("Retry block {}", block); + mRetryBlocks.add(block); + mCurrentFailureCount.incrementAndGet(); + LoadManager.JOB_LOAD_BLOCK_FAIL.inc(); + return true; + } + + /** + * Add a block to failure summary. + * + * @param block the block that failed to load and cannot be retried + * @param message failure message + * @param code status code for exception + */ + public void addBlockFailure(Block block, String message, int code) { + // When multiple blocks of the same file failed to load, from user's perspective, + // it's not hugely important what are the reasons for each specific failure, + // if they are different, so we will just keep the first one. + mFailedFiles.put(block.getUfsPath(), + String.format("Status code: %s, message: %s", code, message)); + mCurrentFailureCount.incrementAndGet(); + LoadManager.JOB_LOAD_BLOCK_FAIL.inc(); + } + + private static Block buildBlock(FileInfo fileInfo, long blockId) { + return Block.newBuilder().setBlockId(blockId) + .setLength(fileInfo.getFileBlockInfo(blockId).getBlockInfo().getLength()) + .setUfsPath(fileInfo.getUfsPath()) + .setMountId(fileInfo.getMountId()) + .setOffsetInFile(fileInfo.getFileBlockInfo(blockId).getOffset()) + .build(); + } + + @Override + public String toString() { + return MoreObjects.toStringHelper(this) + .add("Path", mPath) + .add("User", mUser) + .add("Bandwidth", mBandwidth) + .add("UsePartialListing", mUsePartialListing) + .add("VerificationEnabled", mVerificationEnabled) + .add("RetryBlocks", mRetryBlocks) + .add("FailedFiles", mFailedFiles) + .add("StartTime", mStartTime) + .add("ProcessedFileCount", mProcessedFileCount) + .add("LoadedByteCount", mLoadedByteCount) + .add("TotalBlockCount", mTotalBlockCount) + .add("CurrentBlockCount", mCurrentBlockCount) + .add("TotalFailureCount", mTotalFailureCount) + .add("CurrentFailureCount", mCurrentFailureCount) + .add("State", mState) + .add("BatchSize", BATCH_SIZE) + .add("FailedReason", mFailedReason) + .add("FileIterator", mFileIterator) + .add("CurrentFile", mCurrentFile) + .add("BlockIterator", mBlockIterator) + .add("EndTime", mEndTime) + .toString(); + } + + /** + * @return journal entry of job + */ + public Journal.JournalEntry toJournalEntry() { + Job.LoadJobEntry.Builder jobEntry = Job.LoadJobEntry + .newBuilder() + .setLoadPath(mPath) + .setState(LoadJobState.toProto(mState)) + .setPartialListing(mUsePartialListing) + .setVerify(mVerificationEnabled) + .setJobId(mJobId); + mUser.ifPresent(jobEntry::setUser); + mBandwidth.ifPresent(jobEntry::setBandwidth); + mEndTime.ifPresent(jobEntry::setEndTime); + return Journal.JournalEntry + .newBuilder() + .setLoadJob(jobEntry.build()) + .build(); + } + + /** + * Get journal entry of the job. + * + * @param loadJobEntry journal entry + * @return journal entry of the job + */ + public static LoadJob fromJournalEntry(Job.LoadJobEntry loadJobEntry) { + LoadJob job = new LoadJob(loadJobEntry.getLoadPath(), + loadJobEntry.hasUser() ? Optional.of(loadJobEntry.getUser()) : Optional.empty(), + loadJobEntry.getJobId(), + loadJobEntry.hasBandwidth() ? OptionalLong.of(loadJobEntry.getBandwidth()) : + OptionalLong.empty(), loadJobEntry.getPartialListing(), loadJobEntry.getVerify()); + job.setJobState(LoadJobState.fromProto(loadJobEntry.getState())); + if (loadJobEntry.hasEndTime()) { + job.setEndTime(loadJobEntry.getEndTime()); + } + return job; + } + + /** + * Get duration in seconds. + * @return job duration in seconds + */ + @VisibleForTesting + public long getDurationInSec() { + return (mEndTime.orElse(System.currentTimeMillis()) - mStartTime) / 1000; + } + + private class FileIterator implements Iterator { + private final ListStatusPOptions.Builder mListOptions = + ListStatusPOptions.newBuilder().setRecursive(true); + private static final int PARTIAL_LISTING_BATCH_SIZE = 100; + private final FileSystemMaster mFileSystemMaster; + private final String mPath; + private final Optional mUser; + private final boolean mUsePartialListing; + private String mStartAfter = ""; + private List mFiles; + private Iterator mFileInfoIterator; + + public FileIterator(FileSystemMaster fileSystemMaster, String path, + Optional user, boolean usePartialListing) { + mFileSystemMaster = requireNonNull(fileSystemMaster, "fileSystemMaster is null"); + mPath = requireNonNull(path, "path is null"); + mUser = requireNonNull(user, "user is null"); + mUsePartialListing = usePartialListing; + if (usePartialListing) { + partialListFileInfos(); + } else { + listFileInfos(ListStatusContext.create(mListOptions)); + } + } + + @Override + public boolean hasNext() + { + if (mUsePartialListing && !mFileInfoIterator.hasNext()) { + partialListFileInfos(); + } + return mFileInfoIterator.hasNext(); + } + + @Override + public FileInfo next() + { + if (mUsePartialListing && !mFileInfoIterator.hasNext()) { + partialListFileInfos(); + } + return mFileInfoIterator.next(); + } + + private void partialListFileInfos() { + ListStatusContext context = ListStatusContext.create(ListStatusPartialPOptions.newBuilder() + .setOptions(mListOptions) + .setBatchSize(PARTIAL_LISTING_BATCH_SIZE) + .setStartAfter(mStartAfter)); + listFileInfos(context); + if (mFiles.size() > 0) { + mStartAfter = mFiles.get(mFiles.size() - 1).getPath(); + } + } + + private void listFileInfos(ListStatusContext context) { + try { + AuthenticatedClientUser.set(mUser.orElse(null)); + mFiles = mFileSystemMaster.listStatus(new AlluxioURI(mPath), context).stream().filter( + fileInfo -> !fileInfo.isFolder() && fileInfo.isCompleted() + && fileInfo.getInAlluxioPercentage() != 100).collect(Collectors.toList()); + mFileInfoIterator = mFiles.iterator(); + } catch (FileDoesNotExistException | InvalidPathException e) { + throw new NotFoundRuntimeException(e); + } catch (AccessControlException e) { + throw new UnauthenticatedRuntimeException(e); + } catch (IOException e) { + throw AlluxioRuntimeException.from(e); + } finally { + AuthenticatedClientUser.remove(); + } + List fileInfoStream = mFiles + .stream().filter(fileInfo -> !mFailedFiles.containsKey(fileInfo.getPath())).collect( + Collectors.toList()); + mTotalFileCount.addAndGet(fileInfoStream.size()); + mTotalByteCount.addAndGet(fileInfoStream.stream() + .map(FileInfo::getFileBlockInfos) + .flatMap(Collection::stream) + .map(FileBlockInfo::getBlockInfo) + .filter(blockInfo -> blockInfo.getLocations().isEmpty()) + .map(BlockInfo::getLength) + .reduce(Long::sum) + .orElse(0L)); + } + } + + private static class LoadProgressReport { + private final boolean mVerbose; + private final LoadJobState mJobState; + private final Long mBandwidth; + private final boolean mVerificationEnabled; + private final long mProcessedFileCount; + private final Long mTotalFileCount; + private final long mLoadedByteCount; + private final Long mTotalByteCount; + private final Long mThroughput; + private final double mFailurePercentage; + private final AlluxioRuntimeException mFailureReason; + private final long mFailedFileCount; + private final Map mFailedFilesWithReasons; + + public LoadProgressReport(LoadJob job, boolean verbose) + { + mVerbose = verbose; + mJobState = job.mState; + mBandwidth = job.mBandwidth.isPresent() ? job.mBandwidth.getAsLong() : null; + mVerificationEnabled = job.mVerificationEnabled; + mProcessedFileCount = job.mProcessedFileCount.get(); + mLoadedByteCount = job.mLoadedByteCount.get(); + if (job.mFileIterator.isPresent() && !job.mFileIterator.get().mUsePartialListing) { + mTotalFileCount = job.mTotalFileCount.get(); + mTotalByteCount = job.mTotalByteCount.get(); + } + else { + mTotalFileCount = null; + mTotalByteCount = null; + } + long duration = job.getDurationInSec(); + if (duration > 0) { + mThroughput = job.mLoadedByteCount.get() / duration; + } + else { + mThroughput = null; + } + long blockCount = job.mTotalBlockCount.get() + job.mCurrentBlockCount.get(); + if (blockCount > 0) { + mFailurePercentage = + ((double) (job.mTotalFailureCount.get() + job.mCurrentFailureCount.get()) / blockCount) + * 100; + } + else { + mFailurePercentage = 0; + } + mFailureReason = job.mFailedReason.orElse(null); + mFailedFileCount = job.mFailedFiles.size(); + if (verbose && mFailedFileCount > 0) { + mFailedFilesWithReasons = job.mFailedFiles; + } else { + mFailedFilesWithReasons = null; + } + } + + public String getReport(LoadProgressReportFormat format) + { + switch (format) { + case TEXT: + return getTextReport(); + case JSON: + return getJsonReport(); + default: + throw new InvalidArgumentRuntimeException( + String.format("Unknown load progress report format: %s", format)); + } + } + + private String getTextReport() { + StringBuilder progress = new StringBuilder(); + progress.append( + String.format("\tSettings:\tbandwidth: %s\tverify: %s%n", + mBandwidth == null ? "unlimited" : mBandwidth, + mVerificationEnabled)); + progress.append(String.format("\tJob State: %s%s%n", mJobState, + mFailureReason == null + ? "" : String.format( + " (%s: %s)", + mFailureReason.getClass().getName(), + mFailureReason.getMessage()))); + if (mVerbose && mFailureReason != null) { + for (StackTraceElement stack : mFailureReason.getStackTrace()) { + progress.append(String.format("\t\t%s%n", stack.toString())); + } + } + progress.append(String.format("\tFiles Processed: %d%s%n", mProcessedFileCount, + mTotalFileCount == null + ? "" : String.format(" out of %s", mTotalFileCount))); + progress.append(String.format("\tBytes Loaded: %s%s%n", + FormatUtils.getSizeFromBytes(mLoadedByteCount), + mTotalByteCount == null + ? "" : String.format(" out of %s", FormatUtils.getSizeFromBytes(mTotalByteCount)))); + if (mThroughput != null) { + progress.append(String.format("\tThroughput: %s/s%n", + FormatUtils.getSizeFromBytes(mThroughput))); + } + progress.append(String.format("\tBlock load failure rate: %.2f%%%n", mFailurePercentage)); + progress.append(String.format("\tFiles Failed: %s%n", mFailedFileCount)); + if (mVerbose && mFailedFilesWithReasons != null) { + mFailedFilesWithReasons.forEach((fileName, reason) -> + progress.append(String.format("\t\t%s: %s%n", fileName, reason))); + } + return progress.toString(); + } + + private String getJsonReport() { + try { + return new ObjectMapper() + .setVisibility(PropertyAccessor.FIELD, JsonAutoDetect.Visibility.ANY) + .setSerializationInclusion(JsonInclude.Include.NON_NULL) + .writeValueAsString(this); + } catch (JsonProcessingException e) { + throw new InternalRuntimeException("Failed to convert LoadProgressReport to JSON", e); + } + } + } +} diff --git a/core/server/master/src/main/java/alluxio/master/file/loadmanager/LoadJobState.java b/core/server/master/src/main/java/alluxio/master/file/loadmanager/LoadJobState.java new file mode 100644 index 000000000000..e7a45dbb001e --- /dev/null +++ b/core/server/master/src/main/java/alluxio/master/file/loadmanager/LoadJobState.java @@ -0,0 +1,71 @@ +/* + * The Alluxio Open Foundation licenses this work under the Apache License, version 2.0 + * (the "License"). You may not use this work except in compliance with the License, which is + * available at www.apache.org/licenses/LICENSE-2.0 + * + * This software is distributed on an "AS IS" basis, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, + * either express or implied, as more fully set forth in the License. + * + * See the NOTICE file distributed with this work for information regarding copyright ownership. + */ + +package alluxio.master.file.loadmanager; + +import alluxio.proto.journal.Job; + +/** + * Load status. + */ +public enum LoadJobState +{ + LOADING, + VERIFYING, + STOPPED, + SUCCEEDED, + FAILED; + + /** + * Convert LoadStatus to PJobStatus. + * + * @param state load job state + * @return the corresponding PJobStatus + */ + public static Job.PJobState toProto(LoadJobState state) + { + switch (state) { + case LOADING: + case VERIFYING: + return Job.PJobState.CREATED; + case STOPPED: + return Job.PJobState.STOPPED; + case SUCCEEDED: + return Job.PJobState.SUCCEEDED; + case FAILED: + return Job.PJobState.FAILED; + default: + throw new IllegalArgumentException(String.format("Unknown state %s", state)); + } + } + + /** + * Convert PJobStatus to LoadStatus. + * + * @param jobStatus protobuf job status + * @return the corresponding LoadStatus + */ + public static LoadJobState fromProto(Job.PJobState jobStatus) + { + switch (jobStatus) { + case CREATED: + return LOADING; + case STOPPED: + return STOPPED; + case SUCCEEDED: + return SUCCEEDED; + case FAILED: + return FAILED; + default: + throw new IllegalArgumentException(String.format("Unknown job status %s", jobStatus)); + } + } +} diff --git a/core/server/master/src/main/java/alluxio/master/file/loadmanager/LoadManager.java b/core/server/master/src/main/java/alluxio/master/file/loadmanager/LoadManager.java new file mode 100644 index 000000000000..d26a396569da --- /dev/null +++ b/core/server/master/src/main/java/alluxio/master/file/loadmanager/LoadManager.java @@ -0,0 +1,572 @@ +/* + * The Alluxio Open Foundation licenses this work under the Apache License, version 2.0 + * (the "License"). You may not use this work except in compliance with the License, which is + * available at www.apache.org/licenses/LICENSE-2.0 + * + * This software is distributed on an "AS IS" basis, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, + * either express or implied, as more fully set forth in the License. + * + * See the NOTICE file distributed with this work for information regarding copyright ownership. + */ + +package alluxio.master.file.loadmanager; + +import static java.lang.String.format; + +import alluxio.AlluxioURI; +import alluxio.Constants; +import alluxio.client.block.stream.BlockWorkerClient; +import alluxio.client.file.FileSystemContext; +import alluxio.conf.Configuration; +import alluxio.conf.PropertyKey; +import alluxio.exception.AccessControlException; +import alluxio.exception.FileDoesNotExistException; +import alluxio.exception.InvalidPathException; +import alluxio.exception.runtime.AlluxioRuntimeException; +import alluxio.exception.runtime.InternalRuntimeException; +import alluxio.exception.runtime.NotFoundRuntimeException; +import alluxio.exception.runtime.ResourceExhaustedRuntimeException; +import alluxio.exception.runtime.UnauthenticatedRuntimeException; +import alluxio.exception.runtime.UnavailableRuntimeException; +import alluxio.exception.status.UnavailableException; +import alluxio.grpc.Block; +import alluxio.grpc.BlockStatus; +import alluxio.grpc.LoadProgressReportFormat; +import alluxio.grpc.LoadRequest; +import alluxio.grpc.LoadResponse; +import alluxio.grpc.TaskStatus; +import alluxio.grpc.UfsReadOptions; +import alluxio.master.file.FileSystemMaster; +import alluxio.master.file.contexts.CheckAccessContext; +import alluxio.master.journal.JournalContext; +import alluxio.master.journal.Journaled; +import alluxio.master.journal.checkpoint.CheckpointName; +import alluxio.metrics.MetricKey; +import alluxio.metrics.MetricsSystem; +import alluxio.proto.journal.Job; +import alluxio.proto.journal.Journal; +import alluxio.resource.CloseableIterator; +import alluxio.resource.CloseableResource; +import alluxio.security.User; +import alluxio.security.authentication.AuthenticatedClientUser; +import alluxio.util.ThreadFactoryUtils; +import alluxio.util.ThreadUtils; +import alluxio.wire.WorkerInfo; + +import com.codahale.metrics.Counter; +import com.codahale.metrics.Meter; +import com.google.common.annotations.VisibleForTesting; +import com.google.common.collect.ImmutableMap; +import com.google.common.collect.ImmutableSet; +import com.google.common.collect.Iterators; +import com.google.common.util.concurrent.ListenableFuture; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import java.io.IOException; +import java.util.HashSet; +import java.util.List; +import java.util.Map; +import java.util.Optional; +import java.util.OptionalLong; +import java.util.Set; +import java.util.UUID; +import java.util.concurrent.CancellationException; +import java.util.concurrent.ConcurrentHashMap; +import java.util.concurrent.ExecutionException; +import java.util.concurrent.Executors; +import java.util.concurrent.ScheduledExecutorService; +import java.util.concurrent.TimeUnit; +import javax.annotation.concurrent.ThreadSafe; + +/** + * The Load manager which controls load operations. It's not thread-safe since start and stop + * method is not thread-safe. But we should only have one thread call these two method. + */ +@ThreadSafe +public final class LoadManager implements Journaled { + private static final Logger LOG = LoggerFactory.getLogger(LoadManager.class); + private static final int CAPACITY = 100; + private static final long WORKER_UPDATE_INTERVAL = Configuration.getMs( + PropertyKey.MASTER_WORKER_INFO_CACHE_REFRESH_TIME); + private static final int EXECUTOR_SHUTDOWN_MS = 10 * Constants.SECOND_MS; + private final FileSystemMaster mFileSystemMaster; + private final FileSystemContext mContext; + private final Map mLoadJobs = new ConcurrentHashMap<>(); + private final Map> mRunningTasks = new ConcurrentHashMap<>(); + // initial thread in start method since we would stop and start thread when gainPrimacy + private ScheduledExecutorService mLoadScheduler; + private volatile boolean mRunning = false; + private Map> mActiveWorkers = ImmutableMap.of(); + + /** + * Constructor. + * @param fileSystemMaster fileSystemMaster + */ + public LoadManager(FileSystemMaster fileSystemMaster) { + this(fileSystemMaster, FileSystemContext.create()); + } + + /** + * Constructor. + * @param fileSystemMaster fileSystemMaster + * @param context fileSystemContext + */ + @VisibleForTesting + public LoadManager(FileSystemMaster fileSystemMaster, FileSystemContext context) { + mFileSystemMaster = fileSystemMaster; + mContext = context; + } + + /** + * Start load manager. + */ + public void start() { + if (!mRunning) { + mLoadScheduler = Executors.newSingleThreadScheduledExecutor( + ThreadFactoryUtils.build("load-manager-scheduler", false)); + mLoadScheduler.scheduleAtFixedRate(this::updateWorkers, 0, WORKER_UPDATE_INTERVAL, + TimeUnit.MILLISECONDS); + mLoadScheduler.scheduleWithFixedDelay(this::processJobs, 0, 100, TimeUnit.MILLISECONDS); + mLoadScheduler.scheduleWithFixedDelay(this::cleanupStaleJob, 1, 1, TimeUnit.HOURS); + mRunning = true; + } + } + + /** + * Stop load manager. + */ + public void stop() { + if (mRunning) { + mActiveWorkers.values().forEach(CloseableResource::close); + mActiveWorkers = ImmutableMap.of(); + ThreadUtils.shutdownAndAwaitTermination(mLoadScheduler, EXECUTOR_SHUTDOWN_MS); + mRunning = false; + } + } + + /** + * Submit a load job. + * @param loadPath alluxio directory path to load into Alluxio + * @param bandwidth bandwidth allocated to this load + * @param usePartialListing whether to use partial listing or not + * @param verificationEnabled whether to run verification step or not + * @return true if the job is new, false if the job has already been submitted + */ + public boolean submitLoad(String loadPath, OptionalLong bandwidth, + boolean usePartialListing, boolean verificationEnabled) { + try { + mFileSystemMaster.checkAccess(new AlluxioURI(loadPath), CheckAccessContext.defaults()); + } catch (FileDoesNotExistException | InvalidPathException e) { + throw new NotFoundRuntimeException(e); + } catch (AccessControlException e) { + throw new UnauthenticatedRuntimeException(e); + } catch (IOException e) { + throw AlluxioRuntimeException.from(e); + } + return submitLoad(new LoadJob( + loadPath, + Optional.ofNullable(AuthenticatedClientUser.getOrNull()).map(User::getName), UUID + .randomUUID().toString(), bandwidth, + usePartialListing, + verificationEnabled)); + } + + /** + * Submit a load job. + * @param loadJob the load job + * @return true if the job is new, false if the job has already been submitted + */ + @VisibleForTesting + public boolean submitLoad(LoadJob loadJob) { + LoadJob existingJob = mLoadJobs.get(loadJob.getPath()); + if (existingJob != null && !existingJob.isDone()) { + updateExistingJob(loadJob, existingJob); + return false; + } + + if (mRunningTasks.size() >= CAPACITY) { + throw new ResourceExhaustedRuntimeException( + "Too many load jobs running, please submit later.", true); + } + writeJournal(loadJob); + mLoadJobs.put(loadJob.getPath(), loadJob); + mRunningTasks.put(loadJob, new HashSet<>()); + LOG.debug(format("start job: %s", loadJob)); + return true; + } + + private void updateExistingJob(LoadJob loadJob, LoadJob existingJob) { + existingJob.updateBandwidth(loadJob.getBandwidth()); + existingJob.setVerificationEnabled(loadJob.isVerificationEnabled()); + writeJournal(existingJob); + LOG.debug(format("updated existing job: %s from %s", existingJob, loadJob)); + if (existingJob.getJobState() == LoadJobState.STOPPED) { + existingJob.setJobState(LoadJobState.LOADING); + mRunningTasks.put(existingJob, new HashSet<>()); + } + } + + /** + * Stop a load job. + * @param loadPath alluxio directory path to load into Alluxio + * @return true if the job is stopped, false if the job does not exist or has already finished + */ + public boolean stopLoad(String loadPath) { + LoadJob existingJob = mLoadJobs.get(loadPath); + if (existingJob != null && existingJob.isRunning()) { + existingJob.setJobState(LoadJobState.STOPPED); + writeJournal(existingJob); + // leftover tasks in mLoadTasks would be removed by scheduling thread. + return true; + } + return false; + } + + /** + * Get the load job's progress report. + * @param loadPath alluxio directory path of the load job + * @param format progress report format + * @param verbose whether to include details on failed files and failures + * @return the progress report + */ + public String getLoadProgress( + String loadPath, + LoadProgressReportFormat format, + boolean verbose) { + LoadJob job = mLoadJobs.get(loadPath); + if (job == null) { + throw new NotFoundRuntimeException(format("Load for path %s cannot be found.", loadPath)); + } + return job.getProgress(format, verbose); + } + + /** + * Get active workers. + * @return active workers + */ + @VisibleForTesting + public Map> getActiveWorkers() { + return mActiveWorkers; + } + + /** + * Removes all finished jobs outside the retention time. + */ + @VisibleForTesting + public void cleanupStaleJob() { + long current = System.currentTimeMillis(); + mLoadJobs.entrySet().removeIf(job -> !job.getValue().isRunning() + && job.getValue().getEndTime().isPresent() + && job.getValue().getEndTime().getAsLong() <= (current - Configuration.getMs( + PropertyKey.JOB_RETENTION_TIME))); + } + + /** + * Refresh active workers. + */ + @VisibleForTesting + public void updateWorkers() { + if (Thread.currentThread().isInterrupted()) { + return; + } + Set workerInfos; + try { + try { + // TODO(jianjian): need api for healthy worker instead + workerInfos = ImmutableSet.copyOf(mFileSystemMaster.getWorkerInfoList()); + } catch (UnavailableException e) { + LOG.warn("Failed to get worker info, using existing worker infos of {} workers", + mActiveWorkers.size()); + return; + } + if (workerInfos.size() == mActiveWorkers.size() + && workerInfos.containsAll(mActiveWorkers.keySet())) { + return; + } + + ImmutableMap.Builder> updatedWorkers = + ImmutableMap.builder(); + for (WorkerInfo workerInfo : workerInfos) { + if (mActiveWorkers.containsKey(workerInfo)) { + updatedWorkers.put(workerInfo, mActiveWorkers.get(workerInfo)); + } + else { + try { + updatedWorkers.put(workerInfo, + mContext.acquireBlockWorkerClient(workerInfo.getAddress())); + } catch (IOException e) { + // skip the worker if we cannot obtain a client + } + } + } + // Close clients connecting to lost workers + for (Map.Entry> entry : + mActiveWorkers.entrySet()) { + WorkerInfo workerInfo = entry.getKey(); + if (!workerInfos.contains(workerInfo)) { + CloseableResource resource = entry.getValue(); + resource.close(); + LOG.debug("Closed BlockWorkerClient to lost worker {}", workerInfo); + } + } + // Build the clients to the current active worker list + mActiveWorkers = updatedWorkers.build(); + } catch (Exception e) { + // Unknown exception. This should not happen, but if it happens we don't want to lose the + // scheduler thread, thus catching it here. Any exception surfaced here should be properly + // handled. + LOG.error("Unexpected exception thrown in updateWorkers.", e); + } + } + + /** + * Get load jobs. + * @return load jobs + */ + @VisibleForTesting + public Map getLoadJobs() { + return mLoadJobs; + } + + private void processJobs() { + if (Thread.currentThread().isInterrupted()) { + return; + } + mRunningTasks.forEach(this::processJob); + } + + private void processJob(LoadJob loadJob, Set loadWorkers) { + try { + if (!loadJob.isRunning()) { + try { + writeJournal(loadJob); + } + catch (UnavailableRuntimeException e) { + // This should not happen because the load manager should not be started while master is + // still processing journal entries. However, if it does happen, we don't want to throw + // exception in a task running on scheduler thead. So just ignore it and hopefully later + // retry will work. + LOG.error("error writing to journal when processing job", e); + } + mRunningTasks.remove(loadJob); + return; + } + if (!loadJob.isHealthy()) { + loadJob.failJob(new InternalRuntimeException("Too many block load failed.")); + return; + } + + // If there are new workers, schedule job onto new workers + mActiveWorkers.forEach((workerInfo, workerClient) -> { + if (!loadWorkers.contains(workerInfo) && scheduleBatch(loadJob, workerInfo, loadWorkers, + workerClient, loadJob.getBatchSize())) { + loadWorkers.add(workerInfo); + } + }); + + if (loadWorkers.isEmpty() && loadJob.isCurrentLoadDone()) { + if (loadJob.getCurrentBlockCount() > 0 && loadJob.isVerificationEnabled()) { + loadJob.initiateVerification(); + } + else { + if (loadJob.isHealthy()) { + loadJob.setJobState(LoadJobState.SUCCEEDED); + JOB_LOAD_SUCCESS.inc(); + } + else { + loadJob.failJob(new InternalRuntimeException("Too many block load failed.")); + } + } + } + } catch (Exception e) { + // Unknown exception. This should not happen, but if it happens we don't want to lose the + // scheduler thread, thus catching it here. Any exception surfaced here should be properly + // handled. + LOG.error("Unexpected exception thrown in processJob.", e); + loadJob.failJob(new InternalRuntimeException(e)); + } + } + + // Returns false if the whole task fails + private boolean processResponse( + LoadJob load, + LoadRequest request, + ListenableFuture responseFuture) { + try { + long totalBytes = request.getBlocksList().stream() + .map(Block::getLength) + .reduce(Long::sum) + .orElse(0L); + LoadResponse response = responseFuture.get(); + if (response.getStatus() != TaskStatus.SUCCESS) { + LOG.debug(format("Get failure from worker: %s", response.getBlockStatusList())); + for (BlockStatus status : response.getBlockStatusList()) { + totalBytes -= status.getBlock().getLength(); + if (!load.isHealthy() || !status.getRetryable() || !load.addBlockToRetry( + status.getBlock())) { + load.addBlockFailure(status.getBlock(), status.getMessage(), status.getCode()); + } + } + } + load.addLoadedBytes(totalBytes); + JOB_LOAD_BLOCK_COUNT.inc( + request.getBlocksCount() - response.getBlockStatusCount()); + JOB_LOAD_BLOCK_SIZE.inc(totalBytes); + JOB_LOAD_RATE.mark(totalBytes); + return response.getStatus() != TaskStatus.FAILURE; + } + catch (ExecutionException e) { + LOG.warn("exception when trying to get load response.", e.getCause()); + for (Block block : request.getBlocksList()) { + if (load.isHealthy()) { + load.addBlockToRetry(block); + } + else { + AlluxioRuntimeException exception = AlluxioRuntimeException.from(e.getCause()); + load.addBlockFailure(block, exception.getMessage(), exception.getStatus().getCode() + .value()); + } + } + return false; + } + catch (CancellationException e) { + LOG.warn("Task get canceled and will retry.", e); + request.getBlocksList().forEach(load::addBlockToRetry); + return true; + } + catch (InterruptedException e) { + request.getBlocksList().forEach(load::addBlockToRetry); + Thread.currentThread().interrupt(); + // We don't count InterruptedException as task failure + return true; + } + } + + private boolean scheduleBatch( + LoadJob load, + WorkerInfo workerInfo, + Set loadWorkers, + CloseableResource workerClient, + int batchSize) { + if (!load.isRunning()) { + return false; + } + List batch; + try { + batch = load.getNextBatch(mFileSystemMaster, batchSize); + } catch (AlluxioRuntimeException e) { + LOG.warn(format("error getting next batch for load %s", load), e); + if (!e.isRetryable()) { + load.failJob(e); + } + return false; + } + + if (batch.isEmpty()) { + return false; + } + + LoadRequest request = buildRequest(batch, load.getUser(), load.getJobId(), load.getBandwidth()); + ListenableFuture responseFuture = workerClient.get().load(request); + responseFuture.addListener(() -> { + try { + if (!processResponse(load, request, responseFuture)) { + loadWorkers.remove(workerInfo); + } + // Schedule next batch for healthy job + if (load.isHealthy()) { + if (mActiveWorkers.containsKey(workerInfo)) { + if (!scheduleBatch(load, workerInfo, loadWorkers, mActiveWorkers.get(workerInfo), + load.getBatchSize())) { + loadWorkers.remove(workerInfo); + } + } + else { + loadWorkers.remove(workerInfo); + } + } + } catch (Exception e) { + // Unknown exception. This should not happen, but if it happens we don't want to lose the + // scheduler thread, thus catching it here. Any exception surfaced here should be properly + // handled. + LOG.error("Unexpected exception thrown in response future listener.", e); + load.failJob(new InternalRuntimeException(e)); + } + }, mLoadScheduler); + return true; + } + + private void writeJournal(LoadJob job) { + try (JournalContext context = mFileSystemMaster.createJournalContext()) { + context.append(job.toJournalEntry()); + } catch (UnavailableException e) { + throw new UnavailableRuntimeException( + "There is an ongoing backup running, please submit later", e); + } + } + + private LoadRequest buildRequest(List blockBatch, Optional user, String tag, + OptionalLong bandwidth) { + LoadRequest.Builder request = LoadRequest + .newBuilder() + .addAllBlocks(blockBatch); + UfsReadOptions.Builder options = + UfsReadOptions.newBuilder().setTag(tag).setPositionShort(false); + if (bandwidth.isPresent()) { + options.setBandwidth(bandwidth.getAsLong() / mActiveWorkers.size()); + } + user.ifPresent(options::setUser); + return request.setOptions(options.build()).build(); + } + + @Override + public CloseableIterator getJournalEntryIterator() { + return CloseableIterator.noopCloseable( + Iterators.transform(mLoadJobs.values().iterator(), LoadJob::toJournalEntry)); + } + + @Override + public boolean processJournalEntry(Journal.JournalEntry entry) { + if (!entry.hasLoadJob()) { + return false; + } + Job.LoadJobEntry loadJobEntry = entry.getLoadJob(); + LoadJob job = LoadJob.fromJournalEntry(loadJobEntry); + mLoadJobs.put(loadJobEntry.getLoadPath(), job); + if (job.isDone()) { + mRunningTasks.remove(job); + } + else { + mRunningTasks.put(job, new HashSet<>()); + } + return true; + } + + @Override + public void resetState() + { + mLoadJobs.clear(); + mRunningTasks.clear(); + } + + @Override + public CheckpointName getCheckpointName() + { + return CheckpointName.LOAD_MANAGER; + } + + // metrics + public static final Counter JOB_LOAD_SUCCESS = + MetricsSystem.counter(MetricKey.MASTER_JOB_LOAD_SUCCESS.getName()); + public static final Counter JOB_LOAD_FAIL = + MetricsSystem.counter(MetricKey.MASTER_JOB_LOAD_FAIL.getName()); + public static final Counter JOB_LOAD_BLOCK_COUNT = + MetricsSystem.counter(MetricKey.MASTER_JOB_LOAD_BLOCK_COUNT.getName()); + public static final Counter JOB_LOAD_BLOCK_FAIL = + MetricsSystem.counter(MetricKey.MASTER_JOB_LOAD_BLOCK_FAIL.getName()); + public static final Counter JOB_LOAD_BLOCK_SIZE = + MetricsSystem.counter(MetricKey.MASTER_JOB_LOAD_BLOCK_SIZE.getName()); + public static final Meter JOB_LOAD_RATE = + MetricsSystem.meter(MetricKey.MASTER_JOB_LOAD_RATE.getName()); +} diff --git a/core/server/master/src/test/java/alluxio/master/file/loadmanager/LoadJobTest.java b/core/server/master/src/test/java/alluxio/master/file/loadmanager/LoadJobTest.java new file mode 100644 index 000000000000..0efb37818c35 --- /dev/null +++ b/core/server/master/src/test/java/alluxio/master/file/loadmanager/LoadJobTest.java @@ -0,0 +1,142 @@ +/* + * The Alluxio Open Foundation licenses this work under the Apache License, version 2.0 + * (the "License"). You may not use this work except in compliance with the License, which is + * available at www.apache.org/licenses/LICENSE-2.0 + * + * This software is distributed on an "AS IS" basis, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, + * either express or implied, as more fully set forth in the License. + * + * See the NOTICE file distributed with this work for information regarding copyright ownership. + */ + +package alluxio.master.file.loadmanager; + +import static alluxio.master.file.loadmanager.LoadTestUtils.generateRandomFileInfo; +import static org.junit.Assert.assertEquals; +import static org.junit.Assert.assertFalse; +import static org.junit.Assert.assertTrue; +import static org.mockito.ArgumentMatchers.any; +import static org.mockito.Mockito.mock; +import static org.mockito.Mockito.spy; +import static org.mockito.Mockito.when; + +import alluxio.Constants; +import alluxio.exception.AccessControlException; +import alluxio.exception.FileDoesNotExistException; +import alluxio.exception.InvalidPathException; +import alluxio.exception.runtime.InternalRuntimeException; +import alluxio.grpc.Block; +import alluxio.grpc.LoadProgressReportFormat; +import alluxio.master.file.FileSystemMaster; +import alluxio.wire.FileInfo; + +import com.google.common.collect.ImmutableSet; +import org.junit.Test; + +import java.io.IOException; +import java.util.List; +import java.util.OptionalLong; + +public class LoadJobTest { + @Test + public void testGetNextBatch() + throws FileDoesNotExistException, AccessControlException, IOException, InvalidPathException { + List fileInfos = generateRandomFileInfo(5, 20, 64 * Constants.MB); + FileSystemMaster fileSystemMaster = mock(FileSystemMaster.class); + when(fileSystemMaster.listStatus(any(), any())) + .thenReturn(fileInfos); + LoadJob load = new LoadJob("test", "user", OptionalLong.empty()); + List batch = load.getNextBatch(fileSystemMaster, 10); + assertEquals(10, batch.size()); + assertEquals(1, batch.stream().map(Block::getUfsPath).distinct().count()); + + batch.forEach(load::addBlockToRetry); + + batch = load.getNextBatch(fileSystemMaster, 80); + assertEquals(80, batch.size()); + assertEquals(5, batch.stream().map(Block::getUfsPath).distinct().count()); + + batch = load.getNextBatch(fileSystemMaster, 80); + assertEquals(10, batch.size()); + assertEquals(1, batch.stream().map(Block::getUfsPath).distinct().count()); + + batch = load.getNextBatch(fileSystemMaster, 80); + assertEquals(10, batch.size()); + assertEquals(1, batch.stream().map(Block::getUfsPath).distinct().count()); + assertEquals(ImmutableSet.of(fileInfos.get(0).getUfsPath()), + batch.stream().map(Block::getUfsPath).collect(ImmutableSet.toImmutableSet())); + + batch = load.getNextBatch(fileSystemMaster, 80); + assertEquals(0, batch.size()); + } + + @Test + public void testIsHealthy() + throws FileDoesNotExistException, AccessControlException, IOException, InvalidPathException { + List fileInfos = generateRandomFileInfo(100, 5, 64 * 1024 * 1024); + FileSystemMaster fileSystemMaster = mock(FileSystemMaster.class); + when(fileSystemMaster.listStatus(any(), any())) + .thenReturn(fileInfos); + LoadJob loadJob = new LoadJob("test", "user", OptionalLong.empty()); + List batch = loadJob.getNextBatch(fileSystemMaster, 100); + assertTrue(loadJob.isHealthy()); + loadJob.getNextBatch(fileSystemMaster, 100); + assertTrue(loadJob.isHealthy()); + batch.forEach(loadJob::addBlockToRetry); + assertTrue(loadJob.isHealthy()); + batch = loadJob.getNextBatch(fileSystemMaster, 100); + assertTrue(loadJob.isHealthy()); + batch.forEach(loadJob::addBlockToRetry); + assertFalse(loadJob.isHealthy()); + } + + @Test + public void testLoadProgressReport() throws Exception { + List fileInfos = generateRandomFileInfo(10, 10, 64 * Constants.MB); + FileSystemMaster fileSystemMaster = mock(FileSystemMaster.class); + when(fileSystemMaster.listStatus(any(), any())) + .thenReturn(fileInfos); + LoadJob job = spy(new LoadJob("/test", "user", OptionalLong.empty())); + when(job.getDurationInSec()).thenReturn(0L); + job.setJobState(LoadJobState.LOADING); + List blocks = job.getNextBatch(fileSystemMaster, 25); + job.addLoadedBytes(640 * Constants.MB); + String expectedTextReport = "\tSettings:\tbandwidth: unlimited\tverify: false\n" + + "\tJob State: LOADING\n" + + "\tFiles Processed: 3 out of 10\n" + + "\tBytes Loaded: 640.00MB out of 6.25GB\n" + + "\tBlock load failure rate: 0.00%\n" + + "\tFiles Failed: 0\n"; + assertEquals(expectedTextReport, job.getProgress(LoadProgressReportFormat.TEXT, false)); + assertEquals(expectedTextReport, job.getProgress(LoadProgressReportFormat.TEXT, true)); + String expectedJsonReport = "{\"mVerbose\":false,\"mJobState\":\"LOADING\"," + + "\"mVerificationEnabled\":false,\"mProcessedFileCount\":3,\"mTotalFileCount\":10," + + "\"mLoadedByteCount\":671088640,\"mTotalByteCount\":6710886400," + + "\"mFailurePercentage\":0.0,\"mFailedFileCount\":0}"; + assertEquals(expectedJsonReport, job.getProgress(LoadProgressReportFormat.JSON, false)); + job.addBlockFailure(blocks.get(0), "Test error 1", 2); + job.addBlockFailure(blocks.get(4), "Test error 2", 2); + job.addBlockFailure(blocks.get(10), "Test error 3", 2); + job.failJob(new InternalRuntimeException("test")); + String expectedTextReportWithError = "\tSettings:\tbandwidth: unlimited\tverify: false\n" + + "\tJob State: FAILED (alluxio.exception.runtime.InternalRuntimeException: test)\n" + + "\tFiles Processed: 3 out of 10\n" + + "\tBytes Loaded: 640.00MB out of 6.25GB\n" + + "\tBlock load failure rate: 12.00%\n" + + "\tFiles Failed: 2\n"; + assertEquals(expectedTextReportWithError, + job.getProgress(LoadProgressReportFormat.TEXT, false)); + String textReport = job.getProgress(LoadProgressReportFormat.TEXT, true); + assertFalse(textReport.contains("Test error 1")); + assertTrue(textReport.contains("Test error 2")); + assertTrue(textReport.contains("Test error 3")); + String jsonReport = job.getProgress(LoadProgressReportFormat.JSON, false); + assertTrue(jsonReport.contains("FAILED")); + assertTrue(jsonReport.contains("mFailureReason")); + assertFalse(jsonReport.contains("Test error 2")); + jsonReport = job.getProgress(LoadProgressReportFormat.JSON, true); + assertFalse(jsonReport.contains("Test error 1")); + assertTrue(jsonReport.contains("Test error 2")); + assertTrue(jsonReport.contains("Test error 3")); + } +} diff --git a/core/server/master/src/test/java/alluxio/master/file/loadmanager/LoadManagerTest.java b/core/server/master/src/test/java/alluxio/master/file/loadmanager/LoadManagerTest.java new file mode 100644 index 000000000000..89ea0057d4c4 --- /dev/null +++ b/core/server/master/src/test/java/alluxio/master/file/loadmanager/LoadManagerTest.java @@ -0,0 +1,430 @@ +/* + * The Alluxio Open Foundation licenses this work under the Apache License, version 2.0 + * (the "License"). You may not use this work except in compliance with the License, which is + * available at www.apache.org/licenses/LICENSE-2.0 + * + * This software is distributed on an "AS IS" basis, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, + * either express or implied, as more fully set forth in the License. + * + * See the NOTICE file distributed with this work for information regarding copyright ownership. + */ + +package alluxio.master.file.loadmanager; + +import static alluxio.master.file.loadmanager.LoadTestUtils.fileWithBlockLocations; +import static alluxio.master.file.loadmanager.LoadTestUtils.generateRandomBlockStatus; +import static alluxio.master.file.loadmanager.LoadTestUtils.generateRandomFileInfo; +import static org.junit.Assert.assertEquals; +import static org.junit.Assert.assertFalse; +import static org.junit.Assert.assertThrows; +import static org.junit.Assert.assertTrue; +import static org.mockito.ArgumentMatchers.any; +import static org.mockito.ArgumentMatchers.argThat; +import static org.mockito.Mockito.doThrow; +import static org.mockito.Mockito.mock; +import static org.mockito.Mockito.times; +import static org.mockito.Mockito.verify; +import static org.mockito.Mockito.when; + +import alluxio.Constants; +import alluxio.client.block.stream.BlockWorkerClient; +import alluxio.client.file.FileSystemContext; +import alluxio.conf.Configuration; +import alluxio.conf.PropertyKey; +import alluxio.conf.Source; +import alluxio.exception.AccessControlException; +import alluxio.exception.FileDoesNotExistException; +import alluxio.exception.InvalidPathException; +import alluxio.exception.runtime.NotFoundRuntimeException; +import alluxio.exception.runtime.ResourceExhaustedRuntimeException; +import alluxio.exception.runtime.UnauthenticatedRuntimeException; +import alluxio.exception.status.UnavailableException; +import alluxio.grpc.BlockStatus; +import alluxio.grpc.LoadProgressReportFormat; +import alluxio.grpc.LoadRequest; +import alluxio.grpc.LoadResponse; +import alluxio.grpc.TaskStatus; +import alluxio.master.file.FileSystemMaster; +import alluxio.master.journal.JournalContext; +import alluxio.proto.journal.Job; +import alluxio.resource.CloseableResource; +import alluxio.security.authentication.AuthenticatedClientUser; +import alluxio.wire.FileInfo; +import alluxio.wire.WorkerInfo; +import alluxio.wire.WorkerNetAddress; + +import com.google.common.collect.ImmutableList; +import com.google.common.util.concurrent.SettableFuture; +import io.grpc.Status; +import org.junit.AfterClass; +import org.junit.BeforeClass; +import org.junit.Test; + +import java.io.IOException; +import java.util.List; +import java.util.Optional; +import java.util.OptionalLong; +import java.util.concurrent.TimeoutException; +import java.util.concurrent.atomic.AtomicInteger; +import java.util.stream.IntStream; + +public final class LoadManagerTest { + + @BeforeClass + public static void before() { + AuthenticatedClientUser.set("user"); + } + + @AfterClass + public static void after() { + AuthenticatedClientUser.remove(); + } + + @Test + public void testGetActiveWorkers() throws IOException { + FileSystemMaster fileSystemMaster = mock(FileSystemMaster.class); + FileSystemContext fileSystemContext = mock(FileSystemContext.class); + CloseableResource blockWorkerClient = mock(CloseableResource.class); + LoadManager loadManager = new LoadManager(fileSystemMaster, fileSystemContext); + when(fileSystemMaster.getWorkerInfoList()) + .thenReturn(ImmutableList.of( + new WorkerInfo().setId(1).setAddress( + new WorkerNetAddress().setHost("worker1").setRpcPort(1234)), + new WorkerInfo().setId(2).setAddress( + new WorkerNetAddress().setHost("worker2").setRpcPort(1234)))) + .thenThrow(new UnavailableException("test")) + .thenReturn(ImmutableList.of( + new WorkerInfo().setId(2).setAddress( + new WorkerNetAddress().setHost("worker2").setRpcPort(1234)))) + .thenReturn(ImmutableList.of( + new WorkerInfo().setId(1).setAddress( + new WorkerNetAddress().setHost("worker1").setRpcPort(1234)), + new WorkerInfo().setId(2).setAddress( + new WorkerNetAddress().setHost("worker2").setRpcPort(1234)))); + when(fileSystemContext.acquireBlockWorkerClient(any())).thenReturn(blockWorkerClient); + assertEquals(0, loadManager.getActiveWorkers().size()); + loadManager.updateWorkers(); + assertEquals(2, loadManager.getActiveWorkers().size()); + loadManager.updateWorkers(); + assertEquals(2, loadManager.getActiveWorkers().size()); + loadManager.updateWorkers(); + assertEquals(1, loadManager.getActiveWorkers().size()); + loadManager.updateWorkers(); + assertEquals(2, loadManager.getActiveWorkers().size()); + } + + @Test + public void testSubmit() throws Exception { + String validLoadPath = "/path/to/load"; + String invalidLoadPath = "/path/to/invalid"; + FileSystemMaster fileSystemMaster = mock(FileSystemMaster.class); + FileSystemContext fileSystemContext = mock(FileSystemContext.class); + JournalContext journalContext = mock(JournalContext.class); + when(fileSystemMaster.createJournalContext()).thenReturn(journalContext); + LoadManager loadManager = new LoadManager(fileSystemMaster, fileSystemContext); + assertTrue(loadManager.submitLoad(validLoadPath, OptionalLong.empty(), false, true)); + verify(journalContext).append(argThat(journalEntry -> journalEntry.hasLoadJob() + && journalEntry.getLoadJob().getLoadPath().equals(validLoadPath) + && journalEntry.getLoadJob().getState() == Job.PJobState.CREATED + && !journalEntry.getLoadJob().hasBandwidth() + && journalEntry.getLoadJob().getVerify())); + assertEquals(1, loadManager.getLoadJobs().size()); + assertEquals(OptionalLong.empty(), loadManager.getLoadJobs().get(validLoadPath).getBandwidth()); + assertTrue(loadManager.getLoadJobs().get(validLoadPath).isVerificationEnabled()); + assertFalse(loadManager.submitLoad(validLoadPath, OptionalLong.of(1000), true, false)); + verify(journalContext).append(argThat(journalEntry -> journalEntry.hasLoadJob() + && journalEntry.getLoadJob().getLoadPath().equals(validLoadPath) + && journalEntry.getLoadJob().getState() == Job.PJobState.CREATED + && journalEntry.getLoadJob().getBandwidth() == 1000 + && !journalEntry.getLoadJob().getPartialListing() // we don't update partialListing + && !journalEntry.getLoadJob().getVerify())); + assertEquals(1, loadManager.getLoadJobs().size()); + assertEquals(1000, loadManager.getLoadJobs().get(validLoadPath).getBandwidth().getAsLong()); + assertFalse(loadManager.getLoadJobs().get(validLoadPath).isVerificationEnabled()); + doThrow(new FileDoesNotExistException("test")).when(fileSystemMaster).checkAccess(any(), any()); + assertThrows(NotFoundRuntimeException.class, + () -> loadManager.submitLoad(invalidLoadPath, OptionalLong.empty(), false, true)); + doThrow(new InvalidPathException("test")).when(fileSystemMaster).checkAccess(any(), any()); + assertThrows(NotFoundRuntimeException.class, + () -> loadManager.submitLoad(invalidLoadPath, OptionalLong.empty(), false, true)); + doThrow(new AccessControlException("test")).when(fileSystemMaster).checkAccess(any(), any()); + assertThrows(UnauthenticatedRuntimeException.class, + () -> loadManager.submitLoad(invalidLoadPath, OptionalLong.empty(), false, true)); + } + + @Test + public void testStop() throws Exception { + String validLoadPath = "/path/to/load"; + FileSystemMaster fileSystemMaster = mock(FileSystemMaster.class); + FileSystemContext fileSystemContext = mock(FileSystemContext.class); + JournalContext journalContext = mock(JournalContext.class); + when(fileSystemMaster.createJournalContext()).thenReturn(journalContext); + LoadManager loadManager = new LoadManager(fileSystemMaster, fileSystemContext); + assertTrue(loadManager.submitLoad(validLoadPath, OptionalLong.of(100), false, true)); + verify(journalContext, times(1)).append(any()); + verify(journalContext).append(argThat(journalEntry -> journalEntry.hasLoadJob() + && journalEntry.getLoadJob().getLoadPath().equals(validLoadPath) + && journalEntry.getLoadJob().getState() == Job.PJobState.CREATED + && journalEntry.getLoadJob().getBandwidth() == 100 + && journalEntry.getLoadJob().getVerify())); + assertTrue(loadManager.stopLoad(validLoadPath)); + verify(journalContext, times(2)).append(any()); + verify(journalContext).append(argThat(journalEntry -> journalEntry.hasLoadJob() + && journalEntry.getLoadJob().getLoadPath().equals(validLoadPath) + && journalEntry.getLoadJob().getState() == Job.PJobState.STOPPED + && journalEntry.getLoadJob().getBandwidth() == 100 + && journalEntry.getLoadJob().getVerify() + && journalEntry.getLoadJob().hasEndTime())); + assertFalse(loadManager.stopLoad(validLoadPath)); + verify(journalContext, times(2)).append(any()); + assertFalse(loadManager.stopLoad("/does/not/exist")); + verify(journalContext, times(2)).append(any()); + assertFalse(loadManager.submitLoad(validLoadPath, OptionalLong.of(100), false, true)); + verify(journalContext, times(3)).append(any()); + assertTrue(loadManager.stopLoad(validLoadPath)); + verify(journalContext, times(4)).append(any()); + } + + @Test + public void testSubmitExceedsCapacity() throws Exception { + FileSystemMaster fileSystemMaster = mock(FileSystemMaster.class); + FileSystemContext fileSystemContext = mock(FileSystemContext.class); + JournalContext journalContext = mock(JournalContext.class); + when(fileSystemMaster.createJournalContext()).thenReturn(journalContext); + LoadManager loadManager = new LoadManager(fileSystemMaster, fileSystemContext); + IntStream.range(0, 100).forEach( + i -> assertTrue(loadManager.submitLoad( + String.format("/path/to/load/%d", i), OptionalLong.empty(), false, true))); + assertThrows( + ResourceExhaustedRuntimeException.class, + () -> loadManager.submitLoad("/path/to/load/101", OptionalLong.empty(), false, true)); + } + + @Test + public void testScheduling() throws Exception { + FileSystemMaster fileSystemMaster = mock(FileSystemMaster.class); + FileSystemContext fileSystemContext = mock(FileSystemContext.class); + JournalContext journalContext = mock(JournalContext.class); + when(fileSystemMaster.createJournalContext()).thenReturn(journalContext); + CloseableResource blockWorkerClientResource = mock(CloseableResource.class); + BlockWorkerClient blockWorkerClient = mock(BlockWorkerClient.class); + when(fileSystemMaster.getWorkerInfoList()) + .thenReturn(ImmutableList.of( + new WorkerInfo().setId(1).setAddress( + new WorkerNetAddress().setHost("worker1").setRpcPort(1234)), + new WorkerInfo().setId(2).setAddress( + new WorkerNetAddress().setHost("worker2").setRpcPort(1234)))) + .thenReturn(ImmutableList.of( + new WorkerInfo().setId(2).setAddress( + new WorkerNetAddress().setHost("worker2").setRpcPort(1234)))) + .thenReturn(ImmutableList.of( + new WorkerInfo().setId(1).setAddress( + new WorkerNetAddress().setHost("worker1").setRpcPort(1234)), + new WorkerInfo().setId(2).setAddress( + new WorkerNetAddress().setHost("worker2").setRpcPort(1234)), + new WorkerInfo().setId(3).setAddress( + new WorkerNetAddress().setHost("worker3").setRpcPort(1234)), + new WorkerInfo().setId(4).setAddress( + new WorkerNetAddress().setHost("worker4").setRpcPort(1234)), + new WorkerInfo().setId(5).setAddress( + new WorkerNetAddress().setHost("worker5").setRpcPort(1234)), + new WorkerInfo().setId(6).setAddress( + new WorkerNetAddress().setHost("worker6").setRpcPort(1234)), + new WorkerInfo().setId(7).setAddress( + new WorkerNetAddress().setHost("worker7").setRpcPort(1234)), + new WorkerInfo().setId(8).setAddress( + new WorkerNetAddress().setHost("worker8").setRpcPort(1234)), + new WorkerInfo().setId(9).setAddress( + new WorkerNetAddress().setHost("worker9").setRpcPort(1234)), + new WorkerInfo().setId(10).setAddress( + new WorkerNetAddress().setHost("worker10").setRpcPort(1234)))); + List fileInfos = generateRandomFileInfo(100, 50, 64 * Constants.MB); + when(fileSystemMaster.listStatus(any(), any())) + .thenReturn(fileInfos) + .thenReturn(fileWithBlockLocations(fileInfos, 0.95)) + .thenReturn(fileWithBlockLocations(fileInfos, 1.1)); + int failureRequestIteration = 50; + int exceptionRequestIteration = 70; + AtomicInteger iteration = new AtomicInteger(); + + when(fileSystemContext.acquireBlockWorkerClient(any())).thenReturn(blockWorkerClientResource); + when(blockWorkerClientResource.get()).thenReturn(blockWorkerClient); + when(blockWorkerClient.load(any())).thenAnswer(invocation -> { + iteration.getAndIncrement(); + LoadRequest request = invocation.getArgument(0); + List status; + if (iteration.get() == exceptionRequestIteration) { + // Test worker exception + SettableFuture responseFuture = SettableFuture.create(); + responseFuture.setException(new TimeoutException()); + return responseFuture; + } + else if (iteration.get() == failureRequestIteration) { + // Test worker failing the whole request + status = generateRandomBlockStatus(request.getBlocksList(), 1); + } + else { + status = generateRandomBlockStatus(request.getBlocksList(), 0.01); + } + LoadResponse.Builder response = LoadResponse.newBuilder(); + if (status.stream().allMatch(s -> s.getCode() == Status.OK.getCode().value())) { + response.setStatus(TaskStatus.SUCCESS); + } + else if (status.stream().allMatch(s -> s.getCode() != Status.OK.getCode().value())) { + response.setStatus(TaskStatus.FAILURE) + .addAllBlockStatus(status); + } + else { + response.setStatus(TaskStatus.PARTIAL_FAILURE) + .addAllBlockStatus(status.stream() + .filter(s -> s.getCode() != Status.OK.getCode().value()) + .collect(ImmutableList.toImmutableList())); + } + SettableFuture responseFuture = SettableFuture.create(); + responseFuture.set(response.build()); + return responseFuture; + }); + + LoadManager loadManager = new LoadManager(fileSystemMaster, fileSystemContext); + LoadJob loadJob = new LoadJob("test", Optional.of("user"), "1", + OptionalLong.of(1000), false, true); + loadManager.submitLoad(loadJob); + verify(journalContext).append(argThat(journalEntry -> journalEntry.hasLoadJob() + && journalEntry.getLoadJob().getLoadPath().equals("test") + && journalEntry.getLoadJob().getState() == Job.PJobState.CREATED + && journalEntry.getLoadJob().getBandwidth() == 1000 + && journalEntry.getLoadJob().getVerify())); + loadManager.start(); + while (!loadManager + .getLoadProgress("test", LoadProgressReportFormat.TEXT, false) + .contains("SUCCEEDED")) { + assertFalse(loadManager.submitLoad( + new LoadJob("test", Optional.of("user"), "1", OptionalLong.of(1000), false, true))); + Thread.sleep(1000); + } + Thread.sleep(1000); + loadManager.stop(); + assertEquals(LoadJobState.SUCCEEDED, loadJob.getJobState()); + assertEquals(0, loadJob.getCurrentBlockCount()); + assertTrue(loadJob.getTotalBlockCount() > 5000); + verify(journalContext).append(argThat(journalEntry -> journalEntry.hasLoadJob() + && journalEntry.getLoadJob().getLoadPath().equals("test") + && journalEntry.getLoadJob().getState() == Job.PJobState.SUCCEEDED + && journalEntry.getLoadJob().getBandwidth() == 1000 + && journalEntry.getLoadJob().getVerify())); + assertTrue(loadManager.submitLoad(new LoadJob("test", "user", OptionalLong.of(1000)))); + } + + @Test + public void testSchedulingFullCapacity() throws Exception { + FileSystemMaster fileSystemMaster = mock(FileSystemMaster.class); + FileSystemContext fileSystemContext = mock(FileSystemContext.class); + JournalContext journalContext = mock(JournalContext.class); + when(fileSystemMaster.createJournalContext()).thenReturn(journalContext); + CloseableResource blockWorkerClientResource = mock(CloseableResource.class); + BlockWorkerClient blockWorkerClient = mock(BlockWorkerClient.class); + ImmutableList.Builder workerInfos = ImmutableList.builder(); + for (int i = 0; i < 1000; i++) { + workerInfos.add(new WorkerInfo().setId(i).setAddress( + new WorkerNetAddress().setHost("worker" + i).setRpcPort(1234))); + } + when(fileSystemMaster.getWorkerInfoList()) + .thenReturn(workerInfos.build()); + List fileInfos = generateRandomFileInfo(2000, 50, 64 * Constants.MB); + when(fileSystemMaster.listStatus(any(), any())) + .thenReturn(fileInfos); + + when(fileSystemContext.acquireBlockWorkerClient(any())).thenReturn(blockWorkerClientResource); + when(blockWorkerClientResource.get()).thenReturn(blockWorkerClient); + when(blockWorkerClient.load(any())).thenAnswer(invocation -> { + LoadResponse.Builder response = LoadResponse.newBuilder().setStatus(TaskStatus.SUCCESS); + SettableFuture responseFuture = SettableFuture.create(); + responseFuture.set(response.build()); + return responseFuture; + }); + + LoadManager loadManager = new LoadManager(fileSystemMaster, fileSystemContext); + for (int i = 0; i < 100; i++) { + LoadJob loadJob = new LoadJob("test" + i, "user", OptionalLong.of(1000)); + loadManager.submitLoad(loadJob); + } + assertThrows(ResourceExhaustedRuntimeException.class, + () -> loadManager.submitLoad(new LoadJob("/way/too/many", "user", OptionalLong.empty()))); + loadManager.start(); + while (loadManager.getLoadJobs().values().stream() + .anyMatch(loadJob -> loadJob.getJobState() != LoadJobState.SUCCEEDED)) { + Thread.sleep(1000); + } + loadManager.stop(); + } + + @Test + public void testSchedulingWithException() throws Exception { + FileSystemMaster fileSystemMaster = mock(FileSystemMaster.class); + FileSystemContext fileSystemContext = mock(FileSystemContext.class); + JournalContext journalContext = mock(JournalContext.class); + when(fileSystemMaster.createJournalContext()).thenReturn(journalContext); + CloseableResource blockWorkerClientResource = mock(CloseableResource.class); + BlockWorkerClient blockWorkerClient = mock(BlockWorkerClient.class); + when(fileSystemMaster.getWorkerInfoList()) + .thenReturn(ImmutableList.of( + new WorkerInfo().setId(1).setAddress( + new WorkerNetAddress().setHost("worker1").setRpcPort(1234)), + new WorkerInfo().setId(2).setAddress( + new WorkerNetAddress().setHost("worker2").setRpcPort(1234)))); + when(fileSystemContext.acquireBlockWorkerClient(any())).thenReturn(blockWorkerClientResource); + when(blockWorkerClientResource.get()).thenReturn(blockWorkerClient); + List fileInfos = generateRandomFileInfo(100, 10, 64 * Constants.MB); + when(fileSystemMaster.listStatus(any(), any())) + // Non-retryable exception, first load job should fail + .thenThrow(AccessControlException.class) + // Retryable exception, second load job should succeed + .thenThrow(new ResourceExhaustedRuntimeException("test", true)) + .thenReturn(fileInfos); + LoadManager loadManager = new LoadManager(fileSystemMaster, fileSystemContext); + loadManager.start(); + loadManager.submitLoad("test", OptionalLong.of(1000), false, false); + while (!loadManager.getLoadProgress("test", LoadProgressReportFormat.TEXT, false) + .contains("FAILED")) { + Thread.sleep(1000); + } + when(blockWorkerClient.load(any())).thenAnswer(invocation -> { + LoadResponse.Builder response = LoadResponse.newBuilder().setStatus(TaskStatus.SUCCESS); + SettableFuture responseFuture = SettableFuture.create(); + responseFuture.set(response.build()); + return responseFuture; + }); + loadManager.submitLoad("test", OptionalLong.of(1000), false, false); + while (!loadManager.getLoadProgress("test", LoadProgressReportFormat.TEXT, false) + .contains("SUCCEEDED")) { + Thread.sleep(1000); + } + loadManager.stop(); + } + + @Test + public void testJobRetention() throws Exception { + Configuration.modifiableGlobal().set(PropertyKey.JOB_RETENTION_TIME, "0ms", Source.RUNTIME); + FileSystemMaster fileSystemMaster = mock(FileSystemMaster.class); + FileSystemContext fileSystemContext = mock(FileSystemContext.class); + JournalContext journalContext = mock(JournalContext.class); + when(fileSystemMaster.createJournalContext()).thenReturn(journalContext); + LoadManager loadManager = new LoadManager(fileSystemMaster, fileSystemContext); + loadManager.start(); + IntStream.range(0, 5).forEach( + i -> assertTrue(loadManager.submitLoad( + String.format("/load/%d", i), OptionalLong.empty(), false, true))); + assertEquals(5, loadManager.getLoadJobs().size()); + loadManager.getLoadJobs().get("/load/1").setJobState(LoadJobState.VERIFYING); + loadManager.getLoadJobs().get("/load/2").setJobState(LoadJobState.FAILED); + loadManager.getLoadJobs().get("/load/3").setJobState(LoadJobState.SUCCEEDED); + loadManager.getLoadJobs().get("/load/4").setJobState(LoadJobState.STOPPED); + loadManager.cleanupStaleJob(); + assertEquals(2, loadManager.getLoadJobs().size()); + assertTrue(loadManager.getLoadJobs().containsKey("/load/0")); + assertTrue(loadManager.getLoadJobs().containsKey("/load/1")); + IntStream.range(2, 5).forEach( + i -> assertFalse(loadManager.getLoadJobs().containsKey(String.format("/load/%d", i)))); + Configuration.modifiableGlobal().unset(PropertyKey.JOB_RETENTION_TIME); + } +} diff --git a/core/server/master/src/test/java/alluxio/master/file/loadmanager/LoadTestUtils.java b/core/server/master/src/test/java/alluxio/master/file/loadmanager/LoadTestUtils.java new file mode 100644 index 000000000000..4b610c6cfe47 --- /dev/null +++ b/core/server/master/src/test/java/alluxio/master/file/loadmanager/LoadTestUtils.java @@ -0,0 +1,118 @@ +/* + * The Alluxio Open Foundation licenses this work under the Apache License, version 2.0 + * (the "License"). You may not use this work except in compliance with the License, which is + * available at www.apache.org/licenses/LICENSE-2.0 + * + * This software is distributed on an "AS IS" basis, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, + * either express or implied, as more fully set forth in the License. + * + * See the NOTICE file distributed with this work for information regarding copyright ownership. + */ + +package alluxio.master.file.loadmanager; + +import alluxio.grpc.Block; +import alluxio.grpc.BlockStatus; +import alluxio.util.CommonUtils; +import alluxio.wire.BlockInfo; +import alluxio.wire.BlockLocation; +import alluxio.wire.FileBlockInfo; +import alluxio.wire.FileInfo; + +import com.google.common.collect.ImmutableList; +import com.google.common.collect.Lists; +import io.grpc.Status; + +import java.util.List; +import java.util.Random; +import java.util.stream.LongStream; + +public final class LoadTestUtils { + private LoadTestUtils() {} + + public static List generateRandomBlockStatus( + List blocks, double failureRate) { + ImmutableList.Builder blockStatus = ImmutableList.builder(); + for (Block block : blocks) { + if (Math.random() > failureRate) { + blockStatus.add(BlockStatus.newBuilder() + .setBlock(block) + .setCode(Status.OK.getCode().value()) + .build()); + } + else { + blockStatus.add(BlockStatus.newBuilder() + .setBlock(block) + .setCode((int) (Math.random() * 10) + 1) + .setRetryable(Math.random() > 0.5) + .build()); + } + } + return blockStatus.build(); + } + + public static List fileWithBlockLocations(List files, double ratio) { + ImmutableList.Builder newFiles = ImmutableList.builder(); + files.forEach(fileInfo -> { + ImmutableList.Builder newFileBlockInfo = ImmutableList.builder(); + fileInfo.getFileBlockInfos().forEach(fileBlockInfo -> { + BlockInfo info = new BlockInfo().setBlockId(fileBlockInfo.getBlockInfo().getBlockId()); + if (Math.random() <= ratio) { + info.setLocations(ImmutableList.of(new BlockLocation())); + } + newFileBlockInfo.add(new FileBlockInfo() + .setUfsLocations(fileBlockInfo.getUfsLocations()) + .setOffset(fileBlockInfo.getOffset()) + .setBlockInfo(info)); + }); + newFiles.add(new FileInfo() + .setUfsPath(fileInfo.getUfsPath()) + .setBlockSizeBytes(fileInfo.getBlockSizeBytes()) + .setBlockIds(fileInfo.getBlockIds()) + .setCompleted(true) + .setFileBlockInfos(newFileBlockInfo.build())); + }); + return newFiles.build(); + } + + public static List generateRandomFileInfo( + int fileCount, int blockCountPerFile, long blockSizeLimit) { + List fileInfos = Lists.newArrayList(); + for (int i = 0; i < fileCount; i++) { + FileInfo info = createFileInfo(blockCountPerFile, blockSizeLimit); + fileInfos.add(info); + } + return fileInfos; + } + + private static FileInfo createFileInfo(int blockCount, long blockSizeLimit) { + Random random = new Random(); + FileInfo info = new FileInfo(); + String ufs = CommonUtils.randomAlphaNumString(6); + long blockSize = Math.abs(random.nextLong() % blockSizeLimit); + List blockIds = LongStream.range(0, blockCount) + .map(i -> random.nextLong()) + .boxed() + .collect(ImmutableList.toImmutableList()); + info.setUfsPath(ufs) + .setBlockSizeBytes(blockSize) + .setLength(blockSizeLimit * blockCount) + .setBlockIds(blockIds) + .setFileBlockInfos(blockIds + .stream() + .map(id -> LoadTestUtils.createFileBlockInfo(id, blockSizeLimit)) + .collect(ImmutableList.toImmutableList())) + .setCompleted(true); + return info; + } + + private static FileBlockInfo createFileBlockInfo(long id, long length) { + FileBlockInfo fileBlockInfo = new FileBlockInfo(); + BlockInfo blockInfo = new BlockInfo(); + blockInfo.setBlockId(id); + blockInfo.setLength(length); + fileBlockInfo.setBlockInfo(blockInfo); + fileBlockInfo.setOffset(new Random().nextInt(1000)); + return fileBlockInfo; + } +} diff --git a/core/server/worker/src/main/java/alluxio/worker/block/UfsIOManager.java b/core/server/worker/src/main/java/alluxio/worker/block/UfsIOManager.java index de999c75d748..48f7773d127d 100644 --- a/core/server/worker/src/main/java/alluxio/worker/block/UfsIOManager.java +++ b/core/server/worker/src/main/java/alluxio/worker/block/UfsIOManager.java @@ -203,6 +203,10 @@ private int readInternal() { int bytesRead = 0; InputStream inStream = null; try (CloseableResource ufsResource = mUfsClient.acquireUfsResource()) { + if (mOptions.hasUser()) { + // Before interacting with ufs manager, set the user. + alluxio.security.authentication.AuthenticatedClientUser.set(mOptions.getUser()); + } inStream = mUfsInstreamCache.acquire(ufsResource.get(), mUfsPath, mFileId, OpenOptions.defaults().setOffset(mOffset) .setPositionShort(mOptions.getPositionShort())); diff --git a/core/transport/src/main/proto/grpc/block_worker.proto b/core/transport/src/main/proto/grpc/block_worker.proto index 3f5d2a5d6667..470c6be90567 100644 --- a/core/transport/src/main/proto/grpc/block_worker.proto +++ b/core/transport/src/main/proto/grpc/block_worker.proto @@ -149,6 +149,7 @@ message UfsReadOptions{ // We introduce a heuristic to choose which API to use. required bool position_short = 2; optional int64 bandwidth = 3; + optional string user = 4; } diff --git a/core/transport/src/main/proto/grpc/file_system_master.proto b/core/transport/src/main/proto/grpc/file_system_master.proto index b516e418648f..bb01a79e09f9 100644 --- a/core/transport/src/main/proto/grpc/file_system_master.proto +++ b/core/transport/src/main/proto/grpc/file_system_master.proto @@ -586,6 +586,48 @@ message NeedsSyncRequest { message NeedsSyncResponse {} +message LoadPathPOptions { + optional int64 bandwidth = 1; + optional bool verify = 2; + optional bool partialListing = 3; +} + +message LoadPathPRequest { + required string path = 1; + optional LoadPathPOptions options = 2; +} + +message LoadPathPResponse { + optional bool newLoadSubmitted = 1; +} + +message StopLoadPathPRequest { + required string path = 1; +} + +message StopLoadPathPResponse { + optional bool existingLoadStopped = 1; +} + +enum LoadProgressReportFormat { + TEXT = 1; + JSON = 2; +} + +message LoadProgressPOptions { + optional LoadProgressReportFormat format = 1; + optional bool verbose = 2; +} + +message GetLoadProgressPRequest { + required string path = 1; + optional LoadProgressPOptions options = 2; +} + +message GetLoadProgressPResponse { + optional string progressReport = 1; + optional LoadProgressReportFormat format = 2; +} /** * This interface contains file system master service endpoints for Alluxio clients. @@ -732,6 +774,15 @@ service FileSystemMasterClientService { rpc GetStateLockHolders(GetStateLockHoldersPRequest) returns (GetStateLockHoldersPResponse); rpc NeedsSync(NeedsSyncRequest) returns (NeedsSyncResponse); + + /** + * Load a directory into Alluxio. + */ + rpc LoadPath(LoadPathPRequest) returns (LoadPathPResponse); + + rpc StopLoadPath(StopLoadPathPRequest) returns (StopLoadPathPResponse); + + rpc GetLoadProgress(GetLoadProgressPRequest) returns (GetLoadProgressPResponse); } message FileSystemHeartbeatPResponse { diff --git a/core/transport/src/main/proto/proto.lock b/core/transport/src/main/proto/proto.lock index dcfc323d804a..d1ca68994440 100644 --- a/core/transport/src/main/proto/proto.lock +++ b/core/transport/src/main/proto/proto.lock @@ -1162,6 +1162,11 @@ "id": 3, "name": "bandwidth", "type": "int64" + }, + { + "id": 4, + "name": "user", + "type": "string" } ] }, @@ -2038,6 +2043,19 @@ "integer": 3 } ] + }, + { + "name": "LoadProgressReportFormat", + "enum_fields": [ + { + "name": "TEXT", + "integer": 1 + }, + { + "name": "JSON", + "integer": 2 + } + ] } ], "messages": [ @@ -3408,6 +3426,11 @@ "id": 12, "name": "xattrUpdateStrategy", "type": "alluxio.proto.journal.XAttrUpdateStrategy" + }, + { + "id": 13, + "name": "directChildrenLoaded", + "type": "bool" } ], "maps": [ @@ -3695,6 +3718,116 @@ { "name": "NeedsSyncResponse" }, + { + "name": "LoadPathPOptions", + "fields": [ + { + "id": 1, + "name": "bandwidth", + "type": "int64" + }, + { + "id": 2, + "name": "verify", + "type": "bool" + }, + { + "id": 3, + "name": "partialListing", + "type": "bool" + } + ] + }, + { + "name": "LoadPathPRequest", + "fields": [ + { + "id": 1, + "name": "path", + "type": "string" + }, + { + "id": 2, + "name": "options", + "type": "LoadPathPOptions" + } + ] + }, + { + "name": "LoadPathPResponse", + "fields": [ + { + "id": 1, + "name": "newLoadSubmitted", + "type": "bool" + } + ] + }, + { + "name": "StopLoadPathPRequest", + "fields": [ + { + "id": 1, + "name": "path", + "type": "string" + } + ] + }, + { + "name": "StopLoadPathPResponse", + "fields": [ + { + "id": 1, + "name": "existingLoadStopped", + "type": "bool" + } + ] + }, + { + "name": "LoadProgressPOptions", + "fields": [ + { + "id": 1, + "name": "format", + "type": "LoadProgressReportFormat" + }, + { + "id": 2, + "name": "verbose", + "type": "bool" + } + ] + }, + { + "name": "GetLoadProgressPRequest", + "fields": [ + { + "id": 1, + "name": "path", + "type": "string" + }, + { + "id": 2, + "name": "options", + "type": "LoadProgressPOptions" + } + ] + }, + { + "name": "GetLoadProgressPResponse", + "fields": [ + { + "id": 1, + "name": "progressReport", + "type": "string" + }, + { + "id": 2, + "name": "format", + "type": "LoadProgressReportFormat" + } + ] + }, { "name": "FileSystemHeartbeatPResponse", "fields": [ @@ -3962,6 +4095,21 @@ "name": "NeedsSync", "in_type": "NeedsSyncRequest", "out_type": "NeedsSyncResponse" + }, + { + "name": "LoadPath", + "in_type": "LoadPathPRequest", + "out_type": "LoadPathPResponse" + }, + { + "name": "StopLoadPath", + "in_type": "StopLoadPathPRequest", + "out_type": "StopLoadPathPResponse" + }, + { + "name": "GetLoadProgress", + "in_type": "GetLoadProgressPRequest", + "out_type": "GetLoadProgressPResponse" } ] }, @@ -9399,6 +9547,84 @@ } } }, + { + "protopath": "proto:/:journal:/:job.proto", + "def": { + "enums": [ + { + "name": "PJobState", + "enum_fields": [ + { + "name": "CREATED", + "integer": 1 + }, + { + "name": "STOPPED", + "integer": 2 + }, + { + "name": "SUCCEEDED", + "integer": 3 + }, + { + "name": "FAILED", + "integer": 4 + } + ] + } + ], + "messages": [ + { + "name": "LoadJobEntry", + "fields": [ + { + "id": 1, + "name": "load_path", + "type": "string" + }, + { + "id": 2, + "name": "state", + "type": "PJobState" + }, + { + "id": 3, + "name": "bandwidth", + "type": "int64" + }, + { + "id": 4, + "name": "verify", + "type": "bool" + }, + { + "id": 5, + "name": "user", + "type": "string" + }, + { + "id": 6, + "name": "partialListing", + "type": "bool" + }, + { + "id": 7, + "name": "job_id", + "type": "string" + }, + { + "id": 8, + "name": "end_time", + "type": "int64" + } + ] + } + ], + "package": { + "name": "alluxio.proto.journal" + } + } + }, { "protopath": "proto:/:journal:/:journal.proto", "def": { @@ -9611,6 +9837,11 @@ "name": "update_inode_file", "type": "UpdateInodeFileEntry" }, + { + "id": 53, + "name": "load_job", + "type": "LoadJobEntry" + }, { "id": 39, "name": "journal_entries", @@ -9632,6 +9863,9 @@ }, { "path": "proto/journal/table.proto" + }, + { + "path": "proto/journal/job.proto" } ], "package": { diff --git a/core/transport/src/main/proto/proto/journal/job.proto b/core/transport/src/main/proto/proto/journal/job.proto new file mode 100644 index 000000000000..9496f3cc21a1 --- /dev/null +++ b/core/transport/src/main/proto/proto/journal/job.proto @@ -0,0 +1,23 @@ +syntax = "proto2"; + +package alluxio.proto.journal; + +// Journal entry messages for the block master. +enum PJobState { + CREATED = 1; + STOPPED = 2; + SUCCEEDED = 3; + FAILED = 4; +} + +// next available id: 8 +message LoadJobEntry { + required string load_path = 1; + required PJobState state = 2; + optional int64 bandwidth = 3; + required bool verify = 4; + optional string user = 5; + required bool partialListing = 6; + required string job_id = 7; + optional int64 end_time = 8; +} diff --git a/core/transport/src/main/proto/proto/journal/journal.proto b/core/transport/src/main/proto/proto/journal/journal.proto index 31ed4bcdda90..023f03879193 100644 --- a/core/transport/src/main/proto/proto/journal/journal.proto +++ b/core/transport/src/main/proto/proto/journal/journal.proto @@ -7,6 +7,7 @@ import "proto/journal/block.proto"; import "proto/journal/file.proto"; import "proto/journal/meta.proto"; import "proto/journal/table.proto"; +import "proto/journal/job.proto"; // Wraps around all types of Alluxio journal entries. // @@ -24,7 +25,7 @@ message JournalOpPId { optional int64 leastSignificantBits = 2; } -// next available id: 53 +// next available id: 54 message JournalEntry { // shared fields. optional int64 sequence_number = 1; @@ -66,6 +67,7 @@ message JournalEntry { optional UpdateInodeEntry update_inode = 35; optional UpdateInodeDirectoryEntry update_inode_directory = 36; optional UpdateInodeFileEntry update_inode_file = 37; + optional LoadJobEntry load_job = 53; // This journal entry is a list of other entries. when a journal entry // contains other journal entries, all other optional fields must be unset. diff --git a/integration/fuse/src/test/java/alluxio/fuse/auth/AbstractAuthPolicyTest.java b/integration/fuse/src/test/java/alluxio/fuse/auth/AbstractAuthPolicyTest.java index 13c7c140d0d6..84cd171b1d31 100644 --- a/integration/fuse/src/test/java/alluxio/fuse/auth/AbstractAuthPolicyTest.java +++ b/integration/fuse/src/test/java/alluxio/fuse/auth/AbstractAuthPolicyTest.java @@ -296,6 +296,23 @@ public void needsSync(AlluxioURI path) { throw new UnsupportedOperationException(); } + @Override + public boolean submitLoad(AlluxioURI path, java.util.OptionalLong bandwidth, + boolean usePartialListing, boolean verify) { + throw new UnsupportedOperationException(); + } + + @Override + public boolean stopLoad(AlluxioURI path) { + throw new UnsupportedOperationException(); + } + + @Override + public String getLoadProgress(AlluxioURI path, + Optional format, boolean verbose) { + throw new UnsupportedOperationException(); + } + @Override public void close() throws IOException { throw new UnsupportedOperationException(); diff --git a/integration/fuse/src/test/java/alluxio/fuse/cli/MockFuseFileSystemMasterClient.java b/integration/fuse/src/test/java/alluxio/fuse/cli/MockFuseFileSystemMasterClient.java index 80ae3d1c3c9c..8eadd3b7636f 100644 --- a/integration/fuse/src/test/java/alluxio/fuse/cli/MockFuseFileSystemMasterClient.java +++ b/integration/fuse/src/test/java/alluxio/fuse/cli/MockFuseFileSystemMasterClient.java @@ -201,6 +201,23 @@ public List getStateLockHolders() throws AlluxioStatusException { public void needsSync(AlluxioURI path) throws AlluxioStatusException { } + @Override + public boolean submitLoad(AlluxioURI path, java.util.OptionalLong bandwidth, + boolean usePartialListing, boolean verify) { + return false; + } + + @Override + public boolean stopLoad(AlluxioURI path) { + return false; + } + + @Override + public String getLoadProgress(AlluxioURI path, + java.util.Optional format, boolean verbose) { + return null; + } + @Override public void connect() throws IOException { } diff --git a/microbench/src/main/java/alluxio/fsmaster/FileSystemMasterBase.java b/microbench/src/main/java/alluxio/fsmaster/FileSystemMasterBase.java index ee99faa497a8..3b068bf40c9a 100644 --- a/microbench/src/main/java/alluxio/fsmaster/FileSystemMasterBase.java +++ b/microbench/src/main/java/alluxio/fsmaster/FileSystemMasterBase.java @@ -90,7 +90,8 @@ void init() throws Exception { ThreadFactoryUtils.build("DefaultFileSystemMasterTest-%d", true)); mFsMaster = new DefaultFileSystemMaster(blockMaster, masterContext, ExecutorServiceFactories.constantExecutorServiceFactory(service), Clock.systemUTC()); - mFsMasterServer = new FileSystemMasterClientServiceHandler(mFsMaster); + mFsMasterServer = + new FileSystemMasterClientServiceHandler(mFsMaster, mFsMaster.getLoadManager()); mGetStatusObserver = createStreamObserver(); mRegistry.add(FileSystemMaster.class, mFsMaster); diff --git a/shell/src/main/java/alluxio/cli/fs/command/LoadCommand.java b/shell/src/main/java/alluxio/cli/fs/command/LoadCommand.java index c7d455cfb71a..96f6c0b9ffc5 100644 --- a/shell/src/main/java/alluxio/cli/fs/command/LoadCommand.java +++ b/shell/src/main/java/alluxio/cli/fs/command/LoadCommand.java @@ -27,21 +27,27 @@ import alluxio.exception.AlluxioException; import alluxio.exception.status.InvalidArgumentException; import alluxio.grpc.CacheRequest; +import alluxio.grpc.LoadProgressReportFormat; import alluxio.grpc.OpenFilePOptions; import alluxio.proto.dataserver.Protocol; import alluxio.resource.CloseableResource; import alluxio.util.FileSystemOptionsUtils; +import alluxio.util.FormatUtils; import alluxio.wire.BlockInfo; import alluxio.wire.WorkerNetAddress; import com.google.common.base.Preconditions; import com.google.common.collect.ImmutableMap; +import io.grpc.Status; +import io.grpc.StatusRuntimeException; import org.apache.commons.cli.CommandLine; import org.apache.commons.cli.Option; import org.apache.commons.cli.Options; import java.io.IOException; import java.util.List; +import java.util.Optional; +import java.util.OptionalLong; import javax.annotation.concurrent.ThreadSafe; /** @@ -57,6 +63,65 @@ public final class LoadCommand extends AbstractFileSystemCommand { .hasArg(false) .desc("load the file to local worker.") .build(); + private static final Option SUBMIT_OPTION = Option.builder() + .longOpt("submit") + .required(false) + .hasArg(false) + .desc("Submit load job to Alluxio master, update job options if already exists.") + .build(); + + private static final Option STOP_OPTION = Option.builder() + .longOpt("stop") + .required(false) + .hasArg(false) + .desc("Stop a load job if it's still running.") + .build(); + + private static final Option PROGRESS_OPTION = Option.builder() + .longOpt("progress") + .required(false) + .hasArg(false) + .desc("Get progress report of a load job.") + .build(); + + private static final Option PARTIAL_LISTING_OPTION = Option.builder() + .longOpt("partial-listing") + .required(false) + .hasArg(false) + .desc("Use partial directory listing. This limits the memory usage " + + "and starts load sooner for larger directory. But progress " + + "report cannot report on the total number of files because the " + + "whole directory is not listed yet.") + .build(); + + private static final Option VERIFY_OPTION = Option.builder() + .longOpt("verify") + .required(false) + .hasArg(false) + .desc("Run verification when load finish and load new files if any.") + .build(); + + private static final Option BANDWIDTH_OPTION = Option.builder() + .longOpt("bandwidth") + .required(false) + .hasArg(true) + .desc("Run verification when load finish and load new files if any.") + .build(); + + private static final Option PROGRESS_FORMAT = Option.builder() + .longOpt("format") + .required(false) + .hasArg(true) + .desc("Format of the progress report, supports TEXT and JSON. If not " + + "set, TEXT is used.") + .build(); + + private static final Option PROGRESS_VERBOSE = Option.builder() + .longOpt("verbose") + .required(false) + .hasArg(false) + .desc("Whether to return a verbose progress report with detailed errors") + .build(); /** * Constructs a new instance to load a file or directory in Alluxio space. @@ -74,37 +139,173 @@ public String getCommandName() { @Override public Options getOptions() { - return new Options().addOption(LOCAL_OPTION); - } - - @Override - protected void runPlainPath(AlluxioURI plainPath, CommandLine cl) - throws AlluxioException, IOException { - load(plainPath, cl.hasOption(LOCAL_OPTION.getLongOpt())); + return new Options() + .addOption(BANDWIDTH_OPTION) + .addOption(PARTIAL_LISTING_OPTION) + .addOption(VERIFY_OPTION) + .addOption(SUBMIT_OPTION) + .addOption(STOP_OPTION) + .addOption(PROGRESS_OPTION) + .addOption(PROGRESS_FORMAT) + .addOption(PROGRESS_VERBOSE) + .addOption(LOCAL_OPTION); } @Override public int run(CommandLine cl) throws AlluxioException, IOException { String[] args = cl.getArgs(); AlluxioURI path = new AlluxioURI(args[0]); - runWildCardCmd(path, cl); + if (isOldFormat(cl)) { + runWildCardCmd(path, cl); + return 0; + } + + if (path.containsWildcard()) { + throw new UnsupportedOperationException("Load does not support wildcard path"); + } + + if (cl.hasOption(SUBMIT_OPTION.getLongOpt())) { + OptionalLong bandwidth = OptionalLong.empty(); + if (cl.hasOption(BANDWIDTH_OPTION.getLongOpt())) { + bandwidth = OptionalLong.of(FormatUtils.parseSpaceSize( + cl.getOptionValue(BANDWIDTH_OPTION.getLongOpt()))); + } + return submitLoad( + path, + bandwidth, + cl.hasOption(PARTIAL_LISTING_OPTION.getLongOpt()), + cl.hasOption(VERIFY_OPTION.getLongOpt())); + } + + if (cl.hasOption(STOP_OPTION.getLongOpt())) { + return stopLoad(path); + } + + if (cl.hasOption(PROGRESS_OPTION.getLongOpt())) { + Optional format = Optional.empty(); + if (cl.hasOption(PROGRESS_FORMAT.getLongOpt())) { + format = Optional.of(LoadProgressReportFormat.valueOf( + cl.getOptionValue(PROGRESS_FORMAT.getLongOpt()))); + } + return getProgress(path, format, cl.hasOption(PROGRESS_VERBOSE.getLongOpt())); + } + return 0; } + @Override + public String getUsage() { + return "For backward compatibility: load [--local] \n" + + "For distributed load:\n" + + "\tload --submit [--bandwidth N] [--verify] [--partial-listing]\n" + + "\tload --stop\n" + + "\tload --progress [--format TEXT|JSON] [--verbose]\n"; + } + + @Override + public String getDescription() { + return "Loads a file or directory in Alluxio space, makes it resident in Alluxio."; + } + + @Override + public void validateArgs(CommandLine cl) throws InvalidArgumentException { + CommandUtils.checkNumOfArgsNoLessThan(this, cl, 1); + if (!isOldFormat(cl)) { + int commands = 0; + if (cl.hasOption(SUBMIT_OPTION.getLongOpt())) { + commands++; + } + if (cl.hasOption(STOP_OPTION.getLongOpt())) { + commands++; + } + if (cl.hasOption(PROGRESS_OPTION.getLongOpt())) { + commands++; + } + if (commands != 1) { + throw new InvalidArgumentException("Must have one of submit / stop / progress"); + } + } + } + + private int submitLoad(AlluxioURI path, OptionalLong bandwidth, + boolean usePartialListing, boolean verify) { + try { + if (mFileSystem.submitLoad(path, bandwidth, usePartialListing, verify)) { + System.out.printf("Load '%s' is successfully submitted.%n", path); + } else { + System.out.printf("Load already running for path '%s', updated the job with " + + "new bandwidth: %s, verify: %s%n", + path, + bandwidth.isPresent() ? String.valueOf(bandwidth.getAsLong()) : "unlimited", + verify); + } + return 0; + } catch (StatusRuntimeException e) { + System.out.println("Failed to submit load job " + path + ": " + e.getMessage()); + return -1; + } + } + + private int stopLoad(AlluxioURI path) { + try { + if (mFileSystem.stopLoad(path)) { + System.out.printf("Load '%s' is successfully stopped.%n", path); + } + else { + System.out.printf("Cannot find load job for path %s, it might have already been " + + "stopped or finished%n", path); + } + return 0; + } catch (StatusRuntimeException e) { + System.out.println("Failed to stop load job " + path + ": " + e.getMessage()); + return -1; + } + } + + private int getProgress(AlluxioURI path, Optional format, + boolean verbose) { + try { + System.out.println("Progress for loading path '" + path + "':"); + System.out.println(mFileSystem.getLoadProgress(path, format, verbose)); + return 0; + } catch (StatusRuntimeException e) { + if (e.getStatus().getCode() == Status.Code.NOT_FOUND) { + System.out.println("Load for path '" + path + "' cannot be found."); + return -2; + } + System.out.println("Failed to get progress for load job " + path + ": " + e.getMessage()); + return -1; + } + } + + private boolean isOldFormat(CommandLine cl) { + return cl.getOptions().length == 0 + || (cl.getOptions().length == 1 && cl.hasOption(LOCAL_OPTION.getLongOpt())); + } + + @Override + protected void runPlainPath(AlluxioURI plainPath, CommandLine cl) + throws AlluxioException, IOException { + Preconditions.checkState( + isOldFormat(cl), + "The new load command should not hit this code path"); + oldLoad(plainPath, cl.hasOption(LOCAL_OPTION.getLongOpt())); + } + /** * Loads a file or directory in Alluxio space, makes it resident in Alluxio. * * @param filePath The {@link AlluxioURI} path to load into Alluxio * @param local whether to load data to local worker even when the data is already loaded remotely */ - private void load(AlluxioURI filePath, boolean local) + private void oldLoad(AlluxioURI filePath, boolean local) throws AlluxioException, IOException { URIStatus status = mFileSystem.getStatus(filePath); if (status.isFolder()) { List statuses = mFileSystem.listStatus(filePath); for (URIStatus uriStatus : statuses) { AlluxioURI newPath = new AlluxioURI(uriStatus.getPath()); - load(newPath, local); + oldLoad(newPath, local); } } else { if (local) { @@ -144,49 +345,24 @@ private void runLoadTask(AlluxioURI filePath, URIStatus status, boolean local) } Protocol.OpenUfsBlockOptions openUfsBlockOptions = new InStreamOptions(status, options, conf, mFsContext).getOpenUfsBlockOptions(blockId); - if (openUfsBlockOptions.getNoCache()) { - // ignore "NO_CACHE" setting for "load" - openUfsBlockOptions = Protocol.OpenUfsBlockOptions.newBuilder(openUfsBlockOptions) - .setNoCache(false).build(); + BlockInfo info = status.getBlockInfo(blockId); + long blockLength = info.getLength(); + String host = dataSource.getHost(); + // issues#11172: If the worker is in a container, use the container hostname + // to establish the connection. + if (!dataSource.getContainerHost().equals("")) { + host = dataSource.getContainerHost(); + } + CacheRequest request = CacheRequest.newBuilder().setBlockId(blockId).setLength(blockLength) + .setOpenUfsBlockOptions(openUfsBlockOptions).setSourceHost(host) + .setSourcePort(dataSource.getDataPort()).build(); + try (CloseableResource blockWorker = + mFsContext.acquireBlockWorkerClient(dataSource)) { + blockWorker.get().cache(request); + } catch (Exception e) { + throw new RuntimeException(String.format("Failed to complete cache request from %s for " + + "block %d of file %s: %s", dataSource, blockId, status.getPath(), e), e); } - cacheBlock(blockId, dataSource, status, openUfsBlockOptions); - } - } - - @Override - public String getUsage() { - return "load [--local] "; - } - - @Override - public String getDescription() { - return "Loads a file or directory in Alluxio space, makes it resident in Alluxio."; - } - - @Override - public void validateArgs(CommandLine cl) throws InvalidArgumentException { - CommandUtils.checkNumOfArgsNoLessThan(this, cl, 1); - } - - private void cacheBlock(long blockId, WorkerNetAddress dataSource, URIStatus status, - Protocol.OpenUfsBlockOptions options) { - BlockInfo info = status.getBlockInfo(blockId); - long blockLength = info.getLength(); - String host = dataSource.getHost(); - // issues#11172: If the worker is in a container, use the container hostname - // to establish the connection. - if (!dataSource.getContainerHost().equals("")) { - host = dataSource.getContainerHost(); - } - CacheRequest request = CacheRequest.newBuilder().setBlockId(blockId).setLength(blockLength) - .setOpenUfsBlockOptions(options).setSourceHost(host) - .setSourcePort(dataSource.getDataPort()).build(); - try (CloseableResource blockWorker = - mFsContext.acquireBlockWorkerClient(dataSource)) { - blockWorker.get().cache(request); - } catch (Exception e) { - throw new RuntimeException(String.format("Failed to complete cache request from %s for " - + "block %d of file %s: %s", dataSource, blockId, status.getPath(), e), e); } } } diff --git a/tests/src/test/java/alluxio/client/cli/fs/command/LoadCommandIntegrationTest.java b/tests/src/test/java/alluxio/client/cli/fs/command/LoadCommandIntegrationTest.java index 0b841141f276..77a1ec45824c 100644 --- a/tests/src/test/java/alluxio/client/cli/fs/command/LoadCommandIntegrationTest.java +++ b/tests/src/test/java/alluxio/client/cli/fs/command/LoadCommandIntegrationTest.java @@ -11,90 +11,159 @@ package alluxio.client.cli.fs.command; -import static org.junit.Assert.assertFalse; +import static org.junit.Assert.assertEquals; import static org.junit.Assert.assertTrue; import alluxio.AlluxioURI; +import alluxio.Constants; import alluxio.client.cli.fs.AbstractFileSystemShellTest; import alluxio.client.file.FileSystemTestUtils; import alluxio.client.file.FileSystemUtils; -import alluxio.client.file.URIStatus; +import alluxio.conf.PropertyKey; import alluxio.grpc.WritePType; +import alluxio.testutils.LocalAlluxioClusterResource; -import org.junit.Assert; +import org.junit.ClassRule; +import org.junit.Rule; import org.junit.Test; +import org.junit.rules.TemporaryFolder; + +public class LoadCommandIntegrationTest extends AbstractFileSystemShellTest { + @Rule + public TemporaryFolder mTempFolder = new TemporaryFolder(); + @ClassRule + public static LocalAlluxioClusterResource sResource = + new LocalAlluxioClusterResource.Builder() + .setNumWorkers(1) + .setProperty(PropertyKey.USER_BLOCK_SIZE_BYTES_DEFAULT, "16MB") + .setProperty(PropertyKey.WORKER_TIERED_STORE_LEVELS, 1) + .setProperty(PropertyKey.WORKER_TIERED_STORE_LEVEL0_ALIAS, Constants.MEDIUM_HDD) + .setProperty(PropertyKey.WORKER_TIERED_STORE_LEVEL0_DIRS_MEDIUMTYPE, Constants.MEDIUM_HDD) + .setProperty(PropertyKey.WORKER_TIERED_STORE_LEVEL0_DIRS_QUOTA, "2GB") + .build(); -/** - * Tests for load command. - */ -public final class LoadCommandIntegrationTest extends AbstractFileSystemShellTest { @Test - public void loadDir() throws Exception { + public void testCommand() throws Exception { FileSystemTestUtils.createByteFile(sFileSystem, "/testRoot/testFileA", WritePType.THROUGH, - 10); - FileSystemTestUtils - .createByteFile(sFileSystem, "/testRoot/testFileB", WritePType.MUST_CACHE, 10); + Constants.MB); + FileSystemTestUtils.createByteFile(sFileSystem, "/testRoot/testFileB", WritePType.THROUGH, + Constants.MB); + FileSystemTestUtils.createByteFile(sFileSystem, "/testRoot/testDirectory/testFileC", + WritePType.THROUGH, Constants.MB); AlluxioURI uriA = new AlluxioURI("/testRoot/testFileA"); AlluxioURI uriB = new AlluxioURI("/testRoot/testFileB"); - - URIStatus statusA = sFileSystem.getStatus(uriA); - URIStatus statusB = sFileSystem.getStatus(uriB); - assertFalse(statusA.getInAlluxioPercentage() == 100); - assertTrue(statusB.getInAlluxioPercentage() == 100); + AlluxioURI uriC = new AlluxioURI("/testRoot/testDirectory/testFileC"); + assertEquals(0, sFileSystem.getStatus(uriA).getInAlluxioPercentage()); + assertEquals(0, sFileSystem.getStatus(uriB).getInAlluxioPercentage()); + assertEquals(0, sFileSystem.getStatus(uriC).getInAlluxioPercentage()); // Testing loading of a directory - sFsShell.run("load", "/testRoot"); + sFsShell.run("loadMetadata", "/testRoot"); + assertEquals(0, sFsShell.run("load", "/testRoot", "--submit", "--verify")); + assertEquals(0, sFsShell.run("load", "/testRoot", "--progress")); FileSystemUtils.waitForAlluxioPercentage(sFileSystem, uriA, 100); FileSystemUtils.waitForAlluxioPercentage(sFileSystem, uriB, 100); + FileSystemUtils.waitForAlluxioPercentage(sFileSystem, uriC, 100); + while (!mOutput.toString().contains("SUCCEEDED")) { + assertEquals(0, sFsShell.run("load", "/testRoot", "--progress")); + Thread.sleep(1000); + } + assertTrue(mOutput.toString().contains("Files Processed: 3 out of 3")); + assertTrue(mOutput.toString().contains("Bytes Loaded: 3072.00KB out of 3072.00KB")); + assertTrue(mOutput.toString().contains("Files Failed: 0")); + assertEquals(0, sFsShell.run("load", "/testRoot", "--stop")); + assertEquals(-2, sFsShell.run("load", "/testRootNotExists", "--progress")); + assertTrue(mOutput.toString().contains("Load for path '/testRootNotExists' cannot be found.")); + sFsShell.run("load", "/testRoot", "--progress", "--format", "JSON"); + assertTrue(mOutput.toString().contains("\"mJobState\":\"SUCCEEDED\"")); + sFsShell.run("load", "/testRoot", "--progress", "--format", "JSON", "--verbose"); + assertTrue(mOutput.toString().contains("\"mVerbose\":true")); } @Test - public void loadFile() throws Exception { - FileSystemTestUtils.createByteFile(sFileSystem, "/testFile", WritePType.THROUGH, 10); - AlluxioURI uri = new AlluxioURI("/testFile"); - URIStatus status = sFileSystem.getStatus(uri); - assertFalse(status.getInAlluxioPercentage() == 100); - // Testing loading of a single file - sFsShell.run("load", "/testFile"); - FileSystemUtils.waitForAlluxioPercentage(sFileSystem, uri, 100); + public void testPartialListing() throws Exception { + int numFiles = 500; + for (int i = 0; i < numFiles; i++) { + String fileName = "/testRoot/testFile" + i; + String fileName2 = "/testRoot/testDirectory/testFile" + i; + FileSystemTestUtils.createByteFile(sFileSystem, fileName, WritePType.THROUGH, Constants.MB); + FileSystemTestUtils.createByteFile(sFileSystem, fileName2, WritePType.THROUGH, Constants.MB); + assertEquals(0, sFileSystem.getStatus(new AlluxioURI(fileName)).getInAlluxioPercentage()); + assertEquals(0, sFileSystem.getStatus(new AlluxioURI(fileName2)).getInAlluxioPercentage()); + } + + // Testing loading of a directory + sFsShell.run("loadMetadata", "/testRoot"); + assertEquals(0, sFsShell.run("load", "/testRoot", "--submit", "--partial-listing")); + assertEquals(0, sFsShell.run("load", "/testRoot", "--progress")); + FileSystemUtils.waitForAlluxioPercentage(sFileSystem, + new AlluxioURI("/testRoot/testDirectory/testFile" + (numFiles - 1)), 100); + while (!mOutput.toString().contains("SUCCEEDED")) { + assertEquals(0, sFsShell.run("load", "/testRoot", "--progress")); + Thread.sleep(1000); + } } @Test - public void loadFileWithLocalOption() throws Exception { - FileSystemTestUtils.createByteFile(sFileSystem, "/testFile", WritePType.CACHE_THROUGH, - 10); - AlluxioURI uri = new AlluxioURI("/testFile"); - URIStatus status = sFileSystem.getStatus(uri); - assertTrue(status.getInAlluxioPercentage() == 100); - // Testing loading a file has been loaded fully - sFsShell.run("load", "--local", "/testFile"); - Assert.assertEquals("/testFile" + " loaded" + "\n", mOutput.toString()); - // Testing "load --local" works when the file isn't already loaded - FileSystemTestUtils.createByteFile(sFileSystem, "/testFile2", WritePType.THROUGH, 10); - uri = new AlluxioURI("/testFile2"); - status = sFileSystem.getStatus(uri); - assertFalse(status.getInAlluxioPercentage() == 100); - sFsShell.run("load", "--local", "/testFile2"); - FileSystemUtils.waitForAlluxioPercentage(sFileSystem, uri, 100); + public void testPartlyLoaded() throws Exception { + FileSystemTestUtils.createByteFile(sFileSystem, "/testRootPartial/testFileA", + WritePType.THROUGH, Constants.MB); + FileSystemTestUtils.createByteFile(sFileSystem, "/testRootPartial/testFileB", + WritePType.CACHE_THROUGH, Constants.MB); + FileSystemTestUtils.createByteFile(sFileSystem, "/testRootPartial/testDirectory/testFileC", + WritePType.CACHE_THROUGH, Constants.MB); + FileSystemTestUtils.createByteFile(sFileSystem, "/testRootPartial/testDirectory/testFileD", + WritePType.THROUGH, 100 * Constants.MB); + AlluxioURI uriA = new AlluxioURI("/testRootPartial/testFileA"); + AlluxioURI uriB = new AlluxioURI("/testRootPartial/testFileB"); + AlluxioURI uriC = new AlluxioURI("/testRootPartial/testDirectory/testFileC"); + AlluxioURI uriD = new AlluxioURI("/testRootPartial/testDirectory/testFileD"); + assertEquals(0, sFileSystem.getStatus(uriA).getInAlluxioPercentage()); + assertEquals(100, sFileSystem.getStatus(uriB).getInAlluxioPercentage()); + assertEquals(100, sFileSystem.getStatus(uriC).getInAlluxioPercentage()); + assertEquals(0, sFileSystem.getStatus(uriD).getInAlluxioPercentage()); + // Testing loading of a directory + sFsShell.run("loadMetadata", "/testRootLoaded"); + assertEquals(0, sFsShell.run("load", "/testRootPartial", "--submit")); + assertEquals(0, sFsShell.run("load", "/testRootPartial", "--progress")); + FileSystemUtils.waitForAlluxioPercentage(sFileSystem, uriA, 100); + FileSystemUtils.waitForAlluxioPercentage(sFileSystem, uriB, 100); + FileSystemUtils.waitForAlluxioPercentage(sFileSystem, uriC, 100); + while (!mOutput.toString().contains("SUCCEEDED")) { + assertEquals(0, sFsShell.run("load", "/testRootPartial", "--progress")); + Thread.sleep(1000); + } } @Test - public void loadFileWithWildcard() throws Exception { - FileSystemTestUtils.createByteFile(sFileSystem, "/testDir1/testFile1", WritePType.THROUGH, - 10); - FileSystemTestUtils.createByteFile(sFileSystem, "/testDir2/testFile2", WritePType.THROUGH, - 10); - AlluxioURI uri = new AlluxioURI("/testDir1/testFile1"); - URIStatus status = sFileSystem.getStatus(uri); - assertFalse(status.getInAlluxioPercentage() == 100); - uri = new AlluxioURI("/testDir2/testFile2"); - status = sFileSystem.getStatus(uri); - assertFalse(status.getInAlluxioPercentage() == 100); - - // Testing loading with wild card - sFsShell.run("load", "/*/testFile*"); - uri = new AlluxioURI("/testDir1/testFile1"); - FileSystemUtils.waitForAlluxioPercentage(sFileSystem, uri, 100); - uri = new AlluxioURI("/testDir2/testFile2"); - FileSystemUtils.waitForAlluxioPercentage(sFileSystem, uri, 100); + public void testAlreadyLoaded() throws Exception { + FileSystemTestUtils.createByteFile(sFileSystem, "/testRootLoaded/testFileA", + WritePType.CACHE_THROUGH, Constants.MB); + FileSystemTestUtils.createByteFile(sFileSystem, "/testRootLoaded/testFileB", + WritePType.CACHE_THROUGH, Constants.MB); + FileSystemTestUtils.createByteFile(sFileSystem, "/testRootLoaded/testDirectory/testFileC", + WritePType.CACHE_THROUGH, Constants.MB); + FileSystemTestUtils.createByteFile(sFileSystem, "/testRootLoaded/testDirectory/testFileD", + WritePType.CACHE_THROUGH, 100 * Constants.MB); + AlluxioURI uriA = new AlluxioURI("/testRootLoaded/testFileA"); + AlluxioURI uriB = new AlluxioURI("/testRootLoaded/testFileB"); + AlluxioURI uriC = new AlluxioURI("/testRootLoaded/testDirectory/testFileC"); + AlluxioURI uriD = new AlluxioURI("/testRootLoaded/testDirectory/testFileD"); + assertEquals(100, sFileSystem.getStatus(uriA).getInAlluxioPercentage()); + assertEquals(100, sFileSystem.getStatus(uriB).getInAlluxioPercentage()); + assertEquals(100, sFileSystem.getStatus(uriC).getInAlluxioPercentage()); + assertEquals(100, sFileSystem.getStatus(uriD).getInAlluxioPercentage()); + // Testing loading of a directory + sFsShell.run("loadMetadata", "/testRootLoaded"); + assertEquals(0, sFsShell.run("load", "/testRootLoaded", "--submit")); + assertEquals(0, sFsShell.run("load", "/testRootLoaded", "--progress")); + FileSystemUtils.waitForAlluxioPercentage(sFileSystem, uriA, 100); + FileSystemUtils.waitForAlluxioPercentage(sFileSystem, uriB, 100); + FileSystemUtils.waitForAlluxioPercentage(sFileSystem, uriC, 100); + while (!mOutput.toString().contains("SUCCEEDED")) { + assertEquals(0, sFsShell.run("load", "/testRootLoaded", "--progress")); + Thread.sleep(1000); + } + assertTrue(mOutput.toString().contains("Files Processed: 0 out of 0")); + assertTrue(mOutput.toString().contains("Bytes Loaded: 0B out of 0B")); } } diff --git a/tests/src/test/java/alluxio/client/cli/fs/command/OldLoadCommandIntegrationTest.java b/tests/src/test/java/alluxio/client/cli/fs/command/OldLoadCommandIntegrationTest.java new file mode 100644 index 000000000000..c7dcadb3ad79 --- /dev/null +++ b/tests/src/test/java/alluxio/client/cli/fs/command/OldLoadCommandIntegrationTest.java @@ -0,0 +1,100 @@ +/* + * The Alluxio Open Foundation licenses this work under the Apache License, version 2.0 + * (the "License"). You may not use this work except in compliance with the License, which is + * available at www.apache.org/licenses/LICENSE-2.0 + * + * This software is distributed on an "AS IS" basis, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, + * either express or implied, as more fully set forth in the License. + * + * See the NOTICE file distributed with this work for information regarding copyright ownership. + */ + +package alluxio.client.cli.fs.command; + +import static org.junit.Assert.assertFalse; +import static org.junit.Assert.assertTrue; + +import alluxio.AlluxioURI; +import alluxio.client.cli.fs.AbstractFileSystemShellTest; +import alluxio.client.file.FileSystemTestUtils; +import alluxio.client.file.FileSystemUtils; +import alluxio.client.file.URIStatus; +import alluxio.grpc.WritePType; + +import org.junit.Assert; +import org.junit.Test; + +/** + * Tests for load command. + */ +public final class OldLoadCommandIntegrationTest extends AbstractFileSystemShellTest { + @Test + public void loadDir() throws Exception { + FileSystemTestUtils.createByteFile(sFileSystem, "/testRoot/testFileA", WritePType.THROUGH, + 10); + FileSystemTestUtils + .createByteFile(sFileSystem, "/testRoot/testFileB", WritePType.MUST_CACHE, 10); + AlluxioURI uriA = new AlluxioURI("/testRoot/testFileA"); + AlluxioURI uriB = new AlluxioURI("/testRoot/testFileB"); + + URIStatus statusA = sFileSystem.getStatus(uriA); + URIStatus statusB = sFileSystem.getStatus(uriB); + assertFalse(statusA.getInAlluxioPercentage() == 100); + assertTrue(statusB.getInAlluxioPercentage() == 100); + // Testing loading of a directory + sFsShell.run("load", "/testRoot"); + FileSystemUtils.waitForAlluxioPercentage(sFileSystem, uriA, 100); + FileSystemUtils.waitForAlluxioPercentage(sFileSystem, uriB, 100); + } + + @Test + public void loadFile() throws Exception { + FileSystemTestUtils.createByteFile(sFileSystem, "/testFile", WritePType.THROUGH, 10); + AlluxioURI uri = new AlluxioURI("/testFile"); + URIStatus status = sFileSystem.getStatus(uri); + assertFalse(status.getInAlluxioPercentage() == 100); + // Testing loading of a single file + sFsShell.run("load", "/testFile"); + FileSystemUtils.waitForAlluxioPercentage(sFileSystem, uri, 100); + } + + @Test + public void loadFileWithLocalOption() throws Exception { + FileSystemTestUtils.createByteFile(sFileSystem, "/testFile", WritePType.CACHE_THROUGH, + 10); + AlluxioURI uri = new AlluxioURI("/testFile"); + URIStatus status = sFileSystem.getStatus(uri); + assertTrue(status.getInAlluxioPercentage() == 100); + // Testing loading a file has been loaded fully + sFsShell.run("load", "--local", "/testFile"); + Assert.assertEquals("/testFile" + " loaded" + "\n", mOutput.toString()); + // Testing "load --local" works when the file isn't already loaded + FileSystemTestUtils.createByteFile(sFileSystem, "/testFile2", WritePType.THROUGH, 10); + uri = new AlluxioURI("/testFile2"); + status = sFileSystem.getStatus(uri); + assertFalse(status.getInAlluxioPercentage() == 100); + sFsShell.run("load", "--local", "/testFile2"); + FileSystemUtils.waitForAlluxioPercentage(sFileSystem, uri, 100); + } + + @Test + public void loadFileWithWildcard() throws Exception { + FileSystemTestUtils.createByteFile(sFileSystem, "/testDir1/testFile1", WritePType.THROUGH, + 10); + FileSystemTestUtils.createByteFile(sFileSystem, "/testDir2/testFile2", WritePType.THROUGH, + 10); + AlluxioURI uri = new AlluxioURI("/testDir1/testFile1"); + URIStatus status = sFileSystem.getStatus(uri); + assertFalse(status.getInAlluxioPercentage() == 100); + uri = new AlluxioURI("/testDir2/testFile2"); + status = sFileSystem.getStatus(uri); + assertFalse(status.getInAlluxioPercentage() == 100); + + // Testing loading with wild card + sFsShell.run("load", "/*/testFile*"); + uri = new AlluxioURI("/testDir1/testFile1"); + FileSystemUtils.waitForAlluxioPercentage(sFileSystem, uri, 100); + uri = new AlluxioURI("/testDir2/testFile2"); + FileSystemUtils.waitForAlluxioPercentage(sFileSystem, uri, 100); + } +} From c3dcda455e081ef99345f2d906e7899652b3a2e6 Mon Sep 17 00:00:00 2001 From: jja725 Date: Wed, 11 Jan 2023 18:23:06 -0800 Subject: [PATCH 057/334] [DOCFIX] Add doc for load v2 ### What changes are proposed in this pull request? Add doc for load v2 ### Why are the changes needed? na ### Does this PR introduce any user facing changes? na pr-link: Alluxio/alluxio#16753 change-id: cid-ed6cbb585b3726771099c6d794be0ed2734b71c1 --- docs/en/operation/User-CLI.md | 44 +++++++++++++++++++++++++++++++---- 1 file changed, 39 insertions(+), 5 deletions(-) diff --git a/docs/en/operation/User-CLI.md b/docs/en/operation/User-CLI.md index 0845ae874830..fb0572ce205c 100644 --- a/docs/en/operation/User-CLI.md +++ b/docs/en/operation/User-CLI.md @@ -1101,18 +1101,52 @@ $ ./bin/alluxio fs leader ### load The `load` command moves data from the under storage system into Alluxio storage. +For example, `load` can be used to prefetch data for analytics jobs. +If `load` is run on a directory, files in the directory will be recursively loaded. +```console +$ ./bin/alluxio fs load --submit [--bandwidth N] [--verify] [--partial-listing] +``` +Options: +* `--bandwidth` option specify how much ufs bandwidth we want to use to load files. +* `--verify` option specify whether we want to verify that all the files are loaded. +* `--partial-listing` option specify using batch listStatus API or traditional listStatus. We would retire this option when batch listStatus API gets mature. + +After submit the command, you can check the status by running the following +```console +$ ./bin/alluxio fs load --progress [--format TEXT|JSON] [--verbose] +``` +And you would get the following output: +```console +Progress for loading path '/dir-99': + Settings: bandwidth: unlimited verify: false + Job State: SUCCEEDED + Files Processed: 1000 + Bytes Loaded: 125.00MB + Throughput: 2509.80KB/s + Block load failure rate: 0.00% + Files Failed: 0 +``` +Options: +* `--format` option specify output format. TEXT as default +* `--verbose` option output job details. + +If you want to stop the command by running the following +```console +$ ./bin/alluxio fs load --stop +``` + +If you just want sequential execution for couple files. You can use the following old version +```console +$ ./bin/alluxio fs load +``` If there is a Alluxio worker on the machine this command is run from, the data will be loaded to that worker. Otherwise, a random worker will be selected to serve the data. If the data is already loaded into Alluxio, load is a no-op unless the `--local flag` is used. The `--local` flag forces the data to be loaded to a local worker even if the data is already available on a remote worker. -If `load` is run on a directory, files in the directory will be recursively loaded. - -For example, `load` can be used to prefetch data for analytics jobs. - ```console -$ ./bin/alluxio fs load /data/today +$ ./bin/alluxio fs load --local ``` ### location From ace87586af4a26cea2e1640407e9e2b257aa52e1 Mon Sep 17 00:00:00 2001 From: xpbob <609083568@qq.com> Date: Thu, 12 Jan 2023 21:51:25 +0800 Subject: [PATCH 058/334] Fix typo issue of FreeWorkerCommand ### What changes are proposed in this pull request? Formatted output ### Why are the changes needed? Better output readability ### Does this PR introduce any user facing changes? NA. pr-link: Alluxio/alluxio#16748 change-id: cid-bd8203263adba719ff0507a364e1d9e4701bbe42 --- .../src/main/java/alluxio/cli/fs/command/FreeWorkerCommand.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/shell/src/main/java/alluxio/cli/fs/command/FreeWorkerCommand.java b/shell/src/main/java/alluxio/cli/fs/command/FreeWorkerCommand.java index 457c74af8ba6..a1502e898dec 100644 --- a/shell/src/main/java/alluxio/cli/fs/command/FreeWorkerCommand.java +++ b/shell/src/main/java/alluxio/cli/fs/command/FreeWorkerCommand.java @@ -81,7 +81,7 @@ public int run(CommandLine cl) throws AlluxioException, IOException { mFsContext.acquireBlockMasterClientResource()) { blockMasterClient.get().removeDecommissionedWorker(workerName); } catch (NotFoundException notFoundException) { - System.out.println("Worker" + workerName + " is not found in decommissioned worker set."); + System.out.println("Worker " + workerName + " is not found in decommissioned worker set."); return -1; } From 79f43ef82036d90077c1c502e86edfd780255b9f Mon Sep 17 00:00:00 2001 From: LingBin Date: Thu, 12 Jan 2023 23:22:29 +0800 Subject: [PATCH 059/334] [SMALLFIX] Remove duplicate function call ### What changes are proposed in this pull request? Remove duplicate `getPort()` call. ### Why are the changes needed? The result of `getPort()` has been recorded and can be used directly without calling this function again. ### Does this PR introduce any user facing changes? No. pr-link: Alluxio/alluxio#15981 change-id: cid-5c680291f7cc487a8f31bd4d42b902b7bf25ebc6 --- .../src/main/java/alluxio/util/network/NetworkAddressUtils.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/core/common/src/main/java/alluxio/util/network/NetworkAddressUtils.java b/core/common/src/main/java/alluxio/util/network/NetworkAddressUtils.java index 704ffb5100bc..05fca8148ac6 100644 --- a/core/common/src/main/java/alluxio/util/network/NetworkAddressUtils.java +++ b/core/common/src/main/java/alluxio/util/network/NetworkAddressUtils.java @@ -370,7 +370,7 @@ public static InetSocketAddress getBindAddress(ServiceAttributeProvider service, AlluxioConfiguration conf) { int port = getPort(service, conf); assertValidPort(port); - return new InetSocketAddress(getBindHost(service, conf), getPort(service, conf)); + return new InetSocketAddress(getBindHost(service, conf), port); } /** From 6b888731d8512ac6c9e60069a5e916b492166ede Mon Sep 17 00:00:00 2001 From: Shouwei Chen Date: Thu, 12 Jan 2023 17:05:09 -0800 Subject: [PATCH 060/334] Restore table command with deprecated status ### What changes are proposed in this pull request? Restore table command with deprecated status Revert of https://github.com/Alluxio/alluxio/pull/16348 ### Why are the changes needed? N/A ### Does this PR introduce any user facing changes? N/A pr-link: Alluxio/alluxio#16762 change-id: cid-39090bc9d628ed769b6494c2c294dcd618c4e61a --- bin/alluxio | 6 ++++++ core/common/src/main/java/alluxio/conf/PropertyKey.java | 9 --------- 2 files changed, 6 insertions(+), 9 deletions(-) diff --git a/bin/alluxio b/bin/alluxio index 0e2488219b85..280d4bbeb0df 100755 --- a/bin/alluxio +++ b/bin/alluxio @@ -21,6 +21,7 @@ function printUsage { echo -e " bootstrapConf \t Generate a config file if one doesn't exist" echo -e " fs \t Command line tool for interacting with the Alluxio filesystem." echo -e " fsadmin \t Command line tool for use by Alluxio filesystem admins." + echo -e " table \t (deprecated) Command line tool for interacting with the Alluxio table service." echo -e " getConf [key] \t Look up a configuration key, or print all configuration." echo -e " job \t Command line tool for interacting with the job service." echo -e " logLevel \t Set or get log level of Alluxio servers." @@ -294,6 +295,11 @@ function main { CLASSPATH=${ALLUXIO_CLIENT_CLASSPATH} runJavaClass "$@" ;; + "table") + CLASS="alluxio.cli.table.TableShell" + CLASSPATH=${ALLUXIO_CLIENT_CLASSPATH} + runJavaClass "$@" + ;; "logLevel") CLASS="alluxio.cli.LogLevel" CLASSPATH=${ALLUXIO_CLIENT_CLASSPATH} diff --git a/core/common/src/main/java/alluxio/conf/PropertyKey.java b/core/common/src/main/java/alluxio/conf/PropertyKey.java index c050dcd5e961..98701f45c36e 100755 --- a/core/common/src/main/java/alluxio/conf/PropertyKey.java +++ b/core/common/src/main/java/alluxio/conf/PropertyKey.java @@ -7191,7 +7191,6 @@ public String toString() { .setDescription("(Experimental) Enables the table service.") .setConsistencyCheckLevel(ConsistencyCheckLevel.WARN) .setScope(Scope.MASTER) - .setIsHidden(true) .build(); public static final PropertyKey TABLE_CATALOG_PATH = stringBuilder(Name.TABLE_CATALOG_PATH) @@ -7199,7 +7198,6 @@ public String toString() { .setDescription("The Alluxio file path for the table catalog metadata.") .setConsistencyCheckLevel(ConsistencyCheckLevel.WARN) .setScope(Scope.MASTER) - .setIsHidden(true) .build(); public static final PropertyKey TABLE_CATALOG_UDB_SYNC_TIMEOUT = durationBuilder(Name.TABLE_CATALOG_UDB_SYNC_TIMEOUT) @@ -7208,7 +7206,6 @@ public String toString() { + "takes longer than this timeout, the sync will be terminated.") .setConsistencyCheckLevel(ConsistencyCheckLevel.WARN) .setScope(Scope.MASTER) - .setIsHidden(true) .build(); public static final PropertyKey TABLE_JOURNAL_PARTITIONS_CHUNK_SIZE = intBuilder(Name.TABLE_JOURNAL_PARTITIONS_CHUNK_SIZE) @@ -7216,7 +7213,6 @@ public String toString() { .setDescription("The maximum table partitions number in a single journal entry.") .setConsistencyCheckLevel(ConsistencyCheckLevel.WARN) .setScope(Scope.MASTER) - .setIsHidden(true) .build(); public static final PropertyKey TABLE_TRANSFORM_MANAGER_JOB_MONITOR_INTERVAL = durationBuilder(Name.TABLE_TRANSFORM_MANAGER_JOB_MONITOR_INTERVAL) @@ -7227,7 +7223,6 @@ public String toString() { + "locations after transformation.") .setConsistencyCheckLevel(ConsistencyCheckLevel.WARN) .setScope(Scope.MASTER) - .setIsHidden(true) .build(); public static final PropertyKey TABLE_TRANSFORM_MANAGER_JOB_HISTORY_RETENTION_TIME = durationBuilder(Name.TABLE_TRANSFORM_MANAGER_JOB_HISTORY_RETENTION_TIME) @@ -7236,7 +7231,6 @@ public String toString() { + "about finished transformation jobs before they are discarded.") .setConsistencyCheckLevel(ConsistencyCheckLevel.WARN) .setScope(Scope.MASTER) - .setIsHidden(true) .build(); public static final PropertyKey TABLE_UDB_HIVE_CLIENTPOOL_MIN = intBuilder(Name.TABLE_UDB_HIVE_CLIENTPOOL_MIN) @@ -7244,7 +7238,6 @@ public String toString() { .setDescription("The minimum capacity of the hive client pool per hive metastore") .setConsistencyCheckLevel(ConsistencyCheckLevel.WARN) .setScope(Scope.MASTER) - .setIsHidden(true) .build(); public static final PropertyKey TABLE_UDB_HIVE_CLIENTPOOL_MAX = intBuilder(Name.TABLE_UDB_HIVE_CLIENTPOOL_MAX) @@ -7252,7 +7245,6 @@ public String toString() { .setDescription("The maximum capacity of the hive client pool per hive metastore") .setConsistencyCheckLevel(ConsistencyCheckLevel.WARN) .setScope(Scope.MASTER) - .setIsHidden(true) .build(); public static final PropertyKey TABLE_LOAD_DEFAULT_REPLICATION = intBuilder(Name.TABLE_LOAD_DEFAULT_REPLICATION) @@ -7260,7 +7252,6 @@ public String toString() { .setDescription("The default replication number of files under the SDS table after " + "load option.") .setScope(Scope.CLIENT) - .setIsHidden(true) .build(); public static final PropertyKey HADOOP_SECURITY_AUTHENTICATION = stringBuilder(Name.HADOOP_SECURITY_AUTHENTICATION) From 84f94cb10b1f711cd8f24bfc80aa94b4cfddd89a Mon Sep 17 00:00:00 2001 From: Jiacheng Liu Date: Mon, 16 Jan 2023 10:55:20 +0800 Subject: [PATCH 061/334] Improve worker register tests ### What changes are proposed in this pull request? 1. A few integration tests on worker registration were put under package `alluxio.client.fs`, move them to `alluxio.server.block` which is a more relevant path. 2. Added a few helper functions 3. Manipulated on a few variable scopes and whitespaces 4. Some minor improvements in BlockMaster ### Why are the changes needed? Improve the tests and utilities by a little ### Does this PR introduce any user facing changes? NA pr-link: Alluxio/alluxio#16759 change-id: cid-bda168d22c51ae5cce8218d8d5c9b982d21e0c3e --- .../master/block/DefaultBlockMaster.java | 11 +- .../master/AbstractLocalAlluxioCluster.java | 65 +++++++-- ...ckMasterRegisterStreamIntegrationTest.java | 126 ++++++++---------- ...ckWorkerRegisterStreamIntegrationTest.java | 44 +++--- .../block}/RegisterStreamTestUtils.java | 86 ++++++++---- 5 files changed, 202 insertions(+), 130 deletions(-) rename tests/src/test/java/alluxio/{client/fs => server/block}/BlockMasterRegisterStreamIntegrationTest.java (88%) rename tests/src/test/java/alluxio/{client/fs => server/block}/BlockWorkerRegisterStreamIntegrationTest.java (94%) rename tests/src/test/java/alluxio/{client/fs => server/block}/RegisterStreamTestUtils.java (58%) diff --git a/core/server/master/src/main/java/alluxio/master/block/DefaultBlockMaster.java b/core/server/master/src/main/java/alluxio/master/block/DefaultBlockMaster.java index 8023449789b9..013bd6b688ed 100644 --- a/core/server/master/src/main/java/alluxio/master/block/DefaultBlockMaster.java +++ b/core/server/master/src/main/java/alluxio/master/block/DefaultBlockMaster.java @@ -1047,7 +1047,7 @@ private MasterWorkerInfo findUnregisteredWorker(long workerId) { * @param workerId the worker id to register */ @Nullable - private MasterWorkerInfo recordWorkerRegistration(long workerId) { + protected MasterWorkerInfo recordWorkerRegistration(long workerId) { for (IndexedSet workers: Arrays.asList(mTempWorkers, mLostWorkers, mDecommissionedWorkers)) { MasterWorkerInfo worker = workers.getFirstByField(ID_INDEX, workerId); @@ -1198,19 +1198,18 @@ public void workerRegisterStream(WorkerRegisterContext context, protected void workerRegisterStart(WorkerRegisterContext context, RegisterWorkerPRequest chunk) { + MasterWorkerInfo workerInfo = context.getWorkerInfo(); + Preconditions.checkState(workerInfo != null, + "No workerInfo metadata found in the WorkerRegisterContext!"); + final List storageTiers = chunk.getStorageTiersList(); final Map totalBytesOnTiers = chunk.getTotalBytesOnTiersMap(); final Map usedBytesOnTiers = chunk.getUsedBytesOnTiersMap(); final Map lostStorage = chunk.getLostStorageMap(); - final Map> currentBlocksOnLocation = BlockMasterWorkerServiceHandler.reconstructBlocksOnLocationMap( chunk.getCurrentBlocksList(), context.getWorkerId()); RegisterWorkerPOptions options = chunk.getOptions(); - - MasterWorkerInfo workerInfo = context.getWorkerInfo(); - Preconditions.checkState(workerInfo != null, - "No workerInfo metadata found in the WorkerRegisterContext!"); mActiveRegisterContexts.put(workerInfo.getId(), context); // The workerInfo is locked so we can operate on its blocks without race conditions diff --git a/minicluster/src/main/java/alluxio/master/AbstractLocalAlluxioCluster.java b/minicluster/src/main/java/alluxio/master/AbstractLocalAlluxioCluster.java index 63811fed99b6..6299e8a51839 100644 --- a/minicluster/src/main/java/alluxio/master/AbstractLocalAlluxioCluster.java +++ b/minicluster/src/main/java/alluxio/master/AbstractLocalAlluxioCluster.java @@ -35,6 +35,7 @@ import alluxio.util.WaitForOptions; import alluxio.util.io.FileUtils; import alluxio.util.network.NetworkAddressUtils; +import alluxio.wire.WorkerNetAddress; import alluxio.worker.WorkerProcess; import org.slf4j.Logger; @@ -42,6 +43,7 @@ import java.io.IOException; import java.util.ArrayList; +import java.util.Collection; import java.util.Collections; import java.util.List; import java.util.Random; @@ -179,6 +181,41 @@ public void startWorkers() throws Exception { } } + /** + * Restarts workers with the addresses provided, so that the workers can restart with + * static addresses to simulate a worker restart in the cluster. + * + * @param addresses worker addresses to use + */ + public void restartWorkers(Collection addresses) throws Exception { + // Start the worker one by one, so we avoid updating config while this worker is starting + for (WorkerNetAddress addr : addresses) { + Configuration.set(PropertyKey.WORKER_RPC_PORT, addr.getRpcPort()); + Configuration.set(PropertyKey.WORKER_WEB_PORT, addr.getWebPort()); + WorkerProcess worker = WorkerProcess.Factory.create(); + mWorkers.add(worker); + + Runnable runWorker = () -> { + try { + worker.start(); + } catch (InterruptedException e) { + // this is expected + } catch (Exception e) { + // Log the exception as the RuntimeException will be caught and handled silently by + // JUnit + LOG.error("Start worker error", e); + throw new RuntimeException(e + " \n Start Worker Error \n" + e.getMessage(), e); + } + }; + Thread thread = new Thread(runWorker); + thread.setName("WorkerThread-" + System.identityHashCode(thread)); + mWorkerThreads.add(thread); + thread.start(); + + TestUtils.waitForReady(worker); + } + } + /** * Sets up corresponding directories for tests. */ @@ -260,6 +297,22 @@ protected void stopProxy() throws Exception { * Stops the workers. */ public void stopWorkers() throws Exception { + killWorkerProcesses(); + + // forget all the workers in the master + LocalAlluxioMaster master = getLocalAlluxioMaster(); + if (master != null) { + DefaultBlockMaster bm = + (DefaultBlockMaster) master.getMasterProcess().getMaster(BlockMaster.class); + bm.forgetAllWorkers(); + } + } + + /** + * Kills all worker processes without forgetting them in the master, + * so we can validate the master mechanism handling dead workers. + */ + public void killWorkerProcesses() throws Exception { if (mWorkers == null) { return; } @@ -274,14 +327,6 @@ public void stopWorkers() throws Exception { } } mWorkerThreads.clear(); - - // forget all the workers in the master - LocalAlluxioMaster master = getLocalAlluxioMaster(); - if (master != null) { - DefaultBlockMaster bm = - (DefaultBlockMaster) master.getMasterProcess().getMaster(BlockMaster.class); - bm.forgetAllWorkers(); - } } /** @@ -333,8 +378,8 @@ public ProxyProcess getProxyProcess() { public void waitForWorkersRegistered(int timeoutMs) throws TimeoutException, InterruptedException, IOException { try (MetaMasterClient client = - new RetryHandlingMetaMasterClient(MasterClientContext - .newBuilder(ClientContext.create(Configuration.global())).build())) { + new RetryHandlingMetaMasterClient(MasterClientContext + .newBuilder(ClientContext.create(Configuration.global())).build())) { CommonUtils.waitFor("workers registered", () -> { try { return client.getMasterInfo(Collections.emptySet()) diff --git a/tests/src/test/java/alluxio/client/fs/BlockMasterRegisterStreamIntegrationTest.java b/tests/src/test/java/alluxio/server/block/BlockMasterRegisterStreamIntegrationTest.java similarity index 88% rename from tests/src/test/java/alluxio/client/fs/BlockMasterRegisterStreamIntegrationTest.java rename to tests/src/test/java/alluxio/server/block/BlockMasterRegisterStreamIntegrationTest.java index caaf7ef343ac..c71502276819 100644 --- a/tests/src/test/java/alluxio/client/fs/BlockMasterRegisterStreamIntegrationTest.java +++ b/tests/src/test/java/alluxio/server/block/BlockMasterRegisterStreamIntegrationTest.java @@ -9,19 +9,19 @@ * See the NOTICE file distributed with this work for information regarding copyright ownership. */ -package alluxio.client.fs; - -import static alluxio.client.fs.RegisterStreamTestUtils.BATCH_SIZE; -import static alluxio.client.fs.RegisterStreamTestUtils.CAPACITY_MAP; -import static alluxio.client.fs.RegisterStreamTestUtils.EMPTY_CONFIG; -import static alluxio.client.fs.RegisterStreamTestUtils.LOST_STORAGE; -import static alluxio.client.fs.RegisterStreamTestUtils.MEM_CAPACITY_BYTES; -import static alluxio.client.fs.RegisterStreamTestUtils.NET_ADDRESS_1; -import static alluxio.client.fs.RegisterStreamTestUtils.TIER_BLOCK_TOTAL; -import static alluxio.client.fs.RegisterStreamTestUtils.TIER_CONFIG; -import static alluxio.client.fs.RegisterStreamTestUtils.USAGE_MAP; -import static alluxio.client.fs.RegisterStreamTestUtils.getErrorCapturingResponseObserver; -import static alluxio.client.fs.RegisterStreamTestUtils.parseTierConfig; +package alluxio.server.block; + +import static alluxio.server.block.RegisterStreamTestUtils.BATCH_SIZE; +import static alluxio.server.block.RegisterStreamTestUtils.CAPACITY_MAP; +import static alluxio.server.block.RegisterStreamTestUtils.EMPTY_CONFIG; +import static alluxio.server.block.RegisterStreamTestUtils.LOST_STORAGE; +import static alluxio.server.block.RegisterStreamTestUtils.MEM_CAPACITY_BYTES; +import static alluxio.server.block.RegisterStreamTestUtils.NET_ADDRESS_1; +import static alluxio.server.block.RegisterStreamTestUtils.TIER_BLOCK_TOTAL; +import static alluxio.server.block.RegisterStreamTestUtils.TIER_CONFIG; +import static alluxio.server.block.RegisterStreamTestUtils.USAGE_MAP; +import static alluxio.server.block.RegisterStreamTestUtils.getErrorCapturingResponseObserver; +import static alluxio.server.block.RegisterStreamTestUtils.parseTierConfig; import static alluxio.stress.cli.RpcBenchPreparationUtils.CAPACITY; import static org.junit.Assert.assertEquals; import static org.junit.Assert.assertThrows; @@ -147,8 +147,8 @@ public void registerEmptyWorkerStream() throws Exception { RegisterStreamTestUtils.generateRegisterStreamForEmptyWorker(workerId); Queue errorQueue = new ConcurrentLinkedQueue<>(); - sendStreamToMaster(requestChunks, - RegisterStreamTestUtils.getErrorCapturingResponseObserver(errorQueue)); + RegisterStreamTestUtils.sendStreamToMaster(mHandler, requestChunks, + getErrorCapturingResponseObserver(errorQueue)); // Verify the worker is registered assertEquals(0, errorQueue.size()); @@ -168,11 +168,11 @@ public void registerEmptyWorkerStream() throws Exception { public void registerWorkerStream() throws Exception { long workerId = mBlockMaster.getWorkerId(NET_ADDRESS_1); List requestChunks = - RegisterStreamTestUtils.generateRegisterStreamForWorker(workerId); + RegisterStreamTestUtils.generateRegisterStreamForWorkerWithTiers(workerId); prepareBlocksOnMaster(requestChunks); Queue errorQueue = new ConcurrentLinkedQueue<>(); - sendStreamToMaster(requestChunks, - RegisterStreamTestUtils.getErrorCapturingResponseObserver(errorQueue)); + RegisterStreamTestUtils.sendStreamToMaster(mHandler, requestChunks, + getErrorCapturingResponseObserver(errorQueue)); // Verify the worker is registered assertEquals(0, errorQueue.size()); @@ -192,11 +192,11 @@ public void registerLostWorker() throws Exception { long workerId = mBlockMaster.getWorkerId(NET_ADDRESS_1); // The worker registers to the master List requestChunks = - RegisterStreamTestUtils.generateRegisterStreamForWorker(workerId); + RegisterStreamTestUtils.generateRegisterStreamForWorkerWithTiers(workerId); prepareBlocksOnMaster(requestChunks); Queue errorQueue = new ConcurrentLinkedQueue<>(); - sendStreamToMaster(requestChunks, - RegisterStreamTestUtils.getErrorCapturingResponseObserver(errorQueue)); + RegisterStreamTestUtils.sendStreamToMaster(mHandler, requestChunks, + getErrorCapturingResponseObserver(errorQueue)); // Verify the worker has been registered assertEquals(0, errorQueue.size()); assertEquals(1, mBlockMaster.getWorkerCount()); @@ -214,8 +214,8 @@ public void registerLostWorker() throws Exception { // Register again Queue newErrorQueue = new ConcurrentLinkedQueue<>(); - sendStreamToMaster(requestChunks, - RegisterStreamTestUtils.getErrorCapturingResponseObserver(newErrorQueue)); + RegisterStreamTestUtils.sendStreamToMaster(mHandler, requestChunks, + getErrorCapturingResponseObserver(newErrorQueue)); // Verify the worker is registered again assertEquals(0, errorQueue.size()); @@ -234,19 +234,19 @@ public void registerLostWorker() throws Exception { public void registerExistingWorker() throws Exception { long workerId = mBlockMaster.getWorkerId(NET_ADDRESS_1); List requestChunks = - RegisterStreamTestUtils.generateRegisterStreamForWorker(workerId); + RegisterStreamTestUtils.generateRegisterStreamForWorkerWithTiers(workerId); prepareBlocksOnMaster(requestChunks); Queue errorQueue = new ConcurrentLinkedQueue<>(); - sendStreamToMaster(requestChunks, - RegisterStreamTestUtils.getErrorCapturingResponseObserver(errorQueue)); + RegisterStreamTestUtils.sendStreamToMaster(mHandler, requestChunks, + getErrorCapturingResponseObserver(errorQueue)); assertEquals(0, errorQueue.size()); // Verify the worker has registered assertEquals(1, mBlockMaster.getWorkerCount()); // Register again Queue newErrorQueue = new ConcurrentLinkedQueue<>(); - sendStreamToMaster(requestChunks, - RegisterStreamTestUtils.getErrorCapturingResponseObserver(newErrorQueue)); + RegisterStreamTestUtils.sendStreamToMaster(mHandler, requestChunks, + getErrorCapturingResponseObserver(newErrorQueue)); assertEquals(0, newErrorQueue.size()); // Verify the worker is registered @@ -264,11 +264,11 @@ public void registerExistingWorkerBlocksLost() throws Exception { long workerId = mBlockMaster.getWorkerId(NET_ADDRESS_1); // Register the worker for the 1st time List requestChunks = - RegisterStreamTestUtils.generateRegisterStreamForWorker(workerId); + RegisterStreamTestUtils.generateRegisterStreamForWorkerWithTiers(workerId); prepareBlocksOnMaster(requestChunks); Queue errorQueue = new ConcurrentLinkedQueue<>(); - sendStreamToMaster(requestChunks, - RegisterStreamTestUtils.getErrorCapturingResponseObserver(errorQueue)); + RegisterStreamTestUtils.sendStreamToMaster(mHandler, requestChunks, + getErrorCapturingResponseObserver(errorQueue)); assertEquals(0, errorQueue.size()); // Verify the worker has registered assertEquals(1, mBlockMaster.getWorkerCount()); @@ -291,8 +291,8 @@ public void registerExistingWorkerBlocksLost() throws Exception { // Register again with the updated stream Queue newErrorQueue = new ConcurrentLinkedQueue<>(); - sendStreamToMaster(newRequestChunks, - RegisterStreamTestUtils.getErrorCapturingResponseObserver(newErrorQueue)); + RegisterStreamTestUtils.sendStreamToMaster(mHandler, newRequestChunks, + getErrorCapturingResponseObserver(newErrorQueue)); assertEquals(0, newErrorQueue.size()); // Verify the worker is registered @@ -317,11 +317,11 @@ public void registerExistingWorkerBlocksAdded() throws Exception { long workerId = mBlockMaster.getWorkerId(NET_ADDRESS_1); // Register the worker for the 1st time List requestChunks = - RegisterStreamTestUtils.generateRegisterStreamForWorker(workerId); + RegisterStreamTestUtils.generateRegisterStreamForWorkerWithTiers(workerId); prepareBlocksOnMaster(requestChunks); Queue errorQueue = new ConcurrentLinkedQueue<>(); - sendStreamToMaster(requestChunks, - RegisterStreamTestUtils.getErrorCapturingResponseObserver(errorQueue)); + RegisterStreamTestUtils.sendStreamToMaster(mHandler, requestChunks, + getErrorCapturingResponseObserver(errorQueue)); assertEquals(0, errorQueue.size()); // Verify the worker has registered assertEquals(1, mBlockMaster.getWorkerCount()); @@ -349,8 +349,8 @@ public void registerExistingWorkerBlocksAdded() throws Exception { // Register again with the new request stream Queue newErrorQueue = new ConcurrentLinkedQueue<>(); - sendStreamToMaster(newRequestChunks, - RegisterStreamTestUtils.getErrorCapturingResponseObserver(newErrorQueue)); + RegisterStreamTestUtils.sendStreamToMaster(mHandler, newRequestChunks, + getErrorCapturingResponseObserver(newErrorQueue)); assertEquals(0, newErrorQueue.size()); // Verify the worker is registered @@ -373,7 +373,7 @@ public void registerExistingWorkerBlocksAdded() throws Exception { public void hangingWorkerSessionRecycled() throws Exception { long workerId = mBlockMaster.getWorkerId(NET_ADDRESS_1); List requestChunks = - RegisterStreamTestUtils.generateRegisterStreamForWorker(workerId); + RegisterStreamTestUtils.generateRegisterStreamForWorkerWithTiers(workerId); prepareBlocksOnMaster(requestChunks); Queue errorQueue = new ConcurrentLinkedQueue<>(); @@ -414,13 +414,13 @@ public void hangingWorkerSessionRecycled() throws Exception { public void workerSendsErrorOnStart() throws Exception { long workerId = mBlockMaster.getWorkerId(NET_ADDRESS_1); List requestChunks = - RegisterStreamTestUtils.generateRegisterStreamForWorker(workerId); + RegisterStreamTestUtils.generateRegisterStreamForWorkerWithTiers(workerId); prepareBlocksOnMaster(requestChunks); Queue errorQueue = new ConcurrentLinkedQueue<>(); StreamObserver requestObserver = mHandler.registerWorkerStream( - RegisterStreamTestUtils.getErrorCapturingResponseObserver(errorQueue)); + getErrorCapturingResponseObserver(errorQueue)); // Instead of sending requests to the master, the worker is interrupted // around the beginning of the stream. The error propagated to the master. @@ -447,13 +447,13 @@ public void workerSendsErrorOnStart() throws Exception { public void workerSendsErrorInStream() throws Exception { long workerId = mBlockMaster.getWorkerId(NET_ADDRESS_1); List requestChunks = - RegisterStreamTestUtils.generateRegisterStreamForWorker(workerId); + RegisterStreamTestUtils.generateRegisterStreamForWorkerWithTiers(workerId); prepareBlocksOnMaster(requestChunks); Queue errorQueue = new ConcurrentLinkedQueue<>(); StreamObserver requestObserver = mHandler.registerWorkerStream( - RegisterStreamTestUtils.getErrorCapturingResponseObserver(errorQueue)); + getErrorCapturingResponseObserver(errorQueue)); // An error took place in the worker during the stream RegisterWorkerPRequest first = requestChunks.get(0); @@ -478,14 +478,14 @@ public void workerSendsErrorInStream() throws Exception { public void workerSendsErrorBeforeCompleting() throws Exception { long workerId = mBlockMaster.getWorkerId(NET_ADDRESS_1); List requestChunks = - RegisterStreamTestUtils.generateRegisterStreamForWorker(workerId); + RegisterStreamTestUtils.generateRegisterStreamForWorkerWithTiers(workerId); prepareBlocksOnMaster(requestChunks); // Send the requests to the master Queue errorQueue = new ConcurrentLinkedQueue<>(); StreamObserver requestObserver = mHandler.registerWorkerStream( - RegisterStreamTestUtils.getErrorCapturingResponseObserver(errorQueue)); + getErrorCapturingResponseObserver(errorQueue)); for (RegisterWorkerPRequest chunk : requestChunks) { requestObserver.onNext(chunk); } @@ -520,7 +520,7 @@ public void workerRegisterStartThrowsError() throws Exception { // Generate requests List requestChunks = - RegisterStreamTestUtils.generateRegisterStreamForWorker(workerId); + RegisterStreamTestUtils.generateRegisterStreamForWorkerWithTiers(workerId); prepareBlocksOnMaster(requestChunks); for (RegisterWorkerPRequest chunk : requestChunks) { streamOb.onNext(chunk); @@ -552,7 +552,7 @@ public void workerRegisterBatchThrowsError() throws Exception { // Generate requests List requestChunks = - RegisterStreamTestUtils.generateRegisterStreamForWorker(workerId); + RegisterStreamTestUtils.generateRegisterStreamForWorkerWithTiers(workerId); prepareBlocksOnMaster(requestChunks); for (RegisterWorkerPRequest chunk : requestChunks) { streamOb.onNext(chunk); @@ -584,7 +584,7 @@ public void workerRegisterCompleteThrowsError() throws Exception { // Generate requests List requestChunks = - RegisterStreamTestUtils.generateRegisterStreamForWorker(workerId); + RegisterStreamTestUtils.generateRegisterStreamForWorkerWithTiers(workerId); prepareBlocksOnMaster(requestChunks); for (RegisterWorkerPRequest chunk : requestChunks) { streamOb.onNext(chunk); @@ -662,11 +662,11 @@ public void reregisterWithDelete() throws Exception { // Register the worker so the worker is marked active in master long workerId = mBlockMaster.getWorkerId(NET_ADDRESS_1); List requestChunks = - RegisterStreamTestUtils.generateRegisterStreamForWorker(workerId); + RegisterStreamTestUtils.generateRegisterStreamForWorkerWithTiers(workerId); prepareBlocksOnMaster(requestChunks); Queue errorQueue = new ConcurrentLinkedQueue<>(); - sendStreamToMaster(requestChunks, - RegisterStreamTestUtils.getErrorCapturingResponseObserver(errorQueue)); + RegisterStreamTestUtils.sendStreamToMaster(mHandler, requestChunks, + getErrorCapturingResponseObserver(errorQueue)); assertEquals(0, errorQueue.size()); assertEquals(1, mBlockMaster.getWorkerCount()); @@ -678,7 +678,7 @@ public void reregisterWithDelete() throws Exception { Queue newErrorQueue = new ConcurrentLinkedQueue<>(); Future f = mExecutorService.submit(() -> { sendStreamToMasterAndSignal(requestChunks, - RegisterStreamTestUtils.getErrorCapturingResponseObserver(newErrorQueue), latch); + getErrorCapturingResponseObserver(newErrorQueue), latch); }); // During the register stream, trigger a delete on worker @@ -715,11 +715,11 @@ public void reregisterWithFree() throws Exception { // Register the worker so the worker is marked active in master long workerId = mBlockMaster.getWorkerId(NET_ADDRESS_1); List requestChunks = - RegisterStreamTestUtils.generateRegisterStreamForWorker(workerId); + RegisterStreamTestUtils.generateRegisterStreamForWorkerWithTiers(workerId); prepareBlocksOnMaster(requestChunks); Queue errorQueue = new ConcurrentLinkedQueue<>(); - sendStreamToMaster(requestChunks, - RegisterStreamTestUtils.getErrorCapturingResponseObserver(errorQueue)); + RegisterStreamTestUtils.sendStreamToMaster(mHandler, requestChunks, + getErrorCapturingResponseObserver(errorQueue)); assertEquals(0, errorQueue.size()); assertEquals(1, mBlockMaster.getWorkerCount()); @@ -731,7 +731,7 @@ public void reregisterWithFree() throws Exception { Queue newErrorQueue = new ConcurrentLinkedQueue<>(); mExecutorService.submit(() -> { sendStreamToMasterAndSignal(requestChunks, - RegisterStreamTestUtils.getErrorCapturingResponseObserver(newErrorQueue), latch); + getErrorCapturingResponseObserver(newErrorQueue), latch); }); // During the register stream, trigger a delete on worker @@ -767,16 +767,6 @@ private void prepareBlocksOnMaster(List requestChunks) t } } - private void sendStreamToMaster(List requestChunks, - StreamObserver responseObserver) { - StreamObserver requestObserver = - mHandler.registerWorkerStream(responseObserver); - for (RegisterWorkerPRequest chunk : requestChunks) { - requestObserver.onNext(chunk); - } - requestObserver.onCompleted(); - } - private void sendStreamToMasterAndSignal( List requestChunks, StreamObserver responseObserver, @@ -851,8 +841,8 @@ private static List getTierAliases(Map> tierCon private void verifyWorkerCanReregister(long workerId, List requestChunks, int expectedBlockCount) throws Exception { Queue errorQueue = new ConcurrentLinkedQueue<>(); - sendStreamToMaster(requestChunks, - RegisterStreamTestUtils.getErrorCapturingResponseObserver(errorQueue)); + RegisterStreamTestUtils.sendStreamToMaster(mHandler, requestChunks, + getErrorCapturingResponseObserver(errorQueue)); assertEquals(errorQueue.toString(), 0, errorQueue.size()); MasterWorkerInfo worker = mBlockMaster.getWorker(workerId); diff --git a/tests/src/test/java/alluxio/client/fs/BlockWorkerRegisterStreamIntegrationTest.java b/tests/src/test/java/alluxio/server/block/BlockWorkerRegisterStreamIntegrationTest.java similarity index 94% rename from tests/src/test/java/alluxio/client/fs/BlockWorkerRegisterStreamIntegrationTest.java rename to tests/src/test/java/alluxio/server/block/BlockWorkerRegisterStreamIntegrationTest.java index 4fa00a8d7acd..6964bdb2e146 100644 --- a/tests/src/test/java/alluxio/client/fs/BlockWorkerRegisterStreamIntegrationTest.java +++ b/tests/src/test/java/alluxio/server/block/BlockWorkerRegisterStreamIntegrationTest.java @@ -9,22 +9,22 @@ * See the NOTICE file distributed with this work for information regarding copyright ownership. */ -package alluxio.client.fs; - -import static alluxio.client.fs.RegisterStreamTestUtils.BATCH_SIZE; -import static alluxio.client.fs.RegisterStreamTestUtils.CAPACITY_MAP; -import static alluxio.client.fs.RegisterStreamTestUtils.EMPTY_CONFIG; -import static alluxio.client.fs.RegisterStreamTestUtils.LOST_STORAGE; -import static alluxio.client.fs.RegisterStreamTestUtils.MEM_CAPACITY; -import static alluxio.client.fs.RegisterStreamTestUtils.MEM_USAGE_EMPTY; -import static alluxio.client.fs.RegisterStreamTestUtils.NET_ADDRESS_1; -import static alluxio.client.fs.RegisterStreamTestUtils.TIER_BLOCK_TOTAL; -import static alluxio.client.fs.RegisterStreamTestUtils.TIER_CONFIG; -import static alluxio.client.fs.RegisterStreamTestUtils.USAGE_MAP; -import static alluxio.client.fs.RegisterStreamTestUtils.findFirstBlock; -import static alluxio.client.fs.RegisterStreamTestUtils.getTierAliases; -import static alluxio.client.fs.RegisterStreamTestUtils.parseTierConfig; +package alluxio.server.block; + import static alluxio.grpc.BlockMasterWorkerServiceGrpc.BlockMasterWorkerServiceStub; +import static alluxio.server.block.RegisterStreamTestUtils.BATCH_SIZE; +import static alluxio.server.block.RegisterStreamTestUtils.CAPACITY_MAP; +import static alluxio.server.block.RegisterStreamTestUtils.EMPTY_CONFIG; +import static alluxio.server.block.RegisterStreamTestUtils.LOST_STORAGE; +import static alluxio.server.block.RegisterStreamTestUtils.MEM_CAPACITY; +import static alluxio.server.block.RegisterStreamTestUtils.MEM_USAGE_EMPTY; +import static alluxio.server.block.RegisterStreamTestUtils.NET_ADDRESS_1; +import static alluxio.server.block.RegisterStreamTestUtils.TIER_BLOCK_TOTAL; +import static alluxio.server.block.RegisterStreamTestUtils.TIER_CONFIG; +import static alluxio.server.block.RegisterStreamTestUtils.USAGE_MAP; +import static alluxio.server.block.RegisterStreamTestUtils.findFirstBlock; +import static alluxio.server.block.RegisterStreamTestUtils.getTierAliases; +import static alluxio.server.block.RegisterStreamTestUtils.parseTierConfig; import static alluxio.stress.cli.RpcBenchPreparationUtils.CAPACITY; import static org.junit.Assert.assertEquals; import static org.junit.Assert.assertNull; @@ -32,7 +32,6 @@ import static org.mockito.ArgumentMatchers.any; import static org.mockito.ArgumentMatchers.anyLong; import static org.mockito.Mockito.mock; -import static org.mockito.Mockito.spy; import static org.mockito.Mockito.when; import alluxio.AlluxioTestDirectory; @@ -81,6 +80,7 @@ import org.junit.Test; import org.junit.rules.TemporaryFolder; import org.junit.runner.RunWith; +import org.mockito.Mockito; import org.powermock.api.mockito.PowerMockito; import org.powermock.core.classloader.annotations.PrepareForTest; import org.powermock.modules.junit4.PowerMockRunner; @@ -173,7 +173,7 @@ public void after() throws Exception { public void initBlockWorker() throws Exception { // Prepare a block worker - mBlockMasterClientPool = spy(new BlockMasterClientPool()); + mBlockMasterClientPool = Mockito.spy(new BlockMasterClientPool()); when(mBlockMasterClientPool.createNewResource()).thenReturn(mBlockMasterClient); when(mBlockMasterClientPool.acquire()).thenReturn(mBlockMasterClient); TieredBlockStore tieredBlockStore = new TieredBlockStore(); @@ -185,7 +185,7 @@ public void initBlockWorker() throws Exception { Sessions sessions = mock(Sessions.class); mBlockWorker = new DefaultBlockWorker(mBlockMasterClientPool, fileSystemMasterClient, - sessions, blockStore, workerId); + sessions, blockStore, workerId); } /** @@ -214,7 +214,7 @@ public void requestsForEmptyWorker() throws Exception { @Test public void requestsForWorker() throws Exception { List requestChunks = - RegisterStreamTestUtils.generateRegisterStreamForWorker(WORKER_ID); + RegisterStreamTestUtils.generateRegisterStreamForWorkerWithTiers(WORKER_ID); // Verify the size and content of the requests int expectedBatchCount = (int) Math.ceil((TIER_BLOCK_TOTAL) / (double) BATCH_SIZE); @@ -412,7 +412,7 @@ public void masterHangsOnCompleted() throws Exception { public void deleteDuringRegisterStream() throws Exception { // Generate a request stream of blocks List requestChunks = - RegisterStreamTestUtils.generateRegisterStreamForWorker(WORKER_ID); + RegisterStreamTestUtils.generateRegisterStreamForWorkerWithTiers(WORKER_ID); // Select a block to remove concurrent with the stream long blockToRemove = findFirstBlock(requestChunks); @@ -432,7 +432,7 @@ public void deleteDuringRegisterStream() throws Exception { // Prepare the block worker to use the overriden stream MasterClientContext context = MasterClientContext - .newBuilder(ClientContext.create(Configuration.global())).build(); + .newBuilder(ClientContext.create(Configuration.global())).build(); // On heartbeat, the expected values will be checked against List expectedLostBlocks = ImmutableList.of(blockToRemove); Map> expectedAddedBlocks = ImmutableMap.of(); @@ -528,7 +528,7 @@ private void prepareBlocksOnWorker(String tierConfig) { // Generate block IDs heuristically Map> tierConfigMap = parseTierConfig(tierConfig); Map> blockMap = - RpcBenchPreparationUtils.generateBlockIdOnTiers(tierConfigMap); + RpcBenchPreparationUtils.generateBlockIdOnTiers(tierConfigMap); for (Map.Entry> entry : blockMap.entrySet()) { BlockStoreLocation loc = entry.getKey(); diff --git a/tests/src/test/java/alluxio/client/fs/RegisterStreamTestUtils.java b/tests/src/test/java/alluxio/server/block/RegisterStreamTestUtils.java similarity index 58% rename from tests/src/test/java/alluxio/client/fs/RegisterStreamTestUtils.java rename to tests/src/test/java/alluxio/server/block/RegisterStreamTestUtils.java index b00c1ad87bcc..e66d22f94ee4 100644 --- a/tests/src/test/java/alluxio/client/fs/RegisterStreamTestUtils.java +++ b/tests/src/test/java/alluxio/server/block/RegisterStreamTestUtils.java @@ -9,7 +9,7 @@ * See the NOTICE file distributed with this work for information regarding copyright ownership. */ -package alluxio.client.fs; +package alluxio.server.block; import static alluxio.stress.cli.RpcBenchPreparationUtils.CAPACITY; import static alluxio.stress.rpc.TierAlias.MEM; @@ -19,6 +19,7 @@ import alluxio.grpc.LocationBlockIdListEntry; import alluxio.grpc.RegisterWorkerPRequest; import alluxio.grpc.RegisterWorkerPResponse; +import alluxio.master.block.BlockMasterWorkerServiceHandler; import alluxio.stress.cli.RpcBenchPreparationUtils; import alluxio.stress.rpc.TierAlias; import alluxio.wire.WorkerNetAddress; @@ -30,40 +31,47 @@ import io.grpc.stub.StreamObserver; import java.util.Arrays; +import java.util.HashMap; import java.util.List; import java.util.Map; import java.util.Queue; import java.util.stream.Collectors; public class RegisterStreamTestUtils { - static final long MEM_CAPACITY_BYTES = 20_000_000L; - static final Map> LOST_STORAGE = - ImmutableMap.of(MEM.toString(), ImmutableList.of()); - static final List EMPTY_CONFIG = ImmutableList.of(); - static final WorkerNetAddress NET_ADDRESS_1 = new WorkerNetAddress() - .setHost("localhost").setRpcPort(80).setDataPort(81).setWebPort(82); - static final String TIER_CONFIG = "100,200,300;1000,1500;2000"; - static final int TIER_BLOCK_TOTAL = 100 + 200 + 300 + 1000 + 1500 + 2000; - static final int BATCH_SIZE = 1000; private static final long MEM_USAGE = 20_000L; private static final long SSD_USAGE = 500_000L; private static final long HDD_USAGE = 1_000_000L; - static final Map USAGE_MAP = ImmutableMap.of("MEM", MEM_USAGE, + + public static final long MEM_CAPACITY_BYTES = 20_000_000L; + public static final Map> LOST_STORAGE = + ImmutableMap.of(MEM.toString(), ImmutableList.of()); + public static final List EMPTY_CONFIG = ImmutableList.of(); + public static final WorkerNetAddress NET_ADDRESS_1 = new WorkerNetAddress() + .setHost("localhost").setRpcPort(80).setDataPort(81).setWebPort(82); + public static final String TIER_CONFIG = "100,200,300;1000,1500;2000"; + public static final int TIER_BLOCK_TOTAL = 100 + 200 + 300 + 1000 + 1500 + 2000; + public static final int BATCH_SIZE = 1000; + public static final Map USAGE_MAP = ImmutableMap.of("MEM", MEM_USAGE, "SSD", SSD_USAGE, "HDD", HDD_USAGE); - static final Map CAPACITY_MAP = ImmutableMap.of("MEM", CAPACITY, + public static final Map CAPACITY_MAP = ImmutableMap.of("MEM", CAPACITY, "SSD", CAPACITY, "HDD", CAPACITY); - static final Map MEM_CAPACITY = ImmutableMap.of("MEM", MEM_CAPACITY_BYTES); - static final Map MEM_USAGE_EMPTY = ImmutableMap.of("MEM", 0L); + public static final Map MEM_CAPACITY = ImmutableMap.of("MEM", MEM_CAPACITY_BYTES); + public static final Map MEM_USAGE_EMPTY = ImmutableMap.of("MEM", 0L); public static List generateRegisterStreamForEmptyWorker(long workerId) { + return generateRegisterStreamForEmptyWorker(workerId, MEM_CAPACITY_BYTES); + } + + public static List generateRegisterStreamForEmptyWorker( + long workerId, long capacity) { String tierConfig = ""; // Generate block IDs heuristically Map> blockMap = RpcBenchPreparationUtils.generateBlockIdOnTiers(parseTierConfig(tierConfig)); RegisterStreamer registerStreamer = new RegisterStreamer(null, - workerId, ImmutableList.of("MEM"), MEM_CAPACITY, MEM_USAGE_EMPTY, + workerId, ImmutableList.of("MEM"), ImmutableMap.of("MEM", capacity), MEM_USAGE_EMPTY, blockMap, LOST_STORAGE, EMPTY_CONFIG); // For an empty worker there is only 1 request @@ -73,11 +81,12 @@ public static List generateRegisterStreamForEmptyWorker( return requestChunks; } - static List getTierAliases(Map> tierConfig) { + public static List getTierAliases(Map> tierConfig) { return tierConfig.keySet().stream().map(TierAlias::toString).collect(Collectors.toList()); } - public static List generateRegisterStreamForWorker(long workerId) { + public static List generateRegisterStreamForWorkerWithTiers( + long workerId) { List tierAliases = getTierAliases(parseTierConfig(TIER_CONFIG)); // Generate block IDs heuristically Map> tierConfigMap = parseTierConfig(TIER_CONFIG); @@ -96,6 +105,27 @@ public static List generateRegisterStreamForWorker(long return requestChunks; } + public static List generateRegisterStreamForWorkerWithBlocks( + long workerId, long blockSize, List blockList) { + Map> blockMap = new HashMap<>(); + BlockStoreLocation mem = new BlockStoreLocation("MEM", 0); + blockMap.put(mem, blockList); + + // We just use the RegisterStreamer to generate the batch of requests + RegisterStreamer registerStreamer = new RegisterStreamer(null, + workerId, ImmutableList.of("MEM"), + ImmutableMap.of("MEM", CAPACITY), // capacity + ImmutableMap.of("MEM", blockSize * blockList.size()), // usage + blockMap, LOST_STORAGE, EMPTY_CONFIG); + + // Get chunks from the RegisterStreamer + List requestChunks = ImmutableList.copyOf(registerStreamer); + int expectedBatchCount = (int) Math.ceil((blockList.size()) / (double) BATCH_SIZE); + assertEquals(expectedBatchCount, requestChunks.size()); + + return requestChunks; + } + public static Map> parseTierConfig(String tiersConfig) { String[] tiers = tiersConfig.split(";"); if (tiers.length == 1 && "".equals(tiers[0])) { @@ -104,16 +134,13 @@ public static Map> parseTierConfig(String tiersConfig) int length = Math.min(tiers.length, TierAlias.values().length); ImmutableMap.Builder> builder = new ImmutableMap.Builder<>(); for (int i = 0; i < length; i++) { - builder.put( - TierAlias.SORTED.get(i), - Arrays.stream(tiers[i].split(",")) - .map(Integer::parseInt) - .collect(Collectors.toList())); + builder.put(TierAlias.SORTED.get(i), + Arrays.stream(tiers[i].split(",")).map(Integer::parseInt).collect(Collectors.toList())); } return builder.build(); } - static StreamObserver getErrorCapturingResponseObserver( + public static StreamObserver getErrorCapturingResponseObserver( Queue errorQueue) { return new StreamObserver() { @Override @@ -129,9 +156,20 @@ public void onCompleted() {} }; } - static long findFirstBlock(List chunks) { + public static long findFirstBlock(List chunks) { RegisterWorkerPRequest firstBatch = chunks.get(0); LocationBlockIdListEntry entry = firstBatch.getCurrentBlocks(0); return entry.getValue().getBlockId(0); } + + public static void sendStreamToMaster(BlockMasterWorkerServiceHandler handler, + List requestChunks, + StreamObserver responseObserver) { + StreamObserver requestObserver = + handler.registerWorkerStream(responseObserver); + for (RegisterWorkerPRequest chunk : requestChunks) { + requestObserver.onNext(chunk); + } + requestObserver.onCompleted(); + } } From ad2fcfbacc8fc4f04b279f42a9f70e62621b1cf0 Mon Sep 17 00:00:00 2001 From: Jiacheng Liu Date: Mon, 16 Jan 2023 10:55:41 +0800 Subject: [PATCH 062/334] Add missing GuardedBy and volatile for worker stream register The locked section is here https://github.com/Alluxio/alluxio/blob/c3dcda455e081ef99345f2d906e7899652b3a2e6/core/server/master/src/main/java/alluxio/master/block/RegisterStreamObserver.java#L78 The object is locked to init the context. This change only adds the missing tag. The locking logic has been in for a long time. pr-link: Alluxio/alluxio#16760 change-id: cid-7c5ef1f6239f46328700178bca295db70aed0d0b --- .../java/alluxio/master/block/RegisterStreamObserver.java | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/core/server/master/src/main/java/alluxio/master/block/RegisterStreamObserver.java b/core/server/master/src/main/java/alluxio/master/block/RegisterStreamObserver.java index 30ff4104cf98..03b45d98b849 100644 --- a/core/server/master/src/main/java/alluxio/master/block/RegisterStreamObserver.java +++ b/core/server/master/src/main/java/alluxio/master/block/RegisterStreamObserver.java @@ -12,6 +12,7 @@ package alluxio.master.block; import alluxio.RpcUtils; +import alluxio.annotation.SuppressFBWarnings; import alluxio.conf.PropertyKey; import alluxio.exception.status.DeadlineExceededException; import alluxio.grpc.GrpcExceptionUtils; @@ -25,6 +26,7 @@ import java.util.concurrent.TimeoutException; import java.util.concurrent.atomic.AtomicReference; +import javax.annotation.concurrent.GuardedBy; /** * This class handles the master side logic of the register stream. @@ -36,7 +38,11 @@ public class RegisterStreamObserver implements StreamObserver { private static final Logger LOG = LoggerFactory.getLogger(RegisterStreamObserver.class); - private WorkerRegisterContext mContext; + @GuardedBy("this") + @SuppressFBWarnings(value = "IS_FIELD_NOT_GUARDED") + // Context is initialized on the 1st request so later requests are guaranteed to see the context + // Locking is applied on init and cleanup + private volatile WorkerRegisterContext mContext; private final BlockMaster mBlockMaster; // Used to send responses to the worker private final StreamObserver mMasterResponseObserver; From 550533507b1490471c90647f3da5c5440aa16b15 Mon Sep 17 00:00:00 2001 From: Yaolong Liu Date: Mon, 16 Jan 2023 12:13:51 +0800 Subject: [PATCH 063/334] Bump xstream version to 1.4.20 ### Why are the changes needed? This should address [CVE-2022-40151](https://nvd.nist.gov/vuln/detail/CVE-2022-40151) ### Does this PR introduce any user facing changes? No pr-link: Alluxio/alluxio#16756 change-id: cid-cb8d596add37d50cf8adc2659701365d9295dcb1 --- pom.xml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pom.xml b/pom.xml index 1d133be1a8e7..8f67e7c6c260 100644 --- a/pom.xml +++ b/pom.xml @@ -797,7 +797,7 @@ xstream com.thoughtworks.xstream - 1.4.19 + 1.4.20 test From 4ba25882c090f338fe435a71b8a6c77dc69c801c Mon Sep 17 00:00:00 2001 From: kimsu98 <40134677+kimsu98@users.noreply.github.com> Date: Sun, 15 Jan 2023 20:18:08 -0800 Subject: [PATCH 064/334] [DOCFIX] Fix docGen and update config/metrics ### What changes are proposed in this pull request? Fix docGen and update configuration/metrics Please outline the changes and how this PR fixes the issue. ### Why are the changes needed? To keep propertykeys and metrics updated Please clarify why the changes are needed. For instance, 1. If you propose a new API, clarify the use case for a new API. 2. If you fix a bug, describe the bug. ### Does this PR introduce any user facing changes? yes. Docs. Please list the user-facing changes introduced by your change, including 1. change in user-facing APIs 2. addition or removal of property keys 3. webui pr-link: Alluxio/alluxio#16766 change-id: cid-04b5e8de962ac7a76a54ae222328c41195257f82 --- docs/_data/table/client-metrics.csv | 2 ++ docs/_data/table/common-configuration.csv | 21 +++++++++-- docs/_data/table/en/client-metrics.yml | 4 +++ docs/_data/table/en/common-configuration.yml | 38 ++++++++++++++++++-- docs/_data/table/en/master-configuration.yml | 18 ++++++++-- docs/_data/table/en/master-metrics.yml | 12 +++++++ docs/_data/table/en/proxy-metrics.yml | 8 +++++ docs/_data/table/en/user-configuration.yml | 2 ++ docs/_data/table/master-configuration.csv | 8 ++++- docs/_data/table/master-metrics.csv | 6 ++++ docs/_data/table/proxy-metrics.csv | 4 +++ docs/_data/table/user-configuration.csv | 1 + 12 files changed, 116 insertions(+), 8 deletions(-) diff --git a/docs/_data/table/client-metrics.csv b/docs/_data/table/client-metrics.csv index 983a4c057d1a..3290cdfa046f 100644 --- a/docs/_data/table/client-metrics.csv +++ b/docs/_data/table/client-metrics.csv @@ -56,6 +56,8 @@ Client.CacheStoreDeleteTimeout,COUNTER Client.CacheStoreGetTimeout,COUNTER Client.CacheStorePutTimeout,COUNTER Client.CacheStoreThreadsRejected,COUNTER +Client.CloseAlluxioOutStreamLatency,TIMER +Client.CloseUFSOutStreamLatency,TIMER Client.DefaultHiveClientCount,COUNTER Client.FileSystemMasterClientCount,COUNTER Client.MetadataCacheSize,GAUGE diff --git a/docs/_data/table/common-configuration.csv b/docs/_data/table/common-configuration.csv index 76fde1545838..45b3c91a7b2f 100644 --- a/docs/_data/table/common-configuration.csv +++ b/docs/_data/table/common-configuration.csv @@ -11,7 +11,7 @@ alluxio.fuse.jnifuse.enabled,"true" alluxio.fuse.jnifuse.libfuse.version,"2" alluxio.fuse.logging.threshold,"10s" alluxio.fuse.mount.alluxio.path,"/" -alluxio.fuse.mount.options,"direct_io" +alluxio.fuse.mount.options,"attr_timeout=600,entry_timeout=600" alluxio.fuse.mount.point,"/mnt/alluxio-fuse" alluxio.fuse.shared.caching.reader.enabled,"false" alluxio.fuse.special.command.enabled,"false" @@ -22,10 +22,12 @@ alluxio.fuse.web.bind.host,"0.0.0.0" alluxio.fuse.web.enabled,"false" alluxio.fuse.web.hostname,"" alluxio.fuse.web.port,"49999" +alluxio.grpc.reflection.enabled,"false" alluxio.hadoop.kerberos.keytab.login.autorenewal,"" alluxio.hadoop.security.authentication,"" alluxio.hadoop.security.krb5.conf,"" alluxio.home,"/opt/alluxio" +alluxio.job.batch.size,"20" alluxio.job.master.bind.host,"0.0.0.0" alluxio.job.master.client.threads,"1024" alluxio.job.master.embedded.journal.addresses,"" @@ -48,6 +50,7 @@ alluxio.job.master.web.port,"20002" alluxio.job.master.worker.heartbeat.interval,"1sec" alluxio.job.master.worker.timeout,"60sec" alluxio.job.request.batch.size,"20" +alluxio.job.retention.time,"1d" alluxio.job.worker.bind.host,"0.0.0.0" alluxio.job.worker.data.port,"30002" alluxio.job.worker.hostname,"${alluxio.worker.hostname}" @@ -105,6 +108,15 @@ alluxio.site.conf.rocks.inode.file,"" alluxio.standalone.fuse.jvm.monitor.enabled,"false" alluxio.standby.master.metrics.sink.enabled,"false" alluxio.standby.master.web.enabled,"false" +alluxio.table.catalog.path,"/catalog" +alluxio.table.catalog.udb.sync.timeout,"1h" +alluxio.table.enabled,"true" +alluxio.table.journal.partitions.chunk.size,"500" +alluxio.table.load.default.replication,"1" +alluxio.table.transform.manager.job.history.retention.time,"300sec" +alluxio.table.transform.manager.job.monitor.interval,"10s" +alluxio.table.udb.hive.clientpool.MAX,"256" +alluxio.table.udb.hive.clientpool.min,"16" alluxio.test.deprecated.key,"" alluxio.tmp.dirs,"/tmp" alluxio.underfs.allow.set.owner.failure,"false" @@ -155,7 +167,12 @@ alluxio.underfs.object.store.skip.parent.directory.creation,"true" alluxio.underfs.oss.connection.max,"1024" alluxio.underfs.oss.connection.timeout,"50sec" alluxio.underfs.oss.connection.ttl,"-1" +alluxio.underfs.oss.ecs.ram.role,"" +alluxio.underfs.oss.retry.max,"3" alluxio.underfs.oss.socket.timeout,"50sec" +alluxio.underfs.oss.sts.ecs.metadata.service.endpoint,"http://100.100.100.200/latest/meta-data/ram/security-credentials/" +alluxio.underfs.oss.sts.enabled,"false" +alluxio.underfs.oss.sts.token.refresh.interval.ms,"30m" alluxio.underfs.ozone.prefixes,"o3fs://,ofs://" alluxio.underfs.persistence.async.temp.dir,".alluxio_ufs_persistence" alluxio.underfs.s3.admin.threads.max,"20" @@ -187,11 +204,11 @@ alluxio.underfs.web.connnection.timeout,"60s" alluxio.underfs.web.header.last.modified,"EEE, dd MMM yyyy HH:mm:ss zzz" alluxio.underfs.web.parent.names,"Parent Directory,..,../" alluxio.underfs.web.titles,"Index of,Directory listing for" -alluxio.web.cors.enabled,"false" alluxio.web.cors.allow.credential,"false" alluxio.web.cors.allow.headers,"*" alluxio.web.cors.allow.methods,"*" alluxio.web.cors.allow.origins,"*" +alluxio.web.cors.enabled,"false" alluxio.web.cors.exposed.headers,"*" alluxio.web.cors.max.age,"-1" alluxio.web.file.info.enabled,"true" diff --git a/docs/_data/table/en/client-metrics.yml b/docs/_data/table/en/client-metrics.yml index 8a6d49110308..3809a62d01ac 100644 --- a/docs/_data/table/en/client-metrics.yml +++ b/docs/_data/table/en/client-metrics.yml @@ -112,6 +112,10 @@ Client.CacheStorePutTimeout: 'Number of timeouts when writing new pages to page store.' Client.CacheStoreThreadsRejected: 'Number of rejection of I/O threads on submitting tasks to thread pool, likely due to unresponsive local file system.' +Client.CloseAlluxioOutStreamLatency: + 'Latency of close Alluxio outstream latency' +Client.CloseUFSOutStreamLatency: + 'Latency of close UFS outstream latency' Client.DefaultHiveClientCount: 'Number of instances in the DefaultHiveClientPool.' Client.FileSystemMasterClientCount: diff --git a/docs/_data/table/en/common-configuration.yml b/docs/_data/table/en/common-configuration.yml index a689a848f8fc..51c7b5273aa8 100644 --- a/docs/_data/table/en/common-configuration.yml +++ b/docs/_data/table/en/common-configuration.yml @@ -44,6 +44,8 @@ alluxio.fuse.web.hostname: 'The hostname of Alluxio FUSE web UI.' alluxio.fuse.web.port: 'The port Alluxio FUSE web UI runs on.' +alluxio.grpc.reflection.enabled: + 'If true, grpc reflection will be enabled on alluxio grpc servers, including masters, workers, job masters and job workers. This makes grpc tools such as grpcurl or grpcui can send grpc requests to the master server easier without knowing the protobufs. This is a debug option.' alluxio.hadoop.kerberos.keytab.login.autorenewal: 'Kerberos authentication keytab login auto renew.' alluxio.hadoop.security.authentication: @@ -52,6 +54,8 @@ alluxio.hadoop.security.krb5.conf: 'Kerberos krb file for configuration of Kerberos.' alluxio.home: 'Alluxio installation directory.' +alluxio.job.batch.size: + 'The number of tasks would be included in a job request.' alluxio.job.master.bind.host: 'The host that the Alluxio job master will bind to.' alluxio.job.master.client.threads: @@ -96,6 +100,8 @@ alluxio.job.master.worker.timeout: 'The time period after which the job master will mark a worker as lost without a subsequent heartbeat.' alluxio.job.request.batch.size: 'The batch size client uses to make requests to the job master.' +alluxio.job.retention.time: + 'The length of time the Alluxio should save information about completed jobs before they are discarded.' alluxio.job.worker.bind.host: 'The host that the Alluxio job worker will bind to.' alluxio.job.worker.data.port: @@ -210,6 +216,24 @@ alluxio.standby.master.metrics.sink.enabled: 'Whether a standby master runs the metric sink' alluxio.standby.master.web.enabled: 'Whether a standby master runs a web server' +alluxio.table.catalog.path: + 'The Alluxio file path for the table catalog metadata.' +alluxio.table.catalog.udb.sync.timeout: + 'The timeout period for a db sync to finish in the catalog. If a synctakes longer than this timeout, the sync will be terminated.' +alluxio.table.enabled: + '(Experimental) Enables the table service.' +alluxio.table.journal.partitions.chunk.size: + 'The maximum table partitions number in a single journal entry.' +alluxio.table.load.default.replication: + 'The default replication number of files under the SDS table after load option.' +alluxio.table.transform.manager.job.history.retention.time: + 'The length of time the Alluxio Table Master should keep information about finished transformation jobs before they are discarded.' +alluxio.table.transform.manager.job.monitor.interval: + 'Job monitor is a heartbeat thread in the transform manager, this is the time interval in milliseconds the job monitor heartbeat is run to check the status of the transformation jobs and update table and partition locations after transformation.' +alluxio.table.udb.hive.clientpool.MAX: + 'The maximum capacity of the hive client pool per hive metastore' +alluxio.table.udb.hive.clientpool.min: + 'The minimum capacity of the hive client pool per hive metastore' alluxio.test.deprecated.key: 'N/A' alluxio.tmp.dirs: @@ -310,8 +334,18 @@ alluxio.underfs.oss.connection.timeout: 'The timeout when connecting to OSS.' alluxio.underfs.oss.connection.ttl: 'The TTL of OSS connections in ms.' +alluxio.underfs.oss.ecs.ram.role: + 'The RAM role of current owner of ECS.' +alluxio.underfs.oss.retry.max: + 'The maximum number of OSS error retry.' alluxio.underfs.oss.socket.timeout: 'The timeout of OSS socket.' +alluxio.underfs.oss.sts.ecs.metadata.service.endpoint: + 'The ECS metadata service endpoint for Aliyun STS' +alluxio.underfs.oss.sts.enabled: + 'Whether to enable oss STS(Security Token Service).' +alluxio.underfs.oss.sts.token.refresh.interval.ms: + 'Time before an OSS Security Token is considered expired and will be automatically renewed' alluxio.underfs.ozone.prefixes: 'Specify which prefixes should run through the Ozone implementation of UnderFileSystem. The delimiter is any whitespace and/or '',''. The default value is "o3fs://,ofs://".' alluxio.underfs.persistence.async.temp.dir: @@ -383,11 +417,11 @@ alluxio.web.cors.allow.methods: alluxio.web.cors.allow.origins: 'Which origins is allowed for cors. use * allow all any origin.' alluxio.web.cors.enabled: - 'Set to true to enable Cross-Origin Resource Sharing for Web UI.' + 'Set to true to enable Cross-Origin Resource Sharing for RESTful APIendpoints.' alluxio.web.cors.exposed.headers: 'Which headers are allowed to set in response when access cross-origin resource. use * allow all any header.' alluxio.web.cors.max.age: - 'Maximum number of seconds the results can be cached for cors. "-1 means no cache.' + 'Maximum number of seconds the results can be cached. -1 means no cache.' alluxio.web.file.info.enabled: 'Whether detailed file information are enabled for the web UI.' alluxio.web.refresh.interval: diff --git a/docs/_data/table/en/master-configuration.yml b/docs/_data/table/en/master-configuration.yml index 0bce4b0e2f96..b0abf5619b01 100644 --- a/docs/_data/table/en/master-configuration.yml +++ b/docs/_data/table/en/master-configuration.yml @@ -66,6 +66,8 @@ alluxio.master.embedded.journal.raft.client.request.interval: 'Base interval for retrying Raft client calls. The retry policy is ExponentialBackoffRetry' alluxio.master.embedded.journal.raft.client.request.timeout: 'Time after which calls made through the Raft client timeout.' +alluxio.master.embedded.journal.ratis.config: + 'Prefix for Apache Ratis internal configuration options. For example, setting alluxio.master.embedded.journal.ratis.config.raft.server.rpc.request.timeout will set ratis.config.raft.server.rpc.request.timeout on the Ratis service in the Alluxio master.' alluxio.master.embedded.journal.retry.cache.expiry.time: 'The time for embedded journal server retry cache to expire. Setting a bigger value allows embedded journal server to cache the responses for a longer time in case of journal writer retries, but will take up more memory in master.' alluxio.master.embedded.journal.snapshot.replication.chunk.size: @@ -74,6 +76,8 @@ alluxio.master.embedded.journal.transport.max.inbound.message.size: 'The maximum size of a message that can be sent to the embedded journal server node.' alluxio.master.embedded.journal.transport.request.timeout.ms: 'The duration after which embedded journal masters will timeout messages sent between each other. Lower values might cause leadership instability when the network is slow.' +alluxio.master.embedded.journal.unsafe.flush.enabled: + 'If true, embedded journal entries will be committed without waiting for the entry to be flushed to disk. This may improve performance of write operations on the Alluxio master if the journal is written to a slow or contested disk. WARNING: enabling this property may result in metadata loss if half or more of the master nodes fail. See Ratis property raft.server.log.unsafe-flush.enabled at https://github.com/apache/ratis/blob/master/ratis-docs/src/site/markdown/configuraions.md.' alluxio.master.embedded.journal.write.timeout: 'Maximum time to wait for a write/flush on embedded journal.' alluxio.master.file.access.time.journal.flush.interval: @@ -156,6 +160,8 @@ alluxio.master.lock.pool.low.watermark: 'Low watermark of lock pool size. When the size grows over the high watermark, a background thread will try to evict unused locks until the size reaches the low watermark.' alluxio.master.log.config.report.heartbeat.interval: 'The interval for periodically logging the configuration check report.' +alluxio.master.lost.worker.deletion.timeout: + 'If a worker has no heartbeat with the master for more than this timeout, the master will totally forget this worker.' alluxio.master.lost.worker.detection.interval: 'The interval between Alluxio master detections to find lost workers based on updates from Alluxio workers.' alluxio.master.lost.worker.file.detection.interval: @@ -168,6 +174,8 @@ alluxio.master.metadata.sync.concurrency.level: 'The maximum number of concurrent sync tasks running for a given sync operation' alluxio.master.metadata.sync.executor.pool.size: 'The number of threads used to execute all metadata syncoperations' +alluxio.master.metadata.sync.ignore.ttl: + 'Whether files created from metadata sync will ignore the TTL from the command/path conf and have no TTL.' alluxio.master.metadata.sync.instrument.executor: 'If true the metadata sync thread pool executors will be instrumented with additional metrics.' alluxio.master.metadata.sync.lock.pool.concurrency.level: @@ -192,6 +200,10 @@ alluxio.master.metastore.block: 'The type of block metastore to use, either HEAP or ROCKS. By default this uses alluxio.master.metastore.' alluxio.master.metastore.dir: 'The metastore work directory. Only some metastores need disk.' +alluxio.master.metastore.dir.block: + 'If the metastore is ROCKS, this property controls where the RocksDB stores block metadata. This property defaults to alluxio.master.metastore.dir. And it can be used to change block metadata storage path to a different disk to improve RocksDB performance.' +alluxio.master.metastore.dir.inode: + 'If the metastore is ROCKS, this property controls where the RocksDB stores inode metadata. This property defaults to alluxio.master.metastore.dir. And it can be used to change inode metadata storage path to a different disk to improve RocksDB performance.' alluxio.master.metastore.inode: 'The type of inode metastore to use, either HEAP or ROCKS. By default this uses alluxio.master.metastore.' alluxio.master.metastore.inode.cache.evict.batch.size: @@ -228,6 +240,8 @@ alluxio.master.metastore.rocks.block.meta.cache.size: 'The capacity in bytes of the RocksDB block metadata table LRU cache. If unset, the RocksDB default will be used. See https://github.com/facebook/rocksdb/wiki/Block-Cache' alluxio.master.metastore.rocks.block.meta.index: 'The index type to be used in the RocksDB block metadata table. If unset, the RocksDB default will be used. See https://github.com/facebook/rocksdb/wiki/Index-Block-Format' +alluxio.master.metastore.rocks.checkpoint.compression.level: + 'The zip compression level of checkpointing rocksdb, the zip format defines ten levels of compression, ranging from 0 (no compression, but very fast) to 9 (best compression, but slow). Or -1 for the system default compression level.' alluxio.master.metastore.rocks.edge.block.index: 'The block index type to be used in the RocksDB inode edge table. If unset, the RocksDB default will be used. See https://rocksdb.org/blog/2018/08/23/data-block-hash-index.html' alluxio.master.metastore.rocks.edge.bloom.filter: @@ -245,9 +259,7 @@ alluxio.master.metastore.rocks.inode.cache.size: alluxio.master.metastore.rocks.inode.index: 'The index type to be used in the RocksDB Inode table. If unset, the RocksDB default will be used. See https://github.com/facebook/rocksdb/wiki/Index-Block-Format' alluxio.master.metastore.rocks.parallel.backup: - 'Whether to backup rocksdb in parallel' -alluxio.master.metastore.rocks.parallel.backup.compression.level: - 'The zip compression level of backing up rocksdb in parallel, the zip format defines ten levels of compression, ranging from 0 (no compression, but very fast) to 9 (best compression, but slow)' + 'Whether to checkpoint rocksdb in parallel using the number of threads set by alluxio.master.metastore.rocks.parallel.backup.threads.' alluxio.master.metastore.rocks.parallel.backup.threads: 'The number of threads used by backing up rocksdb in parallel.' alluxio.master.metrics.file.size.distribution.buckets: diff --git a/docs/_data/table/en/master-metrics.yml b/docs/_data/table/en/master-metrics.yml index 3e658c54faec..ed6fcedccd19 100644 --- a/docs/_data/table/en/master-metrics.yml +++ b/docs/_data/table/en/master-metrics.yml @@ -106,6 +106,8 @@ Master.JobCount: 'The number of all status job' Master.JobCreated: 'The number of created status job' +Master.JobDistributedLoadBlockSizes: + 'The total block size loaded by load commands' Master.JobDistributedLoadCancel: 'The number of cancelled DistributedLoad operations' Master.JobDistributedLoadFail: @@ -120,6 +122,16 @@ Master.JobDistributedLoadSuccess: 'The number of successful DistributedLoad operations' Master.JobFailed: 'The number of failed status job' +Master.JobLoadBlockCount: + 'The number of blocks loaded by load commands' +Master.JobLoadBlockFail: + 'The number of blocks failed to be loaded by load commands' +Master.JobLoadFail: + 'The number of failed Load commands' +Master.JobLoadRate: + 'The average loading rate of Load commands' +Master.JobLoadSuccess: + 'The number of successful Load commands' Master.JobRunning: 'The number of running status job' Master.JournalCheckpointWarn: diff --git a/docs/_data/table/en/proxy-metrics.yml b/docs/_data/table/en/proxy-metrics.yml index 62965ef1fb80..0cc3dca05852 100644 --- a/docs/_data/table/en/proxy-metrics.yml +++ b/docs/_data/table/en/proxy-metrics.yml @@ -1,2 +1,10 @@ Proxy.AuditLogEntriesSize: 'The size of the audit log entries blocking queue' +Proxy.CheckUploadIDStatusLatency: + 'Latency of check uploadId status in CompleteMultipartUpload' +Proxy.CleanupMultipartUploadLatency: + 'Latency of cleaning up temp folder and metafile from CompleteMultipartUpload' +Proxy.CleanupTempMultipartUploadObjectLatency: + 'Latency of cleaning up temp target obj duringCompleteMultipartUpload' +Proxy.CompleteMPUploadMergeLatency: + 'Latency of merging parts into one objectin CompleteMultipartUpload' diff --git a/docs/_data/table/en/user-configuration.yml b/docs/_data/table/en/user-configuration.yml index f753790dbf11..65265e01fe3c 100644 --- a/docs/_data/table/en/user-configuration.yml +++ b/docs/_data/table/en/user-configuration.yml @@ -88,6 +88,8 @@ alluxio.user.client.cache.timeout.duration: 'The timeout duration for local cache I/O operations (reading/writing/deleting). When this property is a positive value,local cache operations after timing out will fail and fallback to external file system but transparent to applications; when this property is a negative value, this feature is disabled.' alluxio.user.client.cache.timeout.threads: 'The number of threads to handle cache I/O operation timeout, when alluxio.user.client.cache.timeout.duration is positive.' +alluxio.user.client.report.version.enabled: + 'Whether the client reports version information to the server.' alluxio.user.conf.cluster.default.enabled: 'When this property is true, an Alluxio client will load the default values of cluster-wide configuration and path-specific configuration set by Alluxio master.' alluxio.user.conf.sync.interval: diff --git a/docs/_data/table/master-configuration.csv b/docs/_data/table/master-configuration.csv index 62e6be06bb70..c9c4a5ea311c 100644 --- a/docs/_data/table/master-configuration.csv +++ b/docs/_data/table/master-configuration.csv @@ -33,10 +33,12 @@ alluxio.master.embedded.journal.flush.size.max,"160MB" alluxio.master.embedded.journal.port,"19200" alluxio.master.embedded.journal.raft.client.request.interval,"100ms" alluxio.master.embedded.journal.raft.client.request.timeout,"60sec" +alluxio.master.embedded.journal.ratis.config,"" alluxio.master.embedded.journal.retry.cache.expiry.time,"60s" alluxio.master.embedded.journal.snapshot.replication.chunk.size,"4MB" alluxio.master.embedded.journal.transport.max.inbound.message.size,"100MB" alluxio.master.embedded.journal.transport.request.timeout.ms,"5sec" +alluxio.master.embedded.journal.unsafe.flush.enabled,"false" alluxio.master.embedded.journal.write.timeout,"30sec" alluxio.master.file.access.time.journal.flush.interval,"1h" alluxio.master.file.access.time.update.precision,"1d" @@ -78,12 +80,14 @@ alluxio.master.lock.pool.high.watermark,"1000000" alluxio.master.lock.pool.initsize,"1000" alluxio.master.lock.pool.low.watermark,"500000" alluxio.master.log.config.report.heartbeat.interval,"1h" +alluxio.master.lost.worker.deletion.timeout,"30min" alluxio.master.lost.worker.detection.interval,"10sec" alluxio.master.lost.worker.file.detection.interval,"5min" alluxio.master.merge.journal.context.num.entries.logging.threshold,"10000" alluxio.master.metadata.concurrent.sync.dedup,"false" alluxio.master.metadata.sync.concurrency.level,"6" alluxio.master.metadata.sync.executor.pool.size,"The total number of threads which can concurrently execute metadata sync operations." +alluxio.master.metadata.sync.ignore.ttl,"false" alluxio.master.metadata.sync.instrument.executor,"false" alluxio.master.metadata.sync.lock.pool.concurrency.level,"20" alluxio.master.metadata.sync.lock.pool.high.watermark,"50000" @@ -96,6 +100,8 @@ alluxio.master.metadata.sync.ufs.prefetch.timeout,"100ms" alluxio.master.metastore,"ROCKS" alluxio.master.metastore.block,"ROCKS" alluxio.master.metastore.dir,"${alluxio.work.dir}/metastore" +alluxio.master.metastore.dir.block,"${alluxio.master.metastore.dir}" +alluxio.master.metastore.dir.inode,"${alluxio.master.metastore.dir}" alluxio.master.metastore.inode,"ROCKS" alluxio.master.metastore.inode.cache.evict.batch.size,"1000" alluxio.master.metastore.inode.cache.high.water.mark.ratio,"0.85" @@ -114,6 +120,7 @@ alluxio.master.metastore.rocks.block.meta.block.index,"" alluxio.master.metastore.rocks.block.meta.bloom.filter,"false" alluxio.master.metastore.rocks.block.meta.cache.size,"" alluxio.master.metastore.rocks.block.meta.index,"" +alluxio.master.metastore.rocks.checkpoint.compression.level,"-1" alluxio.master.metastore.rocks.edge.block.index,"" alluxio.master.metastore.rocks.edge.bloom.filter,"false" alluxio.master.metastore.rocks.edge.cache.size,"" @@ -123,7 +130,6 @@ alluxio.master.metastore.rocks.inode.bloom.filter,"false" alluxio.master.metastore.rocks.inode.cache.size,"" alluxio.master.metastore.rocks.inode.index,"" alluxio.master.metastore.rocks.parallel.backup,"false" -alluxio.master.metastore.rocks.parallel.backup.compression.level,"6" alluxio.master.metastore.rocks.parallel.backup.threads,"The default number of threads used by backing up rocksdb in parallel." alluxio.master.metrics.file.size.distribution.buckets,"1KB,1MB,10MB,100MB,1GB,10GB" alluxio.master.metrics.heap.enabled,"false" diff --git a/docs/_data/table/master-metrics.csv b/docs/_data/table/master-metrics.csv index 4c68999fb50d..194dc2ff18c0 100644 --- a/docs/_data/table/master-metrics.csv +++ b/docs/_data/table/master-metrics.csv @@ -53,6 +53,7 @@ Master.JobCanceled,COUNTER Master.JobCompleted,COUNTER Master.JobCount,GAUGE Master.JobCreated,COUNTER +Master.JobDistributedLoadBlockSizes,COUNTER Master.JobDistributedLoadCancel,COUNTER Master.JobDistributedLoadFail,COUNTER Master.JobDistributedLoadFileCount,COUNTER @@ -60,6 +61,11 @@ Master.JobDistributedLoadFileSizes,COUNTER Master.JobDistributedLoadRate,METER Master.JobDistributedLoadSuccess,COUNTER Master.JobFailed,COUNTER +Master.JobLoadBlockCount,COUNTER +Master.JobLoadBlockFail,COUNTER +Master.JobLoadFail,COUNTER +Master.JobLoadRate,METER +Master.JobLoadSuccess,COUNTER Master.JobRunning,COUNTER Master.JournalCheckpointWarn,GAUGE Master.JournalEntriesSinceCheckPoint,GAUGE diff --git a/docs/_data/table/proxy-metrics.csv b/docs/_data/table/proxy-metrics.csv index cf97f2ac00b6..28ae58c08786 100644 --- a/docs/_data/table/proxy-metrics.csv +++ b/docs/_data/table/proxy-metrics.csv @@ -1,2 +1,6 @@ metricName,metricType Proxy.AuditLogEntriesSize,GAUGE +Proxy.CheckUploadIDStatusLatency,TIMER +Proxy.CleanupMultipartUploadLatency,TIMER +Proxy.CleanupTempMultipartUploadObjectLatency,TIMER +Proxy.CompleteMPUploadMergeLatency,TIMER diff --git a/docs/_data/table/user-configuration.csv b/docs/_data/table/user-configuration.csv index 823900f4d392..5688d89b252f 100644 --- a/docs/_data/table/user-configuration.csv +++ b/docs/_data/table/user-configuration.csv @@ -44,6 +44,7 @@ alluxio.user.client.cache.store.overhead,"" alluxio.user.client.cache.store.type,"LOCAL" alluxio.user.client.cache.timeout.duration,"-1" alluxio.user.client.cache.timeout.threads,"32" +alluxio.user.client.report.version.enabled,"false" alluxio.user.conf.cluster.default.enabled,"true" alluxio.user.conf.sync.interval,"1min" alluxio.user.date.format.pattern,"MM-dd-yyyy HH:mm:ss:SSS" From 96f145566f26a61f67fd9f3829b74f25596142d7 Mon Sep 17 00:00:00 2001 From: elega <445092967@qq.com> Date: Mon, 16 Jan 2023 13:08:23 +0800 Subject: [PATCH 065/334] Refactor rpc server service ### What changes are proposed in this pull request? Did some refactoring to improve the code extensibility ### Why are the changes needed? We want to create more grpc service in the future and this refactoring is a intermediate step. ### Does this PR introduce any user facing changes? N/A pr-link: Alluxio/alluxio#16774 change-id: cid-0c7f9021c5a28fb02801e8b14042bcf1ca065844 --- .../master/service/rpc/RpcServerService.java | 12 ++- .../service/rpc/RpcServerServiceTest.java | 69 +++------------- .../service/rpc/RpcServerServiceTestBase.java | 79 +++++++++++++++++++ 3 files changed, 100 insertions(+), 60 deletions(-) create mode 100644 core/server/master/src/test/java/alluxio/master/service/rpc/RpcServerServiceTestBase.java diff --git a/core/server/master/src/main/java/alluxio/master/service/rpc/RpcServerService.java b/core/server/master/src/main/java/alluxio/master/service/rpc/RpcServerService.java index 0d750c22cd85..aa2f3006b8df 100644 --- a/core/server/master/src/main/java/alluxio/master/service/rpc/RpcServerService.java +++ b/core/server/master/src/main/java/alluxio/master/service/rpc/RpcServerService.java @@ -17,7 +17,10 @@ import alluxio.grpc.ErrorType; import alluxio.grpc.GrpcServer; import alluxio.grpc.GrpcServerBuilder; +import alluxio.grpc.GrpcService; +import alluxio.grpc.ServiceType; import alluxio.master.AlluxioExecutorService; +import alluxio.master.Master; import alluxio.master.MasterProcess; import alluxio.master.MasterRegistry; import alluxio.master.SafeModeManager; @@ -34,8 +37,10 @@ import java.io.IOException; import java.net.InetSocketAddress; import java.net.Socket; +import java.util.Map; import java.util.Optional; import java.util.concurrent.TimeUnit; +import java.util.function.Function; import javax.annotation.Nullable; import javax.annotation.concurrent.GuardedBy; @@ -89,6 +94,11 @@ public synchronized void promote() { Preconditions.checkState(mGrpcServer == null, "rpc server must not be running"); stopRejectingServer(); waitForFree(); + startGrpcServer(Master::getServices); + } + + protected synchronized void startGrpcServer( + Function> serviceProvider) { GrpcServerBuilder builder = mMasterProcess.createBaseRpcServer(); Optional executorService = mMasterProcess.createRpcExecutorService(); if (executorService.isPresent()) { @@ -96,7 +106,7 @@ public synchronized void promote() { mRpcExecutor = executorService.get(); } mMasterRegistry.getServers().forEach(master -> { - master.getServices().forEach((type, service) -> { + serviceProvider.apply(master).forEach((type, service) -> { builder.addService(type, service); LOG.info("registered service {}", type.name()); }); diff --git a/core/server/master/src/test/java/alluxio/master/service/rpc/RpcServerServiceTest.java b/core/server/master/src/test/java/alluxio/master/service/rpc/RpcServerServiceTest.java index babc788cea42..2ae2ffac6c34 100644 --- a/core/server/master/src/test/java/alluxio/master/service/rpc/RpcServerServiceTest.java +++ b/core/server/master/src/test/java/alluxio/master/service/rpc/RpcServerServiceTest.java @@ -11,49 +11,20 @@ package alluxio.master.service.rpc; -import alluxio.conf.Configuration; -import alluxio.grpc.GrpcServerAddress; -import alluxio.grpc.GrpcServerBuilder; import alluxio.master.AlluxioMasterProcess; -import alluxio.master.MasterRegistry; -import alluxio.master.PortReservationRule; -import alluxio.util.CommonUtils; -import alluxio.util.WaitForOptions; import org.junit.Assert; -import org.junit.Before; -import org.junit.Rule; import org.junit.Test; -import org.mockito.Mockito; - -import java.io.IOException; -import java.net.ConnectException; -import java.net.InetSocketAddress; -import java.net.Socket; -import java.util.Optional; +import org.junit.runner.RunWith; +import org.powermock.core.classloader.annotations.PrepareForTest; +import org.powermock.modules.junit4.PowerMockRunner; /** * Test for RpcSimpleService. */ -public class RpcServerServiceTest { - @Rule - public PortReservationRule mPort = new PortReservationRule(); - - private final MasterRegistry mRegistry = new MasterRegistry(); - private InetSocketAddress mRpcAddress; - private AlluxioMasterProcess mMasterProcess; - - @Before - public void setUp() { - mRpcAddress = new InetSocketAddress(mPort.getPort()); - mMasterProcess = Mockito.mock(AlluxioMasterProcess.class); - Mockito.when(mMasterProcess.createBaseRpcServer()).thenAnswer(mock -> - GrpcServerBuilder.forAddress(GrpcServerAddress.create(mRpcAddress.getHostName(), - mRpcAddress), Configuration.global())); - Mockito.when(mMasterProcess.createRpcExecutorService()).thenReturn(Optional.empty()); - Mockito.when(mMasterProcess.getSafeModeManager()).thenReturn(Optional.empty()); - } - +@RunWith(PowerMockRunner.class) +@PrepareForTest(AlluxioMasterProcess.class) +public class RpcServerServiceTest extends RpcServerServiceTestBase { @Test public void primaryOnlyTest() { RpcServerService service = @@ -64,19 +35,19 @@ public void primaryOnlyTest() { service.start(); // after start and before stop the rpc port is always bound as either the rpc server or the // rejecting server is bound to is (depending on whether it is in PRIMARY or STANDBY state) - Assert.assertTrue(isBound()); + Assert.assertTrue(isGrpcBound()); Assert.assertFalse(service.isServing()); for (int i = 0; i < 5; i++) { service.promote(); Assert.assertTrue(service.isServing()); - Assert.assertTrue(isBound()); + Assert.assertTrue(isGrpcBound()); service.demote(); - Assert.assertTrue(isBound()); + Assert.assertTrue(isGrpcBound()); Assert.assertFalse(service.isServing()); } service.stop(); Assert.assertFalse(service.isServing()); - Assert.assertFalse(isBound()); + Assert.assertFalse(isGrpcBound()); } @Test @@ -99,24 +70,4 @@ public void doubleStartRpcServer() { Assert.assertThrows("rpc server must not be running", IllegalStateException.class, service::promote); } - - private boolean isBound() { - try (Socket socket = new Socket(mRpcAddress.getAddress(), mRpcAddress.getPort())) { - return true; - } catch (ConnectException e) { - return false; - } catch (IOException e) { - throw new RuntimeException(e); - } - } - - private boolean waitForFree() { - try { - CommonUtils.waitFor("wait for socket to be free", () -> !isBound(), - WaitForOptions.defaults().setTimeoutMs(1_000).setInterval(10)); - return true; - } catch (Exception e) { - return false; - } - } } diff --git a/core/server/master/src/test/java/alluxio/master/service/rpc/RpcServerServiceTestBase.java b/core/server/master/src/test/java/alluxio/master/service/rpc/RpcServerServiceTestBase.java new file mode 100644 index 000000000000..6165eed72c3a --- /dev/null +++ b/core/server/master/src/test/java/alluxio/master/service/rpc/RpcServerServiceTestBase.java @@ -0,0 +1,79 @@ +/* + * The Alluxio Open Foundation licenses this work under the Apache License, version 2.0 + * (the "License"). You may not use this work except in compliance with the License, which is + * available at www.apache.org/licenses/LICENSE-2.0 + * + * This software is distributed on an "AS IS" basis, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, + * either express or implied, as more fully set forth in the License. + * + * See the NOTICE file distributed with this work for information regarding copyright ownership. + */ + +package alluxio.master.service.rpc; + +import alluxio.conf.Configuration; +import alluxio.grpc.GrpcServerAddress; +import alluxio.grpc.GrpcServerBuilder; +import alluxio.master.AlluxioMasterProcess; +import alluxio.master.MasterRegistry; +import alluxio.master.PortReservationRule; +import alluxio.util.CommonUtils; +import alluxio.util.WaitForOptions; + +import org.junit.Before; +import org.junit.Rule; +import org.mockito.Mockito; +import org.powermock.api.mockito.PowerMockito; + +import java.io.IOException; +import java.net.ConnectException; +import java.net.InetSocketAddress; +import java.net.Socket; +import java.util.Optional; + +/** + * Test base RpcService related tests. + */ +public class RpcServerServiceTestBase { + @Rule + public PortReservationRule mPort = new PortReservationRule(); + + protected final MasterRegistry mRegistry = new MasterRegistry(); + protected InetSocketAddress mRpcAddress; + protected AlluxioMasterProcess mMasterProcess; + + @Before + public void setUp() { + mRpcAddress = new InetSocketAddress(mPort.getPort()); + mMasterProcess = PowerMockito.mock(AlluxioMasterProcess.class); + Mockito.when(mMasterProcess.createBaseRpcServer()).thenAnswer(mock -> + GrpcServerBuilder.forAddress(GrpcServerAddress.create(mRpcAddress.getHostName(), + mRpcAddress), Configuration.global())); + Mockito.when(mMasterProcess.createRpcExecutorService()).thenReturn(Optional.empty()); + Mockito.when(mMasterProcess.getSafeModeManager()).thenReturn(Optional.empty()); + } + + protected boolean isGrpcBound() { + return isBound(mRpcAddress); + } + + protected boolean isBound(InetSocketAddress address) { + try (Socket socket = new Socket(address.getAddress(), address.getPort())) { + return true; + } catch (ConnectException e) { + return false; + } catch (IOException e) { + throw new RuntimeException(e); + } + } + + protected boolean waitForFree() { + try { + CommonUtils.waitFor("wait for socket to be free", () -> !isGrpcBound(), + WaitForOptions.defaults().setTimeoutMs(1_000).setInterval(10)); + return true; + } catch (Exception e) { + return false; + } + } +} From 6fc070180928e1696722b50ced7b9ed19f886752 Mon Sep 17 00:00:00 2001 From: voddle Date: Wed, 18 Jan 2023 00:23:44 +0800 Subject: [PATCH 066/334] Split BlockStoreEventListener.onCommitBlock() ### What changes are proposed in this pull request? `listener.onCommitBlock()` cannot reflect the state difference between commitBlockToLocal and commitBlockToMaster, so some times when commitBlockToMaster failed the `listener.onCommitBlock()` will still be called. This PR separated the `listener.onCommitBlock()` to `onCommitBlockToLocal()` and `onCommitBlockToMaster()`, and added corresponding UT to `PagedBlockStore` and `MonoBlockStore`. ### Why are the changes needed? Please clarify why the changes are needed. For instance, 1. `listener.onCommitBlock()` cannot reflect the state difference between commitBlockToLocal and commitBlockToMaster, so some times when commitBlockToMaster failed the `listener.onCommitBlock()` will still be called. The separation of the `listener.onCommitBlock()` can eliminate this inconsistency. ### Does this PR introduce any user facing changes? Please list the user-facing changes introduced by your change, including No user-facing changes previous conversation is [here](https://github.com/voddle/alluxio/pull/1) since I select incorrect base repo the first time I create this PR... pr-link: Alluxio/alluxio#16777 change-id: cid-9c367b0cab5dd5347eda873bc3c8ba189626cc75 --- .../java/alluxio/worker/block/BlockStore.java | 2 +- .../worker/block/BlockStoreEventListener.java | 11 +- .../AbstractBlockStoreEventListener.java | 5 +- .../alluxio/worker/block/MonoBlockStore.java | 12 + .../worker/block/TieredBlockStore.java | 7 +- .../block/annotator/DefaultBlockIterator.java | 2 +- .../worker/block/evictor/LRUEvictor.java | 2 +- .../alluxio/worker/page/PagedBlockStore.java | 27 +- .../block/MonoBlockStoreCommitBlockTest.java | 157 ++++++++++++ .../block/TieredBlockStoreTestUtils.java | 4 +- .../page/PagedBlockStoreCommitBlockTest.java | 235 ++++++++++++++++++ 11 files changed, 446 insertions(+), 18 deletions(-) create mode 100644 core/server/worker/src/test/java/alluxio/worker/block/MonoBlockStoreCommitBlockTest.java create mode 100644 core/server/worker/src/test/java/alluxio/worker/page/PagedBlockStoreCommitBlockTest.java diff --git a/core/common/src/main/java/alluxio/worker/block/BlockStore.java b/core/common/src/main/java/alluxio/worker/block/BlockStore.java index c5329e717610..5e84e9a85946 100644 --- a/core/common/src/main/java/alluxio/worker/block/BlockStore.java +++ b/core/common/src/main/java/alluxio/worker/block/BlockStore.java @@ -97,7 +97,7 @@ BlockReader createBlockReader(long sessionId, long blockId, long offset, /** * Creates a block reader to read a UFS block starting from given block offset. - * Owner of this block reader must close it to cleanup state. + * Owner of this block reader must close it to clean up state. * * @param sessionId the client session ID * @param blockId the ID of the UFS block to read diff --git a/core/common/src/main/java/alluxio/worker/block/BlockStoreEventListener.java b/core/common/src/main/java/alluxio/worker/block/BlockStoreEventListener.java index d718c004e5a2..eca88c69191f 100644 --- a/core/common/src/main/java/alluxio/worker/block/BlockStoreEventListener.java +++ b/core/common/src/main/java/alluxio/worker/block/BlockStoreEventListener.java @@ -42,11 +42,18 @@ public interface BlockStoreEventListener { void onAbortBlock(long blockId); /** - * Actions when committing a temporary block to a {@link BlockStoreLocation}. + * Actions when committing a temporary block to a {@link BlockStoreLocation} at local block store. * @param blockId the id of the block to commit * @param location the location of the block to be committed */ - void onCommitBlock(long blockId, BlockStoreLocation location); + void onCommitBlockToLocal(long blockId, BlockStoreLocation location); + + /** + * Actions when a temporary block has been committed to the alluxio master. + * @param blockId the id of the block to commit + * @param location the location of the block to be committed + */ + void onCommitBlockToMaster(long blockId, BlockStoreLocation location); /** * Actions when moving a block by a client from a {@link BlockStoreLocation} to another. diff --git a/core/server/worker/src/main/java/alluxio/worker/block/AbstractBlockStoreEventListener.java b/core/server/worker/src/main/java/alluxio/worker/block/AbstractBlockStoreEventListener.java index dc99e9406b7d..b35475bc06c7 100644 --- a/core/server/worker/src/main/java/alluxio/worker/block/AbstractBlockStoreEventListener.java +++ b/core/server/worker/src/main/java/alluxio/worker/block/AbstractBlockStoreEventListener.java @@ -30,7 +30,10 @@ public void onAccessBlock(long blockId, BlockStoreLocation location) {} public void onAbortBlock(long blockId) {} @Override - public void onCommitBlock(long blockId, BlockStoreLocation location) {} + public void onCommitBlockToLocal(long blockId, BlockStoreLocation location) {} + + @Override + public void onCommitBlockToMaster(long blockId, BlockStoreLocation location) {} @Override public void onMoveBlockByClient(long blockId, BlockStoreLocation oldLocation, diff --git a/core/server/worker/src/main/java/alluxio/worker/block/MonoBlockStore.java b/core/server/worker/src/main/java/alluxio/worker/block/MonoBlockStore.java index 28bb9abd6dee..546591f2b2a5 100644 --- a/core/server/worker/src/main/java/alluxio/worker/block/MonoBlockStore.java +++ b/core/server/worker/src/main/java/alluxio/worker/block/MonoBlockStore.java @@ -53,6 +53,7 @@ import java.util.Optional; import java.util.Set; import java.util.concurrent.CompletableFuture; +import java.util.concurrent.CopyOnWriteArrayList; import java.util.concurrent.ScheduledExecutorService; import java.util.concurrent.ScheduledThreadPoolExecutor; import java.util.concurrent.TimeUnit; @@ -70,6 +71,10 @@ public class MonoBlockStore implements BlockStore { private final UnderFileSystemBlockStore mUnderFileSystemBlockStore; private final BlockMasterClientPool mBlockMasterClientPool; private final AtomicReference mWorkerId; + + private final List mBlockStoreEventListeners = + new CopyOnWriteArrayList<>(); + private final ScheduledExecutorService mDelayer = new ScheduledThreadPoolExecutor(1, ThreadFactoryUtils.build("LoadTimeOut", true)); @@ -118,6 +123,11 @@ public void commitBlock(long sessionId, long blockId, boolean pinOnCreate) { blockMasterClient.commitBlock(mWorkerId.get(), mLocalBlockStore.getBlockStoreMeta().getUsedBytesOnTiers().get(loc.tierAlias()), loc.tierAlias(), loc.mediumType(), blockId, meta.getBlockSize()); + for (BlockStoreEventListener listener : mBlockStoreEventListeners) { + synchronized (listener) { + listener.onCommitBlockToMaster(blockId, loc); + } + } } catch (AlluxioStatusException e) { throw AlluxioRuntimeException.from(e); } finally { @@ -266,6 +276,8 @@ public void updatePinnedInodes(Set inodes) { @Override public void registerBlockStoreEventListener(BlockStoreEventListener listener) { + LOG.debug("registerBlockStoreEventListener: listener={}", listener); + mBlockStoreEventListeners.add(listener); mLocalBlockStore.registerBlockStoreEventListener(listener); } diff --git a/core/server/worker/src/main/java/alluxio/worker/block/TieredBlockStore.java b/core/server/worker/src/main/java/alluxio/worker/block/TieredBlockStore.java index 099e92ca03dc..d4c0d3886bb9 100644 --- a/core/server/worker/src/main/java/alluxio/worker/block/TieredBlockStore.java +++ b/core/server/worker/src/main/java/alluxio/worker/block/TieredBlockStore.java @@ -233,7 +233,7 @@ public void commitBlock(long sessionId, long blockId, boolean pinOnCreate) { BlockStoreLocation loc = commitBlockInternal(sessionId, blockId, pinOnCreate); for (BlockStoreEventListener listener : mBlockStoreEventListeners) { synchronized (listener) { - listener.onCommitBlock(blockId, loc); + listener.onCommitBlockToLocal(blockId, loc); } } } @@ -253,7 +253,7 @@ public BlockLock commitBlockLocked(long sessionId, long blockId, boolean pinOnCr } for (BlockStoreEventListener listener : mBlockStoreEventListeners) { synchronized (listener) { - listener.onCommitBlock(blockId, loc); + listener.onCommitBlockToLocal(blockId, loc); } } return lock; @@ -498,7 +498,8 @@ private void abortBlockInternal(long sessionId, long blockId) { * @param pinOnCreate is block pinned on create * @return destination location to move the block */ - private BlockStoreLocation commitBlockInternal(long sessionId, long blockId, + @VisibleForTesting + BlockStoreLocation commitBlockInternal(long sessionId, long blockId, boolean pinOnCreate) { if (mMetaManager.hasBlockMeta(blockId)) { LOG.debug("Block {} has been in block store, this could be a retry due to master-side RPC " diff --git a/core/server/worker/src/main/java/alluxio/worker/block/annotator/DefaultBlockIterator.java b/core/server/worker/src/main/java/alluxio/worker/block/annotator/DefaultBlockIterator.java index 00f521fbc660..a0ffa791cdfb 100644 --- a/core/server/worker/src/main/java/alluxio/worker/block/annotator/DefaultBlockIterator.java +++ b/core/server/worker/src/main/java/alluxio/worker/block/annotator/DefaultBlockIterator.java @@ -386,7 +386,7 @@ public void onAccessBlock(long blockId, BlockStoreLocation location) { } @Override - public void onCommitBlock(long blockId, BlockStoreLocation location) { + public void onCommitBlockToLocal(long blockId, BlockStoreLocation location) { blockUpdated(blockId, location); } diff --git a/core/server/worker/src/main/java/alluxio/worker/block/evictor/LRUEvictor.java b/core/server/worker/src/main/java/alluxio/worker/block/evictor/LRUEvictor.java index 6074e4358f4f..172f424a3958 100644 --- a/core/server/worker/src/main/java/alluxio/worker/block/evictor/LRUEvictor.java +++ b/core/server/worker/src/main/java/alluxio/worker/block/evictor/LRUEvictor.java @@ -82,7 +82,7 @@ public void onAccessBlock(long blockId) { } @Override - public void onCommitBlock(long blockId, BlockStoreLocation location) { + public void onCommitBlockToLocal(long blockId, BlockStoreLocation location) { // Since the temp block has been committed, update Evictor about the new added blocks mLRUCache.put(blockId, UNUSED_MAP_VALUE); } diff --git a/core/server/worker/src/main/java/alluxio/worker/page/PagedBlockStore.java b/core/server/worker/src/main/java/alluxio/worker/page/PagedBlockStore.java index 16d9daacfdb5..a3be0892c6ad 100644 --- a/core/server/worker/src/main/java/alluxio/worker/page/PagedBlockStore.java +++ b/core/server/worker/src/main/java/alluxio/worker/page/PagedBlockStore.java @@ -166,7 +166,19 @@ public void commitBlock(long sessionId, long blockId, boolean pinOnCreate) { pageStoreDir.commit(BlockPageId.tempFileIdOf(blockId), BlockPageId.fileIdOf(blockId, blockMeta.getBlockSize())); final PagedBlockMeta committed = mPageMetaStore.commit(blockId); + BlockStoreLocation blockLocation = + new BlockStoreLocation(DEFAULT_TIER, getDirIndexOfBlock(blockId)); + for (BlockStoreEventListener listener : mBlockStoreEventListeners) { + synchronized (listener) { + listener.onCommitBlockToLocal(blockId, blockLocation); + } + } commitBlockToMaster(committed); + for (BlockStoreEventListener listener : mBlockStoreEventListeners) { + synchronized (listener) { + listener.onCommitBlockToMaster(blockId, blockLocation); + } + } } catch (IOException e) { throw AlluxioRuntimeException.from(e); } finally { @@ -195,13 +207,6 @@ private void commitBlockToMaster(PagedBlockMeta blockMeta) { } finally { mBlockMasterClientPool.release(bmc); } - BlockStoreLocation blockLocation = - new BlockStoreLocation(DEFAULT_TIER, getDirIndexOfBlock(blockId)); - for (BlockStoreEventListener listener : mBlockStoreEventListeners) { - synchronized (listener) { - listener.onCommitBlock(blockId, blockLocation); - } - } } @Override @@ -365,6 +370,14 @@ public BlockWriter createBlockWriter(long sessionId, long blockId) String.format("Cannot overwrite an existing block %d", blockId))); } + /** + * Return mCacheManager.mState.get() for CommitTest. + * @return the mState, like READ_ONLY, READ_WRITE, NOT_IN_USE + */ + public CacheManager.State getCacheManagerState() { + return mCacheManager.state(); + } + @Override public void moveBlock(long sessionId, long blockId, AllocateOptions moveOptions) throws IOException { diff --git a/core/server/worker/src/test/java/alluxio/worker/block/MonoBlockStoreCommitBlockTest.java b/core/server/worker/src/test/java/alluxio/worker/block/MonoBlockStoreCommitBlockTest.java new file mode 100644 index 000000000000..96eec7641404 --- /dev/null +++ b/core/server/worker/src/test/java/alluxio/worker/block/MonoBlockStoreCommitBlockTest.java @@ -0,0 +1,157 @@ +/* + * The Alluxio Open Foundation licenses this work under the Apache License, version 2.0 + * (the "License"). You may not use this work except in compliance with the License, which is + * available at www.apache.org/licenses/LICENSE-2.0 + * + * This software is distributed on an "AS IS" basis, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, + * either express or implied, as more fully set forth in the License. + * + * See the NOTICE file distributed with this work for information regarding copyright ownership. + */ + +package alluxio.worker.block; + +import static org.junit.Assert.assertEquals; +import static org.junit.Assert.assertThrows; +import static org.mockito.Mockito.any; +import static org.mockito.Mockito.anyBoolean; +import static org.mockito.Mockito.anyLong; +import static org.mockito.Mockito.anyString; +import static org.mockito.Mockito.doAnswer; +import static org.mockito.Mockito.doNothing; +import static org.mockito.Mockito.mock; +import static org.mockito.Mockito.never; +import static org.mockito.Mockito.spy; +import static org.mockito.Mockito.verify; +import static org.mockito.Mockito.when; + +import alluxio.exception.status.AlluxioStatusException; +import alluxio.underfs.UfsManager; +import alluxio.worker.block.io.BlockWriter; +import alluxio.worker.block.meta.StorageDir; + +import io.grpc.Status; +import org.junit.Before; +import org.junit.Rule; +import org.junit.Test; +import org.junit.rules.TemporaryFolder; + +import java.io.File; +import java.nio.ByteBuffer; +import java.util.Arrays; +import java.util.concurrent.atomic.AtomicReference; + +// This Test is a little different from the PagedBlockStoreCommitStore due to structure different. +// MonoBlockStore.commitBlock() will call TieredBlockStore.commitBlocked() first as commitLocal, +// then will call BlockMasterClient.commitBlock() as commitMaster +// TieredBlockStore.commitBlock() call TieredBLockStore.commitBlockInternal inside them wake the +// EventListener for listener.onCommitToLocal() +// MonoBlockStore will wake the EventListener for listener.onCommitToMaster after +// BlockMasterClient.commitBlock() successes +// In a nutshell two onCommit events weren't called in same domain +public class MonoBlockStoreCommitBlockTest { + public MonoBlockStore mMonoBlockStore; + BlockMasterClientPool mMockedBlockMasterClientPool; + BlockMasterClient mMockedBlockMasterClient; + BlockMetadataManager mBlockMetadataManager; + BlockLockManager mBlockLockManager; + TieredBlockStore mTieredBlockStore; + private static final String FIRST_TIER_ALIAS = TieredBlockStoreTestUtils.TIER_ALIAS[0]; + private StorageDir mTestDir1; + /** Rule to create a new temporary folder during each test. */ + @Rule + public TemporaryFolder mTestFolder = new TemporaryFolder(); + + private static final Long SESSION_ID = 1L; + private static final long BLOCK_ID = 2L; + // Maybe location should be asserted as well. + BlockStoreEventListener mListener; + + @Before + public void setup() throws Exception { + File tempFolder = mTestFolder.newFolder(); + TieredBlockStoreTestUtils.setupDefaultConf(tempFolder.getAbsolutePath()); + + mMockedBlockMasterClientPool = mock(BlockMasterClientPool.class); + mMockedBlockMasterClient = mock(BlockMasterClient.class); + when(mMockedBlockMasterClientPool.acquire()).thenReturn(mMockedBlockMasterClient); + doNothing().when(mMockedBlockMasterClientPool).release(any()); + mBlockLockManager = new BlockLockManager(); + mBlockMetadataManager = BlockMetadataManager.createBlockMetadataManager(); + + mTestDir1 = mBlockMetadataManager.getTier(FIRST_TIER_ALIAS).getDir(0); + + mListener = spy(new AbstractBlockStoreEventListener() { + @Override + public void onCommitBlockToLocal(long blockId, BlockStoreLocation location) { + assertEquals(BLOCK_ID, blockId); + } + + @Override + public void onCommitBlockToMaster(long blockId, BlockStoreLocation location) { + assertEquals(BLOCK_ID, blockId); + } + }); + } + + @Test + public void commitLocalandCommitMasterBothSuccess() throws Exception { + mTieredBlockStore = new TieredBlockStore(mBlockMetadataManager, mBlockLockManager); + + prepareBlockStore(); + + mMonoBlockStore.commitBlock(SESSION_ID, BLOCK_ID, false); + + verify(mListener).onCommitBlockToLocal(anyLong(), any(BlockStoreLocation.class)); + verify(mListener).onCommitBlockToMaster(anyLong(), any(BlockStoreLocation.class)); + } + + @Test + public void commitLocalSuccessandCommitMasterFail() throws Exception { + doAnswer((i) -> { + throw new AlluxioStatusException(Status.UNAVAILABLE); + }).when(mMockedBlockMasterClient).commitBlock(anyLong(), anyLong(), anyString(), + anyString(), anyLong(), anyLong()); + mTieredBlockStore = new TieredBlockStore(mBlockMetadataManager, mBlockLockManager); + + prepareBlockStore(); + + assertThrows(RuntimeException.class, () -> { + mMonoBlockStore.commitBlock(SESSION_ID, BLOCK_ID, false); + }); + + verify(mListener).onCommitBlockToLocal(anyLong(), any(BlockStoreLocation.class)); + verify(mListener, never()).onCommitBlockToMaster(anyLong(), any(BlockStoreLocation.class)); + } + + @Test + public void commitLocalFailandCommitMasterSuccess() throws Exception { + mTieredBlockStore = spy(new TieredBlockStore(mBlockMetadataManager, mBlockLockManager)); + doAnswer((i) -> { + throw new RuntimeException(); + }).when(mTieredBlockStore).commitBlockInternal(anyLong(), anyLong(), anyBoolean()); + + prepareBlockStore(); + + assertThrows(RuntimeException.class, () -> { + mMonoBlockStore.commitBlock(SESSION_ID, BLOCK_ID, false); + }); + + verify(mListener, never()).onCommitBlockToLocal(anyLong(), any(BlockStoreLocation.class)); + verify(mListener, never()).onCommitBlockToMaster(anyLong(), any(BlockStoreLocation.class)); + } + + public void prepareBlockStore() throws Exception { + mMonoBlockStore = new MonoBlockStore(mTieredBlockStore, mMockedBlockMasterClientPool, + mock(UfsManager.class), new AtomicReference<>(1L)); + + TieredBlockStoreTestUtils.createTempBlock(SESSION_ID, BLOCK_ID, 64, mTestDir1); + + byte[] data = new byte[64]; + Arrays.fill(data, (byte) 1); + ByteBuffer buf = ByteBuffer.wrap(data); + BlockWriter writer = mMonoBlockStore.createBlockWriter(SESSION_ID, BLOCK_ID); + writer.append(buf); + mMonoBlockStore.registerBlockStoreEventListener(mListener); + } +} diff --git a/core/server/worker/src/test/java/alluxio/worker/block/TieredBlockStoreTestUtils.java b/core/server/worker/src/test/java/alluxio/worker/block/TieredBlockStoreTestUtils.java index 15ef31e70f3b..ff8b8939742b 100644 --- a/core/server/worker/src/test/java/alluxio/worker/block/TieredBlockStoreTestUtils.java +++ b/core/server/worker/src/test/java/alluxio/worker/block/TieredBlockStoreTestUtils.java @@ -335,7 +335,7 @@ public static void cache2(long sessionId, long blockId, long bytes, StorageDir d cache2(sessionId, blockId, bytes, dir, meta, (BlockStoreEventListener) null); if (iterator != null) { for (BlockStoreEventListener listener : iterator.getListeners()) { - listener.onCommitBlock(blockId, dir.toBlockStoreLocation()); + listener.onCommitBlockToLocal(blockId, dir.toBlockStoreLocation()); } } } @@ -360,7 +360,7 @@ public static void cache2(long sessionId, long blockId, long bytes, StorageDir d // update iterator if a listener. if (listener != null) { - listener.onCommitBlock(blockId, dir.toBlockStoreLocation()); + listener.onCommitBlockToLocal(blockId, dir.toBlockStoreLocation()); } } diff --git a/core/server/worker/src/test/java/alluxio/worker/page/PagedBlockStoreCommitBlockTest.java b/core/server/worker/src/test/java/alluxio/worker/page/PagedBlockStoreCommitBlockTest.java new file mode 100644 index 000000000000..531979303d0f --- /dev/null +++ b/core/server/worker/src/test/java/alluxio/worker/page/PagedBlockStoreCommitBlockTest.java @@ -0,0 +1,235 @@ +/* + * The Alluxio Open Foundation licenses this work under the Apache License, version 2.0 + * (the "License"). You may not use this work except in compliance with the License, which is + * available at www.apache.org/licenses/LICENSE-2.0 + * + * This software is distributed on an "AS IS" basis, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, + * either express or implied, as more fully set forth in the License. + * + * See the NOTICE file distributed with this work for information regarding copyright ownership. + */ + +package alluxio.worker.page; + +import static org.junit.Assert.assertEquals; +import static org.junit.Assert.assertThrows; +import static org.mockito.Mockito.any; +import static org.mockito.Mockito.anyLong; +import static org.mockito.Mockito.anyString; +import static org.mockito.Mockito.doAnswer; +import static org.mockito.Mockito.doNothing; +import static org.mockito.Mockito.mock; +import static org.mockito.Mockito.never; +import static org.mockito.Mockito.spy; +import static org.mockito.Mockito.verify; +import static org.mockito.Mockito.when; + +import alluxio.Constants; +import alluxio.client.file.cache.CacheManager; +import alluxio.client.file.cache.CacheManagerOptions; +import alluxio.client.file.cache.evictor.CacheEvictorOptions; +import alluxio.client.file.cache.evictor.FIFOCacheEvictor; +import alluxio.client.file.cache.store.PageStoreDir; +import alluxio.client.file.cache.store.PageStoreOptions; +import alluxio.client.file.cache.store.PageStoreType; +import alluxio.conf.AlluxioConfiguration; +import alluxio.conf.Configuration; +import alluxio.conf.InstancedConfiguration; +import alluxio.conf.PropertyKey; +import alluxio.exception.status.AlluxioStatusException; +import alluxio.master.NoopUfsManager; +import alluxio.underfs.UfsManager; +import alluxio.util.CommonUtils; +import alluxio.worker.block.AbstractBlockStoreEventListener; +import alluxio.worker.block.BlockMasterClient; +import alluxio.worker.block.BlockMasterClientPool; +import alluxio.worker.block.BlockStoreEventListener; +import alluxio.worker.block.BlockStoreLocation; +import alluxio.worker.block.CreateBlockOptions; +import alluxio.worker.block.io.BlockWriter; + +import com.google.common.collect.ImmutableList; +import io.grpc.Status; +import org.junit.After; +import org.junit.Before; +import org.junit.Rule; +import org.junit.Test; +import org.junit.rules.TemporaryFolder; + +import java.io.IOException; +import java.nio.ByteBuffer; +import java.nio.file.Path; +import java.util.ArrayList; +import java.util.Arrays; +import java.util.List; +import java.util.concurrent.TimeoutException; +import java.util.concurrent.atomic.AtomicReference; + +public class PagedBlockStoreCommitBlockTest { + BlockStoreEventListener mListener; + UfsManager mUfs; + AlluxioConfiguration mConf; + CacheManagerOptions mCacheManagerOptions; + PagedBlockMetaStore mPageMetaStore; + List mDirs; + PagedBlockStore mPagedBlockStore; + BlockMasterClientPool mBlockMasterClientPool; + BlockMasterClient mMockedBlockMasterClient; + AtomicReference mWorkerId; + + CacheManager mCacheManager; + + private static final int DIR_INDEX = 0; + + private static final Long SESSION_ID = 1L; + private static final long BLOCK_ID = 2L; + final int mBlockSize = 64; + + public int mPageSize = 2; + + private static final int OFFSET = 0; + + @Rule + public TemporaryFolder mTempFolder = new TemporaryFolder(); + + @Before + public void setup() throws Exception { + List pageStoreDirs; + InstancedConfiguration cacheManagerConf = Configuration.copyGlobal(); + + Path dirPath = mTempFolder.newFolder().toPath(); + InstancedConfiguration dirConf = Configuration.modifiableGlobal(); + dirConf.set(PropertyKey.WORKER_PAGE_STORE_DIRS, ImmutableList.of(dirPath)); + dirConf.set(PropertyKey.WORKER_PAGE_STORE_SIZES, ImmutableList.of(Constants.MB)); + dirConf.set(PropertyKey.WORKER_PAGE_STORE_TYPE, PageStoreType.LOCAL); + PageStoreDir pageStoreDir = + PageStoreDir.createPageStoreDir( + new CacheEvictorOptions().setEvictorClass(FIFOCacheEvictor.class), + PageStoreOptions.createForWorkerPageStore(dirConf).get(DIR_INDEX)); + + mUfs = new NoopUfsManager(); + mConf = Configuration.global(); + cacheManagerConf.set(PropertyKey.WORKER_PAGE_STORE_PAGE_SIZE, mPageSize); + cacheManagerConf.set(PropertyKey.WORKER_PAGE_STORE_DIRS, ImmutableList.of(dirPath)); + + // Here mock BlockMasterClientPool and BlockMasterClient since I have no idea + // about how to override them. + // mockedPool will return a mocked BlockMasterClient when require() is called, + // and do nothing when releasing, maybe add some action later on. + mBlockMasterClientPool = mock(BlockMasterClientPool.class); + mMockedBlockMasterClient = mock(BlockMasterClient.class); + when(mBlockMasterClientPool.acquire()).thenReturn(mMockedBlockMasterClient); + doNothing().when(mBlockMasterClientPool).release(any()); + mWorkerId = new AtomicReference<>(-1L); + mCacheManagerOptions = CacheManagerOptions.createForWorker(cacheManagerConf); + pageStoreDirs = new ArrayList(); + pageStoreDirs.add(pageStoreDir); + mDirs = PagedBlockStoreDir.fromPageStoreDirs(pageStoreDirs); + + mListener = spy(new AbstractBlockStoreEventListener() { + @Override + public void onCommitBlockToLocal(long blockId, BlockStoreLocation location) { + assertEquals(BLOCK_ID, blockId); + } + + @Override + public void onCommitBlockToMaster(long blockId, BlockStoreLocation location) { + assertEquals(BLOCK_ID, blockId); + } + }); + } + + @After + public void tearDown() throws IOException { + mPagedBlockStore.close(); + } + + // This Test case success both to commit, no Exception should be thrown, + // and both onCommit method should be called + @Test + public void localCommitAndMasterCommitBothSuccess() + throws IOException, InterruptedException, TimeoutException { + mPageMetaStore = new PagedBlockMetaStore(mDirs); + mCacheManager = CacheManager.Factory.create(mConf, mCacheManagerOptions, mPageMetaStore); + + mPagedBlockStore = new PagedBlockStore(mCacheManager, mUfs, mBlockMasterClientPool, mWorkerId, + mPageMetaStore, mCacheManagerOptions.getPageSize()); + + prepareBlockStore(); + + mPagedBlockStore.commitBlock(SESSION_ID, BLOCK_ID, false); + verify(mListener).onCommitBlockToLocal(anyLong(), any(BlockStoreLocation.class)); + verify(mListener).onCommitBlockToMaster(anyLong(), any(BlockStoreLocation.class)); + } + + // This Test case success commitToMaster, expecting one exception, + + @Test + public void localCommitFailAndMasterCommitSuccess() + throws IOException, InterruptedException, TimeoutException { + mPageMetaStore = new PagedBlockMetaStore(mDirs) { + // here commit always throw Exception + @Override + public PagedBlockMeta commit(long BLOCK_ID) { + throw new RuntimeException(); + } + }; + mCacheManager = CacheManager.Factory.create(mConf, mCacheManagerOptions, mPageMetaStore); + + mPagedBlockStore = new PagedBlockStore(mCacheManager, mUfs, mBlockMasterClientPool, + mWorkerId, mPageMetaStore, mCacheManagerOptions.getPageSize()); + + prepareBlockStore(); + + assertThrows(RuntimeException.class, () -> { + mPagedBlockStore.commitBlock(SESSION_ID, BLOCK_ID, false); + }); + + verify(mListener, never()).onCommitBlockToLocal(anyLong(), any(BlockStoreLocation.class)); + verify(mListener, never()).onCommitBlockToMaster(anyLong(), any(BlockStoreLocation.class)); + } + + // This Test case success commitToLocal, expecting one exception, + // and only one onCommit method should be called. + @Test + public void localCommitSuccessAndMasterCommitFail() + throws IOException, InterruptedException, TimeoutException { + doAnswer((i) -> { + throw new AlluxioStatusException(Status.UNAVAILABLE); + }).when(mMockedBlockMasterClient).commitBlock(anyLong(), anyLong(), anyString(), + anyString(), anyLong(), anyLong()); + mPageMetaStore = new PagedBlockMetaStore(mDirs); + mCacheManager = CacheManager.Factory.create(mConf, mCacheManagerOptions, mPageMetaStore); + + mPagedBlockStore = new PagedBlockStore(mCacheManager, mUfs, mBlockMasterClientPool, mWorkerId, + mPageMetaStore, mCacheManagerOptions.getPageSize()); + + prepareBlockStore(); + + assertThrows(RuntimeException.class, () -> { + mPagedBlockStore.commitBlock(SESSION_ID, BLOCK_ID, false); + }); + verify(mListener).onCommitBlockToLocal(anyLong(), any(BlockStoreLocation.class)); + verify(mListener, never()).onCommitBlockToMaster(anyLong(), any(BlockStoreLocation.class)); + } + + // Prepare PageBlockStore and creat a temp block for following test + public void prepareBlockStore() throws IOException, InterruptedException, TimeoutException { + PagedBlockStoreDir dir = + (PagedBlockStoreDir) mPageMetaStore.allocate(BlockPageId.tempFileIdOf(BLOCK_ID), 1); + + dir.putTempFile(BlockPageId.tempFileIdOf(BLOCK_ID)); + PagedTempBlockMeta blockMeta = new PagedTempBlockMeta(BLOCK_ID, dir); + mPagedBlockStore.createBlock(SESSION_ID, BLOCK_ID, OFFSET, + new CreateBlockOptions(null, null, mBlockSize)); + byte[] data = new byte[mBlockSize]; + Arrays.fill(data, (byte) 1); + ByteBuffer buf = ByteBuffer.wrap(data); + BlockWriter writer = mPagedBlockStore.createBlockWriter(SESSION_ID, BLOCK_ID); + CommonUtils.waitFor("writer initiation complete", + () -> mPagedBlockStore.getCacheManagerState() == CacheManager.State.READ_WRITE); + writer.append(buf); + + mPagedBlockStore.registerBlockStoreEventListener(mListener); + } +} From e39fa3ba61cdfa6b08dc27f153db1110c46cd7d2 Mon Sep 17 00:00:00 2001 From: Arthur Jenoudet <23088925+jenoudet@users.noreply.github.com> Date: Tue, 17 Jan 2023 09:31:33 -0800 Subject: [PATCH 067/334] Refactor Microbench options Refactor Microbenchmark `@Param` options to focus only on the most relevant benchmarks. pr-link: Alluxio/alluxio#16781 change-id: cid-e0dd1ffe3bbdf5d83ad40b65592cd33bb7fdad39 --- .../main/java/alluxio/BaseFileStructure.java | 38 +++++-------------- .../fsmaster/FileSystemMasterBench.java | 13 +++++++ .../java/alluxio/inode/InodeBenchRead.java | 20 +++++++--- .../java/alluxio/inode/InodeBenchWrite.java | 19 ++++++---- .../java/alluxio/inode/RocksBenchRead.java | 19 +++++++--- .../alluxio/inode/RocksBenchReadWrite.java | 21 +++++++--- .../java/alluxio/inode/RocksBenchWrite.java | 5 +-- .../cache/InvalidationSyncCacheBench.java | 23 ++++++----- 8 files changed, 92 insertions(+), 66 deletions(-) diff --git a/microbench/src/main/java/alluxio/BaseFileStructure.java b/microbench/src/main/java/alluxio/BaseFileStructure.java index e30f12a1777e..9705607bf792 100644 --- a/microbench/src/main/java/alluxio/BaseFileStructure.java +++ b/microbench/src/main/java/alluxio/BaseFileStructure.java @@ -11,10 +11,7 @@ package alluxio; -import org.openjdk.jmh.annotations.Level; -import org.openjdk.jmh.annotations.Param; import org.openjdk.jmh.annotations.Scope; -import org.openjdk.jmh.annotations.Setup; import org.openjdk.jmh.annotations.State; import site.ycsb.generator.NumberGenerator; import site.ycsb.generator.UniformLongGenerator; @@ -27,20 +24,6 @@ */ @State(Scope.Benchmark) public class BaseFileStructure { - - @Param({"0", "1", "10"}) - public int mDepth; - - @Param({"0"}) - public int mWidth; - - @Param({"0", "10", "100", "1000"}) - public int mFileCount; - - // is used in read benchmark to simulate different file access patterns - @Param({"UNIFORM", "ZIPF"}) - public Distribution mDistribution; - // each depth level needs its own file id generator public ArrayList mFileGenerators; public NumberGenerator mDepthGenerator; @@ -48,22 +31,21 @@ public class BaseFileStructure { public enum Distribution { UNIFORM, ZIPF } - @Setup(Level.Trial) - public void init() { + public void init(int depth, int width, int fileCount, Distribution distribution) { mFileGenerators = new ArrayList<>(); - switch (mDistribution) { + switch (distribution) { case ZIPF: - mDepthGenerator = new ZipfianGenerator(0, mDepth); - mWidthGenerator = new ZipfianGenerator(0, mWidth); - for (int i = 0; i < mDepth + 1; i++) { - mFileGenerators.add(new ZipfianGenerator(0, mFileCount - 1)); + mDepthGenerator = new ZipfianGenerator(0, depth); + mWidthGenerator = new ZipfianGenerator(0, width); + for (int i = 0; i < depth + 1; i++) { + mFileGenerators.add(new ZipfianGenerator(0, fileCount - 1)); } break; default: - mDepthGenerator = new UniformLongGenerator(0, mDepth); - mWidthGenerator = new UniformLongGenerator(0, mWidth); - for (int i = 0; i < mDepth + 1; i++) { - mFileGenerators.add(new UniformLongGenerator(0, mFileCount - 1)); + mDepthGenerator = new UniformLongGenerator(0, depth); + mWidthGenerator = new UniformLongGenerator(0, width); + for (int i = 0; i < depth + 1; i++) { + mFileGenerators.add(new UniformLongGenerator(0, fileCount - 1)); } } } diff --git a/microbench/src/main/java/alluxio/fsmaster/FileSystemMasterBench.java b/microbench/src/main/java/alluxio/fsmaster/FileSystemMasterBench.java index a9003d9dd9df..d4a0767c8f6e 100644 --- a/microbench/src/main/java/alluxio/fsmaster/FileSystemMasterBench.java +++ b/microbench/src/main/java/alluxio/fsmaster/FileSystemMasterBench.java @@ -21,6 +21,7 @@ import org.openjdk.jmh.annotations.Level; import org.openjdk.jmh.annotations.Measurement; import org.openjdk.jmh.annotations.Mode; +import org.openjdk.jmh.annotations.Param; import org.openjdk.jmh.annotations.Scope; import org.openjdk.jmh.annotations.Setup; import org.openjdk.jmh.annotations.State; @@ -55,10 +56,22 @@ public void setup() { @State(Scope.Benchmark) public static class FileSystem extends BaseFileStructure { + @Param({"10"}) + public int mDepth; + + @Param({"0"}) + public int mWidth; + + @Param({"1000"}) + public int mFileCount; + + @Param({"ZIPF"}) + public Distribution mDistribution; FileSystemMasterBase mBase = new FileSystemMasterBase(); @Setup(Level.Trial) public void setup() throws Exception { + super.init(mDepth, mWidth, mFileCount, mDistribution); mBase.init(); mBase.createPathDepths(mDepth); for (int d = 0; d < mDepth + 1; d++) { diff --git a/microbench/src/main/java/alluxio/inode/InodeBenchRead.java b/microbench/src/main/java/alluxio/inode/InodeBenchRead.java index 1c06b17d5e51..eb681026f951 100644 --- a/microbench/src/main/java/alluxio/inode/InodeBenchRead.java +++ b/microbench/src/main/java/alluxio/inode/InodeBenchRead.java @@ -11,8 +11,6 @@ package alluxio.inode; -import static alluxio.inode.InodeBenchBase.HEAP; -import static alluxio.inode.InodeBenchBase.ROCKS; import static alluxio.inode.InodeBenchBase.ROCKSCACHE; import alluxio.BaseFileStructure; @@ -56,23 +54,35 @@ public static class ThreadState extends BaseThreadState { } @State(Scope.Benchmark) public static class Db extends BaseFileStructure { + @Param({"10"}) + public int mDepth; + + @Param({"0"}) + public int mWidth; + + @Param({"1000"}) + public int mFileCount; + + @Param({"ZIPF"}) + public Distribution mDistribution; @Param({"true", "false"}) public boolean mSingleFile; - @Param({HEAP, ROCKS, ROCKSCACHE}) + @Param({ROCKSCACHE}) public String mType; - @Param({RocksBenchConfig.JAVA_CONFIG, RocksBenchConfig.BASE_CONFIG, - RocksBenchConfig.EMPTY_CONFIG, RocksBenchConfig.BLOOM_CONFIG}) + @Param({RocksBenchConfig.JAVA_CONFIG}) public String mRocksConfig; InodeBenchBase mBase; @Setup(Level.Trial) public void setup() throws Exception { + super.init(mDepth, mWidth, mFileCount, mDistribution); Assert.assertTrue("mFileCount needs to be > 0 if mSingleFile is true", !mSingleFile || mFileCount > 0); + mBase = new InodeBenchBase(mType, mRocksConfig); mBase.createBasePath(mDepth); for (int d = 0; d <= mDepth; d++) { diff --git a/microbench/src/main/java/alluxio/inode/InodeBenchWrite.java b/microbench/src/main/java/alluxio/inode/InodeBenchWrite.java index d3d07241d90e..15a85885d2a3 100644 --- a/microbench/src/main/java/alluxio/inode/InodeBenchWrite.java +++ b/microbench/src/main/java/alluxio/inode/InodeBenchWrite.java @@ -11,14 +11,11 @@ package alluxio.inode; -import static alluxio.inode.InodeBenchBase.HEAP; -import static alluxio.inode.InodeBenchBase.ROCKS; import static alluxio.inode.InodeBenchBase.ROCKSCACHE; import alluxio.BaseFileStructure; import alluxio.BaseThreadState; -import org.junit.Assert; import org.openjdk.jmh.annotations.Benchmark; import org.openjdk.jmh.annotations.Level; import org.openjdk.jmh.annotations.Param; @@ -63,18 +60,26 @@ public void after() { @State(Scope.Benchmark) public static class Db extends BaseFileStructure { - @Param({HEAP, ROCKS, ROCKSCACHE}) + @Param({"10"}) + public int mDepth; + + @Param({"0"}) + public int mWidth; + + // is used in read benchmark to simulate different file access patterns + @Param({"ZIPF"}) + public Distribution mDistribution; + @Param({ROCKSCACHE}) public String mType; - @Param({RocksBenchConfig.JAVA_CONFIG, RocksBenchConfig.BASE_CONFIG, - RocksBenchConfig.EMPTY_CONFIG, RocksBenchConfig.BLOOM_CONFIG}) + @Param({RocksBenchConfig.JAVA_CONFIG}) public String mRocksConfig; InodeBenchBase mBase; @Setup(Level.Iteration) public void setup() throws Exception { - Assert.assertEquals("mFileCount is not used in this benchmark", 0, mFileCount); + super.init(mDepth, mWidth, 0, mDistribution); mBase = new InodeBenchBase(mType, mRocksConfig); mBase.createBasePath(mDepth); } diff --git a/microbench/src/main/java/alluxio/inode/RocksBenchRead.java b/microbench/src/main/java/alluxio/inode/RocksBenchRead.java index a913a50c8e96..d8d7c53f7b3b 100644 --- a/microbench/src/main/java/alluxio/inode/RocksBenchRead.java +++ b/microbench/src/main/java/alluxio/inode/RocksBenchRead.java @@ -63,22 +63,31 @@ public void setup() { @State(Scope.Benchmark) public static class Db extends BaseFileStructure { - @Param({SER_READ, NO_SER_READ, SER_NO_ALLOC_READ, NO_SER_NO_ALLOC_READ}) + @Param({"0"}) + public int mWidth; + + @Param({"1000"}) + public int mFileCount; + + // is used in read benchmark to simulate different file access patterns + @Param({"ZIPF"}) + public Distribution mDistribution; + @Param({SER_READ}) public String mReadType; - @Param({"true", "false"}) + @Param({"false"}) public boolean mIsDirectory; - @Param({RocksBenchConfig.JAVA_CONFIG, RocksBenchConfig.BASE_CONFIG, - RocksBenchConfig.EMPTY_CONFIG, RocksBenchConfig.BLOOM_CONFIG}) + @Param({RocksBenchConfig.JAVA_CONFIG}) public String mRocksConfig; RocksBenchBase mBase; @Setup(Level.Trial) public void setup() throws IOException { - Assert.assertEquals("mDepth is not used in this benchmark", 0, mDepth); Assert.assertTrue("mFileCount must be > 0", mFileCount > 0); + super.init(0, mWidth, mFileCount, mDistribution); + MutableInode inode = genInode(mIsDirectory); mBase = new RocksBenchBase(mRocksConfig); for (long i = 0; i < mFileCount; i++) { diff --git a/microbench/src/main/java/alluxio/inode/RocksBenchReadWrite.java b/microbench/src/main/java/alluxio/inode/RocksBenchReadWrite.java index 186c41e3185c..6982f0d9c814 100644 --- a/microbench/src/main/java/alluxio/inode/RocksBenchReadWrite.java +++ b/microbench/src/main/java/alluxio/inode/RocksBenchReadWrite.java @@ -142,20 +142,28 @@ public void after() { @State(Scope.Benchmark) public static class Db extends BaseFileStructure { - @Param({SER_READ, NO_SER_READ, SER_NO_ALLOC_READ, NO_SER_NO_ALLOC_READ}) + @Param({"0"}) + public int mWidth; + + @Param({"1000"}) + public int mFileCount; + + // is used in read benchmark to simulate different file access patterns + @Param({"ZIPF"}) + public Distribution mDistribution; + @Param({SER_READ}) public String mReadType; - @Param({"false", "true"}) + @Param({"true"}) public boolean mWriteSerialization; @Param({"true", "false"}) public boolean mIsDirectory; - @Param({"1", "10"}) + @Param({"20"}) public int mWritePercentage; - @Param({RocksBenchConfig.JAVA_CONFIG, RocksBenchConfig.BASE_CONFIG, - RocksBenchConfig.EMPTY_CONFIG, RocksBenchConfig.BLOOM_CONFIG}) + @Param({RocksBenchConfig.JAVA_CONFIG}) public String mRocksConfig; RocksBenchBase mBase; @@ -164,8 +172,9 @@ public static class Db extends BaseFileStructure { public void setup() throws IOException { Preconditions.checkState(mWritePercentage >= 0 && mWritePercentage <= 100, "write percentage must be between 0 and 100"); - Assert.assertEquals("mDepth is not used in this benchmark", 0, mDepth); Assert.assertTrue("mFileCount has to be greater than 0", 0 < mFileCount); + super.init(0, mWidth, mFileCount, mDistribution); + mBase = new RocksBenchBase(mRocksConfig); } diff --git a/microbench/src/main/java/alluxio/inode/RocksBenchWrite.java b/microbench/src/main/java/alluxio/inode/RocksBenchWrite.java index 07e1ad9360e5..1260a892dd27 100644 --- a/microbench/src/main/java/alluxio/inode/RocksBenchWrite.java +++ b/microbench/src/main/java/alluxio/inode/RocksBenchWrite.java @@ -68,11 +68,10 @@ public void after() { @State(Scope.Benchmark) public static class Db { - @Param({"true", "false"}) + @Param({"true"}) public boolean mUseSerialization; - @Param({RocksBenchConfig.JAVA_CONFIG, RocksBenchConfig.BASE_CONFIG, - RocksBenchConfig.EMPTY_CONFIG, RocksBenchConfig.BLOOM_CONFIG}) + @Param({RocksBenchConfig.JAVA_CONFIG}) public String mRocksConfig; RocksBenchBase mBase; diff --git a/microbench/src/main/java/alluxio/invalidation/cache/InvalidationSyncCacheBench.java b/microbench/src/main/java/alluxio/invalidation/cache/InvalidationSyncCacheBench.java index 15fb7c9f57b3..4e1320614487 100644 --- a/microbench/src/main/java/alluxio/invalidation/cache/InvalidationSyncCacheBench.java +++ b/microbench/src/main/java/alluxio/invalidation/cache/InvalidationSyncCacheBench.java @@ -97,16 +97,22 @@ AlluxioURI nextPath(BaseFileStructure fs, boolean isDirectory) { public static class FileStructure extends BaseFileStructure { UfsSyncPathCache mCache; - @Param({"100", "1000"}) + @Param({"10"}) + public int mDepth; + + @Param({"0"}) + public int mWidth; + + @Param({"1000"}) public int mCacheSize; - @Param({"70", "80"}) + @Param({"70"}) public int mCheckSync; @Param({"5"}) public int mDirSync; - @Param({"UNIFORM", "ZIPF"}) + @Param({"ZIPF"}) public Distribution mInvalDist; @Param({"1000"}) @@ -119,21 +125,14 @@ public void setupTrial() throws Exception { Configuration.set(PropertyKey.MASTER_UFS_PATH_CACHE_CAPACITY, mCacheSize); mInvalidationStructure = new BaseFileStructure(); - mInvalidationStructure.mDistribution = mInvalDist; - mInvalidationStructure.mFileCount = mInvalCount; - mInvalidationStructure.mDepth = mDepth; - mInvalidationStructure.init(); + mInvalidationStructure.init(mDepth, 0, mInvalCount, mInvalDist); mCache = new UfsSyncPathCache(new AtomicClock()); mCache.notifySyncedPath(new AlluxioURI("/"), DescendantType.ALL, mCache.recordStartSync(), null, false); // first approximately fill the cache BaseFileStructure fs = new BaseFileStructure(); - fs.mDistribution = Distribution.UNIFORM; - fs.mFileCount = mInvalCount; - fs.mDepth = mDepth; - fs.mWidth = mWidth; - fs.init(); + fs.init(mDepth, mWidth, mInvalCount, Distribution.UNIFORM); ThreadState ts = new ThreadState(); int fillSize = Math.min(mCacheSize, 5000000); System.out.println("Filling cache with " + fillSize + " elements"); From 1261abb5a4350bbf88a76becaa717e081ab64fed Mon Sep 17 00:00:00 2001 From: Kaijie Chen Date: Wed, 18 Jan 2023 03:30:25 +0800 Subject: [PATCH 068/334] Change matcher in CapacityCommandIntegrationTest ### What changes are proposed in this pull request? Change matcher in CapacityCommandIntegrationTest. 1. Match the output line by line, deprecate `assertThat()`. 2. Use regexp to match the table header. ### Why are the changes needed? 1. `Assert.assertThat()` is deprecated. 2. We got following errors in our internal CI, because the worker name is too long. ``` [ERROR] Failures: [ERROR] alluxio.client.cli.fsadmin.command.CapacityCommandIntegrationTest.allCapacity [ERROR] Run 1: CapacityCommandIntegrationTest.allCapacity:40 Expected: a string containing "Worker Name Last Heartbeat Storage MEM Version Revision" but: was "Capacity information for all workers: Total Capacity: 16.00MB Tier: MEM Size: 16.00MB Used Capacity: 0B Tier: MEM Size: 0B Used Percentage: 0% Free Percentage: 100% Worker Name Last Heartbeat Storage MEM Version Revision ci1672097607865c-0.ci1672097607865c.default.svc.cluster.local 0 capacity 16.00MB 2.10.0-SNAPSHOT c25544758d60f4 4c0eb9fa13fc20040a3ec2ae30 used 0B (0%) " ``` ### Does this PR introduce any user facing changes? No. pr-link: Alluxio/alluxio#16778 change-id: cid-896b4dc4c71337aec11687a8bb95d0be60d1f365 --- .../CapacityCommandIntegrationTest.java | 54 ++++++++++--------- 1 file changed, 28 insertions(+), 26 deletions(-) diff --git a/tests/src/test/java/alluxio/client/cli/fsadmin/command/CapacityCommandIntegrationTest.java b/tests/src/test/java/alluxio/client/cli/fsadmin/command/CapacityCommandIntegrationTest.java index b591b1a928e9..bdbfb93c02d0 100644 --- a/tests/src/test/java/alluxio/client/cli/fsadmin/command/CapacityCommandIntegrationTest.java +++ b/tests/src/test/java/alluxio/client/cli/fsadmin/command/CapacityCommandIntegrationTest.java @@ -15,7 +15,6 @@ import alluxio.client.cli.fsadmin.AbstractFsAdminShellTest; import alluxio.util.FormatUtils; -import org.hamcrest.CoreMatchers; import org.junit.Assert; import org.junit.Test; @@ -29,18 +28,20 @@ public void allCapacity() { Assert.assertEquals(0, ret); String output = mOutput.toString(); String size = FormatUtils.getSizeFromBytes(SIZE_BYTES); - Assert.assertThat(output, CoreMatchers.containsString("Capacity information for all workers: \n" - + " Total Capacity: " + size + "\n" - + " Tier: MEM Size: " + size + "\n" - + " Used Capacity: 0B\n" - + " Tier: MEM Size: 0B\n" - + " Used Percentage: 0%\n" - + " Free Percentage: 100%\n")); - // CHECKSTYLE.OFF: LineLengthExceed - Much more readable - Assert.assertThat(output, CoreMatchers.containsString( - "Worker Name Last Heartbeat Storage MEM Version Revision")); - Assert.assertThat(output, CoreMatchers.containsString( - " used 0B (0%)")); + String[] lines = output.split("\n"); + Assert.assertEquals(11, lines.length); + Assert.assertEquals("Capacity information for all workers: ", lines[0]); + Assert.assertEquals(" Total Capacity: " + size, lines[1]); + Assert.assertEquals(" Tier: MEM Size: " + size, lines[2]); + Assert.assertEquals(" Used Capacity: 0B", lines[3]); + Assert.assertEquals(" Tier: MEM Size: 0B", lines[4]); + Assert.assertEquals(" Used Percentage: 0%", lines[5]); + Assert.assertEquals(" Free Percentage: 100%", lines[6]); + Assert.assertEquals("", lines[7]); + Assert.assertTrue(lines[8].matches( + "Worker Name {6,}Last Heartbeat {3}Storage {7}MEM {14}Version {10}Revision *")); + Assert.assertTrue(lines[9].contains("capacity " + size)); + Assert.assertTrue(lines[10].contains("used 0B (0%)")); } @Test @@ -56,19 +57,20 @@ public void liveCapacity() { Assert.assertEquals(0, ret); String output = mOutput.toString(); String size = FormatUtils.getSizeFromBytes(SIZE_BYTES); - Assert.assertThat(output, CoreMatchers.containsString( - "Capacity information for live workers: \n" - + " Total Capacity: " + size + "\n" - + " Tier: MEM Size: " + size + "\n" - + " Used Capacity: 0B\n" - + " Tier: MEM Size: 0B\n" - + " Used Percentage: 0%\n" - + " Free Percentage: 100%\n")); - // CHECKSTYLE.OFF: LineLengthExceed - Much more readable - Assert.assertThat(output, CoreMatchers.containsString( - "Worker Name Last Heartbeat Storage MEM Version Revision")); - Assert.assertThat(output, CoreMatchers.containsString( - " used 0B (0%)")); + String[] lines = output.split("\n"); + Assert.assertEquals(11, lines.length); + Assert.assertEquals("Capacity information for live workers: ", lines[0]); + Assert.assertEquals(" Total Capacity: " + size, lines[1]); + Assert.assertEquals(" Tier: MEM Size: " + size, lines[2]); + Assert.assertEquals(" Used Capacity: 0B", lines[3]); + Assert.assertEquals(" Tier: MEM Size: 0B", lines[4]); + Assert.assertEquals(" Used Percentage: 0%", lines[5]); + Assert.assertEquals(" Free Percentage: 100%", lines[6]); + Assert.assertEquals("", lines[7]); + Assert.assertTrue(lines[8].matches( + "Worker Name {6,}Last Heartbeat {3}Storage {7}MEM {14}Version {10}Revision *")); + Assert.assertTrue(lines[9].contains("capacity " + size)); + Assert.assertTrue(lines[10].contains("used 0B (0%)")); } @Test From 45ec28e77e30d88035d6885e6a63deaf70199a51 Mon Sep 17 00:00:00 2001 From: bingzheng Date: Wed, 18 Jan 2023 03:47:51 +0800 Subject: [PATCH 069/334] [SMALLFIX] Import class type before code class ### What changes are proposed in this pull request? Import class type before code class ### Why are the changes needed? Please clarify why the changes are needed. For instance, 1. If you propose a new API, clarify the use case for a new API. 2. If you fix a bug, describe the bug. ### Does this PR introduce any user facing changes? Please list the user-facing changes introduced by your change, including 1. change in user-facing APIs 2. addition or removal of property keys 4. webui No pr-link: Alluxio/alluxio#16768 change-id: cid-d72e58a4052542891dbc13844c23bb110886dbdd --- .../src/main/java/alluxio/client/file/BaseFileSystem.java | 4 +++- .../java/alluxio/client/file/DelegatingFileSystem.java | 4 +++- .../fs/src/main/java/alluxio/client/file/FileSystem.java | 4 +++- .../java/alluxio/client/file/ufs/UfsBaseFileSystem.java | 7 +++++-- .../client/file/cache/LocalCacheFileInStreamTest.java | 4 +++- .../java/alluxio/fuse/auth/AbstractAuthPolicyTest.java | 3 ++- 6 files changed, 19 insertions(+), 7 deletions(-) diff --git a/core/client/fs/src/main/java/alluxio/client/file/BaseFileSystem.java b/core/client/fs/src/main/java/alluxio/client/file/BaseFileSystem.java index 4d9602b3a556..773d05868560 100644 --- a/core/client/fs/src/main/java/alluxio/client/file/BaseFileSystem.java +++ b/core/client/fs/src/main/java/alluxio/client/file/BaseFileSystem.java @@ -48,6 +48,7 @@ import alluxio.grpc.ListStatusPOptions; import alluxio.grpc.ListStatusPartialPOptions; import alluxio.grpc.LoadMetadataPType; +import alluxio.grpc.LoadProgressReportFormat; import alluxio.grpc.MountPOptions; import alluxio.grpc.OpenFilePOptions; import alluxio.grpc.RenamePOptions; @@ -80,6 +81,7 @@ import java.util.List; import java.util.Map; import java.util.Objects; +import java.util.Optional; import java.util.function.Consumer; import javax.annotation.concurrent.ThreadSafe; @@ -535,7 +537,7 @@ public boolean stopLoad(AlluxioURI path) { @Override public String getLoadProgress(AlluxioURI path, - java.util.Optional format, boolean verbose) { + Optional format, boolean verbose) { try (CloseableResource client = mFsContext.acquireMasterClientResource()) { return client.get().getLoadProgress(path, format, verbose); diff --git a/core/client/fs/src/main/java/alluxio/client/file/DelegatingFileSystem.java b/core/client/fs/src/main/java/alluxio/client/file/DelegatingFileSystem.java index d4030a674d9c..58ca4b70fa3f 100644 --- a/core/client/fs/src/main/java/alluxio/client/file/DelegatingFileSystem.java +++ b/core/client/fs/src/main/java/alluxio/client/file/DelegatingFileSystem.java @@ -29,6 +29,7 @@ import alluxio.grpc.GetStatusPOptions; import alluxio.grpc.ListStatusPOptions; import alluxio.grpc.ListStatusPartialPOptions; +import alluxio.grpc.LoadProgressReportFormat; import alluxio.grpc.MountPOptions; import alluxio.grpc.OpenFilePOptions; import alluxio.grpc.RenamePOptions; @@ -45,6 +46,7 @@ import java.io.IOException; import java.util.List; import java.util.Map; +import java.util.Optional; import java.util.function.Consumer; /** @@ -254,7 +256,7 @@ public boolean stopLoad(AlluxioURI path) { @Override public String getLoadProgress(AlluxioURI path, - java.util.Optional format, boolean verbose) { + Optional format, boolean verbose) { return mDelegatedFileSystem.getLoadProgress(path, format, verbose); } diff --git a/core/client/fs/src/main/java/alluxio/client/file/FileSystem.java b/core/client/fs/src/main/java/alluxio/client/file/FileSystem.java index eb311ca57bc8..c6c0a05eb705 100644 --- a/core/client/fs/src/main/java/alluxio/client/file/FileSystem.java +++ b/core/client/fs/src/main/java/alluxio/client/file/FileSystem.java @@ -41,6 +41,7 @@ import alluxio.grpc.ListStatusPartialPOptions; import alluxio.grpc.LoadMetadataPOptions; import alluxio.grpc.LoadMetadataPType; +import alluxio.grpc.LoadProgressReportFormat; import alluxio.grpc.MountPOptions; import alluxio.grpc.OpenFilePOptions; import alluxio.grpc.RenamePOptions; @@ -67,6 +68,7 @@ import java.util.Comparator; import java.util.List; import java.util.Map; +import java.util.Optional; import java.util.concurrent.atomic.AtomicBoolean; import java.util.function.Consumer; import javax.security.auth.Subject; @@ -764,5 +766,5 @@ boolean submitLoad(AlluxioURI path, java.util.OptionalLong bandwidth, * @return the load job progress */ String getLoadProgress(AlluxioURI path, - java.util.Optional format, boolean verbose); + Optional format, boolean verbose); } diff --git a/core/client/fs/src/main/java/alluxio/client/file/ufs/UfsBaseFileSystem.java b/core/client/fs/src/main/java/alluxio/client/file/ufs/UfsBaseFileSystem.java index 9f1820428a68..5130ea713517 100644 --- a/core/client/fs/src/main/java/alluxio/client/file/ufs/UfsBaseFileSystem.java +++ b/core/client/fs/src/main/java/alluxio/client/file/ufs/UfsBaseFileSystem.java @@ -32,6 +32,7 @@ import alluxio.grpc.GetStatusPOptions; import alluxio.grpc.ListStatusPOptions; import alluxio.grpc.ListStatusPartialPOptions; +import alluxio.grpc.LoadProgressReportFormat; import alluxio.grpc.MountPOptions; import alluxio.grpc.OpenFilePOptions; import alluxio.grpc.RenamePOptions; @@ -71,6 +72,8 @@ import java.util.Collections; import java.util.List; import java.util.Map; +import java.util.Optional; +import java.util.OptionalLong; import java.util.function.Consumer; import java.util.stream.Collectors; import javax.annotation.concurrent.ThreadSafe; @@ -393,7 +396,7 @@ public void needsSync(AlluxioURI path) throws IOException, AlluxioException { } @Override - public boolean submitLoad(AlluxioURI path, java.util.OptionalLong bandwidth, + public boolean submitLoad(AlluxioURI path, OptionalLong bandwidth, boolean usePartialListing, boolean verify) { throw new UnsupportedOperationException(); } @@ -405,7 +408,7 @@ public boolean stopLoad(AlluxioURI path) { @Override public String getLoadProgress(AlluxioURI path, - java.util.Optional format, boolean verbose) { + Optional format, boolean verbose) { throw new UnsupportedOperationException(); } diff --git a/core/client/fs/src/test/java/alluxio/client/file/cache/LocalCacheFileInStreamTest.java b/core/client/fs/src/test/java/alluxio/client/file/cache/LocalCacheFileInStreamTest.java index bebf0af52b81..876e12363282 100644 --- a/core/client/fs/src/test/java/alluxio/client/file/cache/LocalCacheFileInStreamTest.java +++ b/core/client/fs/src/test/java/alluxio/client/file/cache/LocalCacheFileInStreamTest.java @@ -41,6 +41,7 @@ import alluxio.grpc.GetStatusPOptions; import alluxio.grpc.ListStatusPOptions; import alluxio.grpc.ListStatusPartialPOptions; +import alluxio.grpc.LoadProgressReportFormat; import alluxio.grpc.MountPOptions; import alluxio.grpc.OpenFilePOptions; import alluxio.grpc.RenamePOptions; @@ -81,6 +82,7 @@ import java.util.HashMap; import java.util.List; import java.util.Map; +import java.util.Optional; import java.util.Random; import java.util.concurrent.ThreadLocalRandom; import java.util.function.BiConsumer; @@ -875,7 +877,7 @@ public boolean stopLoad(AlluxioURI path) { @Override public String getLoadProgress(AlluxioURI path, - java.util.Optional format, boolean verbose) { + Optional format, boolean verbose) { throw new UnsupportedOperationException(); } diff --git a/integration/fuse/src/test/java/alluxio/fuse/auth/AbstractAuthPolicyTest.java b/integration/fuse/src/test/java/alluxio/fuse/auth/AbstractAuthPolicyTest.java index 84cd171b1d31..b7bb97a4c34e 100644 --- a/integration/fuse/src/test/java/alluxio/fuse/auth/AbstractAuthPolicyTest.java +++ b/integration/fuse/src/test/java/alluxio/fuse/auth/AbstractAuthPolicyTest.java @@ -33,6 +33,7 @@ import alluxio.grpc.GetStatusPOptions; import alluxio.grpc.ListStatusPOptions; import alluxio.grpc.ListStatusPartialPOptions; +import alluxio.grpc.LoadProgressReportFormat; import alluxio.grpc.MountPOptions; import alluxio.grpc.OpenFilePOptions; import alluxio.grpc.RenamePOptions; @@ -309,7 +310,7 @@ public boolean stopLoad(AlluxioURI path) { @Override public String getLoadProgress(AlluxioURI path, - Optional format, boolean verbose) { + Optional format, boolean verbose) { throw new UnsupportedOperationException(); } From bfece1c5c6dbb760ee3e98797212bdab3bb4a8fe Mon Sep 17 00:00:00 2001 From: Rico Chiu Date: Wed, 18 Jan 2023 14:25:03 -0800 Subject: [PATCH 070/334] Add client-hadoop3 module the new client/hdfs3 and shaded/client-hadoop3 modules are currently a copy of the existing client/hdfs and shaded/client modules the addition of this will allow for changes that are available only in hadoop3, such as https://github.com/Alluxio/alluxio/pull/16017#discussion_r1039377694 both client jars will be built by default, but the symlink at client/alluxio-VERSION-client.jar will point to the hadoop-2 one to maintain backcompat. if the hadoop-3 profile is activated by adding `-Phadoop-3`, then the symlink will be overridden to point to the new hadoop3 shaded client jar pr-link: Alluxio/alluxio#16699 change-id: cid-a6ffd09414e8259078fd5a8f68c3e287d85feec5 --- core/client/hdfs3/pom.xml | 57 +++++ core/client/pom.xml | 1 + shaded/client-hadoop3/pom.xml | 377 ++++++++++++++++++++++++++++++++++ shaded/pom.xml | 1 + 4 files changed, 436 insertions(+) create mode 100644 core/client/hdfs3/pom.xml create mode 100644 shaded/client-hadoop3/pom.xml diff --git a/core/client/hdfs3/pom.xml b/core/client/hdfs3/pom.xml new file mode 100644 index 000000000000..3bc1b3b8cb14 --- /dev/null +++ b/core/client/hdfs3/pom.xml @@ -0,0 +1,57 @@ + + + 4.0.0 + + org.alluxio + alluxio-core-client + 2.10.0-SNAPSHOT + + alluxio-core-client-hdfs3 + jar + Alluxio Core - Client - HDFS3 + HDFS Client of Alluxio Core For HDFS 3 + + + + + ${project.parent.parent.parent.basedir}/build + false + + + + + + org.alluxio + alluxio-core-client-hdfs + ${project.version} + + + + + + + + org.apache.maven.plugins + maven-jar-plugin + + + + test-jar + + + + + + + diff --git a/core/client/pom.xml b/core/client/pom.xml index 338db707b66b..46895c9ea6cf 100644 --- a/core/client/pom.xml +++ b/core/client/pom.xml @@ -25,6 +25,7 @@ fs hdfs + hdfs3 diff --git a/shaded/client-hadoop3/pom.xml b/shaded/client-hadoop3/pom.xml new file mode 100644 index 000000000000..88aaa5390c7e --- /dev/null +++ b/shaded/client-hadoop3/pom.xml @@ -0,0 +1,377 @@ + + + 4.0.0 + + alluxio-shaded + org.alluxio + 2.10.0-SNAPSHOT + + alluxio-shaded-hadoop3-client + jar + Alluxio Shaded Libraries - Hadoop3 Client + Shaded Alluxio Client Module for hadoop 3 + + + + + ${project.parent.parent.basedir}/build + false + + alluxio.shaded.client + + + + + + + org.apache.hadoop + hadoop-client + provided + + + org.rocksdb + rocksdbjni + runtime + + + + org.slf4j + slf4j-api + runtime + + + commons-logging + commons-logging + runtime + + + + org.apache.logging.log4j + log4j-slf4j-impl + runtime + true + + + org.apache.logging.log4j + log4j-api + runtime + true + + + org.apache.logging.log4j + log4j-core + runtime + true + + + + + + org.alluxio + alluxio-core-client-hdfs3 + ${project.version} + + + org.alluxio + alluxio-core-client-fs + ${project.version} + + + org.alluxio + alluxio-table-client + ${project.version} + + + + + + includeHadoopClient + + + org.apache.hadoop + hadoop-client + compile + + + com.fasterxml.jackson.core + jackson-core + + + + + + + + + hadoop-3 + + + + org.codehaus.mojo + exec-maven-plugin + + + symlink-jar + install + + exec + + + ln + + -fnsv + build/alluxio-${project.version}-hadoop3-client.jar + ${project.parent.parent.basedir}/client/alluxio-${project.version}-client.jar + + + + + + + + + + + + + + org.apache.maven.plugins + maven-jar-plugin + + true + + + + empty-javadoc-jar + package + + jar + + + javadoc + ${basedir}/javadoc + + + + sources-jar + package + + jar + + + sources + + + + + + org.apache.maven.plugins + maven-source-plugin + + true + + + + attach-sources + + jar-no-fork + + + + + + + + + + org.apache.maven.plugins + maven-shade-plugin + + + uber-jar + package + + shade + + + true + true + ${create.dependency.reduced.pom} + + + + org.slf4j:* + + commons-logging:commons-logging + + log4j:log4j + org.apache.logging.log4j:log4j-api + org.apache.logging.log4j:log4j-core + org.apache.logging.log4j:log4j-slf4j-impl + org.alluxio:alluxio-microbench + org.openjdk.jmh:* + + + + + + org.apache.zookeeper:zookeeper-jute + + **/*.java + + + + *:* + + LICENSE + META-INF/LICENSE + META-INF/*.SF + META-INF/*.DSA + META-INF/*.RSA + + + + *:* + + mozilla/public-suffix-list.txt + + + + + + + META-INF/native/libnetty_transport_native_epoll_x86_64.so + META-INF/native/liballuxio_shaded_client_netty_transport_native_epoll_x86_64.so + + true + + + META-INF/native/libnetty_transport_native_epoll_aarch_64.so + META-INF/native/liballuxio_shaded_client_netty_transport_native_epoll_aarch_64.so + + true + + + com/ + ${shading.prefix}.com. + + **/pom.xml + + com/ibm/security/* + com/ibm/security/**/* + com/sun/tools/* + com/sun/javadoc/* + com/sun/security/* + com/sun/jndi/* + com/sun/management/* + com/sun/tools/**/* + com/sun/javadoc/**/* + com/sun/security/**/* + com/sun/jndi/**/* + com/sun/management/**/* + + + + io/ + ${shading.prefix}.io. + + **/pom.xml + + + + javassist + ${shading.prefix}.javassist + + + javax/annotation/ + ${shading.prefix}.javax.annotation. + + **/pom.xml + + + + org/ + ${shading.prefix}.org. + + org/apache/hadoop/* + org/apache/hadoop/**/* + **/pom.xml + + org/slf4j/* + org/slf4j/**/* + org/apache/commons/logging/* + org/apache/commons/logging/**/* + org/apache/log4j/* + org/apache/log4j/**/* + + org/ietf/jgss/* + org/omg/**/* + org/w3c/dom/* + org/w3c/dom/**/* + org/xml/sax/* + org/xml/sax/**/* + + org/rocksdb/**/* + + + + + + + + + NOTICE.txt + NOTICE + LICENSE + Log4j-charsets.properties + Log4j-config.xsd + Log4j-events.dtd + Log4j-events.xsd + Log4j-levels.xsd + + + + META-INF/LICENSE + ${basedir}/../../LICENSE + + + META-INF/NOTICE + ${basedir}/../../NOTICE + + + + + + + + com.coderplus.maven.plugins + copy-rename-maven-plugin + + + copy-and-rename-file + install + + copy + + + ${basedir}/target/${project.artifactId}-${project.version}.jar + ${project.parent.parent.basedir}/client/build/alluxio-${project.version}-hadoop3-client.jar + + + + + + + + diff --git a/shaded/pom.xml b/shaded/pom.xml index b781a24457b7..12c95c5ae5d3 100644 --- a/shaded/pom.xml +++ b/shaded/pom.xml @@ -24,6 +24,7 @@ client + client-hadoop3 hadoop From 412e549f4c17a8561b7a35bdf981ae575a00f561 Mon Sep 17 00:00:00 2001 From: Deepak Shivamurthy <107634330+deepak-shivamurthy@users.noreply.github.com> Date: Thu, 19 Jan 2023 04:49:40 +0530 Subject: [PATCH 071/334] Fix logging to capture jobControlId ### What changes are proposed in this pull request? https://github.com/Alluxio/new-contributor-tasks/issues/643 Fix logging to capture jobControlId ### Why are the changes needed? this will help in getting jobControlId in the logs, and will help in getting status information about the job ### Does this PR introduce any user facing changes? its just log level changes pr-link: Alluxio/alluxio#16780 change-id: cid-551cf01210bb24d39239199adeb398201f67c184 --- .../main/java/alluxio/master/job/tracker/CmdJobTracker.java | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/job/server/src/main/java/alluxio/master/job/tracker/CmdJobTracker.java b/job/server/src/main/java/alluxio/master/job/tracker/CmdJobTracker.java index 0a555ca66034..2b7e6246d47f 100644 --- a/job/server/src/main/java/alluxio/master/job/tracker/CmdJobTracker.java +++ b/job/server/src/main/java/alluxio/master/job/tracker/CmdJobTracker.java @@ -114,7 +114,8 @@ private void runDistributedCommand(CmdConfig cmdConfig, long jobControlId) MigrateCliConfig migrateCliConfig = (MigrateCliConfig) cmdConfig; AlluxioURI srcPath = new AlluxioURI(migrateCliConfig.getSource()); AlluxioURI dstPath = new AlluxioURI(migrateCliConfig.getDestination()); - LOG.info("run a dist cp command, cmd config is " + cmdConfig); + LOG.info("run a dist cp command, job control id: {}, cmd config: {}", + jobControlId, cmdConfig); cmdInfo = mMigrateCliRunner.runDistCp(srcPath, dstPath, migrateCliConfig.getOverWrite(), migrateCliConfig.getBatchSize(), jobControlId); From 01cb80ec535d24379e9e057994638afdd39de378 Mon Sep 17 00:00:00 2001 From: Shawn Sun <32376495+ssz1997@users.noreply.github.com> Date: Wed, 18 Jan 2023 17:34:03 -0800 Subject: [PATCH 072/334] [DOCFIX] Fix monitor helm chart Grafana dashboard link The old Grafana dashboard link in the Readme returns 404. Replace it with the correct link. Solves https://github.com/Alluxio/alluxio/issues/16796 pr-link: Alluxio/alluxio#16795 change-id: cid-4fd171d90a4c447c1065af73c37f839d9b22bdd9 --- integration/kubernetes/helm-chart/monitor/README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/integration/kubernetes/helm-chart/monitor/README.md b/integration/kubernetes/helm-chart/monitor/README.md index c973117a8399..2dcb9b9b6c1d 100644 --- a/integration/kubernetes/helm-chart/monitor/README.md +++ b/integration/kubernetes/helm-chart/monitor/README.md @@ -70,7 +70,7 @@ metrics: prometheus.io/path: "/metrics/prometheus/" ``` ### 4. Download the alluxio dashboard -Download the alluxio dashboard from [Alluxio grafana dashboard V1](https://grafana.com/grafana/dashboards/17763-alluxio-prometheus-grafana-monitor-v1/), then +Download the alluxio dashboard from [Alluxio grafana dashboard V1](https://grafana.com/grafana/dashboards/17785-alluxio-prometheus-grafana-monitor-v1/), then move the dashboard file to `monitor/source/grafana/dashboard` directory. ## Helm Chart Values From 37941a764531016ffc0ae74d900fa4ca90e862e1 Mon Sep 17 00:00:00 2001 From: elega <445092967@qq.com> Date: Thu, 19 Jan 2023 17:07:11 +0800 Subject: [PATCH 073/334] Refactor DefaultBlockWorker and test ### What changes are proposed in this pull request? Refactor block worker and its test ### Why are the changes needed? so that this class can be inherited more easily ### Does this PR introduce any user facing changes? N/A pr-link: Alluxio/alluxio#16789 change-id: cid-031ce1603aec4225691d31cb1d1dcb3e3cdf4450 --- .../worker/block/DefaultBlockWorker.java | 26 +- .../worker/block/DefaultBlockWorkerTest.java | 234 +--------------- .../block/DefaultBlockWorkerTestBase.java | 264 ++++++++++++++++++ 3 files changed, 282 insertions(+), 242 deletions(-) create mode 100644 core/server/worker/src/test/java/alluxio/worker/block/DefaultBlockWorkerTestBase.java diff --git a/core/server/worker/src/main/java/alluxio/worker/block/DefaultBlockWorker.java b/core/server/worker/src/main/java/alluxio/worker/block/DefaultBlockWorker.java index 7d8f28ce50ab..0a3b2c931c1d 100644 --- a/core/server/worker/src/main/java/alluxio/worker/block/DefaultBlockWorker.java +++ b/core/server/worker/src/main/java/alluxio/worker/block/DefaultBlockWorker.java @@ -94,7 +94,7 @@ public class DefaultBlockWorker extends AbstractWorker implements BlockWorker { private static final Logger LOG = LoggerFactory.getLogger(DefaultBlockWorker.class); /** Used to close resources during stop. */ - private final Closer mResourceCloser = Closer.create(); + protected final Closer mResourceCloser = Closer.create(); /** * Block master clients. commitBlock is the only reason to keep a pool of block master clients * on each worker. We should either improve our RPC model in the master or get rid of the @@ -103,14 +103,14 @@ public class DefaultBlockWorker extends AbstractWorker implements BlockWorker { private final BlockMasterClientPool mBlockMasterClientPool; /** Client for all file system master communication. */ - private final FileSystemMasterClient mFileSystemMasterClient; + protected final FileSystemMasterClient mFileSystemMasterClient; /** Block store delta reporter for master heartbeat. */ private final BlockHeartbeatReporter mHeartbeatReporter; /** Session metadata, used to keep track of session heartbeats. */ private final Sessions mSessions; /** Block Store manager. */ - private final BlockStore mBlockStore; + protected final BlockStore mBlockStore; /** List of paths to always keep in memory. */ private final PrefixList mWhitelist; @@ -118,12 +118,12 @@ public class DefaultBlockWorker extends AbstractWorker implements BlockWorker { * The worker ID for this worker. This is initialized in {@link #start(WorkerNetAddress)} and may * be updated by the block sync thread if the master requests re-registration. */ - private final AtomicReference mWorkerId; + protected final AtomicReference mWorkerId; private final CacheRequestManager mCacheManager; private final FuseManager mFuseManager; - private WorkerNetAddress mAddress; + protected WorkerNetAddress mAddress; /** * Constructs a default block worker. @@ -207,12 +207,7 @@ public void start(WorkerNetAddress address) throws IOException { Preconditions.checkNotNull(mAddress, "mAddress"); // Setup BlockMasterSync - BlockMasterSync blockMasterSync = mResourceCloser - .register(new BlockMasterSync(this, mWorkerId, mAddress, mBlockMasterClientPool)); - getExecutorService() - .submit(new HeartbeatThread(HeartbeatContext.WORKER_BLOCK_SYNC, blockMasterSync, - (int) Configuration.getMs(PropertyKey.WORKER_BLOCK_HEARTBEAT_INTERVAL_MS), - Configuration.global(), ServerUserState.global())); + setupBlockMasterSync(); // Setup PinListSyncer PinListSync pinListSync = mResourceCloser.register( @@ -242,6 +237,15 @@ public void start(WorkerNetAddress address) throws IOException { } } + protected void setupBlockMasterSync() throws IOException { + BlockMasterSync blockMasterSync = mResourceCloser + .register(new BlockMasterSync(this, mWorkerId, mAddress, mBlockMasterClientPool)); + getExecutorService() + .submit(new HeartbeatThread(HeartbeatContext.WORKER_BLOCK_SYNC, blockMasterSync, + (int) Configuration.getMs(PropertyKey.WORKER_BLOCK_HEARTBEAT_INTERVAL_MS), + Configuration.global(), ServerUserState.global())); + } + /** * Ask the master for a workerId. Should not be called outside of testing * diff --git a/core/server/worker/src/test/java/alluxio/worker/block/DefaultBlockWorkerTest.java b/core/server/worker/src/test/java/alluxio/worker/block/DefaultBlockWorkerTest.java index 1d4edc018c8a..fb33d9c8be1f 100644 --- a/core/server/worker/src/test/java/alluxio/worker/block/DefaultBlockWorkerTest.java +++ b/core/server/worker/src/test/java/alluxio/worker/block/DefaultBlockWorkerTest.java @@ -11,7 +11,6 @@ package alluxio.worker.block; -import static alluxio.util.CommonUtils.waitFor; import static org.junit.Assert.assertArrayEquals; import static org.junit.Assert.assertEquals; import static org.junit.Assert.assertFalse; @@ -19,23 +18,14 @@ import static org.junit.Assert.assertTrue; import static org.mockito.ArgumentMatchers.any; import static org.mockito.ArgumentMatchers.anyInt; -import static org.mockito.ArgumentMatchers.anyList; import static org.mockito.ArgumentMatchers.anyLong; -import static org.mockito.ArgumentMatchers.anyMap; import static org.mockito.ArgumentMatchers.anyString; -import static org.mockito.Mockito.doReturn; import static org.mockito.Mockito.doThrow; -import static org.mockito.Mockito.mock; -import static org.mockito.Mockito.spy; import static org.mockito.Mockito.times; import static org.mockito.Mockito.verify; -import alluxio.AlluxioURI; -import alluxio.ConfigurationRule; import alluxio.Constants; -import alluxio.Sessions; import alluxio.conf.Configuration; -import alluxio.conf.PropertyKey; import alluxio.exception.runtime.AlluxioRuntimeException; import alluxio.exception.runtime.BlockDoesNotExistRuntimeException; import alluxio.exception.runtime.ResourceExhaustedRuntimeException; @@ -44,35 +34,20 @@ import alluxio.exception.status.UnavailableException; import alluxio.grpc.Block; import alluxio.grpc.BlockStatus; -import alluxio.grpc.CacheRequest; -import alluxio.grpc.Command; -import alluxio.grpc.CommandType; import alluxio.grpc.GetConfigurationPOptions; import alluxio.grpc.UfsReadOptions; -import alluxio.master.NoopUfsManager; import alluxio.proto.dataserver.Protocol; -import alluxio.underfs.UfsManager; -import alluxio.underfs.UnderFileSystemConfiguration; import alluxio.util.IdUtils; -import alluxio.util.WaitForOptions; import alluxio.util.io.BufferUtils; -import alluxio.util.network.NetworkAddressUtils; -import alluxio.wire.WorkerNetAddress; import alluxio.worker.block.io.BlockReader; import alluxio.worker.block.io.BlockWriter; import alluxio.worker.block.meta.TempBlockMeta; -import alluxio.worker.file.FileSystemMasterClient; import com.google.common.collect.ImmutableList; import com.google.common.collect.ImmutableMap; -import com.google.common.collect.ImmutableSet; -import org.junit.Before; -import org.junit.Rule; import org.junit.Test; -import org.junit.rules.TemporaryFolder; import java.io.BufferedOutputStream; -import java.io.File; import java.io.FileOutputStream; import java.io.IOException; import java.nio.ByteBuffer; @@ -81,118 +56,13 @@ import java.util.HashSet; import java.util.List; import java.util.Map; -import java.util.Random; import java.util.Set; import java.util.concurrent.ExecutionException; -import java.util.concurrent.atomic.AtomicReference; /** * Unit tests for {@link DefaultBlockWorker}. */ -public class DefaultBlockWorkerTest { - private static final int BLOCK_SIZE = 128; - - TieredBlockStore mTieredBlockStore; - // worker configurations - private static final long WORKER_ID = 30L; - // ufs for fallback read - private static final long UFS_MOUNT_ID = 1L; - // ufs for batch load - private static final long UFS_LOAD_MOUNT_ID = 2L; - private static final WorkerNetAddress WORKER_ADDRESS = - new WorkerNetAddress().setHost("localhost").setRpcPort(20001); - - // invalid initial worker id - private static final long INVALID_WORKER_ID = -1L; - - // test subject - private DefaultBlockWorker mBlockWorker; - - // mocked dependencies of DefaultBlockWorker - private BlockMasterClient mBlockMasterClient; - private FileSystemMasterClient mFileSystemMasterClient; - - private final Random mRandom = new Random(); - - @Rule - public TemporaryFolder mTestFolder = new TemporaryFolder(); - // worker's local storage directories - private String mMemDir; - private String mHddDir; - // ufs file for fallback read - private File mTestUfsFile; - - // ufs root path for batch load - private String mRootUfs; - // ufs file for batch load - private String mTestLoadFilePath; - - @Rule - public ConfigurationRule mConfigurationRule = - new ConfigurationRule(new ImmutableMap.Builder() - .put(PropertyKey.WORKER_TIERED_STORE_LEVELS, 2) - .put(PropertyKey.WORKER_TIERED_STORE_LEVEL0_ALIAS, Constants.MEDIUM_MEM) - .put(PropertyKey.WORKER_TIERED_STORE_LEVEL0_DIRS_MEDIUMTYPE, Constants.MEDIUM_MEM) - .put(PropertyKey.WORKER_TIERED_STORE_LEVEL0_DIRS_QUOTA, "1GB") - .put(PropertyKey.WORKER_TIERED_STORE_LEVEL1_ALIAS, Constants.MEDIUM_HDD) - .put(PropertyKey.WORKER_TIERED_STORE_LEVEL1_DIRS_MEDIUMTYPE, Constants.MEDIUM_HDD) - .put(PropertyKey.WORKER_TIERED_STORE_LEVEL1_DIRS_QUOTA, "2GB") - .put(PropertyKey.WORKER_RPC_PORT, 0) - .put(PropertyKey.WORKER_MANAGEMENT_TIER_ALIGN_RESERVED_BYTES, "0") - .put(PropertyKey.WORKER_BLOCK_HEARTBEAT_INTERVAL_MS, "10ms") - .build(), Configuration.modifiableGlobal()); - private BlockStore mBlockStore; - - /** - * Sets up all dependencies before a test runs. - */ - @Before - public void before() throws Exception { - // set up storage directories - mMemDir = mTestFolder.newFolder().getAbsolutePath(); - mHddDir = mTestFolder.newFolder().getAbsolutePath(); - mConfigurationRule.set(PropertyKey.WORKER_TIERED_STORE_LEVEL0_DIRS_PATH, mMemDir); - mConfigurationRule.set(PropertyKey.WORKER_TIERED_STORE_LEVEL1_DIRS_PATH, mHddDir); - - // set up BlockMasterClient - mBlockMasterClient = createMockBlockMasterClient(); - BlockMasterClientPool blockMasterClientPool = spy(new BlockMasterClientPool()); - doReturn(mBlockMasterClient).when(blockMasterClientPool).createNewResource(); - - mTieredBlockStore = spy(new TieredBlockStore()); - UfsManager ufsManager = new NoopUfsManager(); - AtomicReference workerId = new AtomicReference<>(INVALID_WORKER_ID); - mBlockStore = - spy(new MonoBlockStore(mTieredBlockStore, blockMasterClientPool, ufsManager, workerId)); - - mFileSystemMasterClient = createMockFileSystemMasterClient(); - - Sessions sessions = mock(Sessions.class); - - // set up a ufs directory for batch load jobs - mRootUfs = mTestFolder.newFolder("DefaultBlockWorkerTest").getAbsolutePath(); - mConfigurationRule.set(PropertyKey.MASTER_MOUNT_TABLE_ROOT_UFS, mRootUfs); - ufsManager.addMount(UFS_LOAD_MOUNT_ID, - new AlluxioURI(mRootUfs), - UnderFileSystemConfiguration.defaults(Configuration.global())); - // Write an actual file to UFS - mTestLoadFilePath = mTestFolder.newFile("temp").getAbsolutePath(); - byte[] buffer = BufferUtils.getIncreasingByteArray((int) (BLOCK_SIZE * 1.5)); - BufferUtils.writeBufferToFile(mTestLoadFilePath, buffer); - - // set up ufs directory for fallback reading - mTestUfsFile = mTestFolder.newFile(); - // mount test file to UFS_MOUNT_ID - ufsManager.addMount( - UFS_MOUNT_ID, - new AlluxioURI(mTestUfsFile.getAbsolutePath()), - UnderFileSystemConfiguration.defaults(Configuration.global()) - ); - - mBlockWorker = new DefaultBlockWorker(blockMasterClientPool, mFileSystemMasterClient, - sessions, mBlockStore, workerId); - } - +public class DefaultBlockWorkerTest extends DefaultBlockWorkerTestBase { @Test public void getWorkerId() throws Exception { mBlockWorker.askForWorkerId(WORKER_ADDRESS); @@ -394,9 +264,9 @@ public void getStoreMeta() throws Exception { assertEquals(1, storeMeta.getBlockList().get("HDD").size()); Map> blockLocations = storeMeta.getBlockListByStorageLocation(); assertEquals(1, blockLocations.get( - new BlockStoreLocation("MEM", 0, "MEM")).size()); + new BlockStoreLocation("MEM", 0, "MEM")).size()); assertEquals(1, blockLocations.get( - new BlockStoreLocation("HDD", 0, "HDD")).size()); + new BlockStoreLocation("HDD", 0, "HDD")).size()); assertEquals(2, storeMeta.getNumberOfBlocks()); } @@ -637,102 +507,4 @@ public void cleanUpSession() throws Exception { // now another session should be able to grab write lock on the block mBlockWorker.removeBlock(anotherSessionId, blockId); } - - private void cacheBlock(boolean async) throws Exception { - // flush 1MB random data to ufs so that caching will take a while - long ufsBlockSize = 1024 * 1024; - byte[] data = new byte[(int) ufsBlockSize]; - mRandom.nextBytes(data); - - try (FileOutputStream fileOut = new FileOutputStream(mTestUfsFile); - BufferedOutputStream bufOut = new BufferedOutputStream(fileOut)) { - bufOut.write(data); - bufOut.flush(); - } - - // ufs options: delegate to the ufs mounted at UFS_MOUNT_ID - // with path to our test file - long blockId = mRandom.nextLong(); - Protocol.OpenUfsBlockOptions options = Protocol.OpenUfsBlockOptions - .newBuilder() - .setBlockSize(ufsBlockSize) - .setUfsPath(mTestUfsFile.getAbsolutePath()) - .setMountId(UFS_MOUNT_ID) - .setNoCache(false) - .setOffsetInFile(0) - .build(); - - // cache request: - // delegate to local ufs client rather than remote worker - CacheRequest request = CacheRequest - .newBuilder() - .setSourceHost(NetworkAddressUtils.getLocalHostName(500)) - .setBlockId(blockId) - .setLength(ufsBlockSize) - .setAsync(async) - .setOpenUfsBlockOptions(options) - .build(); - - mBlockWorker.cache(request); - - // check that the block metadata is present - if (async) { - assertFalse(mBlockWorker.getBlockStore().hasBlockMeta(blockId)); - waitFor( - "Wait for async cache", - () -> mBlockWorker.getBlockStore().hasBlockMeta(blockId), - WaitForOptions.defaults().setInterval(10).setTimeoutMs(2000)); - } else { - assertTrue(mBlockWorker.getBlockStore().hasBlockMeta(blockId)); - } - - long sessionId = mRandom.nextLong(); - // check that we can read the block locally - // note: this time we use an OpenUfsOption without ufsPath and blockInUfsTier so - // that the worker can't fall back to ufs read. - Protocol.OpenUfsBlockOptions noFallbackOptions = Protocol.OpenUfsBlockOptions.newBuilder() - .setBlockInUfsTier(false).build(); - try (BlockReader reader = mBlockWorker.createBlockReader( - sessionId, blockId, 0, false, noFallbackOptions)) { - ByteBuffer buf = reader.read(0, ufsBlockSize); - // alert: LocalFileBlockReader uses a MappedByteBuffer, which does not - // support the array operation. So we need to compare ByteBuffer manually - assertEquals(0, buf.compareTo(ByteBuffer.wrap(data))); - } - } - - // create a BlockMasterClient that simulates reasonable default - // interactions with the block master - private BlockMasterClient createMockBlockMasterClient() throws Exception { - BlockMasterClient client = mock(BlockMasterClient.class); - - // return designated worker id - doReturn(WORKER_ID) - .when(client) - .getId(any(WorkerNetAddress.class)); - - // return Command.Nothing for heartbeat - doReturn(Command.newBuilder().setCommandType(CommandType.Nothing).build()) - .when(client) - .heartbeat( - anyLong(), - anyMap(), - anyMap(), - anyList(), - anyMap(), - anyMap(), - anyList() - ); - return client; - } - - // create a mocked FileSystemMasterClient that simulates reasonable default - // interactions with file system master - private FileSystemMasterClient createMockFileSystemMasterClient() throws Exception { - FileSystemMasterClient client = mock(FileSystemMasterClient.class); - doReturn(ImmutableSet.of()) - .when(client) - .getPinList(); - return client; - } } diff --git a/core/server/worker/src/test/java/alluxio/worker/block/DefaultBlockWorkerTestBase.java b/core/server/worker/src/test/java/alluxio/worker/block/DefaultBlockWorkerTestBase.java new file mode 100644 index 000000000000..b7b3d53c5191 --- /dev/null +++ b/core/server/worker/src/test/java/alluxio/worker/block/DefaultBlockWorkerTestBase.java @@ -0,0 +1,264 @@ +/* + * The Alluxio Open Foundation licenses this work under the Apache License, version 2.0 + * (the "License"). You may not use this work except in compliance with the License, which is + * available at www.apache.org/licenses/LICENSE-2.0 + * + * This software is distributed on an "AS IS" basis, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, + * either express or implied, as more fully set forth in the License. + * + * See the NOTICE file distributed with this work for information regarding copyright ownership. + */ + +package alluxio.worker.block; + +import static alluxio.util.CommonUtils.waitFor; +import static org.junit.Assert.assertEquals; +import static org.junit.Assert.assertFalse; +import static org.junit.Assert.assertTrue; +import static org.mockito.ArgumentMatchers.any; +import static org.mockito.ArgumentMatchers.anyList; +import static org.mockito.ArgumentMatchers.anyLong; +import static org.mockito.ArgumentMatchers.anyMap; +import static org.mockito.Mockito.doReturn; +import static org.mockito.Mockito.mock; +import static org.mockito.Mockito.spy; + +import alluxio.AlluxioURI; +import alluxio.ConfigurationRule; +import alluxio.Constants; +import alluxio.Sessions; +import alluxio.conf.Configuration; +import alluxio.conf.PropertyKey; +import alluxio.grpc.CacheRequest; +import alluxio.grpc.Command; +import alluxio.grpc.CommandType; +import alluxio.master.NoopUfsManager; +import alluxio.proto.dataserver.Protocol; +import alluxio.underfs.UfsManager; +import alluxio.underfs.UnderFileSystemConfiguration; +import alluxio.util.WaitForOptions; +import alluxio.util.io.BufferUtils; +import alluxio.util.network.NetworkAddressUtils; +import alluxio.wire.WorkerNetAddress; +import alluxio.worker.block.io.BlockReader; +import alluxio.worker.file.FileSystemMasterClient; + +import com.google.common.collect.ImmutableMap; +import com.google.common.collect.ImmutableSet; +import org.junit.Before; +import org.junit.Rule; +import org.junit.rules.TemporaryFolder; + +import java.io.BufferedOutputStream; +import java.io.File; +import java.io.FileOutputStream; +import java.nio.ByteBuffer; +import java.util.Random; +import java.util.concurrent.atomic.AtomicReference; + +/** + * Test base for block worker related tests. + */ +public class DefaultBlockWorkerTestBase { + protected static final int BLOCK_SIZE = 128; + + TieredBlockStore mTieredBlockStore; + // worker configurations + protected static final long WORKER_ID = 30L; + // ufs for fallback read + protected static final long UFS_MOUNT_ID = 1L; + // ufs for batch load + protected static final long UFS_LOAD_MOUNT_ID = 2L; + protected static final WorkerNetAddress WORKER_ADDRESS = + new WorkerNetAddress().setHost("localhost").setRpcPort(20001); + + // invalid initial worker id + protected static final long INVALID_WORKER_ID = -1L; + + // test subject + protected DefaultBlockWorker mBlockWorker; + + // mocked dependencies of DefaultBlockWorker + protected BlockMasterClient mBlockMasterClient; + protected FileSystemMasterClient mFileSystemMasterClient; + + protected final Random mRandom = new Random(); + + @Rule + public TemporaryFolder mTestFolder = new TemporaryFolder(); + // worker's local storage directories + protected String mMemDir; + protected String mHddDir; + // ufs file for fallback read + protected File mTestUfsFile; + + // ufs root path for batch load + protected String mRootUfs; + // ufs file for batch load + protected String mTestLoadFilePath; + protected BlockMasterClientPool mBlockMasterClientPool; + + @Rule + public ConfigurationRule mConfigurationRule = + new ConfigurationRule(new ImmutableMap.Builder() + .put(PropertyKey.WORKER_TIERED_STORE_LEVELS, 2) + .put(PropertyKey.WORKER_TIERED_STORE_LEVEL0_ALIAS, Constants.MEDIUM_MEM) + .put(PropertyKey.WORKER_TIERED_STORE_LEVEL0_DIRS_MEDIUMTYPE, Constants.MEDIUM_MEM) + .put(PropertyKey.WORKER_TIERED_STORE_LEVEL0_DIRS_QUOTA, "1GB") + .put(PropertyKey.WORKER_TIERED_STORE_LEVEL1_ALIAS, Constants.MEDIUM_HDD) + .put(PropertyKey.WORKER_TIERED_STORE_LEVEL1_DIRS_MEDIUMTYPE, Constants.MEDIUM_HDD) + .put(PropertyKey.WORKER_TIERED_STORE_LEVEL1_DIRS_QUOTA, "2GB") + .put(PropertyKey.WORKER_RPC_PORT, 0) + .put(PropertyKey.WORKER_MANAGEMENT_TIER_ALIGN_RESERVED_BYTES, "0") + .put(PropertyKey.WORKER_BLOCK_HEARTBEAT_INTERVAL_MS, "10ms") + .build(), Configuration.modifiableGlobal()); + protected BlockStore mBlockStore; + + /** + * Sets up all dependencies before a test runs. + */ + @Before + public void before() throws Exception { + // set up storage directories + mMemDir = mTestFolder.newFolder().getAbsolutePath(); + mHddDir = mTestFolder.newFolder().getAbsolutePath(); + mConfigurationRule.set(PropertyKey.WORKER_TIERED_STORE_LEVEL0_DIRS_PATH, mMemDir); + mConfigurationRule.set(PropertyKey.WORKER_TIERED_STORE_LEVEL1_DIRS_PATH, mHddDir); + + // set up BlockMasterClient + mBlockMasterClient = createMockBlockMasterClient(); + mBlockMasterClientPool = spy(new BlockMasterClientPool()); + doReturn(mBlockMasterClient).when(mBlockMasterClientPool).createNewResource(); + + mTieredBlockStore = spy(new TieredBlockStore()); + UfsManager ufsManager = new NoopUfsManager(); + AtomicReference workerId = new AtomicReference<>(INVALID_WORKER_ID); + mBlockStore = + spy(new MonoBlockStore(mTieredBlockStore, mBlockMasterClientPool, ufsManager, workerId)); + + mFileSystemMasterClient = createMockFileSystemMasterClient(); + + Sessions sessions = mock(Sessions.class); + + // set up a ufs directory for batch load jobs + mRootUfs = mTestFolder.newFolder("DefaultBlockWorkerTest").getAbsolutePath(); + mConfigurationRule.set(PropertyKey.MASTER_MOUNT_TABLE_ROOT_UFS, mRootUfs); + ufsManager.addMount(UFS_LOAD_MOUNT_ID, + new AlluxioURI(mRootUfs), + UnderFileSystemConfiguration.defaults(Configuration.global())); + // Write an actual file to UFS + mTestLoadFilePath = mTestFolder.newFile("temp").getAbsolutePath(); + byte[] buffer = BufferUtils.getIncreasingByteArray((int) (BLOCK_SIZE * 1.5)); + BufferUtils.writeBufferToFile(mTestLoadFilePath, buffer); + + // set up ufs directory for fallback reading + mTestUfsFile = mTestFolder.newFile(); + // mount test file to UFS_MOUNT_ID + ufsManager.addMount( + UFS_MOUNT_ID, + new AlluxioURI(mTestUfsFile.getAbsolutePath()), + UnderFileSystemConfiguration.defaults(Configuration.global()) + ); + + mBlockWorker = new DefaultBlockWorker(mBlockMasterClientPool, mFileSystemMasterClient, + sessions, mBlockStore, workerId); + } + + protected void cacheBlock(boolean async) throws Exception { + // flush 1MB random data to ufs so that caching will take a while + long ufsBlockSize = 1024 * 1024; + byte[] data = new byte[(int) ufsBlockSize]; + mRandom.nextBytes(data); + + try (FileOutputStream fileOut = new FileOutputStream(mTestUfsFile); + BufferedOutputStream bufOut = new BufferedOutputStream(fileOut)) { + bufOut.write(data); + bufOut.flush(); + } + + // ufs options: delegate to the ufs mounted at UFS_MOUNT_ID + // with path to our test file + long blockId = mRandom.nextLong(); + Protocol.OpenUfsBlockOptions options = Protocol.OpenUfsBlockOptions + .newBuilder() + .setBlockSize(ufsBlockSize) + .setUfsPath(mTestUfsFile.getAbsolutePath()) + .setMountId(UFS_MOUNT_ID) + .setNoCache(false) + .setOffsetInFile(0) + .build(); + + // cache request: + // delegate to local ufs client rather than remote worker + CacheRequest request = CacheRequest + .newBuilder() + .setSourceHost(NetworkAddressUtils.getLocalHostName(500)) + .setBlockId(blockId) + .setLength(ufsBlockSize) + .setAsync(async) + .setOpenUfsBlockOptions(options) + .build(); + + mBlockWorker.cache(request); + + // check that the block metadata is present + if (async) { + assertFalse(mBlockWorker.getBlockStore().hasBlockMeta(blockId)); + waitFor( + "Wait for async cache", + () -> mBlockWorker.getBlockStore().hasBlockMeta(blockId), + WaitForOptions.defaults().setInterval(10).setTimeoutMs(2000)); + } else { + assertTrue(mBlockWorker.getBlockStore().hasBlockMeta(blockId)); + } + + long sessionId = mRandom.nextLong(); + // check that we can read the block locally + // note: this time we use an OpenUfsOption without ufsPath and blockInUfsTier so + // that the worker can't fall back to ufs read. + Protocol.OpenUfsBlockOptions noFallbackOptions = Protocol.OpenUfsBlockOptions.newBuilder() + .setBlockInUfsTier(false).build(); + try (BlockReader reader = mBlockWorker.createBlockReader( + sessionId, blockId, 0, false, noFallbackOptions)) { + ByteBuffer buf = reader.read(0, ufsBlockSize); + // alert: LocalFileBlockReader uses a MappedByteBuffer, which does not + // support the array operation. So we need to compare ByteBuffer manually + assertEquals(0, buf.compareTo(ByteBuffer.wrap(data))); + } + } + + // create a BlockMasterClient that simulates reasonable default + // interactions with the block master + protected BlockMasterClient createMockBlockMasterClient() throws Exception { + BlockMasterClient client = mock(BlockMasterClient.class); + + // return designated worker id + doReturn(WORKER_ID) + .when(client) + .getId(any(WorkerNetAddress.class)); + + // return Command.Nothing for heartbeat + doReturn(Command.newBuilder().setCommandType(CommandType.Nothing).build()) + .when(client) + .heartbeat( + anyLong(), + anyMap(), + anyMap(), + anyList(), + anyMap(), + anyMap(), + anyList() + ); + return client; + } + + // create a mocked FileSystemMasterClient that simulates reasonable default + // interactions with file system master + protected FileSystemMasterClient createMockFileSystemMasterClient() throws Exception { + FileSystemMasterClient client = mock(FileSystemMasterClient.class); + doReturn(ImmutableSet.of()) + .when(client) + .getPinList(); + return client; + } +} From 59f660d8d35578f635e0e80e539fb5435739afad Mon Sep 17 00:00:00 2001 From: Vimal Date: Fri, 20 Jan 2023 03:28:14 +0530 Subject: [PATCH 074/334] Fix cli distributed command hang issue ### What changes are proposed in this pull request? Whenever the job command status is being asked for the job command id, it is mostly being asked by alluxio cli client in synchronous mode, it should return the status immediately in case of plan info has one of the finished status like failed, completed or cancelled instead of checking its children tasks' status and deriving the final status. ### Why are the changes needed? As per #16708, the distributed cli commands like distcp and distload hang; same behaviour observed consistently whenever any job worker is declared disconnected by job master. Currently, the job master derives the status for job command id by calling its children job control id which internally, derives its status from its children's status. In normal circumstances, all the children of job control id have expected status but in circumstance like any job worker disconnected from job master, children have different odd status combination like one has RUNNING while other has FAILED or COMPLETED even though its job control id has FAILED final status (plan info). This situation makes the job command id always in RUNNING state and apparently, alluxio cli client waits forever. The job control id (plan info) should return the status whenever it has one of the finished status like failed, completed or cancelled instead of deriving from its children. ### Does this PR introduce any user facing changes? NA pr-link: Alluxio/alluxio#16786 change-id: cid-e99b94fb06552c9b1e57278788c679cee1158bac --- .../alluxio/master/job/tracker/CmdRunAttempt.java | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) diff --git a/job/server/src/main/java/alluxio/master/job/tracker/CmdRunAttempt.java b/job/server/src/main/java/alluxio/master/job/tracker/CmdRunAttempt.java index aaae9b13b566..67e3cefae871 100644 --- a/job/server/src/main/java/alluxio/master/job/tracker/CmdRunAttempt.java +++ b/job/server/src/main/java/alluxio/master/job/tracker/CmdRunAttempt.java @@ -173,12 +173,14 @@ public Status checkJobStatus() { return Status.FAILED; } - // This make an assumption that this job tree only goes 1 level deep boolean finished = true; - for (JobInfo child : jobInfo.getChildren()) { - if (!child.getStatus().isFinished()) { - finished = false; - break; + if (!jobInfo.getStatus().isFinished()) { + // This make an assumption that this job tree only goes 1 level deep + for (JobInfo child : jobInfo.getChildren()) { + if (!child.getStatus().isFinished()) { + finished = false; + break; + } } } From 46ac8c9123860ffd7830b67bf1fd0610d39ef96f Mon Sep 17 00:00:00 2001 From: jja725 Date: Thu, 19 Jan 2023 13:58:54 -0800 Subject: [PATCH 075/334] [SMALLFIX]Update job service batch size default value ### What changes are proposed in this pull request? Set job service batch size default to 1 to avoid using batched jobs. Batched job is no longer helpful for job service since we move the scheduling logic from client to job master. Eventually, we would deprecate batched job. ### Why are the changes needed? Reduce code complexity ### Does this PR introduce any user facing changes? na pr-link: Alluxio/alluxio#16802 change-id: cid-32e57177652918bd7ec6d962e70439f6f2312fc5 --- core/common/src/main/java/alluxio/conf/PropertyKey.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/core/common/src/main/java/alluxio/conf/PropertyKey.java b/core/common/src/main/java/alluxio/conf/PropertyKey.java index 98701f45c36e..634c59cdcfb8 100755 --- a/core/common/src/main/java/alluxio/conf/PropertyKey.java +++ b/core/common/src/main/java/alluxio/conf/PropertyKey.java @@ -7003,7 +7003,7 @@ public String toString() { intBuilder(Name.JOB_REQUEST_BATCH_SIZE) .setDescription("The batch size client uses to make requests to the " + "job master.") - .setDefaultValue(20) + .setDefaultValue(1) .setScope(Scope.CLIENT) .build(); public static final PropertyKey JOB_WORKER_BIND_HOST = From 85f5260a886d81fdf62388a56bf9b3ba264e04a7 Mon Sep 17 00:00:00 2001 From: Arthur Jenoudet <23088925+jenoudet@users.noreply.github.com> Date: Thu, 19 Jan 2023 14:02:28 -0800 Subject: [PATCH 076/334] Change default value for checkpoint compression level ### What changes are proposed in this pull request? Change the default value for a property key. ### Why are the changes needed? After some internal testing, we saw that a compression level of 1 compressed 98% as much as a compression level of -1 (system default) but reduced compression time by 23%. In light of this, we believe that 1 is the better default value. Screenshot 2023-01-19 at 12 37 55 PM Screenshot 2023-01-19 at 12 40 25 PM ### Does this PR introduce any user facing changes? No pr-link: Alluxio/alluxio#16801 change-id: cid-0fb5fd90bcec7d2d9390056702bd58c0752237c1 --- core/common/src/main/java/alluxio/conf/PropertyKey.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/core/common/src/main/java/alluxio/conf/PropertyKey.java b/core/common/src/main/java/alluxio/conf/PropertyKey.java index 634c59cdcfb8..c4674610c56b 100755 --- a/core/common/src/main/java/alluxio/conf/PropertyKey.java +++ b/core/common/src/main/java/alluxio/conf/PropertyKey.java @@ -2479,7 +2479,7 @@ public String toString() { .build(); public static final PropertyKey MASTER_METASTORE_ROCKS_CHECKPOINT_COMPRESSION_LEVEL = intBuilder(Name.MASTER_METASTORE_ROCKS_CHECKPOINT_COMPRESSION_LEVEL) - .setDefaultValue(-1) + .setDefaultValue(1) .setDescription("The zip compression level of checkpointing rocksdb, the zip" + " format defines ten levels of compression, ranging from 0" + " (no compression, but very fast) to 9 (best compression, but slow)." From 6849871c3927f348d468d8e7a318598acd1e31e9 Mon Sep 17 00:00:00 2001 From: elega <445092967@qq.com> Date: Fri, 20 Jan 2023 09:55:03 +0800 Subject: [PATCH 077/334] Refactor BaseFileSystemMaster ### What changes are proposed in this pull request? Refactor BaseFileSystemMaster and its test, a test base is extracted out ### Why are the changes needed? So that the class can be inherited easily ### Does this PR introduce any user facing changes? Please list the user-facing changes introduced by your change, including N/A pr-link: Alluxio/alluxio#16790 change-id: cid-b4b02d93ebbed6c3166131623d920d98d580f133 --- .../alluxio/client/file/BaseFileSystem.java | 38 ++++++-- .../client/file/BaseFileSystemTest.java | 67 +------------- .../client/file/FileSystemTestBase.java | 89 +++++++++++++++++++ 3 files changed, 119 insertions(+), 75 deletions(-) create mode 100644 core/client/fs/src/test/java/alluxio/client/file/FileSystemTestBase.java diff --git a/core/client/fs/src/main/java/alluxio/client/file/BaseFileSystem.java b/core/client/fs/src/main/java/alluxio/client/file/BaseFileSystem.java index 773d05868560..e9c707b11ad6 100644 --- a/core/client/fs/src/main/java/alluxio/client/file/BaseFileSystem.java +++ b/core/client/fs/src/main/java/alluxio/client/file/BaseFileSystem.java @@ -100,6 +100,10 @@ public class BaseFileSystem implements FileSystem { protected volatile boolean mClosed = false; + protected static final Error UNREACHABLE_CODE_ERROR = new Error("We should never reach here. " + + "wrapAndThrowAlluxioStatusException is guaranteed " + + "to throw an exception and never returns."); + /** * Constructs a new base file system. * @@ -617,22 +621,38 @@ R rpc(RpcCallable fn) // Explicitly connect to trigger loading configuration from meta master. client.get().connect(); return fn.call(client.get()); - } catch (NotFoundException e) { + } catch (AlluxioStatusException e) { + wrapAndThrowAlluxioStatusException(e); + throw UNREACHABLE_CODE_ERROR; + } + } + + protected void wrapAndThrowAlluxioStatusException(AlluxioStatusException e) + throws AlluxioException, IOException { + if (e instanceof NotFoundException) { throw new FileDoesNotExistException(e.getMessage()); - } catch (AlreadyExistsException e) { + } + if (e instanceof AlreadyExistsException) { throw new FileAlreadyExistsException(e.getMessage()); - } catch (InvalidArgumentException e) { + } + if (e instanceof InvalidArgumentException) { throw new InvalidPathException(e.getMessage()); - } catch (FailedPreconditionException e) { + } + if (e instanceof FailedPreconditionException) { // A little sketchy, but this should be the only case that throws FailedPrecondition. throw new DirectoryNotEmptyException(e.getMessage()); - } catch (UnavailableException e) { - throw e; - } catch (UnauthenticatedException e) { + } + if (e instanceof UnavailableException || e instanceof UnauthenticatedException) { throw e; - } catch (AlluxioStatusException e) { - throw e.toAlluxioException(); } + throw e.toAlluxioException(); + } + + /** + * @return the file system context + */ + public FileSystemContext getFileSystemContext() { + return mFsContext; } /** diff --git a/core/client/fs/src/test/java/alluxio/client/file/BaseFileSystemTest.java b/core/client/fs/src/test/java/alluxio/client/file/BaseFileSystemTest.java index 44e3a51a3e66..fa8efe0a2f4e 100644 --- a/core/client/fs/src/test/java/alluxio/client/file/BaseFileSystemTest.java +++ b/core/client/fs/src/test/java/alluxio/client/file/BaseFileSystemTest.java @@ -24,10 +24,6 @@ import static org.mockito.Mockito.when; import alluxio.AlluxioURI; -import alluxio.ClientContext; -import alluxio.TestLoggerRule; -import alluxio.conf.Configuration; -import alluxio.conf.InstancedConfiguration; import alluxio.conf.PropertyKey; import alluxio.grpc.Bits; import alluxio.grpc.CreateDirectoryPOptions; @@ -41,16 +37,11 @@ import alluxio.grpc.RenamePOptions; import alluxio.grpc.SetAttributePOptions; import alluxio.grpc.UnmountPOptions; -import alluxio.resource.CloseableResource; import alluxio.util.FileSystemOptionsUtils; import alluxio.wire.FileInfo; -import org.junit.After; -import org.junit.Before; -import org.junit.Rule; import org.junit.Test; import org.junit.runner.RunWith; -import org.powermock.api.mockito.PowerMockito; import org.powermock.core.classloader.annotations.PrepareForTest; import org.powermock.modules.junit4.PowerMockRunner; @@ -62,63 +53,7 @@ */ @RunWith(PowerMockRunner.class) @PrepareForTest({FileSystemContext.class, FileSystemMasterClient.class}) -public final class BaseFileSystemTest { - - private static final RuntimeException EXCEPTION = new RuntimeException("test exception"); - private static final String SHOULD_HAVE_PROPAGATED_MESSAGE = - "Exception should have been propagated"; - - private InstancedConfiguration mConf = Configuration.copyGlobal(); - - @Rule - private TestLoggerRule mTestLogger = new TestLoggerRule(); - - private FileSystem mFileSystem; - private FileSystemContext mFileContext; - private ClientContext mClientContext; - private FileSystemMasterClient mFileSystemMasterClient; - - private class DummyAlluxioFileSystem extends BaseFileSystem { - public DummyAlluxioFileSystem(FileSystemContext fsContext) { - super(fsContext); - } - } - - /** - * Sets up the file system and the context before a test runs. - */ - @Before - public void before() { - mConf.set(PropertyKey.USER_FILE_INCLUDE_OPERATION_ID, false); - mClientContext = ClientContext.create(mConf); - mFileContext = PowerMockito.mock(FileSystemContext.class); - mFileSystemMasterClient = PowerMockito.mock(FileSystemMasterClient.class); - when(mFileContext.acquireMasterClientResource()).thenReturn( - new CloseableResource(mFileSystemMasterClient) { - @Override - public void closeResource() { - // Noop. - } - }); - when(mFileContext.getClientContext()).thenReturn(mClientContext); - when(mFileContext.getClusterConf()).thenReturn(mConf); - when(mFileContext.getPathConf(any())).thenReturn(mConf); - when(mFileContext.getUriValidationEnabled()).thenReturn(true); - mFileSystem = new DummyAlluxioFileSystem(mFileContext); - } - - @After - public void after() { - mConf = Configuration.copyGlobal(); - } - - /** - * Verifies and releases the master client after a test with a filesystem operation. - */ - public void verifyFilesystemContextAcquiredAndReleased() { - verify(mFileContext).acquireMasterClientResource(); - } - +public final class BaseFileSystemTest extends FileSystemTestBase { /** * Tests the creation of a file via the * {@link BaseFileSystem#createFile(AlluxioURI, CreateFilePOptions)} method. diff --git a/core/client/fs/src/test/java/alluxio/client/file/FileSystemTestBase.java b/core/client/fs/src/test/java/alluxio/client/file/FileSystemTestBase.java new file mode 100644 index 000000000000..1c1c1ae3de41 --- /dev/null +++ b/core/client/fs/src/test/java/alluxio/client/file/FileSystemTestBase.java @@ -0,0 +1,89 @@ +/* + * The Alluxio Open Foundation licenses this work under the Apache License, version 2.0 + * (the "License"). You may not use this work except in compliance with the License, which is + * available at www.apache.org/licenses/LICENSE-2.0 + * + * This software is distributed on an "AS IS" basis, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, + * either express or implied, as more fully set forth in the License. + * + * See the NOTICE file distributed with this work for information regarding copyright ownership. + */ + +package alluxio.client.file; + +import static org.mockito.Mockito.any; +import static org.mockito.Mockito.verify; +import static org.mockito.Mockito.when; + +import alluxio.ClientContext; +import alluxio.TestLoggerRule; +import alluxio.conf.Configuration; +import alluxio.conf.InstancedConfiguration; +import alluxio.conf.PropertyKey; +import alluxio.resource.CloseableResource; + +import org.junit.After; +import org.junit.Before; +import org.junit.Rule; +import org.powermock.api.mockito.PowerMockito; + +/** + * Test base for {@link FileSystem} related test. + */ +public class FileSystemTestBase { + + protected static final RuntimeException EXCEPTION = new RuntimeException("test exception"); + protected static final String SHOULD_HAVE_PROPAGATED_MESSAGE = + "Exception should have been propagated"; + + protected InstancedConfiguration mConf = Configuration.copyGlobal(); + + @Rule + protected TestLoggerRule mTestLogger = new TestLoggerRule(); + + protected FileSystem mFileSystem; + protected FileSystemContext mFileContext; + protected ClientContext mClientContext; + protected FileSystemMasterClient mFileSystemMasterClient; + + private class DummyAlluxioFileSystem extends BaseFileSystem { + public DummyAlluxioFileSystem(FileSystemContext fsContext) { + super(fsContext); + } + } + + /** + * Sets up the file system and the context before a test runs. + */ + @Before + public void before() { + mConf.set(PropertyKey.USER_FILE_INCLUDE_OPERATION_ID, false); + mClientContext = ClientContext.create(mConf); + mFileContext = PowerMockito.mock(FileSystemContext.class); + mFileSystemMasterClient = PowerMockito.mock(FileSystemMasterClient.class); + when(mFileContext.acquireMasterClientResource()).thenReturn( + new CloseableResource(mFileSystemMasterClient) { + @Override + public void closeResource() { + // Noop. + } + }); + when(mFileContext.getClientContext()).thenReturn(mClientContext); + when(mFileContext.getClusterConf()).thenReturn(mConf); + when(mFileContext.getPathConf(any())).thenReturn(mConf); + when(mFileContext.getUriValidationEnabled()).thenReturn(true); + mFileSystem = new DummyAlluxioFileSystem(mFileContext); + } + + @After + public void after() { + mConf = Configuration.copyGlobal(); + } + + /** + * Verifies and releases the master client after a test with a filesystem operation. + */ + public void verifyFilesystemContextAcquiredAndReleased() { + verify(mFileContext).acquireMasterClientResource(); + } +} From 521187ecb813b4390d8f8cb66405597e025bff11 Mon Sep 17 00:00:00 2001 From: elega <445092967@qq.com> Date: Fri, 20 Jan 2023 10:12:56 +0800 Subject: [PATCH 078/334] Add some small refactoring changes ### What changes are proposed in this pull request? Functional: 1. Make capacity command load configurations before execution Non functional: 1. Add MasterSelectionPolicy.Type 2. Add NoopCloseable 3. Add LOG to some classes, update private to protected to some class member variables so that the class can be overrode 4. Update RaftJournalServiceHandler constructor to take an additional RaftJournalSystem as an input 5. Add primary selectory to master context 6. Extract updateAccessTime in FileSystemMaster 7. Let notifySyncedPath return SyncState ### Does this PR introduce any user facing changes? N/A pr-link: Alluxio/alluxio#16798 change-id: cid-4bb48934974261b708abee83bd45336b45d4598c --- .../main/java/alluxio/conf/PropertyKey.java | 2 +- .../MasterSelectionPolicy.java | 15 ++++ .../SelectionPolicyAnyMaster.java | 5 ++ .../SelectionPolicyAnyStandbyMaster.java | 5 ++ .../SelectionPolicyPrimaryMaster.java | 5 ++ .../SelectionPolicySpecifiedMaster.java | 5 ++ .../java/alluxio/resource/NoopCloseable.java | 29 +++++++ .../java/alluxio/wire/WorkerNetAddress.java | 2 + .../java/alluxio/master/AbstractMaster.java | 2 +- .../raft/RaftJournalServiceHandler.java | 14 +++- .../journal/raft/RaftJournalSystem.java | 2 +- .../main/java/alluxio/master/CoreMaster.java | 2 + .../alluxio/master/CoreMasterContext.java | 12 +++ .../master/block/DefaultBlockMaster.java | 4 + .../master/file/DefaultFileSystemMaster.java | 28 +++---- .../master/file/meta/InodeLockManager.java | 11 +++ .../file/meta/InodeTreePersistentState.java | 7 ++ .../master/file/meta/UfsSyncPathCache.java | 6 +- .../master/file/meta/InodeTreeTest.java | 6 ++ .../master/journal/NoopRaftJournalSystem.java | 77 +++++++++++++++++++ .../raft/SnapshotReplicationManagerTest.java | 2 +- .../worker/block/BlockHeartbeatReporter.java | 5 ++ .../worker/block/BlockMasterSyncHelper.java | 5 +- .../cli/fsadmin/report/CapacityCommand.java | 13 +++- .../fsadmin/report/CapacityCommandTest.java | 10 ++- 25 files changed, 245 insertions(+), 29 deletions(-) create mode 100644 core/common/src/main/java/alluxio/resource/NoopCloseable.java create mode 100644 core/server/master/src/test/java/alluxio/master/journal/NoopRaftJournalSystem.java diff --git a/core/common/src/main/java/alluxio/conf/PropertyKey.java b/core/common/src/main/java/alluxio/conf/PropertyKey.java index c4674610c56b..8c6c42852c18 100755 --- a/core/common/src/main/java/alluxio/conf/PropertyKey.java +++ b/core/common/src/main/java/alluxio/conf/PropertyKey.java @@ -2981,7 +2981,7 @@ public String toString() { + "UFS), EMBEDDED (use a journal embedded in the masters), and NOOP (do not use a " + "journal)") .setConsistencyCheckLevel(ConsistencyCheckLevel.ENFORCE) - .setScope(Scope.MASTER) + .setScope(Scope.ALL) .build(); public static final PropertyKey MASTER_JOURNAL_LOG_SIZE_BYTES_MAX = dataSizeBuilder(Name.MASTER_JOURNAL_LOG_SIZE_BYTES_MAX) diff --git a/core/common/src/main/java/alluxio/master/selectionpolicy/MasterSelectionPolicy.java b/core/common/src/main/java/alluxio/master/selectionpolicy/MasterSelectionPolicy.java index cbfe753822e3..855914bcc291 100644 --- a/core/common/src/main/java/alluxio/master/selectionpolicy/MasterSelectionPolicy.java +++ b/core/common/src/main/java/alluxio/master/selectionpolicy/MasterSelectionPolicy.java @@ -21,6 +21,16 @@ * determines which master node a client should connect to. */ public interface MasterSelectionPolicy { + /** + * The enum for master selection policies. + */ + enum Type { + PRIMARY_MASTER, + ANY_STANDBY_MASTER, + ANY_MASTER, + SPECIFIED_MASTER, + } + /** * Get and cache the primary master address. * @@ -48,6 +58,11 @@ InetSocketAddress getGrpcMasterAddress(MasterInquireClient masterInquireClient) */ void resetPrimaryMasterAddressCache(); + /** + * @return the type of the master selection policy + */ + Type getType(); + /** * Factory for {@link MasterSelectionPolicy}. */ diff --git a/core/common/src/main/java/alluxio/master/selectionpolicy/SelectionPolicyAnyMaster.java b/core/common/src/main/java/alluxio/master/selectionpolicy/SelectionPolicyAnyMaster.java index c9d0215d80d1..c76709613ccb 100644 --- a/core/common/src/main/java/alluxio/master/selectionpolicy/SelectionPolicyAnyMaster.java +++ b/core/common/src/main/java/alluxio/master/selectionpolicy/SelectionPolicyAnyMaster.java @@ -36,4 +36,9 @@ public synchronized InetSocketAddress getGrpcMasterAddress( Collections.shuffle(masterAddresses); return masterAddresses.get(0); } + + @Override + public Type getType() { + return Type.ANY_MASTER; + } } diff --git a/core/common/src/main/java/alluxio/master/selectionpolicy/SelectionPolicyAnyStandbyMaster.java b/core/common/src/main/java/alluxio/master/selectionpolicy/SelectionPolicyAnyStandbyMaster.java index 1ef1a299ccc1..8e23504c5892 100644 --- a/core/common/src/main/java/alluxio/master/selectionpolicy/SelectionPolicyAnyStandbyMaster.java +++ b/core/common/src/main/java/alluxio/master/selectionpolicy/SelectionPolicyAnyStandbyMaster.java @@ -65,4 +65,9 @@ public synchronized InetSocketAddress getGrpcMasterAddress( } throw new UnavailableException("No standby masters available"); } + + @Override + public Type getType() { + return Type.ANY_STANDBY_MASTER; + } } diff --git a/core/common/src/main/java/alluxio/master/selectionpolicy/SelectionPolicyPrimaryMaster.java b/core/common/src/main/java/alluxio/master/selectionpolicy/SelectionPolicyPrimaryMaster.java index 7f8b07a4153d..8b31380a37cb 100644 --- a/core/common/src/main/java/alluxio/master/selectionpolicy/SelectionPolicyPrimaryMaster.java +++ b/core/common/src/main/java/alluxio/master/selectionpolicy/SelectionPolicyPrimaryMaster.java @@ -28,4 +28,9 @@ public synchronized InetSocketAddress getGrpcMasterAddress( mPrimaryMasterAddress = masterInquireClient.getPrimaryRpcAddress(); return mPrimaryMasterAddress; } + + @Override + public Type getType() { + return Type.PRIMARY_MASTER; + } } diff --git a/core/common/src/main/java/alluxio/master/selectionpolicy/SelectionPolicySpecifiedMaster.java b/core/common/src/main/java/alluxio/master/selectionpolicy/SelectionPolicySpecifiedMaster.java index e1c765a54159..ddefb397f749 100644 --- a/core/common/src/main/java/alluxio/master/selectionpolicy/SelectionPolicySpecifiedMaster.java +++ b/core/common/src/main/java/alluxio/master/selectionpolicy/SelectionPolicySpecifiedMaster.java @@ -36,5 +36,10 @@ public synchronized InetSocketAddress getGrpcMasterAddress( MasterInquireClient masterInquireClient) throws UnavailableException { return mMasterAddressToConnect; } + + @Override + public Type getType() { + return Type.SPECIFIED_MASTER; + } } diff --git a/core/common/src/main/java/alluxio/resource/NoopCloseable.java b/core/common/src/main/java/alluxio/resource/NoopCloseable.java new file mode 100644 index 000000000000..4331588312e1 --- /dev/null +++ b/core/common/src/main/java/alluxio/resource/NoopCloseable.java @@ -0,0 +1,29 @@ +/* + * The Alluxio Open Foundation licenses this work under the Apache License, version 2.0 + * (the "License"). You may not use this work except in compliance with the License, which is + * available at www.apache.org/licenses/LICENSE-2.0 + * + * This software is distributed on an "AS IS" basis, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, + * either express or implied, as more fully set forth in the License. + * + * See the NOTICE file distributed with this work for information regarding copyright ownership. + */ + +package alluxio.resource; + +import java.io.Closeable; +import java.io.IOException; + +/** + * A noop closeable that does nothing upon close. + */ +public class NoopCloseable implements Closeable { + private NoopCloseable() { + } + + @Override + public void close() throws IOException { + } + + public static final NoopCloseable INSTANCE = new NoopCloseable(); +} diff --git a/core/common/src/main/java/alluxio/wire/WorkerNetAddress.java b/core/common/src/main/java/alluxio/wire/WorkerNetAddress.java index ed71d37e7652..7a2a9bedbc98 100644 --- a/core/common/src/main/java/alluxio/wire/WorkerNetAddress.java +++ b/core/common/src/main/java/alluxio/wire/WorkerNetAddress.java @@ -32,6 +32,8 @@ public final class WorkerNetAddress implements Serializable { private static final long serialVersionUID = 0L; + public static final WorkerNetAddress DUMMY = new WorkerNetAddress(); + private String mHost = ""; private String mContainerHost = ""; private int mRpcPort; diff --git a/core/server/common/src/main/java/alluxio/master/AbstractMaster.java b/core/server/common/src/main/java/alluxio/master/AbstractMaster.java index 0850f1166996..bc4a88862b40 100644 --- a/core/server/common/src/main/java/alluxio/master/AbstractMaster.java +++ b/core/server/common/src/main/java/alluxio/master/AbstractMaster.java @@ -47,7 +47,7 @@ public abstract class AbstractMaster implements Master { /** The executor used for running maintenance threads for the master. */ private ExecutorService mExecutorService; /** A handler to the journal for this master. */ - private final Journal mJournal; + protected final Journal mJournal; /** true if this master is in primary mode, and not standby mode. */ private boolean mIsPrimary = false; diff --git a/core/server/common/src/main/java/alluxio/master/journal/raft/RaftJournalServiceHandler.java b/core/server/common/src/main/java/alluxio/master/journal/raft/RaftJournalServiceHandler.java index 9bc203a87ed5..4291759023d3 100644 --- a/core/server/common/src/main/java/alluxio/master/journal/raft/RaftJournalServiceHandler.java +++ b/core/server/common/src/main/java/alluxio/master/journal/raft/RaftJournalServiceHandler.java @@ -18,19 +18,29 @@ import alluxio.grpc.UploadSnapshotPResponse; import io.grpc.stub.StreamObserver; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; /** * RPC handler for raft journal service. */ public class RaftJournalServiceHandler extends RaftJournalServiceGrpc.RaftJournalServiceImplBase { - + private static final Logger LOG = + LoggerFactory.getLogger(RaftJournalServiceHandler.class); private final SnapshotReplicationManager mManager; + private final RaftJournalSystem mRaftJournalSystem; /** * @param manager the snapshot replication manager + * @param raftJournalSystem the raft journal system */ - public RaftJournalServiceHandler(SnapshotReplicationManager manager) { + public RaftJournalServiceHandler( + SnapshotReplicationManager manager, + RaftJournalSystem raftJournalSystem) { mManager = manager; + mRaftJournalSystem = raftJournalSystem; + LOG.debug("RaftJournalServiceHandler initialized, journal system {}", + mRaftJournalSystem); } @Override diff --git a/core/server/common/src/main/java/alluxio/master/journal/raft/RaftJournalSystem.java b/core/server/common/src/main/java/alluxio/master/journal/raft/RaftJournalSystem.java index 7bdffcc363af..a0ecfd8f2b3d 100644 --- a/core/server/common/src/main/java/alluxio/master/journal/raft/RaftJournalSystem.java +++ b/core/server/common/src/main/java/alluxio/master/journal/raft/RaftJournalSystem.java @@ -656,7 +656,7 @@ public synchronized void checkpoint(StateLockManager stateLockManager) throws IO public synchronized Map getJournalServices() { Map services = new HashMap<>(); services.put(alluxio.grpc.ServiceType.RAFT_JOURNAL_SERVICE, new GrpcService( - new RaftJournalServiceHandler(mStateMachine.getSnapshotReplicationManager()))); + new RaftJournalServiceHandler(mStateMachine.getSnapshotReplicationManager(), this))); return services; } diff --git a/core/server/master/src/main/java/alluxio/master/CoreMaster.java b/core/server/master/src/main/java/alluxio/master/CoreMaster.java index 3ebafa223caa..1fa97dc82f93 100644 --- a/core/server/master/src/main/java/alluxio/master/CoreMaster.java +++ b/core/server/master/src/main/java/alluxio/master/CoreMaster.java @@ -24,6 +24,7 @@ public abstract class CoreMaster extends AbstractMaster { protected final SafeModeManager mSafeModeManager; protected final BackupManager mBackupManager; protected final JournalSystem mJournalSystem; + protected final PrimarySelector mPrimarySelector; protected final long mStartTimeMs; protected final int mPort; @@ -38,6 +39,7 @@ protected CoreMaster(CoreMasterContext context, Clock clock, mSafeModeManager = context.getSafeModeManager(); mBackupManager = context.getBackupManager(); mJournalSystem = context.getJournalSystem(); + mPrimarySelector = context.getPrimarySelector(); mStartTimeMs = context.getStartTimeMs(); mPort = context.getPort(); } diff --git a/core/server/master/src/main/java/alluxio/master/CoreMasterContext.java b/core/server/master/src/main/java/alluxio/master/CoreMasterContext.java index 69ea2a318314..220d1b2374e0 100644 --- a/core/server/master/src/main/java/alluxio/master/CoreMasterContext.java +++ b/core/server/master/src/main/java/alluxio/master/CoreMasterContext.java @@ -19,6 +19,8 @@ import com.google.common.base.Preconditions; +import javax.annotation.Nullable; + /** * This class stores fields that are specific to core masters. */ @@ -28,6 +30,8 @@ public class CoreMasterContext extends MasterContext { private final BlockMetaStore.Factory mBlockStoreFactory; private final InodeStore.Factory mInodeStoreFactory; private final JournalSystem mJournalSystem; + @Nullable + private final PrimarySelector mPrimarySelector; private final long mStartTimeMs; private final int mPort; @@ -44,6 +48,7 @@ private CoreMasterContext(Builder builder) { mJournalSystem = Preconditions.checkNotNull(builder.mJournalSystem, "journalSystem"); mStartTimeMs = builder.mStartTimeMs; mPort = builder.mPort; + mPrimarySelector = builder.mPrimarySelector; } /** @@ -93,6 +98,13 @@ public int getPort() { return mPort; } + /** + * @return the leader selector + */ + public @Nullable PrimarySelector getPrimarySelector() { + return mPrimarySelector; + } + /** * @return a new builder */ diff --git a/core/server/master/src/main/java/alluxio/master/block/DefaultBlockMaster.java b/core/server/master/src/main/java/alluxio/master/block/DefaultBlockMaster.java index 013bd6b688ed..ab35187d28ac 100644 --- a/core/server/master/src/main/java/alluxio/master/block/DefaultBlockMaster.java +++ b/core/server/master/src/main/java/alluxio/master/block/DefaultBlockMaster.java @@ -362,6 +362,9 @@ public boolean processJournalEntry(JournalEntry entry) { long length = blockInfoEntry.getLength(); Optional block = mBlockMetaStore.getBlock(blockInfoEntry.getBlockId()); if (block.isPresent()) { + // If we write multiple replicas, multiple streams will all write BlockInfoEntry + // when they CommitBlock. We rely on the idempotence to handle duplicate entries + // and only warning when there are inconsistencies. long oldLen = block.get().getLength(); if (oldLen != Constants.UNKNOWN_SIZE) { LOG.warn("Attempting to update block length ({}) to a different length ({}).", oldLen, @@ -1074,6 +1077,7 @@ protected MasterWorkerInfo recordWorkerRegistration(long workerId) { @Override public long getWorkerId(WorkerNetAddress workerNetAddress) { + LOG.info("Worker {} requesting for an ID", workerNetAddress); MasterWorkerInfo existingWorker = mWorkers.getFirstByField(ADDRESS_INDEX, workerNetAddress); if (existingWorker != null) { // This worker address is already mapped to a worker id. diff --git a/core/server/master/src/main/java/alluxio/master/file/DefaultFileSystemMaster.java b/core/server/master/src/main/java/alluxio/master/file/DefaultFileSystemMaster.java index 7fa2dfe61572..06f42ebd3c8a 100644 --- a/core/server/master/src/main/java/alluxio/master/file/DefaultFileSystemMaster.java +++ b/core/server/master/src/main/java/alluxio/master/file/DefaultFileSystemMaster.java @@ -346,7 +346,7 @@ public class DefaultFileSystemMaster extends CoreMaster private final BlockMaster mBlockMaster; /** This manages the file system inode structure. This must be journaled. */ - private final InodeTree mInodeTree; + protected final InodeTree mInodeTree; /** Store for holding inodes. */ private final ReadOnlyInodeStore mInodeStore; @@ -396,7 +396,7 @@ public class DefaultFileSystemMaster extends CoreMaster private final ActiveSyncManager mSyncManager; /** Log writer for user access audit log. */ - private AsyncUserAccessAuditLogWriter mAsyncAuditLogWriter; + protected AsyncUserAccessAuditLogWriter mAsyncAuditLogWriter; /** Stores the time series for various metrics which are exposed in the UI. */ private final TimeSeriesStore mTimeSeriesStore; @@ -404,7 +404,7 @@ public class DefaultFileSystemMaster extends CoreMaster private final AccessTimeUpdater mAccessTimeUpdater; /** Used to check pending/running backup from RPCs. */ - private final CallTracker mStateLockCallTracker; + protected final CallTracker mStateLockCallTracker; private final alluxio.master.file.loadmanager.LoadManager mLoadManager; final Clock mClock; @@ -959,8 +959,7 @@ public FileInfo getFileInfo(AlluxioURI path, GetStatusContext context) Mode.Bits accessMode = Mode.Bits.fromProto(context.getOptions().getAccessMode()); if (context.getOptions().getUpdateTimestamps() && context.getOptions().hasAccessMode() && (accessMode.imply(Mode.Bits.READ) || accessMode.imply(Mode.Bits.WRITE))) { - mAccessTimeUpdater.updateAccessTime(rpcContext.getJournalContext(), - inodePath.getInode(), opTimeMs); + updateAccessTime(rpcContext, inodePath.getInode(), opTimeMs); } auditContext.setSrcInode(inodePath.getInode()).setSucceeded(true); ret = fileInfo; @@ -1271,8 +1270,7 @@ private void listStatusInternal( // in the remaining recursive calls, so we set partialPath to the empty list partialPath = Collections.emptyList(); } - mAccessTimeUpdater.updateAccessTime(rpcContext.getJournalContext(), inode, - CommonUtils.getCurrentMs()); + updateAccessTime(rpcContext, inode, CommonUtils.getCurrentMs()); DescendantType nextDescendantType = (descendantType == DescendantType.ALL) ? DescendantType.ALL : DescendantType.NONE; try (CloseableIterator childrenIterator = getChildrenIterator( @@ -1343,7 +1341,7 @@ private void checkLoadMetadataOptions(LoadMetadataPType loadMetadataType, Alluxi } } - private boolean areDescendantsLoaded(InodeDirectoryView inode) { + protected boolean areDescendantsLoaded(InodeDirectoryView inode) { if (!inode.isDirectChildrenLoaded()) { return false; } @@ -1366,7 +1364,7 @@ private boolean areDescendantsLoaded(InodeDirectoryView inode) { * * @param inodePath the path to ensure */ - private void ensureFullPathAndUpdateCache(LockedInodePath inodePath) + protected void ensureFullPathAndUpdateCache(LockedInodePath inodePath) throws InvalidPathException, FileDoesNotExistException { boolean exists = false; try { @@ -3263,7 +3261,7 @@ public Set getLostFiles() { * @param path the path to load metadata for * @param context the {@link LoadMetadataContext} */ - private void loadMetadataIfNotExist(RpcContext rpcContext, AlluxioURI path, + protected void loadMetadataIfNotExist(RpcContext rpcContext, AlluxioURI path, LoadMetadataContext context) throws InvalidPathException, AccessControlException { DescendantType syncDescendantType = @@ -5199,7 +5197,7 @@ private Metrics() {} // prevent instantiation * @param srcInode the source inode of this command * @return newly-created {@link FileSystemMasterAuditContext} instance */ - private FileSystemMasterAuditContext createAuditContext(String command, AlluxioURI srcPath, + protected FileSystemMasterAuditContext createAuditContext(String command, AlluxioURI srcPath, @Nullable AlluxioURI dstPath, @Nullable Inode srcInode) { // Audit log may be enabled during runtime AsyncUserAccessAuditLogWriter auditLogWriter = null; @@ -5238,7 +5236,7 @@ private FileSystemMasterAuditContext createAuditContext(String command, AlluxioU return auditContext; } - private BlockDeletionContext createBlockDeletionContext() { + protected BlockDeletionContext createBlockDeletionContext() { return new DefaultBlockDeletionContext(this::removeBlocks, blocks -> blocks.forEach(mUfsBlockLocationCache::invalidate)); } @@ -5285,13 +5283,17 @@ private LockingScheme createLockingScheme(AlluxioURI path, FileSystemMasterCommo getSyncPathCache(), DescendantType.NONE); } - private LockingScheme createSyncLockingScheme(AlluxioURI path, + protected LockingScheme createSyncLockingScheme(AlluxioURI path, FileSystemMasterCommonPOptions options, DescendantType descendantType) throws InvalidPathException { return new LockingScheme(path, LockPattern.READ, options, getSyncPathCache(), descendantType); } + protected void updateAccessTime(RpcContext rpcContext, Inode inode, long opTimeMs) { + mAccessTimeUpdater.updateAccessTime(rpcContext.getJournalContext(), inode, opTimeMs); + } + boolean isAclEnabled() { return Configuration.getBoolean(PropertyKey.SECURITY_AUTHORIZATION_PERMISSION_ENABLED); } diff --git a/core/server/master/src/main/java/alluxio/master/file/meta/InodeLockManager.java b/core/server/master/src/main/java/alluxio/master/file/meta/InodeLockManager.java index a87a4be13a76..2526c03f41aa 100644 --- a/core/server/master/src/main/java/alluxio/master/file/meta/InodeLockManager.java +++ b/core/server/master/src/main/java/alluxio/master/file/meta/InodeLockManager.java @@ -163,6 +163,17 @@ public RWLockResource lockInode(InodeView inode, LockMode mode, boolean useTryLo return mInodeLocks.get(inode.getId(), mode, useTryLock); } + /** + * Acquires an inode lock using {@link Lock#lock()}. + * + * @param inodeId the inode id of the inode to lock + * @param mode the mode to lock in + * @return a lock resource which must be closed to release the lock + */ + public RWLockResource lockInode(Long inodeId, LockMode mode) { + return mInodeLocks.get(inodeId, mode, false); + } + /** * Attempts to acquire an inode lock. * diff --git a/core/server/master/src/main/java/alluxio/master/file/meta/InodeTreePersistentState.java b/core/server/master/src/main/java/alluxio/master/file/meta/InodeTreePersistentState.java index 9d87f913a05d..be7fe322933b 100644 --- a/core/server/master/src/main/java/alluxio/master/file/meta/InodeTreePersistentState.java +++ b/core/server/master/src/main/java/alluxio/master/file/meta/InodeTreePersistentState.java @@ -203,6 +203,13 @@ public Set getToBePersistedIds() { return Collections.unmodifiableSet(mToBePersistedIds); } + /** + * @return the list of TTL buckets for tracking inode TTLs + */ + public TtlBucketList getTtlBuckets() { + return mTtlBuckets; + } + //// /// The applyAndJournal() methods make sure the in-memory metadata state and the journal are /// BOTH updated. Any exception seen here will crash the master! So if an exception should be diff --git a/core/server/master/src/main/java/alluxio/master/file/meta/UfsSyncPathCache.java b/core/server/master/src/main/java/alluxio/master/file/meta/UfsSyncPathCache.java index a76bb7f8dfe1..1c145c537d74 100644 --- a/core/server/master/src/main/java/alluxio/master/file/meta/UfsSyncPathCache.java +++ b/core/server/master/src/main/java/alluxio/master/file/meta/UfsSyncPathCache.java @@ -319,8 +319,9 @@ private void updateParentInvalidation(SyncState state, long time, long parentLev * @param syncTime the time to set the sync success to, if null then the current * clock time is used * @param isFile true if the synced path is a file + * @return the sync state */ - public void notifySyncedPath( + public SyncState notifySyncedPath( AlluxioURI path, DescendantType descendantType, long startTime, @Nullable Long syncTime, boolean isFile) { long time = syncTime == null ? startTime : @@ -329,9 +330,10 @@ public void notifySyncedPath( try (LockResource ignored = new LockResource(mRootLock)) { Preconditions.checkState(!isFile); updateSyncState(mRoot, time, startTime, false, descendantType); + return mRoot; } } else { - mItems.asMap().compute(path.getPath(), (key, state) -> { + return mItems.asMap().compute(path.getPath(), (key, state) -> { if (state == null) { state = new SyncState(isFile); } diff --git a/core/server/master/src/test/java/alluxio/master/file/meta/InodeTreeTest.java b/core/server/master/src/test/java/alluxio/master/file/meta/InodeTreeTest.java index 0c87807e2ff9..1ca5eb9ab5ab 100644 --- a/core/server/master/src/test/java/alluxio/master/file/meta/InodeTreeTest.java +++ b/core/server/master/src/test/java/alluxio/master/file/meta/InodeTreeTest.java @@ -82,6 +82,8 @@ import java.util.Set; import java.util.Spliterator; import java.util.Spliterators; +import java.util.concurrent.ExecutorService; +import java.util.concurrent.Executors; import java.util.function.Supplier; import java.util.stream.Collectors; import java.util.stream.Stream; @@ -113,6 +115,8 @@ public final class InodeTreeTest { private MasterRegistry mRegistry; private MetricsMaster mMetricsMaster; + private ExecutorService mThreadPool; + @Parameters public static Iterable> parameters() throws Exception { String dir = @@ -163,12 +167,14 @@ public void before() throws Exception { mRegistry.start(true); mTree.initializeRoot(TEST_OWNER, TEST_GROUP, TEST_DIR_MODE, NoopJournalContext.INSTANCE); + mThreadPool = Executors.newCachedThreadPool(); } @After public void after() throws Exception { mRegistry.stop(); mInodeStore.close(); + mThreadPool.shutdown(); } /** diff --git a/core/server/master/src/test/java/alluxio/master/journal/NoopRaftJournalSystem.java b/core/server/master/src/test/java/alluxio/master/journal/NoopRaftJournalSystem.java new file mode 100644 index 000000000000..4daaedd72b66 --- /dev/null +++ b/core/server/master/src/test/java/alluxio/master/journal/NoopRaftJournalSystem.java @@ -0,0 +1,77 @@ +/* + * The Alluxio Open Foundation licenses this work under the Apache License, version 2.0 + * (the "License"). You may not use this work except in compliance with the License, which is + * available at www.apache.org/licenses/LICENSE-2.0 + * + * This software is distributed on an "AS IS" basis, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, + * either express or implied, as more fully set forth in the License. + * + * See the NOTICE file distributed with this work for information regarding copyright ownership. + */ + +package alluxio.master.journal; + +import alluxio.master.Master; +import alluxio.master.journal.noop.NoopJournal; +import alluxio.master.journal.raft.RaftJournalSystem; +import alluxio.util.network.NetworkAddressUtils; + +import java.net.URI; +import java.net.URISyntaxException; + +/** + * A noop raft journal system for testing. + */ +public class NoopRaftJournalSystem extends RaftJournalSystem { + private boolean mIsLeader = false; + + /** + * Creates a raft journal system object. + * @throws URISyntaxException + */ + public NoopRaftJournalSystem() throws URISyntaxException { + super(new URI(""), NetworkAddressUtils.ServiceType.MASTER_RAFT); + } + + /** + * Sets the raft journal state. + * @param isLeader if the raft journal system should be a leader + */ + public synchronized void setIsLeader(boolean isLeader) { + mIsLeader = isLeader; + } + + @Override + public synchronized void start() { + } + + @Override + public synchronized void stop() { + } + + @Override + public synchronized boolean isLeader() { + return mIsLeader; + } + + @Override + public synchronized void startInternal() { + } + + @Override + public synchronized void stopInternal() { + } + + @Override + public synchronized void gainPrimacy() { + } + + @Override + public synchronized void losePrimacy() { + } + + @Override + public synchronized Journal createJournal(Master master) { + return new NoopJournal(); + } +} diff --git a/core/server/master/src/test/java/alluxio/master/journal/raft/SnapshotReplicationManagerTest.java b/core/server/master/src/test/java/alluxio/master/journal/raft/SnapshotReplicationManagerTest.java index 02a90fec31cc..639b5b113a54 100644 --- a/core/server/master/src/test/java/alluxio/master/journal/raft/SnapshotReplicationManagerTest.java +++ b/core/server/master/src/test/java/alluxio/master/journal/raft/SnapshotReplicationManagerTest.java @@ -100,7 +100,7 @@ private void before(int numFollowers) throws Exception { String serverName = InProcessServerBuilder.generateName(); mServer = InProcessServerBuilder.forName(serverName) .directExecutor() - .addService(new RaftJournalServiceHandler(mLeaderSnapshotManager)).build(); + .addService(new RaftJournalServiceHandler(mLeaderSnapshotManager, null)).build(); mServer.start(); ManagedChannel channel = InProcessChannelBuilder.forName(serverName).directExecutor().build(); RaftJournalServiceGrpc.RaftJournalServiceStub stub = RaftJournalServiceGrpc.newStub(channel); diff --git a/core/server/worker/src/main/java/alluxio/worker/block/BlockHeartbeatReporter.java b/core/server/worker/src/main/java/alluxio/worker/block/BlockHeartbeatReporter.java index 6a6f7837556a..f26b7b74bbdb 100644 --- a/core/server/worker/src/main/java/alluxio/worker/block/BlockHeartbeatReporter.java +++ b/core/server/worker/src/main/java/alluxio/worker/block/BlockHeartbeatReporter.java @@ -12,6 +12,8 @@ package alluxio.worker.block; import com.google.common.collect.Lists; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; import java.util.ArrayList; import java.util.HashMap; @@ -28,6 +30,8 @@ */ @ThreadSafe public final class BlockHeartbeatReporter extends AbstractBlockStoreEventListener { + private static final Logger LOG = LoggerFactory.getLogger(BlockHeartbeatReporter.class); + /** Lock for operations on the removed and added block collections. */ private final Object mLock; @@ -54,6 +58,7 @@ public BlockHeartbeatReporter() { mRemovedBlocks = new ArrayList<>(100); mAddedBlocks = new HashMap<>(20); mLostStorage = new HashMap<>(); + LOG.debug("BlockHeartbeatReporter initialized"); } /** diff --git a/core/server/worker/src/main/java/alluxio/worker/block/BlockMasterSyncHelper.java b/core/server/worker/src/main/java/alluxio/worker/block/BlockMasterSyncHelper.java index 0359b21a81b9..b43cc378ace1 100644 --- a/core/server/worker/src/main/java/alluxio/worker/block/BlockMasterSyncHelper.java +++ b/core/server/worker/src/main/java/alluxio/worker/block/BlockMasterSyncHelper.java @@ -145,9 +145,10 @@ boolean heartbeat( } catch (Exception e) { // An error occurred, log and ignore it or error if heartbeat timeout is reached if (cmdFromMaster == null) { - LOG.error("Failed to receive master heartbeat command.", e); + LOG.error("Failed to receive master heartbeat command. worker id {}", workerId, e); } else { - LOG.error("Failed to receive or execute master heartbeat command: {}", cmdFromMaster, e); + LOG.error("Failed to receive or execute master heartbeat command: {}. worker id {}", + cmdFromMaster, workerId, e); } mMasterClient.disconnect(); return false; diff --git a/shell/src/main/java/alluxio/cli/fsadmin/report/CapacityCommand.java b/shell/src/main/java/alluxio/cli/fsadmin/report/CapacityCommand.java index 1332c3a8bee0..3caac3c2a36e 100644 --- a/shell/src/main/java/alluxio/cli/fsadmin/report/CapacityCommand.java +++ b/shell/src/main/java/alluxio/cli/fsadmin/report/CapacityCommand.java @@ -17,7 +17,10 @@ import alluxio.client.block.options.GetWorkerReportOptions; import alluxio.client.block.options.GetWorkerReportOptions.WorkerInfoField; import alluxio.client.block.options.GetWorkerReportOptions.WorkerRange; +import alluxio.conf.AlluxioConfiguration; +import alluxio.conf.Configuration; import alluxio.exception.status.InvalidArgumentException; +import alluxio.grpc.Scope; import alluxio.util.FormatUtils; import alluxio.wire.WorkerInfo; @@ -78,7 +81,8 @@ public int run(CommandLine cl) throws IOException { } GetWorkerReportOptions options = getOptions(cl); - generateCapacityReport(options); + Configuration.loadClusterDefaults(mBlockMasterClient.getConfAddress(), Scope.CLIENT); + generateCapacityReport(options, Configuration.global()); return 0; } @@ -86,8 +90,10 @@ public int run(CommandLine cl) throws IOException { * Generates capacity report. * * @param options GetWorkerReportOptions to get worker report + * @param conf the cluster configuration */ - public void generateCapacityReport(GetWorkerReportOptions options) throws IOException { + public void generateCapacityReport(GetWorkerReportOptions options, AlluxioConfiguration conf) + throws IOException { List workerInfoList = mBlockMasterClient.getWorkerReport(options); if (workerInfoList.size() == 0) { print("No workers found."); @@ -288,7 +294,8 @@ private GetWorkerReportOptions getOptions(CommandLine cl) throws IOException { Set fieldRange = EnumSet.of(WorkerInfoField.ADDRESS, WorkerInfoField.WORKER_CAPACITY_BYTES, WorkerInfoField.WORKER_CAPACITY_BYTES_ON_TIERS, WorkerInfoField.LAST_CONTACT_SEC, WorkerInfoField.WORKER_USED_BYTES, - WorkerInfoField.WORKER_USED_BYTES_ON_TIERS, WorkerInfoField.BUILD_VERSION); + WorkerInfoField.WORKER_USED_BYTES_ON_TIERS, WorkerInfoField.BUILD_VERSION, + WorkerInfoField.ID, WorkerInfoField.STATE); workerOptions.setFieldRange(fieldRange); if (cl.hasOption(ReportCommand.LIVE_OPTION_NAME)) { diff --git a/shell/src/test/java/alluxio/cli/fsadmin/report/CapacityCommandTest.java b/shell/src/test/java/alluxio/cli/fsadmin/report/CapacityCommandTest.java index 6c729b519815..4a8d5521194e 100644 --- a/shell/src/test/java/alluxio/cli/fsadmin/report/CapacityCommandTest.java +++ b/shell/src/test/java/alluxio/cli/fsadmin/report/CapacityCommandTest.java @@ -14,6 +14,7 @@ import alluxio.Constants; import alluxio.client.block.BlockMasterClient; import alluxio.client.block.options.GetWorkerReportOptions; +import alluxio.conf.Configuration; import alluxio.wire.WorkerInfo; import alluxio.wire.WorkerNetAddress; @@ -52,7 +53,8 @@ public void longCapacity() throws IOException { PrintStream printStream = new PrintStream(outputStream, true, "utf-8")) { CapacityCommand capacityCommand = new CapacityCommand(mBlockMasterClient, printStream); - capacityCommand.generateCapacityReport(GetWorkerReportOptions.defaults()); + capacityCommand.generateCapacityReport(GetWorkerReportOptions.defaults(), + Configuration.global()); String output = new String(outputStream.toByteArray(), StandardCharsets.UTF_8); // CHECKSTYLE.OFF: LineLengthExceed - Much more readable List expectedOutput = Arrays.asList("Capacity information for all workers: ", @@ -93,7 +95,8 @@ public void shortCapacity() throws IOException { PrintStream printStream = new PrintStream(outputStream, true, "utf-8")) { CapacityCommand capacityCommand = new CapacityCommand(mBlockMasterClient, printStream); - capacityCommand.generateCapacityReport(GetWorkerReportOptions.defaults()); + capacityCommand.generateCapacityReport(GetWorkerReportOptions.defaults(), + Configuration.global()); String output = new String(outputStream.toByteArray(), StandardCharsets.UTF_8); // CHECKSTYLE.OFF: LineLengthExceed - Much more readable List expectedOutput = Arrays.asList("Capacity information for all workers: ", @@ -125,7 +128,8 @@ public void longWorkerNameCapacity() throws IOException { PrintStream printStream = new PrintStream(outputStream, true, "utf-8")) { CapacityCommand capacityCommand = new CapacityCommand(mBlockMasterClient, printStream); - capacityCommand.generateCapacityReport(GetWorkerReportOptions.defaults()); + capacityCommand.generateCapacityReport(GetWorkerReportOptions.defaults(), + Configuration.global()); String output = new String(outputStream.toByteArray(), StandardCharsets.UTF_8); List testRst = Arrays.asList(output.split("\n")); // CHECKSTYLE.OFF: LineLengthExceed - Much more readable From ccb3784effac73d2cddf05097933b50b08025fb1 Mon Sep 17 00:00:00 2001 From: elega <445092967@qq.com> Date: Fri, 20 Jan 2023 10:41:23 +0800 Subject: [PATCH 079/334] Refactor dynamic resource pool selection type ### What changes are proposed in this pull request? make fifo/lifo a enum type so that we can suport more strategies ### Does this PR introduce any user facing changes? N/A pr-link: Alluxio/alluxio#16792 change-id: cid-efaa6dfa0a0abe7cfab8be46129db54ad53bc305 --- .../alluxio/resource/DynamicResourcePool.java | 46 ++++++++++++------- .../resource/DynamicResourcePoolTest.java | 3 +- 2 files changed, 31 insertions(+), 18 deletions(-) diff --git a/core/common/src/main/java/alluxio/resource/DynamicResourcePool.java b/core/common/src/main/java/alluxio/resource/DynamicResourcePool.java index 7b86f66027cf..abb785e55f74 100644 --- a/core/common/src/main/java/alluxio/resource/DynamicResourcePool.java +++ b/core/common/src/main/java/alluxio/resource/DynamicResourcePool.java @@ -46,6 +46,16 @@ */ @ThreadSafe public abstract class DynamicResourcePool implements Pool { + /** + * A policy specifying in what order to pick a resource item from a pool. + */ + public enum SelectionPolicy { + // first-in-first-out, use the hottest resource + FIFO, + // last-in-first-out, use the coldest resource + LIFO, + } + private static final Logger LOG = LoggerFactory.getLogger(DynamicResourcePool.class); /** @@ -110,7 +120,7 @@ public static final class Options { * If set to false, the first returned resource will take priority. * {@link #acquire()} tends to reuse the most fresh resource if possible. */ - private boolean mFIFO = false; + private SelectionPolicy mSelectionPolicy = SelectionPolicy.LIFO; /** * @return the max capacity @@ -148,18 +158,18 @@ public ScheduledExecutorService getGcExecutor() { } /** - * @return if resources are returned in a FIFO manner + * @return the selection policy */ - public boolean getFIFO() { - return mFIFO; + public SelectionPolicy getSelectionPolicy() { + return mSelectionPolicy; } /** - * @param fifo if resources should be returned in a FIFO manner + * @param policy how to select a client from the pool * @return the updated object */ - public Options setFIFO(boolean fifo) { - mFIFO = fifo; + public Options setSelectionPolicy(SelectionPolicy policy) { + mSelectionPolicy = policy; return this; } @@ -233,12 +243,9 @@ public static Options defaultOptions() { private final int mMinCapacity; /** - * If set to true, when a resource needs to be taken from the pool, the last returned resource - * will take priority. {@link #acquire()} tends to return a different object every time. - * If set to false, the first returned resource will take priority. - * {@link #acquire()} tends to reuse the most fresh resource if possible. + * the selection policy of the resource pool. see {@link SelectionPolicy} for details */ - private final boolean mFIFO; + protected final SelectionPolicy mSelectionPolicy; // Tracks the resources that are available ordered by lastAccessTime (the head is // the most recently used resource). @@ -251,7 +258,7 @@ public static Options defaultOptions() { // put/delete operations are guarded by "mLock" so that we can control its size to be within // a [min, max] range. mLock is reused for simplicity. A separate lock can be used if we see // any performance overhead. - private final ConcurrentHashMap> mResources = + protected final ConcurrentHashMap> mResources = new ConcurrentHashMap<>(32); private final Counter mCounter; @@ -272,7 +279,7 @@ public DynamicResourcePool(Options options) { "cannot find resource count metric for %s", getClass().getName()); mMaxCapacity = options.getMaxCapacity(); mMinCapacity = options.getMinCapacity(); - mFIFO = options.getFIFO(); + mSelectionPolicy = options.getSelectionPolicy(); mAvailableResources = new ArrayDeque<>(Math.min(mMaxCapacity, 32)); mGcFuture = mExecutor.scheduleAtFixedRate(() -> { List resourcesToGc = new ArrayList<>(); @@ -494,10 +501,15 @@ private void remove(T resource) { private ResourceInternal poll() { try { mLock.lock(); - if (mFIFO) { - return mAvailableResources.pollLast(); + switch (mSelectionPolicy) { + case FIFO: + return mAvailableResources.pollLast(); + case LIFO: + return mAvailableResources.pollFirst(); + default: + throw new UnsupportedOperationException( + "Policy " + mSelectionPolicy + " is not supported!"); } - return mAvailableResources.pollFirst(); } finally { mLock.unlock(); } diff --git a/core/common/src/test/java/alluxio/resource/DynamicResourcePoolTest.java b/core/common/src/test/java/alluxio/resource/DynamicResourcePoolTest.java index ff1677aabd3c..3b06c2f217b9 100644 --- a/core/common/src/test/java/alluxio/resource/DynamicResourcePoolTest.java +++ b/core/common/src/test/java/alluxio/resource/DynamicResourcePoolTest.java @@ -166,7 +166,8 @@ public void acquireWithCapacity() throws Exception { */ @Test public void acquireFIFO() throws Exception { - TestPool pool = new TestPool(DynamicResourcePool.Options.defaultOptions().setFIFO(true)); + TestPool pool = new TestPool(DynamicResourcePool.Options.defaultOptions().setSelectionPolicy( + DynamicResourcePool.SelectionPolicy.FIFO)); List resourceList = new ArrayList<>(); for (int i = 0; i < 3; i++) { Resource resource = pool.acquire(); From 9f91f95e81d7aa1d1ea4b2b2e3ede92a645fac55 Mon Sep 17 00:00:00 2001 From: Tyler Crain Date: Thu, 19 Jan 2023 20:45:47 -0800 Subject: [PATCH 080/334] Use async journals for block id journal entries in metadata sync ### What changes are proposed in this pull request? Currently when metadata is created during metadata sync, when creating journal entries for block ids, they will be journaled synchronously. All other metadata updates are journaled asynchronously then flushed as a group. This PR makes the block id journal entries also be flushed asynchronously using the same journal context. Additionally it add higher concurrency defaults on some of the caches used by metadata sync which reduces the locking overhead. ### Does this PR introduce any user facing changes? No pr-link: Alluxio/alluxio#16529 change-id: cid-470942e367abc32a9557f07241ef7b99439a5d82 --- .../java/alluxio/master/block/BlockMaster.java | 16 +++++++++++++++- .../alluxio/master/block/DefaultBlockMaster.java | 5 ++--- .../master/file/DefaultFileSystemMaster.java | 16 +++++++++++----- .../file/meta/AsyncUfsAbsentPathCache.java | 3 ++- .../master/file/meta/UfsSyncPathCache.java | 3 ++- 5 files changed, 32 insertions(+), 11 deletions(-) diff --git a/core/server/master/src/main/java/alluxio/master/block/BlockMaster.java b/core/server/master/src/main/java/alluxio/master/block/BlockMaster.java index 4d8512df91cf..daf9d1906f67 100644 --- a/core/server/master/src/main/java/alluxio/master/block/BlockMaster.java +++ b/core/server/master/src/main/java/alluxio/master/block/BlockMaster.java @@ -26,6 +26,7 @@ import alluxio.grpc.WorkerLostStorageInfo; import alluxio.master.Master; import alluxio.master.block.meta.MasterWorkerInfo; +import alluxio.master.journal.JournalContext; import alluxio.metrics.Metric; import alluxio.proto.meta.Block; import alluxio.wire.Address; @@ -169,7 +170,20 @@ void commitBlock(long workerId, long usedBytesOnTier, String tierAlias, * @param blockId the id of the block to commit * @param length the length of the block */ - void commitBlockInUFS(long blockId, long length) throws UnavailableException; + default void commitBlockInUFS(long blockId, long length) throws UnavailableException { + try (JournalContext journalContext = createJournalContext()) { + commitBlockInUFS(blockId, length, journalContext); + } + } + + /** + * Marks a block as committed, but without a worker location. This means the block is only in ufs. + * Append any created journal entries to the included context. + * @param blockId the id of the block to commit + * @param length the length of the block + * @param context the journal context + */ + void commitBlockInUFS(long blockId, long length, JournalContext context); /** * @param blockId the block id to get information for diff --git a/core/server/master/src/main/java/alluxio/master/block/DefaultBlockMaster.java b/core/server/master/src/main/java/alluxio/master/block/DefaultBlockMaster.java index ab35187d28ac..d2cb9c715334 100644 --- a/core/server/master/src/main/java/alluxio/master/block/DefaultBlockMaster.java +++ b/core/server/master/src/main/java/alluxio/master/block/DefaultBlockMaster.java @@ -947,10 +947,9 @@ public void commitBlock(long workerId, long usedBytesOnTier, String tierAlias, } @Override - public void commitBlockInUFS(long blockId, long length) throws UnavailableException { + public void commitBlockInUFS(long blockId, long length, JournalContext journalContext) { LOG.debug("Commit block in ufs. blockId: {}, length: {}", blockId, length); - try (JournalContext journalContext = createJournalContext(); - LockResource r = lockBlock(blockId)) { + try (LockResource r = lockBlock(blockId)) { if (mBlockMetaStore.getBlock(blockId).isPresent()) { // Block metadata already exists, so do not need to create a new one. return; diff --git a/core/server/master/src/main/java/alluxio/master/file/DefaultFileSystemMaster.java b/core/server/master/src/main/java/alluxio/master/file/DefaultFileSystemMaster.java index 06f42ebd3c8a..112783b82322 100644 --- a/core/server/master/src/main/java/alluxio/master/file/DefaultFileSystemMaster.java +++ b/core/server/master/src/main/java/alluxio/master/file/DefaultFileSystemMaster.java @@ -1020,7 +1020,7 @@ private FileInfo getFileInfoInternal(LockedInodePath inodePath, Counter counter) mBlockMaster.removeBlocks(fileInfo.getBlockIds(), true); // Commit all the file blocks (without locations) so the metadata for the block exists. commitBlockInfosForFile( - fileInfo.getBlockIds(), fileInfo.getLength(), fileInfo.getBlockSizeBytes()); + fileInfo.getBlockIds(), fileInfo.getLength(), fileInfo.getBlockSizeBytes(), null); // Reset file-block-info list with the new list. try { fileInfo.setFileBlockInfos(getFileBlockInfoListInternal(inodePath)); @@ -1761,7 +1761,8 @@ private void completeFileInternal(RpcContext rpcContext, LockedInodePath inodePa if (inode.isPersisted()) { // Commit all the file blocks (without locations) so the metadata for the block exists. - commitBlockInfosForFile(entry.getSetBlocksList(), length, inode.getBlockSizeBytes()); + commitBlockInfosForFile(entry.getSetBlocksList(), length, inode.getBlockSizeBytes(), + rpcContext.getJournalContext()); // The path exists in UFS, so it is no longer absent mUfsAbsentPathCache.processExisting(inodePath.getUri()); } @@ -1807,13 +1808,18 @@ private void cacheOperation(OperationContext opContext) { * @param blockIds the list of block ids * @param fileLength length of the file in bytes * @param blockSize the block size in bytes + * @param context the journal context, if null a new context will be created */ - private void commitBlockInfosForFile(List blockIds, long fileLength, long blockSize) - throws UnavailableException { + private void commitBlockInfosForFile(List blockIds, long fileLength, long blockSize, + @Nullable JournalContext context) throws UnavailableException { long currLength = fileLength; for (long blockId : blockIds) { long currentBlockSize = Math.min(currLength, blockSize); - mBlockMaster.commitBlockInUFS(blockId, currentBlockSize); + if (context != null) { + mBlockMaster.commitBlockInUFS(blockId, currentBlockSize, context); + } else { + mBlockMaster.commitBlockInUFS(blockId, currentBlockSize); + } currLength -= currentBlockSize; } } diff --git a/core/server/master/src/main/java/alluxio/master/file/meta/AsyncUfsAbsentPathCache.java b/core/server/master/src/main/java/alluxio/master/file/meta/AsyncUfsAbsentPathCache.java index c69ebd10ac83..b4aa4af63c62 100644 --- a/core/server/master/src/main/java/alluxio/master/file/meta/AsyncUfsAbsentPathCache.java +++ b/core/server/master/src/main/java/alluxio/master/file/meta/AsyncUfsAbsentPathCache.java @@ -82,7 +82,8 @@ public AsyncUfsAbsentPathCache(MountTable mountTable, int numThreads, Clock cloc mMountTable = mountTable; mClock = clock; mCurrentPaths = new ConcurrentHashMap<>(8, 0.95f, 8); - mCache = CacheBuilder.newBuilder().maximumSize(MAX_PATHS).recordStats().build(); + mCache = CacheBuilder.newBuilder().maximumSize(MAX_PATHS).concurrencyLevel(Configuration.getInt( + PropertyKey.MASTER_UFS_PATH_CACHE_THREADS)).recordStats().build(); /* Number of threads for the async pool. */ mPool = new ThreadPoolExecutor(numThreads, numThreads, THREAD_KEEP_ALIVE_SECONDS, diff --git a/core/server/master/src/main/java/alluxio/master/file/meta/UfsSyncPathCache.java b/core/server/master/src/main/java/alluxio/master/file/meta/UfsSyncPathCache.java index 1c145c537d74..f236abe43693 100644 --- a/core/server/master/src/main/java/alluxio/master/file/meta/UfsSyncPathCache.java +++ b/core/server/master/src/main/java/alluxio/master/file/meta/UfsSyncPathCache.java @@ -90,7 +90,8 @@ public UfsSyncPathCache(Clock clock) { @VisibleForTesting UfsSyncPathCache(Clock clock, @Nullable BiConsumer onRemoval) { mClock = Preconditions.checkNotNull(clock); - mItems = CacheBuilder.newBuilder() + mItems = CacheBuilder.newBuilder().concurrencyLevel( + Configuration.getInt(PropertyKey.MASTER_UFS_PATH_CACHE_THREADS)) .removalListener( (removal) -> { if (removal.wasEvicted() && removal.getKey() != null && removal.getValue() != null) { From 0c86475bdd7c4d975bb4c68610f497212b1aae86 Mon Sep 17 00:00:00 2001 From: Jiacheng Liu Date: Fri, 20 Jan 2023 23:19:42 +0800 Subject: [PATCH 081/334] [SMALLFIX] Remove unregistered worker metadata on timeout Clean up `mTempWorkers` when a worker is deemed lost pr-link: Alluxio/alluxio#16692 change-id: cid-598c1d74ce4f883912a2ab8cf0efd696174ff3fa --- .../main/java/alluxio/master/block/DefaultBlockMaster.java | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/core/server/master/src/main/java/alluxio/master/block/DefaultBlockMaster.java b/core/server/master/src/main/java/alluxio/master/block/DefaultBlockMaster.java index d2cb9c715334..2897a1875aaa 100644 --- a/core/server/master/src/main/java/alluxio/master/block/DefaultBlockMaster.java +++ b/core/server/master/src/main/java/alluxio/master/block/DefaultBlockMaster.java @@ -1569,7 +1569,7 @@ public void heartbeat() { } for (MasterWorkerInfo worker : mLostWorkers) { try (LockResource r = worker.lockWorkerMeta( - EnumSet.of(WorkerMetaLockSection.BLOCKS), false)) { + EnumSet.of(WorkerMetaLockSection.BLOCKS), false)) { final long lastUpdate = mClock.millis() - worker.getLastUpdatedTimeMs(); if ((lastUpdate - masterWorkerTimeoutMs) > masterWorkerDeleteTimeoutMs) { LOG.error("The worker {}({}) timed out after {}ms without a heartbeat! " @@ -1612,6 +1612,8 @@ public void forgetAllWorkers() { private void processLostWorker(MasterWorkerInfo worker) { mLostWorkers.add(worker); mWorkers.remove(worker); + // If a worker is gone before registering, avoid it getting stuck in mTempWorker forever + mTempWorkers.remove(worker); WorkerNetAddress workerAddress = worker.getWorkerAddress(); for (Consumer
function : mWorkerLostListeners) { function.accept(new Address(workerAddress.getHost(), workerAddress.getRpcPort())); @@ -1625,6 +1627,7 @@ private void processLostWorker(MasterWorkerInfo worker) { private void deleteWorkerMetadata(MasterWorkerInfo worker) { mWorkers.remove(worker); mLostWorkers.remove(worker); + // If a worker is gone before registering, avoid it getting stuck in mTempWorker forever mTempWorkers.remove(worker); WorkerNetAddress workerAddress = worker.getWorkerAddress(); for (Consumer
function : mWorkerDeleteListeners) { From 5a7cfe7f68fb393436dda1f4a32143074ae5aec3 Mon Sep 17 00:00:00 2001 From: Tyler Crain Date: Tue, 24 Jan 2023 13:41:14 -0800 Subject: [PATCH 082/334] Add disable are descendants loaded property ### What changes are proposed in this pull request? This adds a property to the list status call to disable the are descendants loaded check. ### Why are the changes needed? Currently before a recursive listing happens, the entire enclosed set of files are traversed to see if they all have the descendants loaded flag set in order to check if a metadata sync is needed. This can be costly especially if doing batch or partial listings. By setting this property the check can be avoided. This is, for example, recommended to set to true on the calls following the first call of a batch listing. Note that this is recommended as more of a temporary fix for internal libraries, until a new logic design for the metadata sync is completed. ### Does this PR introduce any user facing changes? New property in the list status options proto. pr-link: Alluxio/alluxio#16813 change-id: cid-bd1fdbc4be0cadd9dda1a4115043db4b6a89c6a5 --- .../java/alluxio/master/file/DefaultFileSystemMaster.java | 2 +- .../main/java/alluxio/master/file/loadmanager/LoadJob.java | 3 +++ core/transport/src/main/proto/grpc/file_system_master.proto | 6 ++++++ core/transport/src/main/proto/proto.lock | 5 +++++ 4 files changed, 15 insertions(+), 1 deletion(-) diff --git a/core/server/master/src/main/java/alluxio/master/file/DefaultFileSystemMaster.java b/core/server/master/src/main/java/alluxio/master/file/DefaultFileSystemMaster.java index 112783b82322..19e1656ce20e 100644 --- a/core/server/master/src/main/java/alluxio/master/file/DefaultFileSystemMaster.java +++ b/core/server/master/src/main/java/alluxio/master/file/DefaultFileSystemMaster.java @@ -1124,7 +1124,7 @@ public void listStatus(AlluxioURI path, ListStatusContext context, boolean isLoaded = true; if (inodePath.fullPathExists()) { inode = inodePath.getInode(); - if (inode.isDirectory() + if (inode.isDirectory() && !context.getOptions().getDisableAreDescendantsLoadedCheck() && context.getOptions().getLoadMetadataType() != LoadMetadataPType.ALWAYS) { InodeDirectory inodeDirectory = inode.asDirectory(); isLoaded = inodeDirectory.isDirectChildrenLoaded(); diff --git a/core/server/master/src/main/java/alluxio/master/file/loadmanager/LoadJob.java b/core/server/master/src/main/java/alluxio/master/file/loadmanager/LoadJob.java index 94d94e573fb9..098c7a23d55b 100644 --- a/core/server/master/src/main/java/alluxio/master/file/loadmanager/LoadJob.java +++ b/core/server/master/src/main/java/alluxio/master/file/loadmanager/LoadJob.java @@ -564,6 +564,9 @@ public FileInfo next() } private void partialListFileInfos() { + if (!mStartAfter.isEmpty()) { + mListOptions.setDisableAreDescendantsLoadedCheck(true); + } ListStatusContext context = ListStatusContext.create(ListStatusPartialPOptions.newBuilder() .setOptions(mListOptions) .setBatchSize(PARTIAL_LISTING_BATCH_SIZE) diff --git a/core/transport/src/main/proto/grpc/file_system_master.proto b/core/transport/src/main/proto/grpc/file_system_master.proto index bb01a79e09f9..2f3f267c6020 100644 --- a/core/transport/src/main/proto/grpc/file_system_master.proto +++ b/core/transport/src/main/proto/grpc/file_system_master.proto @@ -229,6 +229,12 @@ message ListStatusPOptions { optional bool recursive = 4; // No data will be transferred. optional bool loadMetadataOnly = 5; + // Setting this to true will disable checking during metadata sync to see if the children + // of a directory has been loaded. This will avoid a costly full traversal of the file + // system during recursive listings, but may result in the children of directories not + // being loaded. It is recommended to set this to true after the first call of a + // recursive partial listing. + optional bool disableAreDescendantsLoadedCheck = 6; } message ListStatusPRequest { /** the path of the file or directory */ diff --git a/core/transport/src/main/proto/proto.lock b/core/transport/src/main/proto/proto.lock index d1ca68994440..f3d8d08b43a2 100644 --- a/core/transport/src/main/proto/proto.lock +++ b/core/transport/src/main/proto/proto.lock @@ -2691,6 +2691,11 @@ "id": 5, "name": "loadMetadataOnly", "type": "bool" + }, + { + "id": 6, + "name": "disableAreDescendantsLoadedCheck", + "type": "bool" } ] }, From 945b10dc30c4336540c1c1d2c60610780fcf6564 Mon Sep 17 00:00:00 2001 From: tian bao <2011xuesong@gmail.com> Date: Wed, 25 Jan 2023 08:26:48 +0800 Subject: [PATCH 083/334] Fix metric worker.cacheBlockSize increases when cache already existed ### What changes are proposed in this pull request? If cache already exists in Alluxio, when try to cache the same block, don't increase the metric worker.cacheBlockSize. ### Why are the changes needed? The metric Worker.CacheBlocksSize is not correct. If data already exists in Alluxio, caching the same block again will increate metric Worker.cacheBlockSize. ### Does this PR introduce any user facing changes? no pr-link: Alluxio/alluxio#16791 change-id: cid-ac9ae8c5eaa85cb38fbb93cb40ad3de7cbacb213 --- .../worker/block/CacheRequestManager.java | 42 ++++++++++++------- 1 file changed, 26 insertions(+), 16 deletions(-) diff --git a/core/server/worker/src/main/java/alluxio/worker/block/CacheRequestManager.java b/core/server/worker/src/main/java/alluxio/worker/block/CacheRequestManager.java index e265f5e330cc..911954bd27c5 100644 --- a/core/server/worker/src/main/java/alluxio/worker/block/CacheRequestManager.java +++ b/core/server/worker/src/main/java/alluxio/worker/block/CacheRequestManager.java @@ -205,15 +205,20 @@ public boolean equals(Object obj) { public Void call() throws IOException, AlluxioException { long blockId = mRequest.getBlockId(); long blockLength = mRequest.getLength(); - boolean result = false; + CacheResult result = CacheResult.FAILED; try { result = cacheBlock(mRequest); } finally { - if (result) { - CACHE_BLOCKS_SIZE.inc(blockLength); - CACHE_SUCCEEDED_BLOCKS.inc(); - } else { - CACHE_FAILED_BLOCKS.inc(); + switch (result) { + case SUCCEED: + CACHE_BLOCKS_SIZE.inc(blockLength); + CACHE_SUCCEEDED_BLOCKS.inc(); + break; + case FAILED: + CACHE_FAILED_BLOCKS.inc(); + break; + default: + break; } mActiveCacheRequests.remove(blockId); } @@ -221,8 +226,13 @@ public Void call() throws IOException, AlluxioException { } } - private boolean cacheBlock(CacheRequest request) throws IOException, AlluxioException { - boolean result; + enum CacheResult { + + SUCCEED, FAILED, ALREADY_CACHED + } + + private CacheResult cacheBlock(CacheRequest request) throws IOException, AlluxioException { + CacheResult result; boolean isSourceLocal = NetworkAddressUtils.isLocalAddress(request.getSourceHost(), NETWORK_HOST_RESOLUTION_TIMEOUT); long blockId = request.getBlockId(); @@ -230,7 +240,7 @@ private boolean cacheBlock(CacheRequest request) throws IOException, AlluxioExce // Check if the block has already been cached on this worker if (mBlockWorker.getBlockStore().hasBlockMeta(blockId)) { LOG.debug("block already cached: {}", blockId); - return true; + return CacheResult.ALREADY_CACHED; } Protocol.OpenUfsBlockOptions openUfsBlockOptions = request.getOpenUfsBlockOptions(); // Depends on the request, cache the target block from different sources @@ -254,9 +264,9 @@ private boolean cacheBlock(CacheRequest request) throws IOException, AlluxioExce * @param blockId block ID * @param blockSize block size * @param openUfsBlockOptions options to open the UFS file - * @return if the block is cached + * @return cache result */ - private boolean cacheBlockFromUfs(long blockId, long blockSize, + private CacheResult cacheBlockFromUfs(long blockId, long blockSize, Protocol.OpenUfsBlockOptions openUfsBlockOptions) throws IOException { try (BlockReader reader = mBlockWorker.createUfsBlockReader( Sessions.CACHE_UFS_SESSION_ID, blockId, 0, false, openUfsBlockOptions)) { @@ -271,7 +281,7 @@ private boolean cacheBlockFromUfs(long blockId, long blockSize, offset += bufferSize; } } - return true; + return CacheResult.SUCCEED; } /** @@ -281,15 +291,15 @@ private boolean cacheBlockFromUfs(long blockId, long blockSize, * @param blockSize block size * @param sourceAddress the source to read the block previously by client * @param openUfsBlockOptions options to open the UFS file - * @return if the block is cached + * @return cache result */ - private boolean cacheBlockFromRemoteWorker(long blockId, long blockSize, + private CacheResult cacheBlockFromRemoteWorker(long blockId, long blockSize, InetSocketAddress sourceAddress, Protocol.OpenUfsBlockOptions openUfsBlockOptions) throws IOException { if (mBlockWorker.getBlockStore().hasBlockMeta(blockId) || mBlockWorker.getBlockStore().hasTempBlockMeta(blockId)) { // It is already cached - return true; + return CacheResult.ALREADY_CACHED; } mBlockWorker.createBlock(Sessions.CACHE_WORKER_SESSION_ID, blockId, 0, new CreateBlockOptions(null, "", blockSize)); @@ -300,7 +310,7 @@ private boolean cacheBlockFromRemoteWorker(long blockId, long blockSize, .createBlockWriter(Sessions.CACHE_WORKER_SESSION_ID, blockId)) { BufferUtils.transfer(reader.getChannel(), writer.getChannel()); mBlockWorker.commitBlock(Sessions.CACHE_WORKER_SESSION_ID, blockId, false); - return true; + return CacheResult.SUCCEED; } catch (IllegalStateException | IOException e) { LOG.warn("Failed to async cache block {} from remote worker ({}) on copying the block: {}", blockId, sourceAddress, e.toString()); From a78513b3caf61897feca36f3742cf66157028a23 Mon Sep 17 00:00:00 2001 From: Tyler Crain Date: Tue, 24 Jan 2023 17:49:59 -0800 Subject: [PATCH 084/334] Fix ufs contract test not outputting error code on failure The UnderFileSystemContractTest was outputting a successful output code even though some of the tests were failing. pr-link: Alluxio/alluxio#16817 change-id: cid-4f8e4ad51b71d36d799f7eef44e40e54dadaf00e --- .../main/java/alluxio/cli/UnderFileSystemContractTest.java | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/integration/tools/validation/src/main/java/alluxio/cli/UnderFileSystemContractTest.java b/integration/tools/validation/src/main/java/alluxio/cli/UnderFileSystemContractTest.java index d493756624bf..c13d79753c17 100644 --- a/integration/tools/validation/src/main/java/alluxio/cli/UnderFileSystemContractTest.java +++ b/integration/tools/validation/src/main/java/alluxio/cli/UnderFileSystemContractTest.java @@ -106,6 +106,10 @@ public void run() throws Exception { failedCnt += runS3Operations(); } System.out.printf("Tests completed with %d failed.%n", failedCnt); + if (failedCnt > 0) { + throw new RuntimeException(String.format("UFS contract test failed with %d failures", + failedCnt)); + } } /** From dbb737c4192a757bba5b5020eccd55cc0a7802b8 Mon Sep 17 00:00:00 2001 From: Shawn Sun <32376495+ssz1997@users.noreply.github.com> Date: Wed, 25 Jan 2023 12:53:47 -0800 Subject: [PATCH 085/334] Make stale labeling faster About 1 page of issues were labeled stale per week. At this rate we need 30 weeks to label all of old issues, which is too slow. pr-link: Alluxio/alluxio#16816 change-id: cid-c1b55d3b9c2bb29f4c861375d9c72d484b9b01ea --- .github/workflows/stale.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/stale.yaml b/.github/workflows/stale.yaml index efc2237486b7..9e8fe31fadda 100644 --- a/.github/workflows/stale.yaml +++ b/.github/workflows/stale.yaml @@ -19,7 +19,7 @@ jobs: with: repo-token: ${{ secrets.GITHUB_TOKEN }} ascending: true # old issues/PRs first - operations-per-run: 100 # default is 30, enlarge for dealing with more issues/PRs + operations-per-run: 1000 # default is 30, enlarge for dealing with more issues/PRs days-before-stale: 30 days-before-close: -1 stale-issue-message: > From ee6c23f7c60fef6ccdcc64ecca0e2ade18b62725 Mon Sep 17 00:00:00 2001 From: Tyler Crain Date: Wed, 25 Jan 2023 16:08:44 -0800 Subject: [PATCH 086/334] Add metrics for in memory data structures As mentioned in https://github.com/Alluxio/alluxio/issues/8991, some data structures about state of index in the master are stored in memory, which could cause memory issues. To know if this is happening, some metrics are added to compute the size of these structures. These already existed: Master.FilesPinned Master.FilesToBePersisted And these are added: Master.TTLBuckets Master.TTLInodes Master.ReplicationLimitedFiles For an example of how these new metrics work, can try something like: In Alluxio-site.properties set ```alluxio.master.ttl.checker.interval.ms=60000``` Then run ``` ./bin/alluxio runTests -Dalluxio.user.file.create.ttl=1m -Dalluxio.user.file.create.ttl.action=DELETE -Dalluxio.user.file.replication.max=1``` And watch ```watch -n 1 "curl 127.0.0.1:19999/metrics/json/ | grep -i -A 4 -e Master.TTL -e Master.ReplicationLimitedFiles"``` pr-link: Alluxio/alluxio#16818 change-id: cid-960d695f986eb6ee8b09c881869f383a98f9a499 --- .../main/java/alluxio/metrics/MetricKey.java | 27 ++++++++++++++++-- .../master/file/DefaultFileSystemMaster.java | 6 ++++ .../alluxio/master/file/meta/TtlBucket.java | 17 ++++++++--- .../master/file/meta/TtlBucketList.java | 28 +++++++++++++++++-- 4 files changed, 70 insertions(+), 8 deletions(-) diff --git a/core/common/src/main/java/alluxio/metrics/MetricKey.java b/core/common/src/main/java/alluxio/metrics/MetricKey.java index 6297193d426f..50a2f7db7cfa 100644 --- a/core/common/src/main/java/alluxio/metrics/MetricKey.java +++ b/core/common/src/main/java/alluxio/metrics/MetricKey.java @@ -322,12 +322,14 @@ public static String getSyncMetricName(long mountId) { // Master file statistics public static final MetricKey MASTER_FILES_PINNED = new Builder("Master.FilesPinned") - .setDescription("Total number of currently pinned files") + .setDescription("Total number of currently pinned files. " + + "Note that IDs for these files are stored in memory.") .setMetricType(MetricType.GAUGE) .build(); public static final MetricKey MASTER_FILES_TO_PERSIST = new Builder("Master.FilesToBePersisted") - .setDescription("Total number of currently to be persisted files") + .setDescription("Total number of currently to be persisted files." + + " Note that the IDs for these files are stored in memory.") .setMetricType(MetricType.GAUGE) .build(); public static final MetricKey MASTER_FILE_SIZE = @@ -420,6 +422,20 @@ public static String getSyncMetricName(long mountId) { .setDescription("Total number of block replicas in Alluxio") .setMetricType(MetricType.GAUGE) .build(); + public static final MetricKey MASTER_TTL_BUCKETS = + new Builder("Master.TTLBuckets") + .setDescription("The number of TTL buckets at the master. Note that these buckets" + + " are stored in memory.") + .setMetricType(MetricType.GAUGE) + .setIsClusterAggregated(false) + .build(); + public static final MetricKey MASTER_TTL_INODES = + new Builder("Master.TTLInodes") + .setDescription("The total number of inodes contained in TTL buckets at the mater." + + " Note that these inodes are stored in memory.") + .setMetricType(MetricType.GAUGE) + .setIsClusterAggregated(false) + .build(); public static final MetricKey MASTER_INODE_HEAP_SIZE = new Builder("Master.InodeHeapSize") .setDescription("An estimate of the inode heap size") @@ -617,6 +633,13 @@ public static String getSyncMetricName(long mountId) { .setDescription("Total number of Mount operations") .setMetricType(MetricType.COUNTER) .build(); + public static final MetricKey MASTER_REPLICATION_LIMITED_FILES = + new Builder("Master.ReplicationLimitedFiles") + .setDescription("Number of files that have a replication count set to a " + + "non-default value. Note that these files have IDs that are stored " + + "in memory.") + .setMetricType(MetricType.COUNTER) + .build(); public static final MetricKey MASTER_RENAME_PATH_OPS = new Builder("Master.RenamePathOps") .setDescription("Total number of Rename operations") diff --git a/core/server/master/src/main/java/alluxio/master/file/DefaultFileSystemMaster.java b/core/server/master/src/main/java/alluxio/master/file/DefaultFileSystemMaster.java index 19e1656ce20e..cefd5fc72d38 100644 --- a/core/server/master/src/main/java/alluxio/master/file/DefaultFileSystemMaster.java +++ b/core/server/master/src/main/java/alluxio/master/file/DefaultFileSystemMaster.java @@ -5145,6 +5145,12 @@ public static void registerGauges(final UfsManager ufsManager, final InodeTree i inodeTree::getPinnedSize); MetricsSystem.registerGaugeIfAbsent(MetricKey.MASTER_FILES_TO_PERSIST.getName(), () -> inodeTree.getToBePersistedIds().size()); + MetricsSystem.registerGaugeIfAbsent(MetricKey.MASTER_REPLICATION_LIMITED_FILES.getName(), + () -> inodeTree.getReplicationLimitedFileIds().size()); + MetricsSystem.registerGaugeIfAbsent(MetricKey.MASTER_TTL_BUCKETS.getName(), + () -> inodeTree.getTtlBuckets().getNumBuckets()); + MetricsSystem.registerGaugeIfAbsent(MetricKey.MASTER_TTL_INODES.getName(), + () -> inodeTree.getTtlBuckets().getNumInodes()); MetricsSystem.registerGaugeIfAbsent(MetricKey.MASTER_TOTAL_PATHS.getName(), inodeTree::getInodeCount); MetricsSystem.registerGaugeIfAbsent(MetricKey.MASTER_FILE_SIZE.getName(), diff --git a/core/server/master/src/main/java/alluxio/master/file/meta/TtlBucket.java b/core/server/master/src/main/java/alluxio/master/file/meta/TtlBucket.java index eda16f7dc6a8..ebfbe7d7ee79 100644 --- a/core/server/master/src/main/java/alluxio/master/file/meta/TtlBucket.java +++ b/core/server/master/src/main/java/alluxio/master/file/meta/TtlBucket.java @@ -89,18 +89,27 @@ public Collection getInodes() { * Adds a inode to the bucket. * * @param inode the inode to be added + * @return true if a new inode was added to the bucket */ - public void addInode(Inode inode) { - mInodes.put(inode.getId(), inode); + public boolean addInode(Inode inode) { + return mInodes.put(inode.getId(), inode) == null; } /** * Removes a inode from the bucket. * * @param inode the inode to be removed + * @return true if a inode was removed */ - public void removeInode(InodeView inode) { - mInodes.remove(inode.getId()); + public boolean removeInode(InodeView inode) { + return mInodes.remove(inode.getId()) != null; + } + + /** + * @return the number of inodes in the bucket + */ + public int size() { + return mInodes.size(); } /** diff --git a/core/server/master/src/main/java/alluxio/master/file/meta/TtlBucketList.java b/core/server/master/src/main/java/alluxio/master/file/meta/TtlBucketList.java index a815117e725a..e68b293b08b0 100644 --- a/core/server/master/src/main/java/alluxio/master/file/meta/TtlBucketList.java +++ b/core/server/master/src/main/java/alluxio/master/file/meta/TtlBucketList.java @@ -29,6 +29,7 @@ import java.util.Optional; import java.util.Set; import java.util.concurrent.ConcurrentSkipListSet; +import java.util.concurrent.atomic.AtomicLong; import javax.annotation.Nullable; import javax.annotation.concurrent.ThreadSafe; @@ -48,6 +49,7 @@ public final class TtlBucketList implements Checkpointed { */ private final ConcurrentSkipListSet mBucketList; private final ReadOnlyInodeStore mInodeStore; + private final AtomicLong mNumInodes = new AtomicLong(); /** * Creates a new list of {@link TtlBucket}s. @@ -59,6 +61,20 @@ public TtlBucketList(ReadOnlyInodeStore inodeStore) { mBucketList = new ConcurrentSkipListSet<>(); } + /** + * @return the number of TTL buckets + */ + public int getNumBuckets() { + return mBucketList.size(); + } + + /** + * @return the total number of inodes in all the buckets + */ + public long getNumInodes() { + return mNumInodes.get(); + } + /** * Gets the bucket in the list that contains the inode. * @@ -123,7 +139,9 @@ public void insert(Inode inode) { // TODO(zhouyufa): Consider the concurrent situation that the bucket is expired and processed by // the InodeTtlChecker, then adding the inode into the bucket is meaningless since the bucket // will not be accessed again. (c.f. ALLUXIO-2821) - bucket.addInode(inode); + if (bucket.addInode(inode)) { + mNumInodes.incrementAndGet(); + } } /** @@ -140,7 +158,9 @@ public void insert(Inode inode) { public void remove(InodeView inode) { TtlBucket bucket = getBucketContaining(inode); if (bucket != null) { - bucket.removeInode(inode); + if (bucket.removeInode(inode)) { + mNumInodes.decrementAndGet(); + } } } @@ -163,6 +183,9 @@ public Set getExpiredBuckets(long time) { */ public void removeBuckets(Set buckets) { mBucketList.removeAll(buckets); + for (TtlBucket nxt : buckets) { + mNumInodes.addAndGet(-nxt.size()); + } } @Override @@ -183,6 +206,7 @@ public void writeToCheckpoint(OutputStream output) throws IOException, Interrupt @Override public void restoreFromCheckpoint(CheckpointInputStream input) throws IOException { mBucketList.clear(); + mNumInodes.set(0); Preconditions.checkState(input.getType() == CheckpointType.LONGS, "Unexpected checkpoint type: %s", input.getType()); while (true) { From e81b610b6c48795633b668b233afb53b18e96779 Mon Sep 17 00:00:00 2001 From: jja725 Date: Thu, 26 Jan 2023 13:01:25 -0800 Subject: [PATCH 087/334] [DOCFIX] Update new contributor doc ### What changes are proposed in this pull request? Point to new contributor tasks since old one is abandoned. ### Why are the changes needed? Please clarify why the changes are needed. For instance, 1. If you propose a new API, clarify the use case for a new API. 2. If you fix a bug, describe the bug. ### Does this PR introduce any user facing changes? Please list the user-facing changes introduced by your change, including 1. change in user-facing APIs 2. addition or removal of property keys 3. webui pr-link: Alluxio/alluxio#16820 change-id: cid-818ec3cfb6fff04b8643a90e4b73e9455e9a19d7 --- docs/en/contributor/Contributor-Getting-Started.md | 11 ++--------- 1 file changed, 2 insertions(+), 9 deletions(-) diff --git a/docs/en/contributor/Contributor-Getting-Started.md b/docs/en/contributor/Contributor-Getting-Started.md index 5ae5bfda4235..417a3fd2cce7 100644 --- a/docs/en/contributor/Contributor-Getting-Started.md +++ b/docs/en/contributor/Contributor-Getting-Started.md @@ -119,7 +119,7 @@ All new contributors are recommended to resolve one and only one **New Contribut larger tasks. This is a good way to familiarize yourself with the entire process of contributing to the Alluxio project. -Browse any of the open [New Contributor Alluxio Tasks](https://github.com/Alluxio/new-contributor-tasks/issues) +Browse any of the open [New Contributor Alluxio Tasks](https://github.com/Alluxio/alluxio/labels/task-good-first-issue) and find one that is unassigned. In order to assign an issue to yourself, leave a comment in the issue like `/assign @yourUserName` to indicate that you are working on the issue. @@ -130,14 +130,7 @@ Notice that all New Contributor issues on Github are assigned with a number. The found after the issue title, like `#123`. When you create a pull request to address the issue, you should add a link/pointer back to the issue itself. In order to do that you have to add certain text in the pull request description. -For example, if your issue number is `#123`, you should include one of the following in your -pull request description. - * `Fixes Alluxio/new-contributor-tasks#123` - * `Fixed Alluxio/new-contributor-tasks#123` - * `Fix Alluxio/new-contributor-tasks#123` - * `Closes Alluxio/new-contributor-tasks#123` - * `Closed Alluxio/new-contributor-tasks#123` - * `Close Alluxio/new-contributor-tasks#123` +For example, if your issue number is `#123`, the PR description should include `Fix new contributor task Alluxio/alluxio#123`. ### Creating a Branch in your Clone From 0b4796f5482e362efe454abf2935e88fef1b69a9 Mon Sep 17 00:00:00 2001 From: qian0817 Date: Sat, 28 Jan 2023 04:18:08 +0800 Subject: [PATCH 088/334] Refactor s3 low level output stream and support OSS and OBS ### What changes are proposed in this pull request? Refactor s3 low level output stream and support OSS and OBS. ### Why are the changes needed? 1. extract the generic logic to `ObjectLowLevelOutputStream` to make it easy to support new object storage. 2. Support streaming uploads for OBS and OSS. 3. fix the bug that empty files cannot be persisted to UFS. 4. specify MD5 when upload parts. ### Does this PR introduce any user facing changes? `alluxio.underfs.oss.intermediate.upload.clean.age`: clean incomplete multi abort age for OSS. `alluxio.underfs.oss.streaming.upload.enabled`: Whether to enable stream upload for OSS. `alluxio.underfs.oss.streaming.upload.partition.size`: straming upload partition size for OSS. `alluxio.underfs.oss.streaming.upload.threads`: thread pool size for OSS streaming upload. `alluxio.underfs.obs.intermediate.upload.clean.age`: clean incomplete multi abort age for obs. `alluxio.underfs.obs.streaming.upload.enabled`: Whether to enable stream upload for OBS. `alluxio.underfs.obs.streaming.upload.partition.size`: straming upload partition size for OBS. `alluxio.underfs.obs.streaming.upload.threads`: thread pool size for OBS streaming upload. pr-link: Alluxio/alluxio#16122 change-id: cid-91f6e2b5ec6b79d2175e71754654b59e650c0c32 --- .../main/java/alluxio/conf/PropertyKey.java | 94 ++++ .../underfs/ObjectLowLevelOutputStream.java | 395 ++++++++++++++ .../underfs/obs/OBSLowLevelOutputStream.java | 184 +++++++ .../underfs/obs/OBSUnderFileSystem.java | 39 ++ .../obs/OBSLowLevelOutputStreamTest.java | 219 ++++++++ .../underfs/oss/OSSLowLevelOutputStream.java | 172 ++++++ .../underfs/oss/OSSUnderFileSystem.java | 44 ++ .../oss/OSSLowLevelOutputStreamTest.java | 219 ++++++++ .../underfs/s3a/S3ALowLevelOutputStream.java | 508 ++++-------------- .../underfs/s3a/S3AUnderFileSystem.java | 8 +- .../s3a/S3ALowLevelOutputStreamTest.java | 52 +- 11 files changed, 1513 insertions(+), 421 deletions(-) create mode 100644 core/common/src/main/java/alluxio/underfs/ObjectLowLevelOutputStream.java create mode 100644 underfs/obs/src/main/java/alluxio/underfs/obs/OBSLowLevelOutputStream.java create mode 100644 underfs/obs/src/test/java/alluxio/underfs/obs/OBSLowLevelOutputStreamTest.java create mode 100644 underfs/oss/src/main/java/alluxio/underfs/oss/OSSLowLevelOutputStream.java create mode 100644 underfs/oss/src/test/java/alluxio/underfs/oss/OSSLowLevelOutputStreamTest.java diff --git a/core/common/src/main/java/alluxio/conf/PropertyKey.java b/core/common/src/main/java/alluxio/conf/PropertyKey.java index 8c6c42852c18..693661aa3d09 100755 --- a/core/common/src/main/java/alluxio/conf/PropertyKey.java +++ b/core/common/src/main/java/alluxio/conf/PropertyKey.java @@ -1223,6 +1223,12 @@ public String toString() { .setConsistencyCheckLevel(ConsistencyCheckLevel.ENFORCE) .setScope(Scope.SERVER) .build(); + public static final PropertyKey UNDERFS_OBJECT_STORE_STREAMING_UPLOAD_PART_TIMEOUT = + durationBuilder(Name.UNDERFS_OBJECT_STORE_STREAMING_UPLOAD_PART_TIMEOUT) + .setDescription("Timeout for uploading part when using streaming uploads.") + .setConsistencyCheckLevel(ConsistencyCheckLevel.ENFORCE) + .setScope(Scope.SERVER) + .build(); public static final PropertyKey UNDERFS_OBJECT_STORE_BREADCRUMBS_ENABLED = booleanBuilder(Name.UNDERFS_OBJECT_STORE_BREADCRUMBS_ENABLED) .setDefaultValue(true) @@ -1755,6 +1761,41 @@ public String toString() { .setScope(Scope.SERVER) .setDisplayType(DisplayType.CREDENTIALS) .build(); + public static final PropertyKey UNDERFS_OSS_INTERMEDIATE_UPLOAD_CLEAN_AGE = + durationBuilder(Name.UNDERFS_OSS_INTERMEDIATE_UPLOAD_CLEAN_AGE) + .setDefaultValue("3day") + .setDescription("Streaming uploads may not have been completed/aborted correctly " + + "and need periodical ufs cleanup. If ufs cleanup is enabled, " + + "intermediate multipart uploads in all non-readonly OSS mount points " + + "older than this age will be cleaned. This may impact other " + + "ongoing upload operations, so a large clean age is encouraged.") + .setConsistencyCheckLevel(ConsistencyCheckLevel.ENFORCE) + .setScope(Scope.SERVER) + .build(); + public static final PropertyKey UNDERFS_OSS_STREAMING_UPLOAD_ENABLED = + booleanBuilder(Name.UNDERFS_OSS_STREAMING_UPLOAD_ENABLED) + .setDefaultValue(false) + .setDescription("(Experimental) If true, using streaming upload to write to OSS.") + .setConsistencyCheckLevel(ConsistencyCheckLevel.ENFORCE) + .setScope(Scope.SERVER) + .build(); + public static final PropertyKey UNDERFS_OSS_STREAMING_UPLOAD_PARTITION_SIZE = + dataSizeBuilder(Name.UNDERFS_OSS_STREAMING_UPLOAD_PARTITION_SIZE) + .setDefaultValue("64MB") + .setDescription("Maximum allowable size of a single buffer file when using " + + "OSS streaming upload. When the buffer file reaches the partition size, " + + "it will be uploaded and the upcoming data will write to other buffer files." + + "If the partition size is too small, OSS upload speed might be affected. ") + .setConsistencyCheckLevel(ConsistencyCheckLevel.ENFORCE) + .setScope(Scope.SERVER) + .build(); + public static final PropertyKey UNDERFS_OSS_STREAMING_UPLOAD_THREADS = + intBuilder(Name.UNDERFS_OSS_STREAMING_UPLOAD_THREADS) + .setDefaultValue(20) + .setDescription("the number of threads to use for streaming upload data to OSS.") + .setConsistencyCheckLevel(ConsistencyCheckLevel.WARN) + .setScope(Scope.SERVER) + .build(); public static final PropertyKey S3A_ACCESS_KEY = stringBuilder(Name.S3A_ACCESS_KEY) .setAlias(Name.AWS_ACCESS_KEY) .setDescription("The access key of S3 bucket.") @@ -1906,6 +1947,41 @@ public String toString() { .setConsistencyCheckLevel(ConsistencyCheckLevel.ENFORCE) .setScope(Scope.SERVER) .build(); + public static final PropertyKey UNDERFS_OBS_INTERMEDIATE_UPLOAD_CLEAN_AGE = + durationBuilder(Name.UNDERFS_OBS_INTERMEDIATE_UPLOAD_CLEAN_AGE) + .setDefaultValue("3day") + .setDescription("Streaming uploads may not have been completed/aborted correctly " + + "and need periodical ufs cleanup. If ufs cleanup is enabled, " + + "intermediate multipart uploads in all non-readonly OBS mount points " + + "older than this age will be cleaned. This may impact other " + + "ongoing upload operations, so a large clean age is encouraged.") + .setConsistencyCheckLevel(ConsistencyCheckLevel.WARN) + .setScope(Scope.SERVER) + .build(); + public static final PropertyKey UNDERFS_OBS_STREAMING_UPLOAD_ENABLED = + booleanBuilder(Name.UNDERFS_OBS_STREAMING_UPLOAD_ENABLED) + .setDefaultValue(false) + .setDescription("(Experimental) If true, using streaming upload to write to OBS.") + .setConsistencyCheckLevel(ConsistencyCheckLevel.ENFORCE) + .setScope(Scope.SERVER) + .build(); + public static final PropertyKey UNDERFS_OBS_STREAMING_UPLOAD_PARTITION_SIZE = + dataSizeBuilder(Name.UNDERFS_OBS_STREAMING_UPLOAD_PARTITION_SIZE) + .setDefaultValue("64MB") + .setDescription("Maximum allowable size of a single buffer file when using " + + "S3A streaming upload. When the buffer file reaches the partition size, " + + "it will be uploaded and the upcoming data will write to other buffer files." + + "If the partition size is too small, OBS upload speed might be affected. ") + .setConsistencyCheckLevel(ConsistencyCheckLevel.WARN) + .setScope(Scope.SERVER) + .build(); + public static final PropertyKey UNDERFS_OBS_STREAMING_UPLOAD_THREADS = + intBuilder(Name.UNDERFS_OBS_STREAMING_UPLOAD_THREADS) + .setDefaultValue(20) + .setDescription("the number of threads to use for streaming upload data to OBS.") + .setConsistencyCheckLevel(ConsistencyCheckLevel.WARN) + .setScope(Scope.SERVER) + .build(); // // Mount table related properties // @@ -7427,6 +7503,8 @@ public static final class Name { public static final String UNDERFS_WEB_PARENT_NAMES = "alluxio.underfs.web.parent.names"; public static final String UNDERFS_WEB_TITLES = "alluxio.underfs.web.titles"; public static final String UNDERFS_VERSION = "alluxio.underfs.version"; + public static final String UNDERFS_OBJECT_STORE_STREAMING_UPLOAD_PART_TIMEOUT = + "alluxio.underfs.object.store.streaming.upload.part.timeout"; public static final String UNDERFS_OBJECT_STORE_BREADCRUMBS_ENABLED = "alluxio.underfs.object.store.breadcrumbs.enabled"; public static final String UNDERFS_OBJECT_STORE_SERVICE_THREADS = @@ -7449,6 +7527,14 @@ public static final class Name { public static final String UNDERFS_OSS_STS_ENABLED = "alluxio.underfs.oss.sts.enabled"; public static final String UNDERFS_OSS_STS_TOKEN_REFRESH_INTERVAL_MS = "alluxio.underfs.oss.sts.token.refresh.interval.ms"; + public static final String UNDERFS_OSS_INTERMEDIATE_UPLOAD_CLEAN_AGE = + "alluxio.underfs.oss.intermediate.upload.clean.age"; + public static final String UNDERFS_OSS_STREAMING_UPLOAD_ENABLED = + "alluxio.underfs.oss.streaming.upload.enabled"; + public static final String UNDERFS_OSS_STREAMING_UPLOAD_PARTITION_SIZE = + "alluxio.underfs.oss.streaming.upload.partition.size"; + public static final String UNDERFS_OSS_STREAMING_UPLOAD_THREADS = + "alluxio.underfs.oss.streaming.upload.threads"; public static final String UNDERFS_S3_BULK_DELETE_ENABLED = "alluxio.underfs.s3.bulk.delete.enabled"; public static final String UNDERFS_S3_DEFAULT_MODE = "alluxio.underfs.s3.default.mode"; @@ -7520,6 +7606,14 @@ public static final class Name { "alluxio.underfs.cephfs.mount.point"; public static final String UNDERFS_CEPHFS_LOCALIZE_READS = "alluxio.underfs.cephfs.localize.reads"; + public static final String UNDERFS_OBS_INTERMEDIATE_UPLOAD_CLEAN_AGE = + "alluxio.underfs.obs.intermediate.upload.clean.age"; + public static final String UNDERFS_OBS_STREAMING_UPLOAD_ENABLED = + "alluxio.underfs.obs.streaming.upload.enabled"; + public static final String UNDERFS_OBS_STREAMING_UPLOAD_PARTITION_SIZE = + "alluxio.underfs.obs.streaming.upload.partition.size"; + public static final String UNDERFS_OBS_STREAMING_UPLOAD_THREADS = + "alluxio.underfs.obs.streaming.upload.threads"; // // UFS access control related properties diff --git a/core/common/src/main/java/alluxio/underfs/ObjectLowLevelOutputStream.java b/core/common/src/main/java/alluxio/underfs/ObjectLowLevelOutputStream.java new file mode 100644 index 000000000000..9ba2573916e3 --- /dev/null +++ b/core/common/src/main/java/alluxio/underfs/ObjectLowLevelOutputStream.java @@ -0,0 +1,395 @@ +/* + * The Alluxio Open Foundation licenses this work under the Apache License, version 2.0 + * (the "License"). You may not use this work except in compliance with the License, which is + * available at www.apache.org/licenses/LICENSE-2.0 + * + * This software is distributed on an "AS IS" basis, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, + * either express or implied, as more fully set forth in the License. + * + * See the NOTICE file distributed with this work for information regarding copyright ownership. + */ + +package alluxio.underfs; + +import alluxio.Constants; +import alluxio.conf.AlluxioConfiguration; +import alluxio.conf.PropertyKey; +import alluxio.retry.CountingRetry; +import alluxio.retry.RetryPolicy; +import alluxio.retry.RetryUtils; +import alluxio.util.CommonUtils; +import alluxio.util.io.PathUtils; + +import com.google.common.annotations.VisibleForTesting; +import com.google.common.base.Preconditions; +import com.google.common.util.concurrent.Futures; +import com.google.common.util.concurrent.ListenableFuture; +import com.google.common.util.concurrent.ListeningExecutorService; +import org.apache.commons.codec.binary.Base64; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import java.io.BufferedOutputStream; +import java.io.File; +import java.io.FileOutputStream; +import java.io.IOException; +import java.io.OutputStream; +import java.security.DigestOutputStream; +import java.security.MessageDigest; +import java.security.NoSuchAlgorithmException; +import java.util.ArrayList; +import java.util.List; +import java.util.UUID; +import java.util.concurrent.Callable; +import java.util.concurrent.ExecutionException; +import java.util.concurrent.TimeUnit; +import java.util.concurrent.TimeoutException; +import java.util.concurrent.atomic.AtomicInteger; +import javax.annotation.Nullable; +import javax.annotation.concurrent.NotThreadSafe; + +/** + * [Experimental] A stream for writing a file into object storage using streaming upload. + * The data transfer is done using object storage low-level multipart upload. + *

+ * We upload data in partitions. When write(), the data will be persisted to + * a temporary file {@link #mFile} on the local disk. When the data {@link #mPartitionOffset} + * in this temporary file reaches the {@link #mPartitionSize}, the file will be submitted + * to the upload executor {@link #mExecutor} and we do not wait for uploads to finish. + * A new temp file will be created for the future write and the {@link #mPartitionOffset} + * will be reset to zero. The process goes until all the data has been written to temp files. + *

+ * In flush(), we upload the buffered data if they are bigger than 5MB + * and wait for all uploads to finish. The temp files will be deleted after uploading successfully. + *

+ * In close(), we upload the last part of data (if exists), wait for all uploads to finish, + * and complete the multipart upload. + *

+ * close() will not be retried, but all the multipart upload + * related operations(init, upload, complete, and abort) will be retried. + *

+ * If an error occurs and we have no way to recover, we abort the multipart uploads. + * Some multipart uploads may not be completed/aborted in normal ways and need periodical cleanup + * by enabling the {@link PropertyKey#UNDERFS_CLEANUP_ENABLED}. + * When a leader master starts or a cleanup interval is reached, all the multipart uploads + * older than clean age will be cleaned. + */ +@NotThreadSafe +public abstract class ObjectLowLevelOutputStream extends OutputStream { + protected static final Logger LOG = LoggerFactory.getLogger(ObjectLowLevelOutputStream.class); + + protected final List mTmpDirs; + + /** + * Only parts bigger than 5MB could be uploaded through multipart upload, + * except the last part. + */ + protected static final long UPLOAD_THRESHOLD = 5L * Constants.MB; + + /** Bucket name of the object storage bucket. */ + protected final String mBucketName; + + /** Key of the file when it is uploaded to object storage. */ + protected final String mKey; + + /** The retry policy of this multipart upload. */ + protected final RetryPolicy mRetryPolicy = new CountingRetry(5); + + /** Pre-allocated byte buffer for writing single characters. */ + protected final byte[] mSingleCharWrite = new byte[1]; + + /** The MD5 hash of the file. */ + @Nullable + protected MessageDigest mHash; + + /** Flag to indicate this stream has been closed, to ensure close is only done once. */ + protected boolean mClosed = false; + + /** When the offset reaches the partition size, we upload the temp file. */ + protected long mPartitionOffset; + /** The maximum allowed size of a partition. */ + protected final long mPartitionSize; + + /** + * The local temp file that will be uploaded when reaches the partition size + * or when flush() is called and this file is bigger than {@link #UPLOAD_THRESHOLD}. + */ + @Nullable + protected File mFile; + /** The output stream to the local temp file. */ + @Nullable + protected OutputStream mLocalOutputStream; + + /** + * Give each upload request a unique and continuous id + * so that object storage knows the part sequence to concatenate the parts to a single object. + */ + private final AtomicInteger mPartNumber; + + /** Executing the upload tasks. */ + private final ListeningExecutorService mExecutor; + + /** Store the future of tags. */ + private final List> mFutures = new ArrayList<>(); + + /** upload part timeout, null means no timeout. */ + @Nullable + private Long mUploadPartTimeoutMills; + + /** Whether the multi upload has been initialized. */ + private boolean mMultiPartUploadInitialized = false; + + /** + * Constructs a new stream for writing a file. + * + * @param bucketName the name of the bucket + * @param key the key of the file + * @param streamingUploadPartitionSize the size in bytes for partitions of streaming uploads + * @param executor executor + * @param ufsConf the object store under file system configuration + */ + public ObjectLowLevelOutputStream( + String bucketName, + String key, + ListeningExecutorService executor, + long streamingUploadPartitionSize, + AlluxioConfiguration ufsConf) { + Preconditions.checkArgument(bucketName != null && !bucketName.isEmpty(), + "Bucket name must not be null or empty."); + mBucketName = bucketName; + mTmpDirs = ufsConf.getList(PropertyKey.TMP_DIRS); + Preconditions.checkArgument(!mTmpDirs.isEmpty(), "No temporary directories available"); + mExecutor = executor; + mKey = key; + initHash(); + mPartitionSize = Math.max(UPLOAD_THRESHOLD, streamingUploadPartitionSize); + mPartNumber = new AtomicInteger(1); + if (ufsConf.isSet(PropertyKey.UNDERFS_OBJECT_STORE_STREAMING_UPLOAD_PART_TIMEOUT)) { + mUploadPartTimeoutMills = + ufsConf.getDuration(PropertyKey.UNDERFS_OBJECT_STORE_STREAMING_UPLOAD_PART_TIMEOUT) + .toMillis(); + } + } + + @Override + public void write(int b) throws IOException { + mSingleCharWrite[0] = (byte) b; + write(mSingleCharWrite); + } + + @Override + public void write(byte[] b) throws IOException { + write(b, 0, b.length); + } + + @Override + public void write(byte[] b, int off, int len) throws IOException { + if (b == null || len == 0) { + return; + } + Preconditions.checkNotNull(b); + Preconditions.checkArgument(off >= 0 && off <= b.length && len >= 0 && off + len <= b.length); + if (mFile == null) { + initNewFile(); + } + if (mPartitionOffset + len <= mPartitionSize) { + mLocalOutputStream.write(b, off, len); + mPartitionOffset += len; + } else { + int firstLen = (int) (mPartitionSize - mPartitionOffset); + mLocalOutputStream.write(b, off, firstLen); + mPartitionOffset += firstLen; + uploadPart(); + write(b, off + firstLen, len - firstLen); + } + } + + @Override + public void flush() throws IOException { + if (!mMultiPartUploadInitialized) { + return; + } + // We try to minimize the time use to close() + // because Fuse release() method which calls close() is async. + // In flush(), we upload the current writing file if it is bigger than 5 MB, + // and wait for all current upload to complete. + if (mLocalOutputStream != null) { + mLocalOutputStream.flush(); + } + if (mPartitionOffset > UPLOAD_THRESHOLD) { + uploadPart(); + } + waitForAllPartsUpload(); + } + + @Override + public void close() throws IOException { + if (mClosed) { + return; + } + + // Set the closed flag, we never retry close() even if exception occurs + mClosed = true; + + // Multi-part upload has not been initialized + if (!mMultiPartUploadInitialized) { + if (mFile == null) { + LOG.debug("Streaming upload output stream closed without uploading any data."); + RetryUtils.retry("put empty object for key" + mKey, () -> createEmptyObject(mKey), + mRetryPolicy); + } else { + try { + mLocalOutputStream.close(); + final String md5 = mHash != null ? Base64.encodeBase64String(mHash.digest()) : null; + RetryUtils.retry("put object for key" + mKey, () -> putObject(mKey, mFile, md5), + mRetryPolicy); + } finally { + if (!mFile.delete()) { + LOG.error("Failed to delete temporary file @ {}", mFile.getPath()); + } + } + } + return; + } + + try { + if (mFile != null) { + mLocalOutputStream.close(); + int partNumber = mPartNumber.getAndIncrement(); + uploadPart(mFile, partNumber, true); + } + + waitForAllPartsUpload(); + RetryUtils.retry("complete multipart upload", + this::completeMultiPartUploadInternal, mRetryPolicy); + } catch (Exception e) { + LOG.error("Failed to upload {}", mKey, e); + throw new IOException(e); + } + } + + /** + * Creates a new temp file to write to. + */ + private void initNewFile() throws IOException { + mFile = new File(PathUtils.concatPath(CommonUtils.getTmpDir(mTmpDirs), UUID.randomUUID())); + initHash(); + if (mHash != null) { + mLocalOutputStream = + new BufferedOutputStream(new DigestOutputStream(new FileOutputStream(mFile), mHash)); + } else { + mLocalOutputStream = new BufferedOutputStream(new FileOutputStream(mFile)); + } + mPartitionOffset = 0; + LOG.debug("Init new temp file @ {}", mFile.getPath()); + } + + private void initHash() { + try { + mHash = MessageDigest.getInstance("MD5"); + } catch (NoSuchAlgorithmException e) { + LOG.warn("Algorithm not available for MD5 hash.", e); + mHash = null; + } + } + + /** + * Uploads part async. + */ + protected void uploadPart() throws IOException { + if (mFile == null) { + return; + } + if (!mMultiPartUploadInitialized) { + RetryUtils.retry("init multipart upload", this::initMultiPartUploadInternal, mRetryPolicy); + mMultiPartUploadInitialized = true; + } + mLocalOutputStream.close(); + int partNumber = mPartNumber.getAndIncrement(); + uploadPart(new File(mFile.getPath()), partNumber, false); + mFile = null; + mLocalOutputStream = null; + } + + protected void uploadPart(File file, int partNumber, boolean lastPart) { + final String md5 = mHash != null ? Base64.encodeBase64String(mHash.digest()) : null; + Callable callable = () -> { + try { + RetryUtils.retry("upload part for key " + mKey + " and part number " + partNumber, + () -> uploadPartInternal(file, partNumber, lastPart, md5), mRetryPolicy); + return null; + } finally { + // Delete the uploaded or failed to upload file + if (!file.delete()) { + LOG.error("Failed to delete temporary file @ {}", file.getPath()); + } + } + }; + ListenableFuture futureTag = mExecutor.submit(callable); + mFutures.add(futureTag); + LOG.debug( + "Submit upload part request. key={}, partNum={}, file={}, fileSize={}, lastPart={}.", + mKey, partNumber, file.getPath(), file.length(), lastPart); + } + + protected void abortMultiPartUpload() throws IOException { + RetryUtils.retry("abort multipart upload for key " + mKey, this::abortMultiPartUploadInternal, + mRetryPolicy); + } + + protected void waitForAllPartsUpload() throws IOException { + try { + for (ListenableFuture future : mFutures) { + if (mUploadPartTimeoutMills == null) { + future.get(); + } else { + future.get(mUploadPartTimeoutMills, TimeUnit.MILLISECONDS); + } + } + } catch (ExecutionException e) { + // No recover ways so that we need to cancel all the upload tasks + // and abort the multipart upload + Futures.allAsList(mFutures).cancel(true); + abortMultiPartUpload(); + throw new IOException( + "Part upload failed in multipart upload with to " + mKey, e); + } catch (InterruptedException e) { + LOG.warn("Interrupted object upload.", e); + Futures.allAsList(mFutures).cancel(true); + abortMultiPartUpload(); + Thread.currentThread().interrupt(); + } catch (TimeoutException e) { + LOG.error("timeout when upload part"); + Futures.allAsList(mFutures).cancel(true); + abortMultiPartUpload(); + throw new IOException("timeout when upload part " + mKey, e); + } + mFutures.clear(); + } + + /** + * Get the part number. + * @return the part number + */ + @VisibleForTesting + public int getPartNumber() { + return mPartNumber.get(); + } + + protected abstract void uploadPartInternal( + File file, + int partNumber, + boolean isLastPart, + @Nullable String md5) + throws IOException; + + protected abstract void initMultiPartUploadInternal() throws IOException; + + protected abstract void completeMultiPartUploadInternal() throws IOException; + + protected abstract void abortMultiPartUploadInternal() throws IOException; + + protected abstract void createEmptyObject(String key) throws IOException; + + protected abstract void putObject(String key, File file, @Nullable String md5) throws IOException; +} diff --git a/underfs/obs/src/main/java/alluxio/underfs/obs/OBSLowLevelOutputStream.java b/underfs/obs/src/main/java/alluxio/underfs/obs/OBSLowLevelOutputStream.java new file mode 100644 index 000000000000..77daa708a20f --- /dev/null +++ b/underfs/obs/src/main/java/alluxio/underfs/obs/OBSLowLevelOutputStream.java @@ -0,0 +1,184 @@ +/* + * The Alluxio Open Foundation licenses this work under the Apache License, version 2.0 + * (the "License"). You may not use this work except in compliance with the License, which is + * available at www.apache.org/licenses/LICENSE-2.0 + * + * This software is distributed on an "AS IS" basis, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, + * either express or implied, as more fully set forth in the License. + * + * See the NOTICE file distributed with this work for information regarding copyright ownership. + */ + +package alluxio.underfs.obs; + +import alluxio.conf.AlluxioConfiguration; +import alluxio.conf.PropertyKey; +import alluxio.underfs.ObjectLowLevelOutputStream; + +import com.google.common.base.Preconditions; +import com.google.common.util.concurrent.ListeningExecutorService; +import com.obs.services.IObsClient; +import com.obs.services.exception.ObsException; +import com.obs.services.model.AbortMultipartUploadRequest; +import com.obs.services.model.CompleteMultipartUploadRequest; +import com.obs.services.model.InitiateMultipartUploadRequest; +import com.obs.services.model.ObjectMetadata; +import com.obs.services.model.PartEtag; +import com.obs.services.model.PutObjectRequest; +import com.obs.services.model.UploadPartRequest; +import com.obs.services.model.UploadPartResult; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import java.io.ByteArrayInputStream; +import java.io.File; +import java.io.IOException; +import java.util.ArrayList; +import java.util.Collections; +import java.util.List; +import javax.annotation.Nullable; + +/** + * {@link ObjectLowLevelOutputStream} implement for OBS. + */ +public class OBSLowLevelOutputStream extends ObjectLowLevelOutputStream { + private static final Logger LOG = LoggerFactory.getLogger(OBSLowLevelOutputStream.class); + + /** The OBS client to interact with OBS. */ + private final IObsClient mClient; + + /** Tags for the uploaded part, provided by OBS after uploading. */ + private final List mTags = + Collections.synchronizedList(new ArrayList<>()); + + /** + * The upload id of this multipart upload. + */ + protected volatile String mUploadId; + + /** + * Constructs a new stream for writing a file. + * + * @param bucketName the name of the bucket + * @param key the key of the file + * @param obsClient the OBS client to upload the file with + * @param executor a thread pool executor + * @param ufsConf the object store under file system configuration + */ + public OBSLowLevelOutputStream( + String bucketName, + String key, + IObsClient obsClient, + ListeningExecutorService executor, + AlluxioConfiguration ufsConf) { + super(bucketName, key, executor, + ufsConf.getBytes(PropertyKey.UNDERFS_OBS_STREAMING_UPLOAD_PARTITION_SIZE), ufsConf); + mClient = Preconditions.checkNotNull(obsClient); + } + + @Override + protected void uploadPartInternal( + File file, + int partNumber, + boolean isLastPart, + @Nullable String md5) + throws IOException { + try { + final UploadPartRequest uploadRequest = new UploadPartRequest(); + uploadRequest.setBucketName(mBucketName); + uploadRequest.setObjectKey(mKey); + uploadRequest.setUploadId(mUploadId); + uploadRequest.setPartNumber(partNumber); + uploadRequest.setFile(file); + uploadRequest.setPartSize(file.length()); + if (md5 != null) { + uploadRequest.setContentMd5(md5); + } + UploadPartResult result = getClient().uploadPart(uploadRequest); + mTags.add(new PartEtag(result.getEtag(), result.getPartNumber())); + } catch (ObsException e) { + LOG.debug("failed to upload part. part number: {} upload id: {}", partNumber, mUploadId, e); + throw new IOException(String.format( + "failed to upload part. key: %s part number: %s uploadId: %s", + mKey, partNumber, mUploadId), e); + } + } + + @Override + protected void initMultiPartUploadInternal() throws IOException { + try { + ObjectMetadata meta = new ObjectMetadata(); + InitiateMultipartUploadRequest request = + new InitiateMultipartUploadRequest(mBucketName, mKey); + request.setMetadata(meta); + mUploadId = getClient().initiateMultipartUpload(request).getUploadId(); + } catch (ObsException e) { + LOG.debug("failed to init multi part upload", e); + throw new IOException("failed to init multi part upload", e); + } + } + + @Override + protected void completeMultiPartUploadInternal() throws IOException { + try { + LOG.debug("complete multi part {}", mUploadId); + CompleteMultipartUploadRequest completeRequest = new CompleteMultipartUploadRequest( + mBucketName, mKey, mUploadId, mTags); + getClient().completeMultipartUpload(completeRequest); + } catch (ObsException e) { + LOG.debug("failed to complete multi part upload", e); + throw new IOException( + String.format("failed to complete multi part upload, key: %s, upload id: %s", + mKey, mUploadId) + e); + } + } + + @Override + protected void abortMultiPartUploadInternal() throws IOException { + try { + AbortMultipartUploadRequest request = + new AbortMultipartUploadRequest(mBucketName, mKey, mUploadId); + getClient().abortMultipartUpload(request); + } catch (ObsException e) { + LOG.debug("failed to abort multi part upload", e); + throw new IOException( + String.format("failed to complete multi part upload, key: %s, upload id: %s", mKey, + mUploadId), e); + } + } + + @Override + protected void createEmptyObject(String key) throws IOException { + try { + ObjectMetadata meta = new ObjectMetadata(); + meta.setContentLength(0L); + PutObjectRequest request = + new PutObjectRequest(mBucketName, key, new ByteArrayInputStream(new byte[0])); + request.setMetadata(meta); + getClient().putObject(request); + } catch (ObsException e) { + throw new IOException(e); + } + } + + @Override + protected void putObject(String key, File file, @Nullable String md5) throws IOException { + try { + ObjectMetadata meta = new ObjectMetadata(); + meta.setContentLength(file.length()); + if (md5 != null) { + meta.setContentMd5(md5); + } + PutObjectRequest request = + new PutObjectRequest(mBucketName, key, file); + request.setMetadata(meta); + getClient().putObject(request); + } catch (ObsException e) { + throw new IOException(e); + } + } + + protected IObsClient getClient() { + return mClient; + } +} diff --git a/underfs/obs/src/main/java/alluxio/underfs/obs/OBSUnderFileSystem.java b/underfs/obs/src/main/java/alluxio/underfs/obs/OBSUnderFileSystem.java index 8b2fe277cdae..85b35528eabe 100644 --- a/underfs/obs/src/main/java/alluxio/underfs/obs/OBSUnderFileSystem.java +++ b/underfs/obs/src/main/java/alluxio/underfs/obs/OBSUnderFileSystem.java @@ -20,12 +20,20 @@ import alluxio.underfs.UnderFileSystemConfiguration; import alluxio.underfs.options.OpenOptions; import alluxio.util.UnderFileSystemUtils; +import alluxio.util.executor.ExecutorServiceFactories; import alluxio.util.io.PathUtils; import com.google.common.base.Preconditions; +import com.google.common.base.Suppliers; +import com.google.common.util.concurrent.ListeningExecutorService; +import com.google.common.util.concurrent.MoreExecutors; import com.obs.services.ObsClient; import com.obs.services.exception.ObsException; +import com.obs.services.model.AbortMultipartUploadRequest; +import com.obs.services.model.ListMultipartUploadsRequest; import com.obs.services.model.ListObjectsRequest; +import com.obs.services.model.MultipartUpload; +import com.obs.services.model.MultipartUploadListing; import com.obs.services.model.ObjectListing; import com.obs.services.model.ObjectMetadata; import com.obs.services.model.ObsObject; @@ -40,6 +48,8 @@ import java.io.OutputStream; import java.util.Date; import java.util.List; +import java.util.concurrent.ExecutorService; +import java.util.function.Supplier; import javax.annotation.concurrent.ThreadSafe; /** @@ -65,6 +75,7 @@ public class OBSUnderFileSystem extends ObjectUnderFileSystem { private final String mBucketName; private final String mBucketType; + private final Supplier mStreamingUploadExecutor; /** * Constructs a new instance of {@link OBSUnderFileSystem}. @@ -107,10 +118,34 @@ protected OBSUnderFileSystem(AlluxioURI uri, ObsClient obsClient, String bucketN mClient = obsClient; mBucketName = bucketName; mBucketType = bucketType; + mStreamingUploadExecutor = Suppliers.memoize(() -> { + int numTransferThreads = + conf.getInt(PropertyKey.UNDERFS_OBS_STREAMING_UPLOAD_THREADS); + ExecutorService service = ExecutorServiceFactories + .fixedThreadPool("alluxio-obs-streaming-upload-worker", + numTransferThreads).create(); + return MoreExecutors.listeningDecorator(service); + }); } @Override public void cleanup() { + long cleanAge = mUfsConf.getMs(PropertyKey.UNDERFS_OBS_INTERMEDIATE_UPLOAD_CLEAN_AGE); + Date cleanBefore = new Date(new Date().getTime() - cleanAge); + MultipartUploadListing uploadListing = mClient.listMultipartUploads( + new ListMultipartUploadsRequest(mBucketName)); + do { + for (MultipartUpload upload : uploadListing.getMultipartTaskList()) { + if (upload.getInitiatedDate().compareTo(cleanBefore) < 0) { + mClient.abortMultipartUpload(new AbortMultipartUploadRequest( + mBucketName, upload.getObjectKey(), upload.getUploadId())); + } + } + ListMultipartUploadsRequest request = new ListMultipartUploadsRequest(mBucketName); + request.setUploadIdMarker(uploadListing.getNextUploadIdMarker()); + request.setKeyMarker(uploadListing.getKeyMarker()); + uploadListing = mClient.listMultipartUploads(request); + } while (uploadListing.isTruncated()); } @Override @@ -156,6 +191,10 @@ public boolean createEmptyObject(String key) { @Override protected OutputStream createObject(String key) throws IOException { + if (mUfsConf.getBoolean(PropertyKey.UNDERFS_OBS_STREAMING_UPLOAD_ENABLED)) { + return new OBSLowLevelOutputStream(mBucketName, key, mClient, + mStreamingUploadExecutor.get(), mUfsConf); + } return new OBSOutputStream(mBucketName, key, mClient, mUfsConf.getList(PropertyKey.TMP_DIRS)); } diff --git a/underfs/obs/src/test/java/alluxio/underfs/obs/OBSLowLevelOutputStreamTest.java b/underfs/obs/src/test/java/alluxio/underfs/obs/OBSLowLevelOutputStreamTest.java new file mode 100644 index 000000000000..d544f1101aa3 --- /dev/null +++ b/underfs/obs/src/test/java/alluxio/underfs/obs/OBSLowLevelOutputStreamTest.java @@ -0,0 +1,219 @@ +/* + * The Alluxio Open Foundation licenses this work under the Apache License, version 2.0 + * (the "License"). You may not use this work except in compliance with the License, which is + * available at www.apache.org/licenses/LICENSE-2.0 + * + * This software is distributed on an "AS IS" basis, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, + * either express or implied, as more fully set forth in the License. + * + * See the NOTICE file distributed with this work for information regarding copyright ownership. + */ + +package alluxio.underfs.obs; + +import static org.mockito.ArgumentMatchers.any; +import static org.mockito.Mockito.never; +import static org.mockito.Mockito.times; +import static org.mockito.Mockito.when; + +import alluxio.conf.Configuration; +import alluxio.conf.InstancedConfiguration; +import alluxio.conf.PropertyKey; +import alluxio.util.FormatUtils; + +import com.google.common.util.concurrent.ListenableFuture; +import com.google.common.util.concurrent.ListeningExecutorService; +import com.obs.services.IObsClient; +import com.obs.services.model.CompleteMultipartUploadRequest; +import com.obs.services.model.CompleteMultipartUploadResult; +import com.obs.services.model.InitiateMultipartUploadRequest; +import com.obs.services.model.InitiateMultipartUploadResult; +import com.obs.services.model.PartEtag; +import com.obs.services.model.PutObjectRequest; +import com.obs.services.model.PutObjectResult; +import com.obs.services.model.UploadPartRequest; +import com.obs.services.model.UploadPartResult; +import org.junit.Assert; +import org.junit.Before; +import org.junit.Test; +import org.junit.runner.RunWith; +import org.mockito.Mockito; +import org.mockito.invocation.InvocationOnMock; +import org.powermock.api.mockito.PowerMockito; +import org.powermock.core.classloader.annotations.PrepareForTest; +import org.powermock.modules.junit4.PowerMockRunner; + +import java.io.BufferedOutputStream; +import java.io.File; +import java.io.FileOutputStream; +import java.security.DigestOutputStream; +import java.util.HashMap; +import java.util.concurrent.Callable; + +/** + * Unit tests for the {@link OBSLowLevelOutputStream}. + */ +@RunWith(PowerMockRunner.class) +@PrepareForTest(OBSLowLevelOutputStream.class) +@SuppressWarnings("unchecked") +public class OBSLowLevelOutputStreamTest { + private static final String BUCKET_NAME = "testBucket"; + private static final String PARTITION_SIZE = "8MB"; + private static final String KEY = "testKey"; + private static final String UPLOAD_ID = "testUploadId"; + private static InstancedConfiguration sConf = Configuration.modifiableGlobal(); + + private IObsClient mMockObsClient; + private ListeningExecutorService mMockExecutor; + private BufferedOutputStream mMockOutputStream; + private ListenableFuture mMockTag; + + private OBSLowLevelOutputStream mStream; + + /** + * Sets the properties and configuration before each test runs. + */ + @Before + public void before() throws Exception { + mockOSSClientAndExecutor(); + mockFileAndOutputStream(); + sConf.set(PropertyKey.UNDERFS_OBS_STREAMING_UPLOAD_PARTITION_SIZE, PARTITION_SIZE); + mStream = new OBSLowLevelOutputStream(BUCKET_NAME, KEY, mMockObsClient, mMockExecutor, sConf); + } + + @Test + public void writeByte() throws Exception { + mStream.write(1); + + mStream.close(); + Mockito.verify(mMockOutputStream).write(new byte[] {1}, 0, 1); + Mockito.verify(mMockExecutor, never()).submit(any(Callable.class)); + Mockito.verify(mMockObsClient).putObject(any(PutObjectRequest.class)); + Mockito.verify(mMockObsClient, never()) + .initiateMultipartUpload(any(InitiateMultipartUploadRequest.class)); + Mockito.verify(mMockObsClient, never()) + .completeMultipartUpload(any(CompleteMultipartUploadRequest.class)); + } + + @Test + public void writeByteArrayForSmallFile() throws Exception { + int partSize = (int) FormatUtils.parseSpaceSize(PARTITION_SIZE); + byte[] b = new byte[partSize]; + + mStream.write(b, 0, b.length); + Mockito.verify(mMockOutputStream).write(b, 0, b.length); + + mStream.close(); + Mockito.verify(mMockExecutor, never()).submit(any(Callable.class)); + Mockito.verify(mMockObsClient).putObject(any(PutObjectRequest.class)); + Mockito.verify(mMockObsClient, never()) + .initiateMultipartUpload(any(InitiateMultipartUploadRequest.class)); + Mockito.verify(mMockObsClient, never()) + .completeMultipartUpload(any(CompleteMultipartUploadRequest.class)); + } + + @Test + public void writeByteArrayForLargeFile() throws Exception { + int partSize = (int) FormatUtils.parseSpaceSize(PARTITION_SIZE); + byte[] b = new byte[partSize + 1]; + Assert.assertEquals(mStream.getPartNumber(), 1); + mStream.write(b, 0, b.length); + Assert.assertEquals(mStream.getPartNumber(), 2); + Mockito.verify(mMockObsClient) + .initiateMultipartUpload(any(InitiateMultipartUploadRequest.class)); + Mockito.verify(mMockOutputStream).write(b, 0, b.length - 1); + Mockito.verify(mMockOutputStream).write(b, b.length - 1, 1); + Mockito.verify(mMockExecutor).submit(any(Callable.class)); + + mStream.close(); + Assert.assertEquals(mStream.getPartNumber(), 3); + Mockito.verify(mMockObsClient) + .completeMultipartUpload(any(CompleteMultipartUploadRequest.class)); + } + + @Test + public void createEmptyFile() throws Exception { + mStream.close(); + Mockito.verify(mMockExecutor, never()).submit(any(Callable.class)); + Mockito.verify(mMockObsClient, never()) + .initiateMultipartUpload(any(InitiateMultipartUploadRequest.class)); + Mockito.verify(mMockObsClient, never()) + .completeMultipartUpload(any(CompleteMultipartUploadRequest.class)); + Mockito.verify(mMockObsClient).putObject(any()); + } + + @Test + public void flush() throws Exception { + int partSize = (int) FormatUtils.parseSpaceSize(PARTITION_SIZE); + byte[] b = new byte[2 * partSize - 1]; + + mStream.write(b, 0, b.length); + Mockito.verify(mMockObsClient) + .initiateMultipartUpload(any(InitiateMultipartUploadRequest.class)); + Mockito.verify(mMockOutputStream).write(b, 0, partSize); + Mockito.verify(mMockOutputStream).write(b, partSize, partSize - 1); + Mockito.verify(mMockExecutor).submit(any(Callable.class)); + + mStream.flush(); + Mockito.verify(mMockExecutor, times(2)).submit(any(Callable.class)); + Mockito.verify(mMockTag, times(2)).get(); + + mStream.close(); + Mockito.verify(mMockObsClient) + .completeMultipartUpload(any(CompleteMultipartUploadRequest.class)); + } + + @Test + public void close() throws Exception { + mStream.close(); + Mockito.verify(mMockObsClient, never()) + .initiateMultipartUpload(any(InitiateMultipartUploadRequest.class)); + Mockito.verify(mMockObsClient, never()) + .completeMultipartUpload(any(CompleteMultipartUploadRequest.class)); + } + + /** + * Mocks the OSS client and executor. + */ + private void mockOSSClientAndExecutor() throws Exception { + mMockObsClient = PowerMockito.mock(IObsClient.class); + + InitiateMultipartUploadResult initResult = + new InitiateMultipartUploadResult(BUCKET_NAME, KEY, UPLOAD_ID); + when(mMockObsClient.initiateMultipartUpload(any(InitiateMultipartUploadRequest.class))) + .thenReturn(initResult); + when(mMockObsClient.putObject(any(PutObjectRequest.class))) + .thenReturn(new PutObjectResult(BUCKET_NAME, KEY, "", "", "", new HashMap<>(), 200)); + + when(mMockObsClient.uploadPart(any(UploadPartRequest.class))) + .thenAnswer((InvocationOnMock invocation) -> { + Object[] args = invocation.getArguments(); + UploadPartResult uploadResult = new UploadPartResult(); + uploadResult.setPartNumber(((UploadPartRequest) args[0]).getPartNumber()); + return uploadResult; + }); + + when(mMockObsClient.completeMultipartUpload(any(CompleteMultipartUploadRequest.class))) + .thenReturn(new CompleteMultipartUploadResult(BUCKET_NAME, KEY, "", "", "", "")); + + mMockTag = (ListenableFuture) PowerMockito.mock(ListenableFuture.class); + when(mMockTag.get()).thenReturn(new PartEtag("someTag", 1)); + mMockExecutor = Mockito.mock(ListeningExecutorService.class); + when(mMockExecutor.submit(any(Callable.class))).thenReturn(mMockTag); + } + + /** + * Mocks file-related classes. + */ + private void mockFileAndOutputStream() throws Exception { + File file = Mockito.mock(File.class); + PowerMockito.whenNew(File.class).withAnyArguments().thenReturn(file); + + mMockOutputStream = PowerMockito.mock(BufferedOutputStream.class); + PowerMockito.whenNew(BufferedOutputStream.class) + .withArguments(Mockito.any(DigestOutputStream.class)).thenReturn(mMockOutputStream); + + FileOutputStream outputStream = PowerMockito.mock(FileOutputStream.class); + PowerMockito.whenNew(FileOutputStream.class).withArguments(file).thenReturn(outputStream); + } +} diff --git a/underfs/oss/src/main/java/alluxio/underfs/oss/OSSLowLevelOutputStream.java b/underfs/oss/src/main/java/alluxio/underfs/oss/OSSLowLevelOutputStream.java new file mode 100644 index 000000000000..e46087ac46d6 --- /dev/null +++ b/underfs/oss/src/main/java/alluxio/underfs/oss/OSSLowLevelOutputStream.java @@ -0,0 +1,172 @@ +/* + * The Alluxio Open Foundation licenses this work under the Apache License, version 2.0 + * (the "License"). You may not use this work except in compliance with the License, which is + * available at www.apache.org/licenses/LICENSE-2.0 + * + * This software is distributed on an "AS IS" basis, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, + * either express or implied, as more fully set forth in the License. + * + * See the NOTICE file distributed with this work for information regarding copyright ownership. + */ + +package alluxio.underfs.oss; + +import alluxio.conf.AlluxioConfiguration; +import alluxio.conf.PropertyKey; +import alluxio.underfs.ObjectLowLevelOutputStream; + +import com.aliyun.oss.ClientException; +import com.aliyun.oss.OSS; +import com.aliyun.oss.OSSException; +import com.aliyun.oss.internal.Mimetypes; +import com.aliyun.oss.model.AbortMultipartUploadRequest; +import com.aliyun.oss.model.CompleteMultipartUploadRequest; +import com.aliyun.oss.model.InitiateMultipartUploadRequest; +import com.aliyun.oss.model.ObjectMetadata; +import com.aliyun.oss.model.PartETag; +import com.aliyun.oss.model.PutObjectRequest; +import com.aliyun.oss.model.UploadPartRequest; +import com.google.common.base.Preconditions; +import com.google.common.util.concurrent.ListeningExecutorService; + +import java.io.BufferedInputStream; +import java.io.ByteArrayInputStream; +import java.io.File; +import java.io.FileInputStream; +import java.io.IOException; +import java.io.InputStream; +import java.util.ArrayList; +import java.util.Collections; +import java.util.List; +import javax.annotation.Nullable; + +/** + * {@link ObjectLowLevelOutputStream} implement for OSS. + */ +public class OSSLowLevelOutputStream extends ObjectLowLevelOutputStream { + /** The OSS client to interact with OSS. */ + private final OSS mClient; + /** Tags for the uploaded part, provided by OSS after uploading. */ + private final List mTags = + Collections.synchronizedList(new ArrayList<>()); + + /** The upload id of this multipart upload. */ + protected volatile String mUploadId; + + /** + * Constructs a new stream for writing a file. + * + * @param bucketName the name of the bucket + * @param key the key of the file + * @param oss the OSS client to upload the file with + * @param executor a thread pool executor + * @param ufsConf the object store under file system configuration + */ + public OSSLowLevelOutputStream( + String bucketName, + String key, + OSS oss, + ListeningExecutorService executor, + AlluxioConfiguration ufsConf) { + super(bucketName, key, executor, + ufsConf.getBytes(PropertyKey.UNDERFS_OSS_STREAMING_UPLOAD_PARTITION_SIZE), ufsConf); + mClient = Preconditions.checkNotNull(oss); + } + + @Override + protected void abortMultiPartUploadInternal() throws IOException { + try { + getClient().abortMultipartUpload(new AbortMultipartUploadRequest(mBucketName, + mKey, mUploadId)); + } catch (OSSException | ClientException e) { + LOG.debug("failed to abort multi part upload. upload id: {}", mUploadId, e); + throw new IOException(String.format( + "failed to upload part. key: %s uploadId: %s", + mKey, mUploadId), e); + } + } + + @Override + protected void uploadPartInternal( + File file, + int partNumber, + boolean isLastPart, + @Nullable String md5) + throws IOException { + try { + try (InputStream inputStream = new BufferedInputStream(new FileInputStream(file))) { + final UploadPartRequest uploadRequest = + new UploadPartRequest(mBucketName, mKey, mUploadId, partNumber, inputStream, + file.length()); + if (md5 != null) { + uploadRequest.setMd5Digest(md5); + } + PartETag partETag = getClient().uploadPart(uploadRequest).getPartETag(); + mTags.add(partETag); + } + } catch (OSSException | ClientException e) { + LOG.debug("failed to upload part. part number: {} upload id: {}", partNumber, mUploadId, e); + throw new IOException(String.format( + "failed to upload part. key: %s part number: %s uploadId: %s", + mKey, partNumber, mUploadId), e); + } + } + + @Override + protected void initMultiPartUploadInternal() throws IOException { + try { + ObjectMetadata meta = new ObjectMetadata(); + meta.setContentType(Mimetypes.DEFAULT_MIMETYPE); + InitiateMultipartUploadRequest initRequest = + new InitiateMultipartUploadRequest(mBucketName, mKey, meta); + mUploadId = getClient().initiateMultipartUpload(initRequest).getUploadId(); + } catch (OSSException | ClientException e) { + LOG.debug("failed to init multi part upload", e); + throw new IOException("failed to init multi part upload", e); + } + } + + @Override + protected void completeMultiPartUploadInternal() throws IOException { + try { + LOG.debug("complete multi part {}", mUploadId); + CompleteMultipartUploadRequest completeRequest = new CompleteMultipartUploadRequest( + mBucketName, mKey, mUploadId, mTags); + getClient().completeMultipartUpload(completeRequest); + } catch (OSSException | ClientException e) { + LOG.debug("failed to complete multi part upload", e); + throw new IOException( + String.format("failed to complete multi part upload, key: %s, upload id: %s", + mKey, mUploadId) + e); + } + } + + @Override + protected void createEmptyObject(String key) throws IOException { + try { + ObjectMetadata objMeta = new ObjectMetadata(); + objMeta.setContentLength(0); + getClient().putObject(mBucketName, key, new ByteArrayInputStream(new byte[0]), objMeta); + } catch (OSSException | ClientException e) { + throw new IOException(e); + } + } + + @Override + protected void putObject(String key, File file, @Nullable String md5) throws IOException { + try { + ObjectMetadata objMeta = new ObjectMetadata(); + if (md5 != null) { + objMeta.setContentMD5(md5); + } + PutObjectRequest request = new PutObjectRequest(mBucketName, key, file, objMeta); + getClient().putObject(request); + } catch (OSSException | ClientException e) { + throw new IOException(e); + } + } + + protected OSS getClient() { + return mClient; + } +} diff --git a/underfs/oss/src/main/java/alluxio/underfs/oss/OSSUnderFileSystem.java b/underfs/oss/src/main/java/alluxio/underfs/oss/OSSUnderFileSystem.java index c47fd7579c2a..a74820bb6462 100644 --- a/underfs/oss/src/main/java/alluxio/underfs/oss/OSSUnderFileSystem.java +++ b/underfs/oss/src/main/java/alluxio/underfs/oss/OSSUnderFileSystem.java @@ -21,17 +21,25 @@ import alluxio.underfs.UnderFileSystemConfiguration; import alluxio.underfs.options.OpenOptions; import alluxio.util.UnderFileSystemUtils; +import alluxio.util.executor.ExecutorServiceFactories; import alluxio.util.io.PathUtils; import com.aliyun.oss.ClientBuilderConfiguration; import com.aliyun.oss.OSS; import com.aliyun.oss.OSSClientBuilder; import com.aliyun.oss.ServiceException; +import com.aliyun.oss.model.AbortMultipartUploadRequest; +import com.aliyun.oss.model.ListMultipartUploadsRequest; import com.aliyun.oss.model.ListObjectsRequest; +import com.aliyun.oss.model.MultipartUpload; +import com.aliyun.oss.model.MultipartUploadListing; import com.aliyun.oss.model.OSSObjectSummary; import com.aliyun.oss.model.ObjectListing; import com.aliyun.oss.model.ObjectMetadata; import com.google.common.base.Preconditions; +import com.google.common.base.Suppliers; +import com.google.common.util.concurrent.ListeningExecutorService; +import com.google.common.util.concurrent.MoreExecutors; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -41,6 +49,8 @@ import java.io.OutputStream; import java.util.Date; import java.util.List; +import java.util.concurrent.ExecutorService; +import java.util.function.Supplier; import javax.annotation.Nullable; import javax.annotation.concurrent.ThreadSafe; @@ -60,6 +70,8 @@ public class OSSUnderFileSystem extends ObjectUnderFileSystem { /** Bucket name of user's configured Alluxio bucket. */ private final String mBucketName; + private final Supplier mStreamingUploadExecutor; + private StsOssClientProvider mClientProvider; /** @@ -114,6 +126,34 @@ protected OSSUnderFileSystem(AlluxioURI uri, @Nullable OSS ossClient, String buc } mBucketName = bucketName; + mStreamingUploadExecutor = Suppliers.memoize(() -> { + int numTransferThreads = + conf.getInt(PropertyKey.UNDERFS_OSS_STREAMING_UPLOAD_THREADS); + ExecutorService service = ExecutorServiceFactories + .fixedThreadPool("alluxio-oss-streaming-upload-worker", + numTransferThreads).create(); + return MoreExecutors.listeningDecorator(service); + }); + } + + @Override + public void cleanup() throws IOException { + long cleanAge = mUfsConf.getMs(PropertyKey.UNDERFS_OSS_INTERMEDIATE_UPLOAD_CLEAN_AGE); + Date cleanBefore = new Date(new Date().getTime() - cleanAge); + MultipartUploadListing uploadListing = mClient.listMultipartUploads( + new ListMultipartUploadsRequest(mBucketName)); + do { + for (MultipartUpload upload : uploadListing.getMultipartUploads()) { + if (upload.getInitiated().compareTo(cleanBefore) < 0) { + mClient.abortMultipartUpload(new AbortMultipartUploadRequest( + mBucketName, upload.getKey(), upload.getUploadId())); + } + } + ListMultipartUploadsRequest request = new ListMultipartUploadsRequest(mBucketName); + request.setUploadIdMarker(uploadListing.getNextUploadIdMarker()); + request.setKeyMarker(uploadListing.getKeyMarker()); + uploadListing = mClient.listMultipartUploads(request); + } while (uploadListing.isTruncated()); } @Override @@ -156,6 +196,10 @@ public boolean createEmptyObject(String key) { @Override protected OutputStream createObject(String key) throws IOException { + if (mUfsConf.getBoolean(PropertyKey.UNDERFS_OSS_STREAMING_UPLOAD_ENABLED)) { + return new OSSLowLevelOutputStream(mBucketName, key, mClient, + mStreamingUploadExecutor.get(), mUfsConf); + } return new OSSOutputStream(mBucketName, key, mClient, mUfsConf.getList(PropertyKey.TMP_DIRS)); } diff --git a/underfs/oss/src/test/java/alluxio/underfs/oss/OSSLowLevelOutputStreamTest.java b/underfs/oss/src/test/java/alluxio/underfs/oss/OSSLowLevelOutputStreamTest.java new file mode 100644 index 000000000000..469f52b86401 --- /dev/null +++ b/underfs/oss/src/test/java/alluxio/underfs/oss/OSSLowLevelOutputStreamTest.java @@ -0,0 +1,219 @@ +/* + * The Alluxio Open Foundation licenses this work under the Apache License, version 2.0 + * (the "License"). You may not use this work except in compliance with the License, which is + * available at www.apache.org/licenses/LICENSE-2.0 + * + * This software is distributed on an "AS IS" basis, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, + * either express or implied, as more fully set forth in the License. + * + * See the NOTICE file distributed with this work for information regarding copyright ownership. + */ + +package alluxio.underfs.oss; + +import static org.mockito.ArgumentMatchers.any; +import static org.mockito.ArgumentMatchers.eq; +import static org.mockito.Mockito.never; +import static org.mockito.Mockito.times; +import static org.mockito.Mockito.when; + +import alluxio.conf.Configuration; +import alluxio.conf.InstancedConfiguration; +import alluxio.conf.PropertyKey; +import alluxio.util.FormatUtils; + +import com.aliyun.oss.OSS; +import com.aliyun.oss.model.CompleteMultipartUploadRequest; +import com.aliyun.oss.model.CompleteMultipartUploadResult; +import com.aliyun.oss.model.InitiateMultipartUploadRequest; +import com.aliyun.oss.model.InitiateMultipartUploadResult; +import com.aliyun.oss.model.ObjectMetadata; +import com.aliyun.oss.model.PartETag; +import com.aliyun.oss.model.PutObjectRequest; +import com.aliyun.oss.model.UploadPartRequest; +import com.aliyun.oss.model.UploadPartResult; +import com.google.common.util.concurrent.ListenableFuture; +import com.google.common.util.concurrent.ListeningExecutorService; +import org.junit.Assert; +import org.junit.Before; +import org.junit.Test; +import org.junit.runner.RunWith; +import org.mockito.Mockito; +import org.mockito.invocation.InvocationOnMock; +import org.powermock.api.mockito.PowerMockito; +import org.powermock.core.classloader.annotations.PrepareForTest; +import org.powermock.modules.junit4.PowerMockRunner; + +import java.io.BufferedOutputStream; +import java.io.File; +import java.io.FileOutputStream; +import java.io.InputStream; +import java.security.DigestOutputStream; +import java.util.concurrent.Callable; + +/** + * Unit tests for the {@link OSSLowLevelOutputStream}. + */ +@RunWith(PowerMockRunner.class) +@PrepareForTest(OSSLowLevelOutputStream.class) +@SuppressWarnings("unchecked") +public class OSSLowLevelOutputStreamTest { + private static final String BUCKET_NAME = "testBucket"; + private static final String PARTITION_SIZE = "8MB"; + private static final String KEY = "testKey"; + private static final String UPLOAD_ID = "testUploadId"; + private static InstancedConfiguration sConf = Configuration.modifiableGlobal(); + + private OSS mMockOssClient; + private ListeningExecutorService mMockExecutor; + private BufferedOutputStream mMockOutputStream; + private ListenableFuture mMockTag; + + private OSSLowLevelOutputStream mStream; + + /** + * Sets the properties and configuration before each test runs. + */ + @Before + public void before() throws Exception { + mockOSSClientAndExecutor(); + mockFileAndOutputStream(); + sConf.set(PropertyKey.UNDERFS_OSS_STREAMING_UPLOAD_PARTITION_SIZE, PARTITION_SIZE); + mStream = new OSSLowLevelOutputStream(BUCKET_NAME, KEY, mMockOssClient, mMockExecutor, sConf); + } + + @Test + public void writeByte() throws Exception { + mStream.write(1); + + mStream.close(); + Mockito.verify(mMockOutputStream).write(new byte[] {1}, 0, 1); + Mockito.verify(mMockExecutor, never()).submit(any(Callable.class)); + Mockito.verify(mMockOssClient).putObject(any(PutObjectRequest.class)); + Mockito.verify(mMockOssClient, never()) + .initiateMultipartUpload(any(InitiateMultipartUploadRequest.class)); + Mockito.verify(mMockOssClient, never()) + .completeMultipartUpload(any(CompleteMultipartUploadRequest.class)); + } + + @Test + public void writeByteArrayForSmallFile() throws Exception { + int partSize = (int) FormatUtils.parseSpaceSize(PARTITION_SIZE); + byte[] b = new byte[partSize]; + + mStream.write(b, 0, b.length); + Mockito.verify(mMockOutputStream).write(b, 0, b.length); + + mStream.close(); + Mockito.verify(mMockExecutor, never()).submit(any(Callable.class)); + Mockito.verify(mMockOssClient).putObject(any(PutObjectRequest.class)); + Mockito.verify(mMockOssClient, never()) + .initiateMultipartUpload(any(InitiateMultipartUploadRequest.class)); + Mockito.verify(mMockOssClient, never()) + .completeMultipartUpload(any(CompleteMultipartUploadRequest.class)); + } + + @Test + public void writeByteArrayForLargeFile() throws Exception { + int partSize = (int) FormatUtils.parseSpaceSize(PARTITION_SIZE); + byte[] b = new byte[partSize + 1]; + Assert.assertEquals(mStream.getPartNumber(), 1); + mStream.write(b, 0, b.length); + Assert.assertEquals(mStream.getPartNumber(), 2); + Mockito.verify(mMockOssClient) + .initiateMultipartUpload(any(InitiateMultipartUploadRequest.class)); + Mockito.verify(mMockOutputStream).write(b, 0, b.length - 1); + Mockito.verify(mMockOutputStream).write(b, b.length - 1, 1); + Mockito.verify(mMockExecutor).submit(any(Callable.class)); + + mStream.close(); + Assert.assertEquals(mStream.getPartNumber(), 3); + Mockito.verify(mMockOssClient) + .completeMultipartUpload(any(CompleteMultipartUploadRequest.class)); + } + + @Test + public void createEmptyFile() throws Exception { + mStream.close(); + Mockito.verify(mMockExecutor, never()).submit(any(Callable.class)); + Mockito.verify(mMockOssClient, never()) + .initiateMultipartUpload(any(InitiateMultipartUploadRequest.class)); + Mockito.verify(mMockOssClient, never()) + .completeMultipartUpload(any(CompleteMultipartUploadRequest.class)); + Mockito.verify(mMockOssClient).putObject(eq(BUCKET_NAME), eq(KEY), any(InputStream.class), + any(ObjectMetadata.class)); + } + + @Test + public void flush() throws Exception { + int partSize = (int) FormatUtils.parseSpaceSize(PARTITION_SIZE); + byte[] b = new byte[2 * partSize - 1]; + + mStream.write(b, 0, b.length); + Mockito.verify(mMockOssClient) + .initiateMultipartUpload(any(InitiateMultipartUploadRequest.class)); + Mockito.verify(mMockOutputStream).write(b, 0, partSize); + Mockito.verify(mMockOutputStream).write(b, partSize, partSize - 1); + Mockito.verify(mMockExecutor).submit(any(Callable.class)); + + mStream.flush(); + Mockito.verify(mMockExecutor, times(2)).submit(any(Callable.class)); + Mockito.verify(mMockTag, times(2)).get(); + + mStream.close(); + Mockito.verify(mMockOssClient) + .completeMultipartUpload(any(CompleteMultipartUploadRequest.class)); + } + + @Test + public void close() throws Exception { + mStream.close(); + Mockito.verify(mMockOssClient, never()) + .initiateMultipartUpload(any(InitiateMultipartUploadRequest.class)); + Mockito.verify(mMockOssClient, never()) + .completeMultipartUpload(any(CompleteMultipartUploadRequest.class)); + } + + /** + * Mocks the OSS client and executor. + */ + private void mockOSSClientAndExecutor() throws Exception { + mMockOssClient = PowerMockito.mock(OSS.class); + + InitiateMultipartUploadResult initResult = new InitiateMultipartUploadResult(); + when(mMockOssClient.initiateMultipartUpload(any(InitiateMultipartUploadRequest.class))) + .thenReturn(initResult); + + initResult.setUploadId(UPLOAD_ID); + when(mMockOssClient.uploadPart(any(UploadPartRequest.class))) + .thenAnswer((InvocationOnMock invocation) -> { + Object[] args = invocation.getArguments(); + UploadPartResult uploadResult = new UploadPartResult(); + uploadResult.setPartNumber(((UploadPartRequest) args[0]).getPartNumber()); + return uploadResult; + }); + + when(mMockOssClient.completeMultipartUpload(any(CompleteMultipartUploadRequest.class))) + .thenReturn(new CompleteMultipartUploadResult()); + + mMockTag = (ListenableFuture) PowerMockito.mock(ListenableFuture.class); + when(mMockTag.get()).thenReturn(new PartETag(1, "someTag")); + mMockExecutor = Mockito.mock(ListeningExecutorService.class); + when(mMockExecutor.submit(any(Callable.class))).thenReturn(mMockTag); + } + + /** + * Mocks file-related classes. + */ + private void mockFileAndOutputStream() throws Exception { + File file = Mockito.mock(File.class); + PowerMockito.whenNew(File.class).withAnyArguments().thenReturn(file); + + mMockOutputStream = PowerMockito.mock(BufferedOutputStream.class); + PowerMockito.whenNew(BufferedOutputStream.class) + .withArguments(Mockito.any(DigestOutputStream.class)).thenReturn(mMockOutputStream); + + FileOutputStream outputStream = PowerMockito.mock(FileOutputStream.class); + PowerMockito.whenNew(FileOutputStream.class).withArguments(file).thenReturn(outputStream); + } +} diff --git a/underfs/s3a/src/main/java/alluxio/underfs/s3a/S3ALowLevelOutputStream.java b/underfs/s3a/src/main/java/alluxio/underfs/s3a/S3ALowLevelOutputStream.java index 26b8828ba745..eefc4b87cb9a 100644 --- a/underfs/s3a/src/main/java/alluxio/underfs/s3a/S3ALowLevelOutputStream.java +++ b/underfs/s3a/src/main/java/alluxio/underfs/s3a/S3ALowLevelOutputStream.java @@ -11,14 +11,11 @@ package alluxio.underfs.s3a; -import alluxio.Constants; +import alluxio.conf.AlluxioConfiguration; import alluxio.conf.PropertyKey; -import alluxio.retry.CountingRetry; -import alluxio.retry.RetryPolicy; -import alluxio.util.CommonUtils; -import alluxio.util.io.PathUtils; +import alluxio.underfs.ObjectLowLevelOutputStream; -import com.amazonaws.AmazonClientException; +import com.amazonaws.SdkClientException; import com.amazonaws.services.s3.AmazonS3; import com.amazonaws.services.s3.internal.Mimetypes; import com.amazonaws.services.s3.model.AbortMultipartUploadRequest; @@ -26,125 +23,38 @@ import com.amazonaws.services.s3.model.InitiateMultipartUploadRequest; import com.amazonaws.services.s3.model.ObjectMetadata; import com.amazonaws.services.s3.model.PartETag; +import com.amazonaws.services.s3.model.PutObjectRequest; import com.amazonaws.services.s3.model.UploadPartRequest; -import com.amazonaws.util.Base64; import com.google.common.base.Preconditions; -import com.google.common.util.concurrent.Futures; -import com.google.common.util.concurrent.ListenableFuture; import com.google.common.util.concurrent.ListeningExecutorService; import org.slf4j.Logger; import org.slf4j.LoggerFactory; -import java.io.BufferedOutputStream; +import java.io.ByteArrayInputStream; import java.io.File; -import java.io.FileOutputStream; import java.io.IOException; -import java.io.OutputStream; -import java.security.DigestOutputStream; -import java.security.MessageDigest; -import java.security.NoSuchAlgorithmException; import java.util.ArrayList; +import java.util.Collections; import java.util.List; -import java.util.UUID; -import java.util.concurrent.Callable; -import java.util.concurrent.ExecutionException; -import java.util.concurrent.atomic.AtomicInteger; +import javax.annotation.Nullable; import javax.annotation.concurrent.NotThreadSafe; /** - * [Experimental] A stream for writing a file into S3 using streaming upload. - * The data transfer is done using S3 low-level multipart upload. - * - * The multipart upload is initialized in the first write() and an upload id is given - * by AWS S3 to distinguish different multipart uploads. - * - * We upload data in partitions. When write(), the data will be persisted to - * a temporary file {@link #mFile} on the local disk. When the data {@link #mPartitionOffset} - * in this temporary file reaches the {@link #mPartitionSize}, the file will be submitted - * to the upload executor {@link #mExecutor} and we do not wait for uploads to finish. - * A new temp file will be created for the future write and the {@link #mPartitionOffset} - * will be reset to zero. The process goes until all the data has been written to temp files. - * - * In flush(), we upload the buffered data if they are bigger than 5MB - * and wait for all uploads to finish. The temp files will be deleted after uploading successfully. - * - * In close(), we upload the last part of data (if exists), wait for all uploads to finish, - * and complete the multipart upload. - * - * close() will not be retried, but all the multipart upload - * related operations(init, upload, complete, and abort) will be retried. - * - * If an error occurs and we have no way to recover, we abort the multipart uploads. - * Some multipart uploads may not be completed/aborted in normal ways and need periodical cleanup - * by enabling the {@link PropertyKey#UNDERFS_CLEANUP_ENABLED}. - * When a leader master starts or a cleanup interval is reached, all the multipart uploads - * older than {@link PropertyKey#UNDERFS_S3_INTERMEDIATE_UPLOAD_CLEAN_AGE} will be cleaned. + * Object storage low output stream for aws s3. */ @NotThreadSafe -public class S3ALowLevelOutputStream extends OutputStream { +public class S3ALowLevelOutputStream extends ObjectLowLevelOutputStream { private static final Logger LOG = LoggerFactory.getLogger(S3ALowLevelOutputStream.class); + /** Server side encrypt enabled. */ private final boolean mSseEnabled; - - private final List mTmpDirs; - - /** - * Only parts bigger than 5MB could be uploaded through S3A low-level multipart upload, - * except the last part. - */ - private static final long UPLOAD_THRESHOLD = 5L * Constants.MB; - - /** Bucket name of the Alluxio S3 bucket. */ - private final String mBucketName; - /** The Amazon S3 client to interact with S3. */ - protected AmazonS3 mClient; - - /** Executing the upload tasks. */ - private final ListeningExecutorService mExecutor; - - /** Key of the file when it is uploaded to S3. */ - protected final String mKey; - - /** The retry policy of this multipart upload. */ - private final RetryPolicy mRetryPolicy = new CountingRetry(5); - - /** Pre-allocated byte buffer for writing single characters. */ - private final byte[] mSingleCharWrite = new byte[1]; - + private final AmazonS3 mClient; /** Tags for the uploaded part, provided by S3 after uploading. */ - private final List mTags = new ArrayList<>(); - - /** The MD5 hash of the file. */ - private MessageDigest mHash; + private final List mTags = Collections.synchronizedList(new ArrayList<>()); /** The upload id of this multipart upload. */ - private String mUploadId; - - /** Flag to indicate this stream has been closed, to ensure close is only done once. */ - private boolean mClosed = false; - - /** When the offset reaches the partition size, we upload the temp file. */ - private long mPartitionOffset; - /** The maximum allowed size of a partition. */ - private final long mPartitionSize; - - /** - * The local temp file that will be uploaded when reaches the partition size - * or when flush() is called and this file is bigger than 5MB. - */ - private File mFile; - /** The output stream to the local temp file. */ - private OutputStream mLocalOutputStream; - - /** - * Give each upload request an unique and continuous id - * so that S3 knows the part sequence to concatenate the parts to a single object. - */ - private AtomicInteger mPartNumber; - - /** Store the future of tags. */ - private List> mTagFutures = new ArrayList<>(); + protected volatile String mUploadId; /** * Constructs a new stream for writing a file. @@ -153,336 +63,126 @@ public class S3ALowLevelOutputStream extends OutputStream { * @param key the key of the file * @param s3Client the Amazon S3 client to upload the file with * @param executor a thread pool executor - * @param streamingUploadPartitionSize the size in bytes for partitions of streaming uploads - * @param tmpDirs a list of temporary directories - * @param sseEnabled whether or not server side encryption is enabled + * @param ufsConf the object store under file system configuration */ - public S3ALowLevelOutputStream(String bucketName, String key, AmazonS3 s3Client, - ListeningExecutorService executor, long streamingUploadPartitionSize, List tmpDirs, - boolean sseEnabled) { - Preconditions.checkArgument(bucketName != null && !bucketName.isEmpty(), "Bucket name must " - + "not be null or empty."); - mBucketName = bucketName; - mClient = s3Client; - mExecutor = executor; - mTmpDirs = tmpDirs; - mSseEnabled = sseEnabled; - try { - mHash = MessageDigest.getInstance("MD5"); - } catch (NoSuchAlgorithmException e) { - LOG.warn("Algorithm not available for MD5 hash.", e); - mHash = null; - } - mKey = key; - // Partition size should be at least 5 MB, since S3 low-level multipart upload does not - // accept intermediate part smaller than 5 MB. - mPartitionSize = Math.max(UPLOAD_THRESHOLD, streamingUploadPartitionSize); - mPartNumber = new AtomicInteger(1); + public S3ALowLevelOutputStream( + String bucketName, + String key, + AmazonS3 s3Client, + ListeningExecutorService executor, + AlluxioConfiguration ufsConf) { + super(bucketName, key, executor, + ufsConf.getBytes(PropertyKey.UNDERFS_S3_STREAMING_UPLOAD_PARTITION_SIZE), ufsConf); + mClient = Preconditions.checkNotNull(s3Client); + mSseEnabled = ufsConf.getBoolean(PropertyKey.UNDERFS_S3_SERVER_SIDE_ENCRYPTION_ENABLED); } @Override - public void write(int b) throws IOException { - mSingleCharWrite[0] = (byte) b; - write(mSingleCharWrite); + protected void uploadPartInternal( + File file, + int partNumber, + boolean isLastPart, + @Nullable String md5) + throws IOException { + try { + final UploadPartRequest uploadRequest = new UploadPartRequest() + .withBucketName(mBucketName) + .withKey(mKey) + .withUploadId(mUploadId) + .withPartNumber(partNumber) + .withFile(file) + .withPartSize(file.length()); + if (md5 != null) { + uploadRequest.setMd5Digest(md5); + } + uploadRequest.setLastPart(isLastPart); + PartETag partETag = getClient().uploadPart(uploadRequest).getPartETag(); + mTags.add(partETag); + } catch (SdkClientException e) { + LOG.debug("failed to upload part.", e); + throw new IOException(String.format( + "failed to upload part. key: %s part number: %s uploadId: %s", + mKey, partNumber, mUploadId), e); + } } @Override - public void write(byte[] b) throws IOException { - write(b, 0, b.length); + protected void initMultiPartUploadInternal() throws IOException { + try { + ObjectMetadata meta = new ObjectMetadata(); + if (mSseEnabled) { + meta.setSSEAlgorithm(ObjectMetadata.AES_256_SERVER_SIDE_ENCRYPTION); + } + meta.setContentType(Mimetypes.MIMETYPE_OCTET_STREAM); + mUploadId = getClient() + .initiateMultipartUpload(new InitiateMultipartUploadRequest(mBucketName, mKey, meta)) + .getUploadId(); + } catch (SdkClientException e) { + LOG.debug("failed to init multi part upload", e); + throw new IOException("failed to init multi part upload", e); + } } @Override - public void write(byte[] b, int off, int len) throws IOException { - if (b == null || len == 0) { - return; - } - validateWriteArgs(b, off, len); - if (mUploadId == null) { - initMultiPartUpload(); - } - if (mFile == null) { - initNewFile(); - } - if (mPartitionOffset + len < mPartitionSize) { - mLocalOutputStream.write(b, off, len); - mPartitionOffset += len; - } else { - int firstLen = (int) (mPartitionSize - mPartitionOffset); - mLocalOutputStream.write(b, off, firstLen); - mPartitionOffset += firstLen; - uploadPart(); - write(b, off + firstLen, len - firstLen); + protected void completeMultiPartUploadInternal() throws IOException { + try { + LOG.debug("complete multi part {}", mUploadId); + getClient().completeMultipartUpload(new CompleteMultipartUploadRequest( + mBucketName, mKey, mUploadId, mTags)); + } catch (SdkClientException e) { + LOG.debug("failed to complete multi part upload", e); + throw new IOException( + String.format("failed to complete multi part upload, key: %s, upload id: %s", + mKey, mUploadId), e); } } @Override - public void flush() throws IOException { - if (mUploadId == null) { - return; - } - // We try to minimize the time use to close() - // because Fuse release() method which calls close() is async. - // In flush(), we upload the current writing file if it is bigger than 5 MB, - // and wait for all current upload to complete. - if (mLocalOutputStream != null) { - mLocalOutputStream.flush(); - } - if (mPartitionOffset > UPLOAD_THRESHOLD) { - uploadPart(); + protected void abortMultiPartUploadInternal() throws IOException { + try { + getClient().abortMultipartUpload( + new AbortMultipartUploadRequest(mBucketName, mKey, mUploadId)); + } catch (SdkClientException e) { + LOG.debug("failed to abort multi part upload", e); + throw new IOException( + String.format("failed to abort multi part upload, key: %s, upload id: %s", mKey, + mUploadId), e); } - waitForAllPartsUpload(); } @Override - public void close() throws IOException { - if (mClosed) { - return; - } - - // Set the closed flag, we never retry close() even if exception occurs - mClosed = true; - - // Multi-part upload has not been initialized - if (mUploadId == null) { - LOG.debug("S3A Streaming upload output stream closed without uploading any data."); - return; - } - + protected void createEmptyObject(String key) throws IOException { try { - if (mFile != null) { - mLocalOutputStream.close(); - int partNumber = mPartNumber.getAndIncrement(); - final UploadPartRequest uploadRequest = new UploadPartRequest() - .withBucketName(mBucketName) - .withKey(mKey) - .withUploadId(mUploadId) - .withPartNumber(partNumber) - .withFile(mFile) - .withPartSize(mFile.length()); - uploadRequest.setLastPart(true); - execUpload(uploadRequest); - } - - waitForAllPartsUpload(); - completeMultiPartUpload(); - } catch (Exception e) { - LOG.error("Failed to upload {}", mKey, e); + ObjectMetadata meta = new ObjectMetadata(); + meta.setContentLength(0); + meta.setContentType(Mimetypes.MIMETYPE_OCTET_STREAM); + getClient().putObject( + new PutObjectRequest(mBucketName, key, new ByteArrayInputStream(new byte[0]), meta)); + } catch (SdkClientException e) { throw new IOException(e); } } - protected void initMultiPartUpload() throws IOException { - initMultiPartUpload(getClient()); - } - - /** - * Initializes multipart upload. - */ - private void initMultiPartUpload(AmazonS3 s3Client) throws IOException { - // Generate the object metadata by setting server side encryption, md5 checksum, - // and encoding as octet stream since no assumptions are made about the file type - ObjectMetadata meta = new ObjectMetadata(); - if (mSseEnabled) { - meta.setSSEAlgorithm(ObjectMetadata.AES_256_SERVER_SIDE_ENCRYPTION); - } - if (mHash != null) { - meta.setContentMD5(Base64.encodeAsString(mHash.digest())); - } - meta.setContentType(Mimetypes.MIMETYPE_OCTET_STREAM); - - AmazonClientException lastException; - InitiateMultipartUploadRequest initRequest = - new InitiateMultipartUploadRequest(mBucketName, mKey).withObjectMetadata(meta); - do { - try { - mUploadId = s3Client.initiateMultipartUpload(initRequest).getUploadId(); - return; - } catch (AmazonClientException e) { - lastException = e; - } - } while (mRetryPolicy.attempt()); - // This point is only reached if the operation failed more - // than the allowed retry count - throw new IOException("Unable to init multipart upload to " + mKey, lastException); - } - - /** - * Creates a new temp file to write to. - */ - private void initNewFile() throws IOException { - mFile = new File(PathUtils.concatPath(CommonUtils.getTmpDir(mTmpDirs), UUID.randomUUID())); - if (mHash != null) { - mLocalOutputStream = - new BufferedOutputStream(new DigestOutputStream(new FileOutputStream(mFile), mHash)); - } else { - mLocalOutputStream = new BufferedOutputStream(new FileOutputStream(mFile)); - } - mPartitionOffset = 0; - LOG.debug("Init new temp file @ {}", mFile.getPath()); - } - - /** - * Uploads part async. - */ - protected void uploadPart() throws IOException { - if (mFile == null) { - return; - } - mLocalOutputStream.close(); - int partNumber = mPartNumber.getAndIncrement(); - File newFileToUpload = new File(mFile.getPath()); - mFile = null; - mLocalOutputStream = null; - UploadPartRequest uploadRequest = new UploadPartRequest() - .withBucketName(mBucketName) - .withKey(mKey) - .withUploadId(mUploadId) - .withPartNumber(partNumber) - .withFile(newFileToUpload) - .withPartSize(newFileToUpload.length()); - execUpload(uploadRequest); - } - - protected void execUpload(UploadPartRequest request) throws IOException { - execUpload(getClient(), request); - } - - /** - * Executes the upload part request. - * - * @param request the upload part request - */ - protected void execUpload(AmazonS3 s3Client, UploadPartRequest request) { - File file = request.getFile(); - ListenableFuture futureTag = - mExecutor.submit((Callable) () -> { - PartETag partETag; - AmazonClientException lastException; - try { - do { - try { - partETag = s3Client.uploadPart(request).getPartETag(); - return partETag; - } catch (AmazonClientException e) { - lastException = e; - } - } while (mRetryPolicy.attempt()); - } finally { - // Delete the uploaded or failed to upload file - if (!file.delete()) { - LOG.error("Failed to delete temporary file @ {}", file.getPath()); - } - } - throw new IOException("Fail to upload part " + request.getPartNumber() - + " to " + request.getKey(), lastException); - }); - mTagFutures.add(futureTag); - LOG.debug("Submit upload part request. key={}, partNum={}, file={}, fileSize={}, lastPart={}.", - mKey, request.getPartNumber(), file.getPath(), file.length(), request.isLastPart()); - } - - /** - * Waits for the submitted upload tasks to finish. - */ - protected void waitForAllPartsUpload() throws IOException { - int beforeSize = mTags.size(); + @Override + protected void putObject(String key, File file, @Nullable String md5) throws IOException { try { - for (ListenableFuture future : mTagFutures) { - mTags.add(future.get()); + ObjectMetadata meta = new ObjectMetadata(); + if (mSseEnabled) { + meta.setSSEAlgorithm(ObjectMetadata.AES_256_SERVER_SIDE_ENCRYPTION); } - } catch (ExecutionException e) { - // No recover ways so that we need to cancel all the upload tasks - // and abort the multipart upload - Futures.allAsList(mTagFutures).cancel(true); - abortMultiPartUpload(); - throw new IOException("Part upload failed in multipart upload with " - + "id '" + mUploadId + "' to " + mKey, e); - } catch (InterruptedException e) { - LOG.warn("Interrupted object upload.", e); - Futures.allAsList(mTagFutures).cancel(true); - abortMultiPartUpload(); - Thread.currentThread().interrupt(); - } - mTagFutures = new ArrayList<>(); - if (mTags.size() != beforeSize) { - LOG.debug("Uploaded {} partitions of id '{}' to {}.", mTags.size(), mUploadId, mKey); - } - } - - protected void completeMultiPartUpload() throws IOException { - completeMultiPartUpload(getClient(), mUploadId); - } - - /** - * Completes multipart upload. - */ - protected void completeMultiPartUpload(AmazonS3 s3Client, String uploadId) throws IOException { - AmazonClientException lastException; - CompleteMultipartUploadRequest completeRequest = new CompleteMultipartUploadRequest(mBucketName, - mKey, uploadId, mTags); - do { - try { - s3Client.completeMultipartUpload(completeRequest); - LOG.debug("Completed multipart upload for key {} and id '{}' with {} partitions.", - mKey, uploadId, mTags.size()); - return; - } catch (AmazonClientException e) { - lastException = e; - } - } while (mRetryPolicy.attempt()); - // This point is only reached if the operation failed more - // than the allowed retry count - throw new IOException("Unable to complete multipart upload with id '" - + uploadId + "' to " + mKey, lastException); - } - - protected void abortMultiPartUpload() { - abortMultiPartUpload(getClient(), mUploadId); - } - - /** - * Aborts multipart upload. - */ - protected void abortMultiPartUpload(AmazonS3 s3Client, String uploadId) { - AmazonClientException lastException; - do { - try { - s3Client.abortMultipartUpload(new AbortMultipartUploadRequest(mBucketName, - mKey, uploadId)); - LOG.warn("Aborted multipart upload for key {} and id '{}' to bucket {}", - mKey, uploadId, mBucketName); - return; - } catch (AmazonClientException e) { - lastException = e; + if (md5 != null) { + meta.setContentMD5(md5); } - } while (mRetryPolicy.attempt()); - // This point is only reached if the operation failed more - // than the allowed retry count - LOG.warn("Unable to abort multipart upload for key '{}' and id '{}' to bucket {}. " - + "You may need to enable the periodical cleanup by setting property {}" - + "to be true.", mKey, uploadId, mBucketName, - PropertyKey.UNDERFS_CLEANUP_ENABLED.getName(), - lastException); - } - - /** - * Validates the arguments of write operation. - * - * @param b the data - * @param off the start offset in the data - * @param len the number of bytes to write - */ - private void validateWriteArgs(byte[] b, int off, int len) { - Preconditions.checkNotNull(b); - if (off < 0 || off > b.length || len < 0 - || (off + len) > b.length || (off + len) < 0) { - throw new IndexOutOfBoundsException("write(b[" + b.length + "], " + off + ", " + len + ")"); + meta.setContentLength(file.length()); + meta.setContentType(Mimetypes.MIMETYPE_OCTET_STREAM); + PutObjectRequest putReq = new PutObjectRequest(mBucketName, key, file); + putReq.setMetadata(meta); + getClient().putObject(putReq); + } catch (Exception e) { + throw new IOException(e); } } - /** - * @return the client - */ protected AmazonS3 getClient() { return mClient; } diff --git a/underfs/s3a/src/main/java/alluxio/underfs/s3a/S3AUnderFileSystem.java b/underfs/s3a/src/main/java/alluxio/underfs/s3a/S3AUnderFileSystem.java index 5edc8dc33208..440c9762180b 100644 --- a/underfs/s3a/src/main/java/alluxio/underfs/s3a/S3AUnderFileSystem.java +++ b/underfs/s3a/src/main/java/alluxio/underfs/s3a/S3AUnderFileSystem.java @@ -406,15 +406,11 @@ public boolean createEmptyObject(String key) { @Override protected OutputStream createObject(String key) throws IOException { if (mStreamingUploadEnabled) { - return new S3ALowLevelOutputStream(mBucketName, key, mClient, mExecutor, - mUfsConf.getBytes(PropertyKey.UNDERFS_S3_STREAMING_UPLOAD_PARTITION_SIZE), - mUfsConf.getList(PropertyKey.TMP_DIRS), - mUfsConf.getBoolean(PropertyKey.UNDERFS_S3_SERVER_SIDE_ENCRYPTION_ENABLED)); + return new S3ALowLevelOutputStream(mBucketName, key, mClient, mExecutor, mUfsConf); } return new S3AOutputStream(mBucketName, key, mManager, mUfsConf.getList(PropertyKey.TMP_DIRS), - mUfsConf - .getBoolean(PropertyKey.UNDERFS_S3_SERVER_SIDE_ENCRYPTION_ENABLED)); + mUfsConf.getBoolean(PropertyKey.UNDERFS_S3_SERVER_SIDE_ENCRYPTION_ENABLED)); } @Override diff --git a/underfs/s3a/src/test/java/alluxio/underfs/s3a/S3ALowLevelOutputStreamTest.java b/underfs/s3a/src/test/java/alluxio/underfs/s3a/S3ALowLevelOutputStreamTest.java index 1cd399069ff8..8412f6e6dda5 100644 --- a/underfs/s3a/src/test/java/alluxio/underfs/s3a/S3ALowLevelOutputStreamTest.java +++ b/underfs/s3a/src/test/java/alluxio/underfs/s3a/S3ALowLevelOutputStreamTest.java @@ -27,10 +27,12 @@ import com.amazonaws.services.s3.model.InitiateMultipartUploadRequest; import com.amazonaws.services.s3.model.InitiateMultipartUploadResult; import com.amazonaws.services.s3.model.PartETag; +import com.amazonaws.services.s3.model.PutObjectRequest; import com.amazonaws.services.s3.model.UploadPartRequest; import com.amazonaws.services.s3.model.UploadPartResult; import com.google.common.util.concurrent.ListenableFuture; import com.google.common.util.concurrent.ListeningExecutorService; +import org.junit.Assert; import org.junit.Before; import org.junit.Test; import org.junit.runner.RunWith; @@ -73,33 +75,49 @@ public class S3ALowLevelOutputStreamTest { public void before() throws Exception { mockS3ClientAndExecutor(); mockFileAndOutputStream(); + sConf.set(PropertyKey.UNDERFS_S3_STREAMING_UPLOAD_PARTITION_SIZE, PARTITION_SIZE); - mStream = new S3ALowLevelOutputStream(BUCKET_NAME, KEY, mMockS3Client, mMockExecutor, - sConf.getBytes(PropertyKey.UNDERFS_S3_STREAMING_UPLOAD_PARTITION_SIZE), - sConf.getList(PropertyKey.TMP_DIRS), - sConf.getBoolean(PropertyKey.UNDERFS_S3_SERVER_SIDE_ENCRYPTION_ENABLED)); + mStream = new S3ALowLevelOutputStream(BUCKET_NAME, KEY, mMockS3Client, mMockExecutor, sConf); } @Test public void writeByte() throws Exception { mStream.write(1); - Mockito.verify(mMockS3Client) - .initiateMultipartUpload(any(InitiateMultipartUploadRequest.class)); - Mockito.verify(mMockOutputStream).write(new byte[]{1}, 0, 1); + + mStream.close(); + Mockito.verify(mMockOutputStream).write(new byte[] {1}, 0, 1); Mockito.verify(mMockExecutor, never()).submit(any(Callable.class)); + Mockito.verify(mMockS3Client).putObject(any(PutObjectRequest.class)); + Mockito.verify(mMockS3Client, never()) + .initiateMultipartUpload(any(InitiateMultipartUploadRequest.class)); + Mockito.verify(mMockS3Client, never()) + .completeMultipartUpload(any(CompleteMultipartUploadRequest.class)); + } + + @Test + public void writeByteArrayForSmallFile() throws Exception { + int partSize = (int) FormatUtils.parseSpaceSize(PARTITION_SIZE); + byte[] b = new byte[partSize]; + + mStream.write(b, 0, b.length); + Mockito.verify(mMockOutputStream).write(b, 0, b.length); mStream.close(); - Mockito.verify(mMockExecutor).submit(any(Callable.class)); - Mockito.verify(mMockS3Client) + Mockito.verify(mMockExecutor, never()).submit(any(Callable.class)); + Mockito.verify(mMockS3Client).putObject(any(PutObjectRequest.class)); + Mockito.verify(mMockS3Client, never()) + .initiateMultipartUpload(any(InitiateMultipartUploadRequest.class)); + Mockito.verify(mMockS3Client, never()) .completeMultipartUpload(any(CompleteMultipartUploadRequest.class)); } @Test - public void writeByteArray() throws Exception { + public void writeByteArrayForLargeFile() throws Exception { int partSize = (int) FormatUtils.parseSpaceSize(PARTITION_SIZE); byte[] b = new byte[partSize + 1]; - + Assert.assertEquals(mStream.getPartNumber(), 1); mStream.write(b, 0, b.length); + Assert.assertEquals(mStream.getPartNumber(), 2); Mockito.verify(mMockS3Client) .initiateMultipartUpload(any(InitiateMultipartUploadRequest.class)); Mockito.verify(mMockOutputStream).write(b, 0, b.length - 1); @@ -107,10 +125,22 @@ public void writeByteArray() throws Exception { Mockito.verify(mMockExecutor).submit(any(Callable.class)); mStream.close(); + Assert.assertEquals(mStream.getPartNumber(), 3); Mockito.verify(mMockS3Client) .completeMultipartUpload(any(CompleteMultipartUploadRequest.class)); } + @Test + public void createEmptyFile() throws Exception { + mStream.close(); + Mockito.verify(mMockExecutor, never()).submit(any(Callable.class)); + Mockito.verify(mMockS3Client, never()) + .initiateMultipartUpload(any(InitiateMultipartUploadRequest.class)); + Mockito.verify(mMockS3Client, never()) + .completeMultipartUpload(any(CompleteMultipartUploadRequest.class)); + Mockito.verify(mMockS3Client).putObject(any(PutObjectRequest.class)); + } + @Test public void flush() throws Exception { int partSize = (int) FormatUtils.parseSpaceSize(PARTITION_SIZE); From 58f1f9639e385e44d524f795621e94dab1b3d1a0 Mon Sep 17 00:00:00 2001 From: jja725 Date: Fri, 27 Jan 2023 15:14:03 -0800 Subject: [PATCH 089/334] Fix s3 client ### What changes are proposed in this pull request? Please outline the changes and how this PR fixes the issue. ### Why are the changes needed? Please clarify why the changes are needed. For instance, 1. If you propose a new API, clarify the use case for a new API. 2. If you fix a bug, describe the bug. ### Does this PR introduce any user facing changes? Please list the user-facing changes introduced by your change, including 1. change in user-facing APIs 2. addition or removal of property keys 3. webui pr-link: Alluxio/alluxio#16824 change-id: cid-975e463d895496294e2172d43d6b287fb37565c5 --- .../main/java/alluxio/underfs/s3a/S3ALowLevelOutputStream.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/underfs/s3a/src/main/java/alluxio/underfs/s3a/S3ALowLevelOutputStream.java b/underfs/s3a/src/main/java/alluxio/underfs/s3a/S3ALowLevelOutputStream.java index eefc4b87cb9a..b5e48762476e 100644 --- a/underfs/s3a/src/main/java/alluxio/underfs/s3a/S3ALowLevelOutputStream.java +++ b/underfs/s3a/src/main/java/alluxio/underfs/s3a/S3ALowLevelOutputStream.java @@ -49,7 +49,7 @@ public class S3ALowLevelOutputStream extends ObjectLowLevelOutputStream { /** Server side encrypt enabled. */ private final boolean mSseEnabled; /** The Amazon S3 client to interact with S3. */ - private final AmazonS3 mClient; + protected final AmazonS3 mClient; /** Tags for the uploaded part, provided by S3 after uploading. */ private final List mTags = Collections.synchronizedList(new ArrayList<>()); From 6a238d1b4ca829989e6c3f9736eb7596c3b8618b Mon Sep 17 00:00:00 2001 From: jja725 Date: Fri, 27 Jan 2023 15:44:18 -0800 Subject: [PATCH 090/334] Make s3 client not final ### What changes are proposed in this pull request? Please outline the changes and how this PR fixes the issue. ### Why are the changes needed? Please clarify why the changes are needed. For instance, 1. If you propose a new API, clarify the use case for a new API. 2. If you fix a bug, describe the bug. ### Does this PR introduce any user facing changes? Please list the user-facing changes introduced by your change, including 1. change in user-facing APIs 2. addition or removal of property keys 3. webui pr-link: Alluxio/alluxio#16825 change-id: cid-71f088a46766a572f64ef76b1ec01a207645a425 --- .../main/java/alluxio/underfs/s3a/S3ALowLevelOutputStream.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/underfs/s3a/src/main/java/alluxio/underfs/s3a/S3ALowLevelOutputStream.java b/underfs/s3a/src/main/java/alluxio/underfs/s3a/S3ALowLevelOutputStream.java index b5e48762476e..465e5e3738d4 100644 --- a/underfs/s3a/src/main/java/alluxio/underfs/s3a/S3ALowLevelOutputStream.java +++ b/underfs/s3a/src/main/java/alluxio/underfs/s3a/S3ALowLevelOutputStream.java @@ -49,7 +49,7 @@ public class S3ALowLevelOutputStream extends ObjectLowLevelOutputStream { /** Server side encrypt enabled. */ private final boolean mSseEnabled; /** The Amazon S3 client to interact with S3. */ - protected final AmazonS3 mClient; + protected AmazonS3 mClient; /** Tags for the uploaded part, provided by S3 after uploading. */ private final List mTags = Collections.synchronizedList(new ArrayList<>()); From fb07ea49e7a2e567f562a04c282d93741b7094f1 Mon Sep 17 00:00:00 2001 From: Kaijie Chen Date: Tue, 31 Jan 2023 03:49:54 +0800 Subject: [PATCH 091/334] Fix debug log in FileSystemContext#reinit() ### What changes are proposed in this pull request? Fix debug log in FileSystemContext#reinit(). ### Why are the changes needed? The argument is wrong. ### Does this PR introduce any user facing changes? No. pr-link: Alluxio/alluxio#16826 change-id: cid-5453a328dff317614d5e529a6a82c413241194fa --- .../fs/src/main/java/alluxio/client/file/FileSystemContext.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/core/client/fs/src/main/java/alluxio/client/file/FileSystemContext.java b/core/client/fs/src/main/java/alluxio/client/file/FileSystemContext.java index d7dd61b218c9..8e0cbb95c238 100644 --- a/core/client/fs/src/main/java/alluxio/client/file/FileSystemContext.java +++ b/core/client/fs/src/main/java/alluxio/client/file/FileSystemContext.java @@ -431,7 +431,7 @@ public void reinit(boolean updateClusterConf, boolean updatePathConf) + "meta master (%s) during reinitialization", masterAddr), e); } LOG.debug("Reinitializing FileSystemContext: update cluster conf: {}, update path conf:" - + " {}", updateClusterConf, updateClusterConf); + + " {}", updateClusterConf, updatePathConf); closeContext(); ReconfigurableRegistry.update(); initContext(getClientContext(), mMasterAddresses != null From 98dba4d78b201ddaf3d5db428befaf83be5cb618 Mon Sep 17 00:00:00 2001 From: JySongWithZhangCe <1960014650@qq.com> Date: Tue, 31 Jan 2023 03:50:54 +0800 Subject: [PATCH 092/334] Add buildx support for build-docker.sh ### What changes are proposed in this pull request? Fix https://github.com/Alluxio/alluxio/issues/16821 Developers can execute cross compare with `build-docker.sh` etc. `./build-docker.sh buildx linux/arm64` ### Why are the changes needed? Nope. ### Does this PR introduce any user facing changes? Nope, just for developers. pr-link: Alluxio/alluxio#16822 change-id: cid-736afb945b565d6756fa671e257a67d1db5e408a --- dev/scripts/build-docker.sh | 25 +++++++++++++++++++++---- 1 file changed, 21 insertions(+), 4 deletions(-) diff --git a/dev/scripts/build-docker.sh b/dev/scripts/build-docker.sh index 1c3482516b9d..870c9c0e93cb 100755 --- a/dev/scripts/build-docker.sh +++ b/dev/scripts/build-docker.sh @@ -23,6 +23,8 @@ readonly GENERATE_TARBALLS_SCRIPT="${SCRIPT_DIR}/generate-tarballs" # Builds a docker image from the specified tarball. function build_docker_image { local tarball=$1 + local build_mode=$2 + local platform=$3 local tmp_dir="$(mktemp -d)" cp -r "${DOCKER_DIR}" "${tmp_dir}" cp "${tarball}" "${tmp_dir}/docker" @@ -30,18 +32,33 @@ function build_docker_image { # example tarball: /path/to/workdir/alluxio-1.4.0-SNAPSHOT.tar.gz # docker image tags must be lowercase local tarball_basename=$(basename ${tarball}) - local tag=$(echo ${tarball_basename%.tar.gz} | tr '[:upper:]' '[:lower:]') - echo "Building ${tag} image..." - docker build -t "${tag}" --build-arg "ALLUXIO_TARBALL=${tarball_basename}" . + + if [[ "${build_mode}" == "buildx" ]] + then + local tag=$(echo ${tarball_basename%.tar.gz}${platform} | tr '[:upper:]' '[:lower:]') + echo "Building ${tag} image..." + docker buildx build -t "${tag}" --platform="${platform}" --build-arg "ALLUXIO_TARBALL=${tarball_basename}" . + else + local tag=$(echo ${tarball_basename%.tar.gz} | tr '[:upper:]' '[:lower:]') + echo "Building ${tag} image..." + docker build -t "${tag}" --build-arg "ALLUXIO_TARBALL=${tarball_basename}" . + fi rm -rf "${tmp_dir}" } function main { + local build_mode="default" + local platform="linux/amd64" + if [[ $1 == "buildx" ]] + then + build_mode="buildx" + platform=$2 + fi cd "${SCRIPT_DIR}" local tmp_dir="$(mktemp -d)" "${GENERATE_TARBALLS_SCRIPT}" single -target "${tmp_dir}/alluxio-\${VERSION}.tar.gz" local tarball="${tmp_dir}/$(ls -tr ${tmp_dir} | tail -1)" - build_docker_image "${tarball}" + build_docker_image "${tarball}" "${build_mode}" "${platform}" rm -rf ${tmp_dir} } From c1064d546783cfa67b564d374305dfe2bcf7ebe4 Mon Sep 17 00:00:00 2001 From: Huang Hua Date: Tue, 31 Jan 2023 13:28:01 +0800 Subject: [PATCH 093/334] Add examples for Alluxio Native API, Hadoop Compatible FS and HDFS Signed-off-by: Huang Hua ### What changes are proposed in this pull request? Added examples for Alluxio Native API, Hadoop Compatible File System and HDFS ### Why are the changes needed? These examples show how to use the Alluxio native API, Hadoop compatible File System and HDFS programming in Java. Developers can use them as a quick start. ### Does this PR introduce any user facing changes? No. pr-link: Alluxio/alluxio#16647 change-id: cid-fea930ad1488b244ba824f6f2c0c162552794b80 --- docs/en/api/Java-API.md | 7 +++ examples/pom.xml | 23 ++++--- .../main/java/alluxio/examples/AlluxioFS.java | 56 +++++++++++++++++ .../java/alluxio/examples/AlluxioHDFS.java | 61 +++++++++++++++++++ .../src/main/java/alluxio/examples/HDFS.java | 60 ++++++++++++++++++ 5 files changed, 198 insertions(+), 9 deletions(-) create mode 100644 examples/src/main/java/alluxio/examples/AlluxioFS.java create mode 100644 examples/src/main/java/alluxio/examples/AlluxioHDFS.java create mode 100644 examples/src/main/java/alluxio/examples/HDFS.java diff --git a/docs/en/api/Java-API.md b/docs/en/api/Java-API.md index c85eda2b1edf..74d70523be19 100644 --- a/docs/en/api/Java-API.md +++ b/docs/en/api/Java-API.md @@ -333,3 +333,10 @@ org.apache.orc.OrcFile.ReaderOptions options = new org.apache.orc.OrcFile.Reader org.apache.orc.Reader orc = org.apache.orc.OrcFile.createReader( new Path("alluxio://localhost:19998/path/file.orc"), options); ``` + +### Examples in Source Code + +There are several example Java programs. They are: +* [`Alluxio native API`](/examples/src/main/java/alluxio/examples/AlluxioFS.java) +* [`Alluxio Hadoop-Compatible File System`](/examples/src/main/java/alluxio/examples/AlluxioHDFS.java) +* [`HDFS`](/examples/src/main/java/alluxio/examples/HDFS.java) diff --git a/examples/pom.xml b/examples/pom.xml index c9710c003341..ab884853b042 100644 --- a/examples/pom.xml +++ b/examples/pom.xml @@ -27,6 +27,7 @@ ${project.parent.basedir}/build false + 3.2.1 @@ -50,22 +51,26 @@ org.apache.hadoop hadoop-client + ${hadoop.version} - org.slf4j - slf4j-api + org.apache.hadoop + hadoop-common + ${hadoop.version} - - - org.alluxio - alluxio-core-client-fs - ${project.version} + org.apache.hadoop + hadoop-hdfs + ${hadoop.version} + + + org.slf4j + slf4j-api org.alluxio - alluxio-core-common - ${project.version} + alluxio-shaded-client + 2.9.0 diff --git a/examples/src/main/java/alluxio/examples/AlluxioFS.java b/examples/src/main/java/alluxio/examples/AlluxioFS.java new file mode 100644 index 000000000000..d1e1d3201168 --- /dev/null +++ b/examples/src/main/java/alluxio/examples/AlluxioFS.java @@ -0,0 +1,56 @@ +/* + * The Alluxio Open Foundation licenses this work under the Apache License, version 2.0 + * (the "License"). You may not use this work except in compliance with the License, which is + * available at www.apache.org/licenses/LICENSE-2.0 + * + * This software is distributed on an "AS IS" basis, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, + * either express or implied, as more fully set forth in the License. + * + * See the NOTICE file distributed with this work for information regarding copyright ownership. + */ + +package alluxio.examples; + +import alluxio.AlluxioURI; +import alluxio.client.file.FileInStream; +import alluxio.client.file.FileOutStream; +import alluxio.client.file.FileSystem; + +/** + * Example program to use Alluxio native API to read and write files. + * + */ +public class AlluxioFS +{ + /** + * Entry point for the {@link AlluxioFS} program. + * + * @param args command-line arguments + */ + public static void main(String[] args) throws + alluxio.exception.FileAlreadyExistsException, + alluxio.exception.InvalidPathException, + alluxio.exception.AlluxioException, + java.io.IOException + { + final int bufSize = alluxio.Constants.KB * 4; + System.out.println("Start Alluxio Native FS write/read"); + FileSystem fs = FileSystem.Factory.get(); + AlluxioURI path = new AlluxioURI("/AlluxioFS.txt"); + + FileOutStream out = fs.createFile(path); + byte[] buffout = new byte[bufSize]; + for (int i = 0; i < bufSize; i++) { + buffout[i] = (byte) ('A' + i % 26); + } + out.write(buffout); + out.close(); + System.out.println("End write"); + + FileInStream in = fs.openFile(path); + byte[] buffin = new byte[bufSize]; + in.read(buffin); + in.close(); + System.out.println("End read:" + new String(buffin)); + } +} diff --git a/examples/src/main/java/alluxio/examples/AlluxioHDFS.java b/examples/src/main/java/alluxio/examples/AlluxioHDFS.java new file mode 100644 index 000000000000..384dae4e8d31 --- /dev/null +++ b/examples/src/main/java/alluxio/examples/AlluxioHDFS.java @@ -0,0 +1,61 @@ +/* + * The Alluxio Open Foundation licenses this work under the Apache License, version 2.0 + * (the "License"). You may not use this work except in compliance with the License, which is + * available at www.apache.org/licenses/LICENSE-2.0 + * + * This software is distributed on an "AS IS" basis, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, + * either express or implied, as more fully set forth in the License. + * + * See the NOTICE file distributed with this work for information regarding copyright ownership. + */ + +package alluxio.examples; + +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.fs.FSDataInputStream; +import org.apache.hadoop.fs.FSDataOutputStream; +import org.apache.hadoop.fs.FileSystem; +import org.apache.hadoop.fs.Path; + +/** + * Example program to use Alluxio Hadoop Compatible API to read and write files. + * + */ +public class AlluxioHDFS +{ + /** + * Entry point for the {@link AlluxioHDFS} program. + * + * @param args command-line arguments + */ + public static void main(String[] args) throws + java.io.IOException + { + final int bufSize = alluxio.Constants.KB * 4; + System.out.println("Starting write"); + + Configuration conf = new Configuration(); + conf.set("fs.alluxio.impl", "alluxio.hadoop.FileSystem"); + conf.set("fs.AbstractFileSystem.alluxio.impl", "alluxio.hadoop.AlluxioFileSystem"); + conf.set("fs.defaultFS", "alluxio://localhost:19998"); + + FileSystem fs = FileSystem.get(conf); + + String filename = "/AlluxioHDFS.txt"; + + FSDataOutputStream out = fs.create(new Path(filename)); + byte[] buffout = new byte[bufSize]; + for (int i = 0; i < bufSize; i++) { + buffout[i] = (byte) ('a' + i % 26); + } + out.write(buffout); + out.close(); + + FSDataInputStream in = fs.open(new Path(filename)); + byte[] buffin = new byte[bufSize]; + in.read(buffin); + in.close(); + System.out.println("Got: " + (new String(buffin))); + System.out.println("End write"); + } +} diff --git a/examples/src/main/java/alluxio/examples/HDFS.java b/examples/src/main/java/alluxio/examples/HDFS.java new file mode 100644 index 000000000000..c86e5cf10596 --- /dev/null +++ b/examples/src/main/java/alluxio/examples/HDFS.java @@ -0,0 +1,60 @@ +/* + * The Alluxio Open Foundation licenses this work under the Apache License, version 2.0 + * (the "License"). You may not use this work except in compliance with the License, which is + * available at www.apache.org/licenses/LICENSE-2.0 + * + * This software is distributed on an "AS IS" basis, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, + * either express or implied, as more fully set forth in the License. + * + * See the NOTICE file distributed with this work for information regarding copyright ownership. + */ + +package alluxio.examples; + +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.fs.FSDataInputStream; +import org.apache.hadoop.fs.FSDataOutputStream; +import org.apache.hadoop.fs.FileSystem; +import org.apache.hadoop.fs.Path; + +/** + * Example program to use HDFS API to read and write files. + * + */ +public class HDFS +{ + /** + * Entry point for the {@link HDFS} program. + * + * @param args command-line arguments + */ + public static void main(String[] args) throws + java.io.IOException + { + final int bufSize = alluxio.Constants.KB * 4; + System.out.println("Starting write"); + + Configuration conf = new Configuration(); + conf.set("fs.defaultFS", "hdfs://localhost:9000"); + conf.set("dfs.replication", "1"); + + FileSystem fs = FileSystem.get(conf); + + String filename = "/HDFS.txt"; + + FSDataOutputStream out = fs.create(new Path(filename)); + byte[] buffout = new byte[bufSize]; + for (int i = 0; i < bufSize; i++) { + buffout[i] = (byte) ('a' + i % 26); + } + out.write(buffout); + out.close(); + + FSDataInputStream in = fs.open(new Path(filename)); + byte[] buffin = new byte[bufSize]; + in.read(buffin); + in.close(); + System.out.println("Got: " + (new String(buffin))); + System.out.println("End write"); + } +} From 379b16391403946543e5d2a756da1ca821805f71 Mon Sep 17 00:00:00 2001 From: Rico Chiu Date: Tue, 31 Jan 2023 18:25:40 -0800 Subject: [PATCH 094/334] [DOCFIX] Fix links to java file examples in Java-API.md relative links within the codebase do not render to usable links when publishing to our documentation website. `[description](/path/to/file)` will render as `https://docs.alluxio.io/path/to/file` replace with the full github url to the corresponding file in master branch instead pr-link: Alluxio/alluxio#16834 change-id: cid-6ad346b38e0e2ff0c8018105930fae64261cd58b --- docs/en/api/Java-API.md | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/docs/en/api/Java-API.md b/docs/en/api/Java-API.md index 74d70523be19..f2a06be954f4 100644 --- a/docs/en/api/Java-API.md +++ b/docs/en/api/Java-API.md @@ -337,6 +337,6 @@ org.apache.orc.Reader orc = org.apache.orc.OrcFile.createReader( ### Examples in Source Code There are several example Java programs. They are: -* [`Alluxio native API`](/examples/src/main/java/alluxio/examples/AlluxioFS.java) -* [`Alluxio Hadoop-Compatible File System`](/examples/src/main/java/alluxio/examples/AlluxioHDFS.java) -* [`HDFS`](/examples/src/main/java/alluxio/examples/HDFS.java) +* [`Alluxio native API`](https://github.com/Alluxio/alluxio/tree/master/examples/src/main/java/alluxio/examples/AlluxioFS.java) +* [`Alluxio Hadoop-Compatible File System`](https://github.com/Alluxio/alluxio/tree/master/examples/src/main/java/alluxio/examples/AlluxioHDFS.java) +* [`HDFS`](https://github.com/Alluxio/alluxio/tree/master/examples/src/main/java/alluxio/examples/HDFS.java) From ee9aa24627b754477b81712c662e05a34d959c0a Mon Sep 17 00:00:00 2001 From: voddle Date: Wed, 1 Feb 2023 21:04:19 +0800 Subject: [PATCH 095/334] [DOCFIX] Fix some cn client-metric table wording ### What changes are proposed in this pull request? Some wording in cn client-metric table ### Why are the changes needed? Clarified some metric descriptions. ### Does this PR introduce any user facing changes? No pr-link: Alluxio/alluxio#16799 change-id: cid-a2e3aab8f7e92eb23380729e845902477fb47c06 --- docs/_data/table/cn/client-metrics.yml | 104 ++++++++++++------------- 1 file changed, 52 insertions(+), 52 deletions(-) diff --git a/docs/_data/table/cn/client-metrics.yml b/docs/_data/table/cn/client-metrics.yml index 2b8b59f6790d..6d2b3ded1ba3 100644 --- a/docs/_data/table/cn/client-metrics.yml +++ b/docs/_data/table/cn/client-metrics.yml @@ -1,120 +1,120 @@ Client.BlockMasterClientCount: 'BlockMasterClientPool 中实例数量' Client.BlockReadChunkRemote: - '这个客户端从远程 Alluxio worker 读取数据chunk数量。当 alluxio.user.block.read.metrics.enabled 设置为 true 时,才会记录此指标' + '该客户端从远程 Alluxio worker 读取数据chunk数量。当 alluxio.user.block.read.metrics.enabled 设置为 true 时,才会记录此指标' Client.BlockWorkerClientCount: 'BlockWorkerClientPool 中实例数量' Client.BusyExceptionCount: '观察到的 BusyException 数量' Client.BytesReadLocal: - '这个客户端短路读取的总字节数' + '该客户端短路读取的总字节数' Client.BytesReadLocalThroughput: - '这个客户端短路读取的字节吞吐量' + '该客户端短路读取的字节吞吐量' Client.BytesWrittenLocal: - '客户端短路写入 Alluxio 缓存的字节总数' + '该客户端短路写入 Alluxio 缓存的字节总数' Client.BytesWrittenLocalThroughput: - '客户端短路写入 Alluxio 缓存的字节吞吐量' + '该客户端短路写入 Alluxio 缓存的字节吞吐量' Client.BytesWrittenUfs: - '这个客户端写入 UFS 的字节数' + '该客户端写入 UFS 的字节数' Client.CacheBytesDiscarded: - '客户端缓存丢弃的总字节数' + '该客户端缓存丢弃的总字节数' Client.CacheBytesEvicted: - '客户端缓存驱逐的总字节数' + '该客户端缓存驱逐的总字节数' Client.CacheBytesReadCache: - '从客户端缓存读的总字节数' + '从该客户端缓存读的总字节数' Client.CacheBytesReadExternal: - '由于客户端缓存未命中从 Alluxio 集群读取的总字节数。chunk read 可能导致这个数字小于 Client.CacheBytesReadExternal' + '由于该客户端缓存未命中从 Alluxio 集群读取的总字节数。chunk read 可能导致这个数字小于 Client.CacheBytesReadExternal' Client.CacheBytesReadInStreamBuffer: - '从客户端缓存的输入流缓冲区中读取的总字节数' + '从该客户端缓存的输入流缓冲区中读取的总字节数' Client.CacheBytesRequestedExternal: '引起缓存未命中的用户读请求总字节数。这个数字可能会比 Client.CacheBytesReadExternal 小,因为它可能被分成多个块读取' Client.CacheBytesWrittenCache: - '向客户端缓存写入的总字节数' + '向该客户端缓存写入的总字节数' Client.CacheCleanErrors: - '为了初始化新缓存时清理已存在缓存路径的失败总数' + '该客户端为了初始化新缓存时清理已存在缓存路径的失败总数' Client.CacheCleanupGetErrors: - '清理失败内存读取失败总数' + '该客户端清理失败内存读取失败总数' Client.CacheCleanupPutErrors: - '清理失败内存写入失败总数' + '该客户端清理失败内存写入失败总数' Client.CacheCreateErrors: - '在客户端缓存中创建缓存的失败总数' + '在该客户端缓存中创建缓存的失败总数' Client.CacheDeleteErrors: - '在客户端缓存中删除缓存数据的失败总数' + '在该客户端缓存中删除缓存数据的失败总数' Client.CacheDeleteFromStoreErrors: - '删除页的失败总数' + '该客户端删除页的失败总数' Client.CacheDeleteNonExistingPageErrors: - '由于页缺失导致删除页失败的总数' + '该客户端由于页缺失导致删除页失败的总数' Client.CacheDeleteNotReadyErrors: - '由于缓存未就绪删除页失败的总数' + '该客户端由于缓存未就绪删除页失败的总数' Client.CacheGetErrors: - '从客户端缓存中获取缓存数据失败总数' + '从该客户端缓存中获取缓存数据失败总数' Client.CacheGetNotReadyErrors: - '由于缓存未就绪获取页失败的总数' + '该客户端由于缓存未就绪获取页失败的总数' Client.CacheGetStoreReadErrors: - '由于从页存储读取失败导致客户端缓存中获取缓存数据失败的次数' + '该客户端由于从页存储读取失败导致客户端缓存中获取缓存数据失败的次数' Client.CacheHitRate: '缓存命中率:(# 从缓存读取的字节数)/(# 请求的字节数)' Client.CachePageReadCacheTimeNanos: - '客户端缓存命中时读取页面时间(ns)' + '该客户端缓存命中时读取页面时间(ns)' Client.CachePageReadExternalTimeNanos: - '当缓存未命中时,从外部源读取数据所花费时间(ns)' + '该客户端当缓存未命中时,从外部源读取数据所花费时间(ns)' Client.CachePages: - '客户端缓存中的总页数' + '该客户端缓存中的总页数' Client.CachePagesDiscarded: - '恢复页存储时丢失页的总数' + '该客户端恢复页存储时丢失页的总数' Client.CachePagesEvicted: - '从客户端缓存中驱逐页的总数' + '从该客户端缓存中驱逐页的总数' Client.CachePutAsyncRejectionErrors: - '客户端缓存中放置缓存数据时,由于异步写队列注入失败而导致的失败次数' + '该客户端缓存中放置缓存数据时,由于异步写队列注入失败而导致的失败次数' Client.CachePutBenignRacingErrors: - '由于驱逐竞争而导致的缓存页添加失败的次数。这个错误是良性的' + '该客户端由于驱逐竞争而导致的缓存页添加失败的次数。这个错误是良性的' Client.CachePutErrors: - '向客户端缓存中放置缓存数据的失败次数' + '向该客户端缓存中放置缓存数据的失败次数' Client.CachePutEvictionErrors: - '由于驱逐失败而导致的缓存页添加失败的次数。这个错误是良性的' + '该客户端由于驱逐失败而导致的缓存页添加失败的次数。这个错误是良性的' Client.CachePutInsufficientSpaceErrors: - '由于在驱逐后空间不足导致的将缓存数据放入客户端缓存时的失败次数' + '该客户端由于在驱逐后空间不足导致的将缓存数据放入客户端缓存时的失败次数' Client.CachePutNotReadyErrors: - '由于缓存不能准备好添加页,添加页失败的次数' + '该客户端由于缓存不能准备好添加页,添加页失败的次数' Client.CachePutStoreDeleteErrors: - '在页存储中删除失败导致的缓存数据放置失败的次数' + '该客户端在页存储中删除失败导致的缓存数据放置失败的次数' Client.CachePutStoreWriteErrors: - '由于向页面存储写入失败而导致的将缓存数据放入客户端缓存中失败的次数' + '该客户端由于向页面存储写入失败而导致的将缓存数据放入客户端缓存中失败的次数' Client.CachePutStoreWriteNoSpaceErrors: - '未达到缓存容量上限但磁盘已满时将缓存数据放入客户端缓存时失败的次数。如果低估写入数据的存储开销比例,这种情况就可能会发生' + '该客户端未达到缓存容量上限但磁盘已满时将缓存数据放入客户端缓存时失败的次数。如果低估写入数据的存储开销比例,这种情况就可能会发生' Client.CacheShadowCacheBytes: - '客户端 shadow cache 的字节数' + '该客户端 shadow cache 的字节数' Client.CacheShadowCacheBytesHit: - '客户端 shadow cache 命中的字节数' + '该客户端 shadow cache 命中的字节数' Client.CacheShadowCacheBytesRead: - '从客户端 shadow cache 读取的字节数' + '这个从客户端 shadow cache 读取的字节数' Client.CacheShadowCacheFalsePositiveRatio: - '正在使用的工作集布隆过滤器犯错的概率。该值为 0-100。如果太高,则需要分配更多空间' + '该客户端正在使用的工作集布隆过滤器犯错的概率。该值为 0-100。如果太高,则需要分配更多空间' Client.CacheShadowCachePages: - '客户端 shadow cache 中页的数量' + '该客户端 shadow cache 中页的数量' Client.CacheShadowCachePagesHit: - '客户端 shadow cache 中页的命中次数' + '该客户端 shadow cache 中页的命中次数' Client.CacheShadowCachePagesRead: - '从客户端 shadow cache 中读取页的数量' + '从该客户端 shadow cache 中读取页的数量' Client.CacheSpaceAvailable: - '客户端缓存中可用字节数' + '该客户端缓存中可用字节数' Client.CacheSpaceUsed: - '客户端缓存使用字节数' + '该客户端缓存使用字节数' Client.CacheSpaceUsedCount: - '客户端缓存用作计数器的字节数量' + '该客户端缓存用作计数器的字节数量' Client.CacheState: '缓存状态:0(不在使用中),1(只读),2(读写)' Client.CacheStoreDeleteTimeout: - '从页存储中删除页超时次数' + '该客户端从页存储中删除页超时次数' Client.CacheStoreGetTimeout: - '从页存储中读取页超时次数' + '该客户端从页存储中读取页超时次数' Client.CacheStorePutTimeout: - '向页存储中写入新页超时次数' + '该客户端向页存储中写入新页超时次数' Client.CacheStoreThreadsRejected: - '向线程池提交任务时拒绝 I/O 线程的次数,可能是由于本地文件系统无响应。' + '该客户端向线程池提交任务时拒绝 I/O 线程的次数,可能是由于本地文件系统无响应。' Client.DefaultHiveClientCount: 'DefaultHiveClientPool 中实例数量' Client.FileSystemMasterClientCount: 'FileSystemMasterClientPool 中实例数量' Client.MetadataCacheSize: - '客户端被缓存的文件和目录的元数据总数。只在文件系统为 alluxio.client.file.MetadataCachingBaseFileSystem 时有效' + '该客户端被缓存的文件和目录的元数据总数。只在文件系统为 alluxio.client.file.MetadataCachingBaseFileSystem 时有效' From 530e720a30c51c58f1f2e9651a4543ac1315d01f Mon Sep 17 00:00:00 2001 From: Chunxu Tang <8018679+ChunxuTang@users.noreply.github.com> Date: Wed, 1 Feb 2023 15:33:55 -0800 Subject: [PATCH 096/334] Add the two-choice random cache eviction policy ### What changes are proposed in this pull request? The PR adds the two-choice random cache eviction policy. The algorithm selects two random page IDs and evicts the one least recently used. ### Why are the changes needed? From some evaluation (https://danluu.com/2choices-eviction/), the two-choice random policy has competitive performance compared with LRU. ### Does this PR introduce any user facing changes? Users can configure the new cache eviction policy. pr-link: Alluxio/alluxio#16828 change-id: cid-f9a140208b3a6938ddc70766abfb3d9f90c722f0 --- .../cache/evictor/TwoChoiceRandomEvictor.java | 94 +++++++++++++++++++ .../cache/TwoChoiceRandomEvictorTest.java | 81 ++++++++++++++++ 2 files changed, 175 insertions(+) create mode 100644 core/client/fs/src/main/java/alluxio/client/file/cache/evictor/TwoChoiceRandomEvictor.java create mode 100644 core/client/fs/src/test/java/alluxio/client/file/cache/TwoChoiceRandomEvictorTest.java diff --git a/core/client/fs/src/main/java/alluxio/client/file/cache/evictor/TwoChoiceRandomEvictor.java b/core/client/fs/src/main/java/alluxio/client/file/cache/evictor/TwoChoiceRandomEvictor.java new file mode 100644 index 000000000000..8778431f2204 --- /dev/null +++ b/core/client/fs/src/main/java/alluxio/client/file/cache/evictor/TwoChoiceRandomEvictor.java @@ -0,0 +1,94 @@ +/* + * The Alluxio Open Foundation licenses this work under the Apache License, version 2.0 + * (the "License"). You may not use this work except in compliance with the License, which is + * available at www.apache.org/licenses/LICENSE-2.0 + * + * This software is distributed on an "AS IS" basis, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, + * either express or implied, as more fully set forth in the License. + * + * See the NOTICE file distributed with this work for information regarding copyright ownership. + */ + +package alluxio.client.file.cache.evictor; + +import alluxio.client.file.cache.PageId; + +import java.time.Instant; +import java.util.ArrayList; +import java.util.Collections; +import java.util.HashMap; +import java.util.List; +import java.util.Map; +import java.util.Random; +import java.util.function.Predicate; +import javax.annotation.Nullable; +import javax.annotation.concurrent.ThreadSafe; + +/** + * Two Choice Random client-side cache eviction policy. + * It selects two random page IDs and evicts the one least-recently used. + */ +@ThreadSafe +public class TwoChoiceRandomEvictor implements CacheEvictor { + private final Map mCache = Collections.synchronizedMap(new HashMap<>()); + + /** + * Constructor. + * @param options + */ + public TwoChoiceRandomEvictor(CacheEvictorOptions options) { + } + + @Override + public void updateOnGet(PageId pageId) { + mCache.put(pageId, Instant.now().toEpochMilli()); + } + + @Override + public void updateOnPut(PageId pageId) { + mCache.put(pageId, Instant.now().toEpochMilli()); + } + + @Override + public void updateOnDelete(PageId pageId) { + mCache.remove(pageId); + } + + @Nullable + @Override + public PageId evict() { + synchronized (mCache) { + if (mCache.isEmpty()) { + return null; + } + + // TODO(chunxu): improve the performance here + List keys = new ArrayList<>(mCache.keySet()); + Random rand = new Random(); + PageId key1 = keys.get(rand.nextInt(keys.size())); + PageId key2 = keys.get(rand.nextInt(keys.size())); + if (mCache.get(key1) < mCache.get(key2)) { + return key1; + } + return key2; + } + } + + @Nullable + @Override + public PageId evictMatching(Predicate criterion) { + synchronized (mCache) { + for (PageId candidate : mCache.keySet()) { + if (criterion.test(candidate)) { + return candidate; + } + } + return null; + } + } + + @Override + public void reset() { + mCache.clear(); + } +} diff --git a/core/client/fs/src/test/java/alluxio/client/file/cache/TwoChoiceRandomEvictorTest.java b/core/client/fs/src/test/java/alluxio/client/file/cache/TwoChoiceRandomEvictorTest.java new file mode 100644 index 000000000000..81248c9cd859 --- /dev/null +++ b/core/client/fs/src/test/java/alluxio/client/file/cache/TwoChoiceRandomEvictorTest.java @@ -0,0 +1,81 @@ +/* + * The Alluxio Open Foundation licenses this work under the Apache License, version 2.0 + * (the "License"). You may not use this work except in compliance with the License, which is + * available at www.apache.org/licenses/LICENSE-2.0 + * + * This software is distributed on an "AS IS" basis, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, + * either express or implied, as more fully set forth in the License. + * + * See the NOTICE file distributed with this work for information regarding copyright ownership. + */ + +package alluxio.client.file.cache; + +import alluxio.client.file.cache.evictor.CacheEvictorOptions; +import alluxio.client.file.cache.evictor.TwoChoiceRandomEvictor; + +import org.junit.Assert; +import org.junit.Before; +import org.junit.Test; + +/** + * Tests for the {@link TwoChoiceRandomEvictor} class. + */ +public class TwoChoiceRandomEvictorTest { + private TwoChoiceRandomEvictor mEvictor; + private final PageId mFirst = new PageId("1L", 2L); + private final PageId mSecond = new PageId("3L", 4L); + private final PageId mThird = new PageId("5L", 6L); + + /** + * Sets up the instances. + */ + @Before + public void before() { + mEvictor = new TwoChoiceRandomEvictor(new CacheEvictorOptions()); + } + + @Test + public void evictGetOrder() { + mEvictor.updateOnGet(mFirst); + Assert.assertEquals(mFirst, mEvictor.evict()); + mEvictor.updateOnGet(mSecond); + Assert.assertEquals(mSecond, mEvictor.evict()); + } + + @Test + public void evictPutOrder() { + mEvictor.updateOnPut(mFirst); + Assert.assertEquals(mFirst, mEvictor.evict()); + mEvictor.updateOnPut(mSecond); + mEvictor.updateOnPut(mFirst); + PageId evictedPage = mEvictor.evict(); + Assert.assertTrue(evictedPage.equals(mFirst) || evictedPage.equals(mSecond)); + } + + @Test + public void evictAfterDelete() { + mEvictor.updateOnPut(mFirst); + mEvictor.updateOnPut(mSecond); + mEvictor.updateOnPut(mThird); + mEvictor.updateOnDelete(mSecond); + mEvictor.updateOnDelete(mThird); + Assert.assertEquals(mFirst, mEvictor.evict()); + } + + @Test + public void evictEmpty() { + Assert.assertNull(mEvictor.evict()); + } + + @Test + public void evictAllGone() { + mEvictor.updateOnPut(mFirst); + mEvictor.updateOnPut(mSecond); + mEvictor.updateOnPut(mThird); + mEvictor.updateOnDelete(mFirst); + mEvictor.updateOnDelete(mSecond); + mEvictor.updateOnDelete(mThird); + Assert.assertNull(mEvictor.evict()); + } +} From 325f36aa0319fc2ea65fb3a520bbe7cacfc618d7 Mon Sep 17 00:00:00 2001 From: elega <445092967@qq.com> Date: Thu, 2 Feb 2023 20:08:29 +0800 Subject: [PATCH 097/334] Support gRPC on standby masters ### What changes are proposed in this pull request? Implemented a RpcServerStandbyGrpcService which runs gRPC server on both standby and primary masters. ### Why are the changes needed? We are going to implement a feature which allows workers registering to all masters to speed up the master failover process. This requires standby masters to enable gRPC servers so that the works can rpc. This PR made these changes. ### Does this PR introduce any user facing changes? N/A pr-link: Alluxio/alluxio#16839 change-id: cid-743160dea3f42872555b48d9a62125b2740962a3 --- .../main/java/alluxio/conf/PropertyKey.java | 9 +++ .../master/PollingMasterInquireClient.java | 7 +- .../src/main/java/alluxio/master/Master.java | 13 ++++ .../master/service/rpc/RpcServerService.java | 3 + .../rpc/RpcServerStandbyGrpcService.java | 78 +++++++++++++++++++ .../master/AlluxioMasterProcessTest.java | 27 +++++++ .../rpc/RpcServerStandbyGrpcServiceTest.java | 76 ++++++++++++++++++ 7 files changed, 212 insertions(+), 1 deletion(-) create mode 100644 core/server/master/src/main/java/alluxio/master/service/rpc/RpcServerStandbyGrpcService.java create mode 100644 core/server/master/src/test/java/alluxio/master/service/rpc/RpcServerStandbyGrpcServiceTest.java diff --git a/core/common/src/main/java/alluxio/conf/PropertyKey.java b/core/common/src/main/java/alluxio/conf/PropertyKey.java index 693661aa3d09..051487eecec2 100755 --- a/core/common/src/main/java/alluxio/conf/PropertyKey.java +++ b/core/common/src/main/java/alluxio/conf/PropertyKey.java @@ -3833,6 +3833,13 @@ public String toString() { .setDescription("Whether a standby master runs a web server") .setScope(Scope.SERVER) .build(); + public static final PropertyKey STANDBY_MASTER_GRPC_ENABLED = + booleanBuilder(Name.STANDBY_MASTER_GRPC_ENABLED) + .setDefaultValue(false) + .setDescription("Whether a standby master runs a grpc server") + .setScope(Scope.ALL) + .setConsistencyCheckLevel(ConsistencyCheckLevel.ENFORCE) + .build(); // // Throttle @@ -8119,6 +8126,8 @@ public static final class Name { "alluxio.standby.master.metrics.sink.enabled"; public static final String STANDBY_MASTER_WEB_ENABLED = "alluxio.standby.master.web.enabled"; + public static final String STANDBY_MASTER_GRPC_ENABLED = + "alluxio.standby.master.grpc.enabled"; // // Worker related properties diff --git a/core/common/src/main/java/alluxio/master/PollingMasterInquireClient.java b/core/common/src/main/java/alluxio/master/PollingMasterInquireClient.java index 96875ec915c0..a1939fb98e11 100644 --- a/core/common/src/main/java/alluxio/master/PollingMasterInquireClient.java +++ b/core/common/src/main/java/alluxio/master/PollingMasterInquireClient.java @@ -18,6 +18,7 @@ import alluxio.exception.status.AlluxioStatusException; import alluxio.exception.status.CancelledException; import alluxio.exception.status.DeadlineExceededException; +import alluxio.exception.status.NotFoundException; import alluxio.exception.status.UnavailableException; import alluxio.grpc.GetServiceVersionPRequest; import alluxio.grpc.GrpcChannel; @@ -140,7 +141,11 @@ private InetSocketAddress getAddress() { LOG.debug("Timeout while connecting to {}", address); } catch (CancelledException e) { LOG.debug("Cancelled while connecting to {}", address); - } catch (AlluxioStatusException e) { + } catch (NotFoundException e) { + // If the gRPC server is enabled but the metadata service isn't enabled, + // try the next master address. + LOG.debug("Meta service rpc endpoint not found on {}. {}", address, e); + } catch (AlluxioStatusException e) { LOG.error("Error while connecting to {}. {}", address, e); // Breaking the loop on non filtered error. break; diff --git a/core/server/common/src/main/java/alluxio/master/Master.java b/core/server/common/src/main/java/alluxio/master/Master.java index 1e475973c7df..13858c22ae92 100644 --- a/core/server/common/src/main/java/alluxio/master/Master.java +++ b/core/server/common/src/main/java/alluxio/master/Master.java @@ -13,9 +13,14 @@ import alluxio.Server; import alluxio.exception.status.UnavailableException; +import alluxio.grpc.GrpcService; +import alluxio.grpc.ServiceType; import alluxio.master.journal.JournalContext; import alluxio.master.journal.Journaled; +import java.util.Collections; +import java.util.Map; + /** * This interface contains common operations for all masters. */ @@ -29,4 +34,12 @@ public interface Master extends Journaled, Server { * @return a master context */ MasterContext getMasterContext(); + + /** + * @return a map from service names to gRPC services that serve RPCs for this master, + * if the master is a standby master. + */ + default Map getStandbyServices() { + return Collections.emptyMap(); + } } diff --git a/core/server/master/src/main/java/alluxio/master/service/rpc/RpcServerService.java b/core/server/master/src/main/java/alluxio/master/service/rpc/RpcServerService.java index aa2f3006b8df..2617b0a666e8 100644 --- a/core/server/master/src/main/java/alluxio/master/service/rpc/RpcServerService.java +++ b/core/server/master/src/main/java/alluxio/master/service/rpc/RpcServerService.java @@ -219,6 +219,9 @@ public static RpcServerService create( InetSocketAddress bindAddress, MasterProcess masterProcess, MasterRegistry masterRegistry) { + if (Configuration.getBoolean(PropertyKey.STANDBY_MASTER_GRPC_ENABLED)) { + return new RpcServerStandbyGrpcService(bindAddress, masterProcess, masterRegistry); + } return new RpcServerService(bindAddress, masterProcess, masterRegistry); } } diff --git a/core/server/master/src/main/java/alluxio/master/service/rpc/RpcServerStandbyGrpcService.java b/core/server/master/src/main/java/alluxio/master/service/rpc/RpcServerStandbyGrpcService.java new file mode 100644 index 000000000000..075dfc7fc739 --- /dev/null +++ b/core/server/master/src/main/java/alluxio/master/service/rpc/RpcServerStandbyGrpcService.java @@ -0,0 +1,78 @@ +/* + * The Alluxio Open Foundation licenses this work under the Apache License, version 2.0 + * (the "License"). You may not use this work except in compliance with the License, which is + * available at www.apache.org/licenses/LICENSE-2.0 + * + * This software is distributed on an "AS IS" basis, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, + * either express or implied, as more fully set forth in the License. + * + * See the NOTICE file distributed with this work for information regarding copyright ownership. + */ + +package alluxio.master.service.rpc; + +import alluxio.master.Master; +import alluxio.master.MasterProcess; +import alluxio.master.MasterRegistry; + +import com.google.common.base.Preconditions; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import java.net.InetSocketAddress; + +/** + * Created by {@link RpcServerService.Factory}. + * Manages the behavior of the master's rpc service. The grpc server is always on. + * When the promotion/demotion happens, the rpc service will be stopped and restarted. + * The new started grpc service will serve gRPC endpoints based on the node state (PRIMARY/STANDBY). + * No rejecting server is deployed. + */ +public class RpcServerStandbyGrpcService extends RpcServerService { + protected static final Logger LOG = LoggerFactory.getLogger(RpcServerStandbyGrpcService.class); + + private boolean mIsPromoted = false; + + protected RpcServerStandbyGrpcService( + InetSocketAddress bindAddress, + MasterProcess masterProcess, + MasterRegistry masterRegistry + ) { + super(bindAddress, masterProcess, masterRegistry); + } + + @Override + public synchronized void start() { + LOG.info("Starting {}", this.getClass().getSimpleName()); + startGrpcServer(Master::getStandbyServices); + } + + @Override + public synchronized void stop() { + stopGrpcServer(); + stopRpcExecutor(); + mIsPromoted = false; + } + + @Override + public synchronized void promote() { + Preconditions.checkState(!mIsPromoted, "double promotion is not allowed"); + LOG.info("Promoting {}", this.getClass().getSimpleName()); + stopGrpcServer(); + stopRpcExecutor(); + waitForFree(); + startGrpcServer(Master::getServices); + mIsPromoted = true; + } + + @Override + public synchronized void demote() { + Preconditions.checkState(mIsPromoted, "double demotion is not allowed"); + LOG.info("Demoting {}", this.getClass().getSimpleName()); + stopGrpcServer(); + stopRpcExecutor(); + waitForFree(); + startGrpcServer(Master::getStandbyServices); + mIsPromoted = false; + } +} diff --git a/core/server/master/src/test/java/alluxio/master/AlluxioMasterProcessTest.java b/core/server/master/src/test/java/alluxio/master/AlluxioMasterProcessTest.java index 3740f135f9d7..4c1bd0b36b98 100644 --- a/core/server/master/src/test/java/alluxio/master/AlluxioMasterProcessTest.java +++ b/core/server/master/src/test/java/alluxio/master/AlluxioMasterProcessTest.java @@ -256,6 +256,33 @@ public void restoreFromBackupLocal() throws Exception { startStopTest(master); } + @Test + public void startStopStandbyStandbyServer() throws Exception { + Configuration.set(PropertyKey.STANDBY_MASTER_GRPC_ENABLED, true); + AlluxioMasterProcess master = + new AlluxioMasterProcess(new NoopJournalSystem(), new AlwaysStandbyPrimarySelector()); + master.registerService( + RpcServerService.Factory.create( + master.getRpcBindAddress(), master, master.getRegistry())); + master.registerService(WebServerService.Factory.create(master.getWebBindAddress(), master)); + master.registerService(MetricsService.Factory.create()); + + Thread t = new Thread(() -> { + try { + master.start(); + } catch (Exception e) { + throw new RuntimeException(e); + } + }); + t.start(); + final int TIMEOUT_MS = 10_000; + master.waitForGrpcServerReady(TIMEOUT_MS); + startStopTest(master, + true, + Configuration.getBoolean(PropertyKey.STANDBY_MASTER_WEB_ENABLED), + Configuration.getBoolean(PropertyKey.STANDBY_MASTER_METRICS_SINK_ENABLED)); + } + private void startStopTest(AlluxioMasterProcess master) throws Exception { startStopTest(master, true, true, true); } diff --git a/core/server/master/src/test/java/alluxio/master/service/rpc/RpcServerStandbyGrpcServiceTest.java b/core/server/master/src/test/java/alluxio/master/service/rpc/RpcServerStandbyGrpcServiceTest.java new file mode 100644 index 000000000000..998a651ba359 --- /dev/null +++ b/core/server/master/src/test/java/alluxio/master/service/rpc/RpcServerStandbyGrpcServiceTest.java @@ -0,0 +1,76 @@ +/* + * The Alluxio Open Foundation licenses this work under the Apache License, version 2.0 + * (the "License"). You may not use this work except in compliance with the License, which is + * available at www.apache.org/licenses/LICENSE-2.0 + * + * This software is distributed on an "AS IS" basis, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, + * either express or implied, as more fully set forth in the License. + * + * See the NOTICE file distributed with this work for information regarding copyright ownership. + */ + +package alluxio.master.service.rpc; + +import alluxio.conf.Configuration; +import alluxio.conf.PropertyKey; +import alluxio.master.AlluxioMasterProcess; + +import org.junit.Assert; +import org.junit.Before; +import org.junit.Test; +import org.junit.runner.RunWith; +import org.powermock.core.classloader.annotations.PrepareForTest; +import org.powermock.modules.junit4.PowerMockRunner; + +/** + * Test for RpcServerStandbyGrpcServiceTest. + */ +@RunWith(PowerMockRunner.class) +@PrepareForTest(AlluxioMasterProcess.class) +public class RpcServerStandbyGrpcServiceTest extends RpcServerServiceTestBase { + @Before + public void setUp() { + Configuration.reloadProperties(); + Configuration.set(PropertyKey.STANDBY_MASTER_GRPC_ENABLED, true); + super.setUp(); + } + + @Test + public void primaryOnlyTest() { + RpcServerService service = + RpcServerService.Factory.create(mRpcAddress, mMasterProcess, mRegistry); + Assert.assertTrue(waitForFree()); + + Assert.assertFalse(service.isServing()); + service.start(); + // when standby master is enabled, gRPC server is always on even if it's standby. + Assert.assertTrue(isGrpcBound()); + Assert.assertTrue(service.isServing()); + for (int i = 0; i < 5; i++) { + service.promote(); + Assert.assertTrue(service.isServing()); + Assert.assertTrue(isGrpcBound()); + service.demote(); + Assert.assertTrue(isGrpcBound()); + Assert.assertTrue(service.isServing()); + } + service.stop(); + Assert.assertFalse(service.isServing()); + Assert.assertFalse(isGrpcBound()); + } + + @Test + public void doubleStartRpcServer() { + RpcServerService service = + RpcServerService.Factory.create(mRpcAddress, mMasterProcess, mRegistry); + + service.start(); + service.promote(); + Assert.assertThrows("double promotion is not allowed", + IllegalStateException.class, service::promote); + + service.demote(); + Assert.assertThrows("double demotion is not allowed", + IllegalStateException.class, service::demote); + } +} From 506a0f4b8667ae7e6895ccfb88cbe28ed26edde7 Mon Sep 17 00:00:00 2001 From: Chunxu Tang <8018679+ChunxuTang@users.noreply.github.com> Date: Thu, 2 Feb 2023 13:02:54 -0800 Subject: [PATCH 098/334] Merge local cache invalidation ### What changes are proposed in this pull request? The PR cherry-picks @beinan's implementation of local cache invalidation to the master branch. pr-link: Alluxio/alluxio#16841 change-id: cid-5191eaf15bb8bf730db7653670ad4e6241ba31c6 --- .../client/file/cache/CacheManager.java | 9 ++++++++ .../client/file/cache/LocalCacheManager.java | 16 ++++++++++++++ .../alluxio/client/file/cache/PageInfo.java | 21 +++++++++++++++++++ .../file/cache/store/LocalPageStoreDir.java | 8 ++++++- 4 files changed, 53 insertions(+), 1 deletion(-) diff --git a/core/client/fs/src/main/java/alluxio/client/file/cache/CacheManager.java b/core/client/fs/src/main/java/alluxio/client/file/cache/CacheManager.java index 4f0e8f5c1fb4..2ca67855a047 100644 --- a/core/client/fs/src/main/java/alluxio/client/file/cache/CacheManager.java +++ b/core/client/fs/src/main/java/alluxio/client/file/cache/CacheManager.java @@ -30,6 +30,7 @@ import java.util.concurrent.atomic.AtomicReference; import java.util.concurrent.locks.Lock; import java.util.concurrent.locks.ReentrantLock; +import java.util.function.Predicate; import javax.annotation.concurrent.GuardedBy; /** @@ -297,4 +298,12 @@ default List getCachedPageIdsByFileId(String fileId, long fileLength) { * @return true if append was successful */ boolean append(PageId pageId, int appendAt, byte[] page, CacheContext cacheContext); + + /** + * Invalidate the pages that match the given predicate. + * @param predicate + */ + default void invalidate(Predicate predicate) { + throw new UnsupportedOperationException(); + } } diff --git a/core/client/fs/src/main/java/alluxio/client/file/cache/LocalCacheManager.java b/core/client/fs/src/main/java/alluxio/client/file/cache/LocalCacheManager.java index f49541fbf45a..ff3dbe87cabc 100644 --- a/core/client/fs/src/main/java/alluxio/client/file/cache/LocalCacheManager.java +++ b/core/client/fs/src/main/java/alluxio/client/file/cache/LocalCacheManager.java @@ -50,6 +50,7 @@ import java.util.concurrent.atomic.AtomicReference; import java.util.concurrent.locks.ReadWriteLock; import java.util.concurrent.locks.ReentrantReadWriteLock; +import java.util.function.Predicate; import javax.annotation.Nullable; import javax.annotation.concurrent.GuardedBy; import javax.annotation.concurrent.ThreadSafe; @@ -640,6 +641,21 @@ public List getCachedPageIdsByFileId(String fileId, long fileLength) { return pageIds; } + @Override + public void invalidate(Predicate predicate) { + mPageStoreDirs.forEach(dir -> { + try { + dir.scanPages(pageInfo -> { + if (pageInfo.isPresent() && predicate.test(pageInfo.get())) { + delete(pageInfo.get().getPageId()); + } + }); + } catch (IOException e) { + throw new RuntimeException(e); + } + }); + } + @Override public void close() throws Exception { for (PageStoreDir pageStoreDir: mPageStoreDirs) { diff --git a/core/client/fs/src/main/java/alluxio/client/file/cache/PageInfo.java b/core/client/fs/src/main/java/alluxio/client/file/cache/PageInfo.java index 067d4980cc85..5bbaf90b5a9a 100644 --- a/core/client/fs/src/main/java/alluxio/client/file/cache/PageInfo.java +++ b/core/client/fs/src/main/java/alluxio/client/file/cache/PageInfo.java @@ -28,6 +28,7 @@ public class PageInfo { private final long mPageSize; private final CacheScope mCacheScope; private final PageStoreDir mLocalCacheDir; + private final long mCreatedTimestamp; /** * @param pageId page id @@ -46,10 +47,23 @@ public PageInfo(PageId pageId, long pageSize, PageStoreDir pageStoreDir) { */ public PageInfo(PageId pageId, long pageSize, CacheScope cacheScope, PageStoreDir pageStoreDir) { + this(pageId, pageSize, cacheScope, pageStoreDir, System.currentTimeMillis()); + } + + /** + * @param pageId page id + * @param pageSize page size in bytes + * @param cacheScope scope of this page + * @param pageStoreDir directory of this page + * @param createdTimestamp created time + */ + public PageInfo(PageId pageId, long pageSize, CacheScope cacheScope, + PageStoreDir pageStoreDir, long createdTimestamp) { mPageId = pageId; mPageSize = pageSize; mCacheScope = cacheScope; mLocalCacheDir = pageStoreDir; + mCreatedTimestamp = createdTimestamp; } /** @@ -80,6 +94,13 @@ public PageStoreDir getLocalCacheDir() { return mLocalCacheDir; } + /** + * @return the created time + */ + public long getCreatedTimestamp() { + return mCreatedTimestamp; + } + @Override public boolean equals(Object o) { if (this == o) { diff --git a/core/client/fs/src/main/java/alluxio/client/file/cache/store/LocalPageStoreDir.java b/core/client/fs/src/main/java/alluxio/client/file/cache/store/LocalPageStoreDir.java index 7426af1c9bf4..3d13ba544313 100644 --- a/core/client/fs/src/main/java/alluxio/client/file/cache/store/LocalPageStoreDir.java +++ b/core/client/fs/src/main/java/alluxio/client/file/cache/store/LocalPageStoreDir.java @@ -17,6 +17,7 @@ import alluxio.client.file.cache.PageInfo; import alluxio.client.file.cache.PageStore; import alluxio.client.file.cache.evictor.CacheEvictor; +import alluxio.client.quota.CacheScope; import com.google.common.base.Preconditions; import org.slf4j.Logger; @@ -25,6 +26,7 @@ import java.io.IOException; import java.nio.file.Files; import java.nio.file.Path; +import java.nio.file.attribute.FileTime; import java.util.Optional; import java.util.function.Consumer; import java.util.regex.Matcher; @@ -104,13 +106,17 @@ private Optional getPageInfo(Path path) { Optional pageId = getPageId(path); if (pageId.isPresent()) { long pageSize; + long createdTime; try { pageSize = Files.size(path); + FileTime creationTime = (FileTime) Files.getAttribute(path, "creationTime"); + createdTime = creationTime.toMillis(); } catch (IOException e) { LOG.error("Failed to get file size for " + path, e); return Optional.empty(); } - return Optional.of(new PageInfo(pageId.get(), pageSize, this)); + return Optional.of(new PageInfo(pageId.get(), + pageSize, CacheScope.GLOBAL, this, createdTime)); } return Optional.empty(); } From 6ca7d1533479d9c745a214a8239d60c2fda42f4a Mon Sep 17 00:00:00 2001 From: Chunxu Tang <8018679+ChunxuTang@users.noreply.github.com> Date: Thu, 2 Feb 2023 16:22:34 -0800 Subject: [PATCH 099/334] Add cache TTL enforcement ### What changes are proposed in this pull request? Co-Author-By: Chen Liang This change adds a scheduled thread in LocalCacheManager, to periodically check the page create time, and delete pages that have passed the configurable time threshold. ### Why are the changes needed? Certain use cases require the cached data to be removed beyond certain longevity. This is mainly for compliance considerations. ### Does this PR introduce any user facing changes? No pr-link: Alluxio/alluxio#16843 change-id: cid-49289cc488680f44b39386a5f364d6d7449cd11a --- .../file/cache/CacheManagerOptions.java | 56 +++++++++++++++++++ .../client/file/cache/LocalCacheManager.java | 21 +++++++ .../main/java/alluxio/conf/PropertyKey.java | 27 +++++++++ 3 files changed, 104 insertions(+) diff --git a/core/client/fs/src/main/java/alluxio/client/file/cache/CacheManagerOptions.java b/core/client/fs/src/main/java/alluxio/client/file/cache/CacheManagerOptions.java index c55155758a6e..40c42f501423 100644 --- a/core/client/fs/src/main/java/alluxio/client/file/cache/CacheManagerOptions.java +++ b/core/client/fs/src/main/java/alluxio/client/file/cache/CacheManagerOptions.java @@ -30,6 +30,9 @@ public class CacheManagerOptions { private long mPageSize; private List mPageStoreOptions; private boolean mQuotaEnabled; + private boolean mTtlEnabled; + private long mTtlCheckIntervalSeconds; + private long mTtlThresholdSeconds; /** * @param conf @@ -50,6 +53,10 @@ public static CacheManagerOptions create(AlluxioConfiguration conf) { .setMaxEvictionRetries(conf.getInt(PropertyKey.USER_CLIENT_CACHE_EVICTION_RETRIES)) .setPageSize(conf.getBytes(PropertyKey.USER_CLIENT_CACHE_PAGE_SIZE)) .setQuotaEnabled(conf.getBoolean(PropertyKey.USER_CLIENT_CACHE_QUOTA_ENABLED)) + .setTtlEnabled(conf.getBoolean(PropertyKey.USER_CLIENT_CACHE_TTL_ENABLED)) + .setTtlCheckIntervalSeconds( + conf.getLong(PropertyKey.USER_CLIENT_CACHE_TTL_CHECK_INTERVAL_SECONDS)) + .setTtlThresholdSeconds(conf.getLong(PropertyKey.USER_CLIENT_CACHE_TTL_THRESHOLD_SECONDS)) .setCacheEvictorOptions(cacheEvictorOptions) .setPageStoreOptions(PageStoreOptions.create(conf)); return options; @@ -113,6 +120,28 @@ public boolean isQuotaEnabled() { return mQuotaEnabled; } + /** + * @return if cache ttl is enabled + */ + public boolean isTtlEnabled() { + return mTtlEnabled; + } + + /** + * @return the check interval of ttl + */ + public long getTtlCheckIntervalSeconds() { + return mTtlCheckIntervalSeconds; + } + + /** + * + * @return the time threshold of cache ttl + */ + public long getTtlThresholdSeconds() { + return mTtlThresholdSeconds; + } + /** * @return max eviction retires */ @@ -213,4 +242,31 @@ public CacheManagerOptions setPageStoreOptions( mPageStoreOptions = pageStoreOptions; return this; } + + /** + * @param isTtlEnabled + * @return the updated options + */ + public CacheManagerOptions setTtlEnabled(boolean isTtlEnabled) { + mTtlEnabled = isTtlEnabled; + return this; + } + + /** + * @param checkIntervalSeconds + * @return the updated options + */ + public CacheManagerOptions setTtlCheckIntervalSeconds(long checkIntervalSeconds) { + mTtlCheckIntervalSeconds = checkIntervalSeconds; + return this; + } + + /** + * @param thresholdSeconds + * @return the updated options + */ + public CacheManagerOptions setTtlThresholdSeconds(long thresholdSeconds) { + mTtlThresholdSeconds = thresholdSeconds; + return this; + } } diff --git a/core/client/fs/src/main/java/alluxio/client/file/cache/LocalCacheManager.java b/core/client/fs/src/main/java/alluxio/client/file/cache/LocalCacheManager.java index ff3dbe87cabc..d9c73b3c2fa5 100644 --- a/core/client/fs/src/main/java/alluxio/client/file/cache/LocalCacheManager.java +++ b/core/client/fs/src/main/java/alluxio/client/file/cache/LocalCacheManager.java @@ -14,6 +14,8 @@ import static alluxio.client.file.cache.CacheManager.State.NOT_IN_USE; import static alluxio.client.file.cache.CacheManager.State.READ_ONLY; import static alluxio.client.file.cache.CacheManager.State.READ_WRITE; +import static java.util.concurrent.Executors.newScheduledThreadPool; +import static java.util.concurrent.TimeUnit.SECONDS; import alluxio.client.file.CacheContext; import alluxio.client.file.cache.store.ByteArrayTargetBuffer; @@ -44,6 +46,7 @@ import java.util.concurrent.ExecutorService; import java.util.concurrent.Executors; import java.util.concurrent.RejectedExecutionException; +import java.util.concurrent.ScheduledExecutorService; import java.util.concurrent.SynchronousQueue; import java.util.concurrent.ThreadPoolExecutor; import java.util.concurrent.TimeUnit; @@ -89,6 +92,8 @@ public class LocalCacheManager implements CacheManager { private final Optional mInitService; /** Executor service for execute the async cache tasks. */ private final Optional mAsyncCacheExecutor; + /** Executor service for execute the cache ttl check tasks. */ + private final Optional mTtlEnforcerExecutor; private final ConcurrentHashSet mPendingRequests; /** State of this cache. */ private final AtomicReference mState = new AtomicReference<>(); @@ -142,6 +147,21 @@ public static LocalCacheManager create(CacheManagerOptions options, mInitService = options.isAsyncRestoreEnabled() ? Optional.of(Executors.newSingleThreadExecutor()) : Optional.empty(); + if (options.isTtlEnabled()) { + mTtlEnforcerExecutor = Optional.of(newScheduledThreadPool(1)); + mTtlEnforcerExecutor.get().scheduleAtFixedRate(() -> + LocalCacheManager.this.invalidate(pageInfo -> { + try { + return System.currentTimeMillis() - pageInfo.getCreatedTimestamp() + >= options.getTtlThresholdSeconds() * 1000; + } catch (Exception ex) { + // In case of any exception, do not invalidate the cache + return false; + } + }), 0, options.getTtlCheckIntervalSeconds(), SECONDS); + } else { + mTtlEnforcerExecutor = Optional.empty(); + } Metrics.registerGauges(mCacheSize, mPageMetaStore); mState.set(READ_ONLY); Metrics.STATE.inc(); @@ -664,6 +684,7 @@ public void close() throws Exception { mPageMetaStore.reset(); mInitService.ifPresent(ExecutorService::shutdownNow); mAsyncCacheExecutor.ifPresent(ExecutorService::shutdownNow); + mTtlEnforcerExecutor.ifPresent(ExecutorService::shutdownNow); } /** diff --git a/core/common/src/main/java/alluxio/conf/PropertyKey.java b/core/common/src/main/java/alluxio/conf/PropertyKey.java index 051487eecec2..cfb622902ad3 100755 --- a/core/common/src/main/java/alluxio/conf/PropertyKey.java +++ b/core/common/src/main/java/alluxio/conf/PropertyKey.java @@ -5925,6 +5925,27 @@ public String toString() { .setConsistencyCheckLevel(ConsistencyCheckLevel.WARN) .setScope(Scope.CLIENT) .build(); + public static final PropertyKey USER_CLIENT_CACHE_TTL_ENABLED = + booleanBuilder(Name.USER_CLIENT_CACHE_TTL_ENABLED) + .setDefaultValue(false) + .setDescription("Whether to support cache quota.") + .setConsistencyCheckLevel(ConsistencyCheckLevel.WARN) + .setScope(Scope.CLIENT) + .build(); + public static final PropertyKey USER_CLIENT_CACHE_TTL_CHECK_INTERVAL_SECONDS = + longBuilder(Name.USER_CLIENT_CACHE_TTL_CHECK_INTERVAL_SECONDS) + .setDefaultValue(3600) + .setDescription("TTL check interval time in seconds.") + .setConsistencyCheckLevel(ConsistencyCheckLevel.IGNORE) + .setScope(Scope.CLIENT) + .build(); + public static final PropertyKey USER_CLIENT_CACHE_TTL_THRESHOLD_SECONDS = + longBuilder(Name.USER_CLIENT_CACHE_TTL_THRESHOLD_SECONDS) + .setDefaultValue(3600 * 3) + .setDescription("TTL threshold time in seconds.") + .setConsistencyCheckLevel(ConsistencyCheckLevel.IGNORE) + .setScope(Scope.CLIENT) + .build(); public static final PropertyKey USER_CLIENT_CACHE_SIZE = listBuilder(Name.USER_CLIENT_CACHE_SIZE) .setDefaultValue("512MB") @@ -8477,6 +8498,12 @@ public static final class Name { "alluxio.user.client.cache.page.size"; public static final String USER_CLIENT_CACHE_QUOTA_ENABLED = "alluxio.user.client.cache.quota.enabled"; + public static final String USER_CLIENT_CACHE_TTL_ENABLED = + "alluxio.user.client.cache.ttl.enabled"; + public static final String USER_CLIENT_CACHE_TTL_CHECK_INTERVAL_SECONDS = + "alluxio.user.client.cache.ttl.check.interval.seconds"; + public static final String USER_CLIENT_CACHE_TTL_THRESHOLD_SECONDS = + "alluxio.user.client.cache.ttl.threshold.seconds"; public static final String USER_CLIENT_CACHE_SIZE = "alluxio.user.client.cache.size"; public static final String USER_CLIENT_CACHE_STORE_OVERHEAD = From 407cf1521b8612319df0ca80e5fa8716918c9505 Mon Sep 17 00:00:00 2001 From: maobaolong <307499405@qq.com> Date: Fri, 3 Feb 2023 11:04:33 +0800 Subject: [PATCH 100/334] Fix updated config not notifying config registry Add update notify to master This issue introduced by #13514 pr-link: Alluxio/alluxio#16764 change-id: cid-36f2010233c3af95f28cf9cb7ac0a0a1963c86fe --- .../src/main/java/alluxio/master/meta/DefaultMetaMaster.java | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/core/server/master/src/main/java/alluxio/master/meta/DefaultMetaMaster.java b/core/server/master/src/main/java/alluxio/master/meta/DefaultMetaMaster.java index f4901f649343..11b574b97889 100644 --- a/core/server/master/src/main/java/alluxio/master/meta/DefaultMetaMaster.java +++ b/core/server/master/src/main/java/alluxio/master/meta/DefaultMetaMaster.java @@ -20,6 +20,7 @@ import alluxio.conf.Configuration; import alluxio.conf.ConfigurationValueOptions; import alluxio.conf.PropertyKey; +import alluxio.conf.ReconfigurableRegistry; import alluxio.conf.Source; import alluxio.exception.AlluxioException; import alluxio.exception.status.NotFoundException; @@ -662,6 +663,9 @@ public Map updateConfiguration(Map propertiesMa } } LOG.debug("Update {} properties, succeed {}.", propertiesMap.size(), successCount); + if (successCount > 0) { + ReconfigurableRegistry.update(); + } return result; } From 7f6baa353b5e78576d6a61336b3a77ff2a8e8a77 Mon Sep 17 00:00:00 2001 From: maobaolong <307499405@qq.com> Date: Fri, 3 Feb 2023 11:09:49 +0800 Subject: [PATCH 101/334] Support dynamic update HeartbeatThread tick interval Fix https://github.com/Alluxio/alluxio/issues/16733 pr-link: Alluxio/alluxio#16702 change-id: cid-500283d523f5cc5835cb889b85cf7804842a4af6 --- .../alluxio/heartbeat/HeartbeatThread.java | 60 +++++++++++++++---- .../alluxio/heartbeat/HeartbeatTimer.java | 9 +++ .../java/alluxio/heartbeat/SleepingTimer.java | 5 ++ .../heartbeat/HeartbeatThreadTest.java | 4 +- .../master/block/DefaultBlockMaster.java | 4 +- .../master/file/DefaultFileSystemMaster.java | 35 ++++------- .../file/activesync/ActiveSyncManager.java | 2 +- .../master/meta/DefaultMetaMaster.java | 10 ++-- .../master/metrics/DefaultMetricsMaster.java | 2 +- .../throttle/DefaultThrottleMaster.java | 2 +- .../worker/block/DefaultBlockWorker.java | 6 +- .../main/java/alluxio/fuse/AlluxioFuse.java | 2 +- .../java/alluxio/master/job/JobMaster.java | 2 +- .../main/java/alluxio/worker/JobWorker.java | 2 +- .../table/transform/TransformManager.java | 2 +- 15 files changed, 93 insertions(+), 54 deletions(-) diff --git a/core/common/src/main/java/alluxio/heartbeat/HeartbeatThread.java b/core/common/src/main/java/alluxio/heartbeat/HeartbeatThread.java index 82a0a504632f..2bb891d67c19 100644 --- a/core/common/src/main/java/alluxio/heartbeat/HeartbeatThread.java +++ b/core/common/src/main/java/alluxio/heartbeat/HeartbeatThread.java @@ -12,6 +12,8 @@ package alluxio.heartbeat; import alluxio.conf.AlluxioConfiguration; +import alluxio.conf.Reconfigurable; +import alluxio.conf.ReconfigurableRegistry; import alluxio.security.authentication.AuthenticatedClientUser; import alluxio.security.user.UserState; import alluxio.util.CommonUtils; @@ -19,6 +21,7 @@ import com.google.common.annotations.VisibleForTesting; import com.google.common.base.Preconditions; +import com.google.common.base.Supplier; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -30,14 +33,16 @@ * the JVM from exiting. */ @NotThreadSafe -public final class HeartbeatThread implements Runnable { +public final class HeartbeatThread implements Runnable, Reconfigurable { private static final Logger LOG = LoggerFactory.getLogger(HeartbeatThread.class); private final String mThreadName; private final HeartbeatExecutor mExecutor; private final UserState mUserState; + private final Supplier mIntervalSupplier; private HeartbeatTimer mTimer; private AlluxioConfiguration mConfiguration; + private Status mStatus; /** * @param executorName the executor name defined in {@link HeartbeatContext} @@ -65,36 +70,39 @@ public static String generateThreadName(String executorName, String threadId) { * thread's name is a combination of executorName and threadId * @param executor identifies the heartbeat thread executor; an instance of a class that * implements the HeartbeatExecutor interface - * @param intervalMs Sleep time between different heartbeat + * @param intervalSupplier Sleep time between different heartbeat supplier * @param conf Alluxio configuration * @param userState the user state for this heartbeat thread */ public HeartbeatThread(String executorName, String threadId, HeartbeatExecutor executor, - long intervalMs, AlluxioConfiguration conf, UserState userState) { + Supplier intervalSupplier, AlluxioConfiguration conf, UserState userState) { mThreadName = generateThreadName(executorName, threadId); mExecutor = Preconditions.checkNotNull(executor, "executor"); Class timerClass = HeartbeatContext.getTimerClass(executorName); mTimer = CommonUtils.createNewClassInstance(timerClass, new Class[] {String.class, long.class}, - new Object[] {mThreadName, intervalMs}); + new Object[] {mThreadName, intervalSupplier.get()}); mConfiguration = conf; mUserState = userState; + mIntervalSupplier = intervalSupplier; + mStatus = Status.INIT; + ReconfigurableRegistry.register(this); } /** * Convenience method for * {@link - * #HeartbeatThread(String, String, HeartbeatExecutor, long, AlluxioConfiguration, UserState)} - * where threadId is null. + * #HeartbeatThread(String, String, HeartbeatExecutor, Supplier, AlluxioConfiguration, + * UserState)} where threadId is null. * * @param executorName the executor name that is one of those defined in {@link HeartbeatContext} * @param executor the heartbeat executor - * @param intervalMs the interval between heartbeats + * @param intervalSupplier the interval between heartbeats supplier * @param conf the Alluxio configuration * @param userState the user state for this heartbeat thread */ - public HeartbeatThread(String executorName, HeartbeatExecutor executor, long intervalMs, - AlluxioConfiguration conf, UserState userState) { - this(executorName, null, executor, intervalMs, conf, userState); + public HeartbeatThread(String executorName, HeartbeatExecutor executor, + Supplier intervalSupplier, AlluxioConfiguration conf, UserState userState) { + this(executorName, null, executor, intervalSupplier, conf, userState); } @Override @@ -114,7 +122,9 @@ public void run() { // Thread.interrupted() clears the interrupt status. Do not call interrupt again to clear it. while (!Thread.interrupted()) { // TODO(peis): Fix this. The current implementation consumes one thread even when ticking. + mStatus = Status.WAITING; mTimer.tick(); + mStatus = Status.RUNNING; mExecutor.heartbeat(); } } catch (InterruptedException e) { @@ -122,6 +132,7 @@ public void run() { } catch (Exception e) { LOG.error("Uncaught exception in heartbeat executor, Heartbeat Thread shutting down", e); } finally { + mStatus = Status.STOPPED; mExecutor.close(); } } @@ -134,4 +145,33 @@ public void run() { public void updateIntervalMs(long intervalMs) { mTimer.setIntervalMs(intervalMs); } + + /** + * @return the status of current heartbeat thread + */ + public Status getStatus() { + return mStatus; + } + + @Override + public void update() { + if (mStatus == Status.STOPPED) { + ReconfigurableRegistry.unregister(this); + return; + } + long interval = mIntervalSupplier.get(); + if (interval != mTimer.getIntervalMs()) { + updateIntervalMs(interval); + } + } + + /** + * Enum representing the status of HeartbeatThread. + */ + public enum Status { + INIT, + WAITING, + RUNNING, + STOPPED, + } } diff --git a/core/common/src/main/java/alluxio/heartbeat/HeartbeatTimer.java b/core/common/src/main/java/alluxio/heartbeat/HeartbeatTimer.java index e68738f4799b..96e9618af3ea 100644 --- a/core/common/src/main/java/alluxio/heartbeat/HeartbeatTimer.java +++ b/core/common/src/main/java/alluxio/heartbeat/HeartbeatTimer.java @@ -25,6 +25,15 @@ default void setIntervalMs(long intervalMs) { throw new UnsupportedOperationException("Setting interval is not supported"); } + /** + * Get the interval of HeartbeatTimer. + * + * @return the interval of this HeartbeatTimer + */ + default long getIntervalMs() { + throw new UnsupportedOperationException("Getting interval is not supported"); + } + /** * Waits until next heartbeat should be executed. * diff --git a/core/common/src/main/java/alluxio/heartbeat/SleepingTimer.java b/core/common/src/main/java/alluxio/heartbeat/SleepingTimer.java index 627ef78dfd56..d6d4ad2589ab 100644 --- a/core/common/src/main/java/alluxio/heartbeat/SleepingTimer.java +++ b/core/common/src/main/java/alluxio/heartbeat/SleepingTimer.java @@ -68,6 +68,11 @@ public void setIntervalMs(long intervalMs) { mIntervalMs = intervalMs; } + @Override + public long getIntervalMs() { + return mIntervalMs; + } + /** * Enforces the thread waits for the given interval between consecutive ticks. * diff --git a/core/common/src/test/java/alluxio/heartbeat/HeartbeatThreadTest.java b/core/common/src/test/java/alluxio/heartbeat/HeartbeatThreadTest.java index 5676fbe05d1c..5d09135dc7ea 100644 --- a/core/common/src/test/java/alluxio/heartbeat/HeartbeatThreadTest.java +++ b/core/common/src/test/java/alluxio/heartbeat/HeartbeatThreadTest.java @@ -139,8 +139,8 @@ public Void call() throws Exception { try (ManuallyScheduleHeartbeat.Resource r = new ManuallyScheduleHeartbeat.Resource(Arrays.asList(mThreadName))) { DummyHeartbeatExecutor executor = new DummyHeartbeatExecutor(); - HeartbeatThread ht = new HeartbeatThread(mThreadName, executor, 1, Configuration.global(), - UserState.Factory.create(Configuration.global())); + HeartbeatThread ht = new HeartbeatThread(mThreadName, executor, () -> 1L, + Configuration.global(), UserState.Factory.create(Configuration.global())); // Run the HeartbeatThread. mExecutorService.submit(ht); diff --git a/core/server/master/src/main/java/alluxio/master/block/DefaultBlockMaster.java b/core/server/master/src/main/java/alluxio/master/block/DefaultBlockMaster.java index 2897a1875aaa..99adf9859842 100644 --- a/core/server/master/src/main/java/alluxio/master/block/DefaultBlockMaster.java +++ b/core/server/master/src/main/java/alluxio/master/block/DefaultBlockMaster.java @@ -483,7 +483,7 @@ public void start(Boolean isLeader) throws IOException { if (isLeader) { getExecutorService().submit(new HeartbeatThread( HeartbeatContext.MASTER_LOST_WORKER_DETECTION, new LostWorkerDetectionHeartbeatExecutor(), - (int) Configuration.getMs(PropertyKey.MASTER_LOST_WORKER_DETECTION_INTERVAL), + () -> Configuration.getMs(PropertyKey.MASTER_LOST_WORKER_DETECTION_INTERVAL), Configuration.global(), mMasterContext.getUserState())); } @@ -491,7 +491,7 @@ HeartbeatContext.MASTER_LOST_WORKER_DETECTION, new LostWorkerDetectionHeartbeatE getExecutorService().submit(new HeartbeatThread( HeartbeatContext.MASTER_WORKER_REGISTER_SESSION_CLEANER, new WorkerRegisterStreamGCExecutor(), - (int) Configuration.getMs(PropertyKey.MASTER_WORKER_REGISTER_STREAM_RESPONSE_TIMEOUT), + () -> Configuration.getMs(PropertyKey.MASTER_WORKER_REGISTER_STREAM_RESPONSE_TIMEOUT), Configuration.global(), mMasterContext.getUserState())); } diff --git a/core/server/master/src/main/java/alluxio/master/file/DefaultFileSystemMaster.java b/core/server/master/src/main/java/alluxio/master/file/DefaultFileSystemMaster.java index cefd5fc72d38..ef392be52bca 100644 --- a/core/server/master/src/main/java/alluxio/master/file/DefaultFileSystemMaster.java +++ b/core/server/master/src/main/java/alluxio/master/file/DefaultFileSystemMaster.java @@ -27,8 +27,6 @@ import alluxio.collections.PrefixList; import alluxio.conf.Configuration; import alluxio.conf.PropertyKey; -import alluxio.conf.Reconfigurable; -import alluxio.conf.ReconfigurableRegistry; import alluxio.exception.AccessControlException; import alluxio.exception.AlluxioException; import alluxio.exception.BlockInfoException; @@ -235,7 +233,7 @@ */ @NotThreadSafe // TODO(jiri): make thread-safe (c.f. ALLUXIO-1664) public class DefaultFileSystemMaster extends CoreMaster - implements FileSystemMaster, DelegatingJournaled, Reconfigurable { + implements FileSystemMaster, DelegatingJournaled { private static final Logger LOG = LoggerFactory.getLogger(DefaultFileSystemMaster.class); private static final Set> DEPS = ImmutableSet.of(BlockMaster.class); @@ -694,31 +692,31 @@ public void start(Boolean isPrimary) throws IOException { if (blockIntegrityCheckInterval > 0) { // negative or zero interval implies disabled getExecutorService().submit( new HeartbeatThread(HeartbeatContext.MASTER_BLOCK_INTEGRITY_CHECK, - new BlockIntegrityChecker(this), blockIntegrityCheckInterval, + new BlockIntegrityChecker(this), () -> + Configuration.getMs(PropertyKey.MASTER_PERIODIC_BLOCK_INTEGRITY_CHECK_INTERVAL), Configuration.global(), mMasterContext.getUserState())); } getExecutorService().submit( new HeartbeatThread(HeartbeatContext.MASTER_TTL_CHECK, new InodeTtlChecker(this, mInodeTree), - Configuration.getMs(PropertyKey.MASTER_TTL_CHECKER_INTERVAL_MS), + () -> Configuration.getMs(PropertyKey.MASTER_TTL_CHECKER_INTERVAL_MS), Configuration.global(), mMasterContext.getUserState())); getExecutorService().submit( new HeartbeatThread(HeartbeatContext.MASTER_LOST_FILES_DETECTION, new LostFileDetector(this, mBlockMaster, mInodeTree), - Configuration.getMs(PropertyKey.MASTER_LOST_WORKER_FILE_DETECTION_INTERVAL), + () -> Configuration.getMs(PropertyKey.MASTER_LOST_WORKER_FILE_DETECTION_INTERVAL), Configuration.global(), mMasterContext.getUserState())); mReplicationCheckHeartbeatThread = new HeartbeatThread( HeartbeatContext.MASTER_REPLICATION_CHECK, new alluxio.master.file.replication.ReplicationChecker(mInodeTree, mBlockMaster, mSafeModeManager, mJobMasterClientPool), - Configuration.getMs(PropertyKey.MASTER_REPLICATION_CHECK_INTERVAL_MS), + () -> Configuration.getMs(PropertyKey.MASTER_REPLICATION_CHECK_INTERVAL_MS), Configuration.global(), mMasterContext.getUserState()); - ReconfigurableRegistry.register(this); getExecutorService().submit(mReplicationCheckHeartbeatThread); getExecutorService().submit( new HeartbeatThread(HeartbeatContext.MASTER_PERSISTENCE_SCHEDULER, new PersistenceScheduler(), - Configuration.getMs(PropertyKey.MASTER_PERSISTENCE_SCHEDULER_INTERVAL_MS), + () -> Configuration.getMs(PropertyKey.MASTER_PERSISTENCE_SCHEDULER_INTERVAL_MS), Configuration.global(), mMasterContext.getUserState())); mPersistCheckerPool = new java.util.concurrent.ThreadPoolExecutor(PERSIST_CHECKER_POOL_THREADS, @@ -729,12 +727,12 @@ public void start(Boolean isPrimary) throws IOException { getExecutorService().submit( new HeartbeatThread(HeartbeatContext.MASTER_PERSISTENCE_CHECKER, new PersistenceChecker(), - Configuration.getMs(PropertyKey.MASTER_PERSISTENCE_CHECKER_INTERVAL_MS), + () -> Configuration.getMs(PropertyKey.MASTER_PERSISTENCE_CHECKER_INTERVAL_MS), Configuration.global(), mMasterContext.getUserState())); getExecutorService().submit( new HeartbeatThread(HeartbeatContext.MASTER_METRICS_TIME_SERIES, new TimeSeriesRecorder(), - Configuration.getMs(PropertyKey.MASTER_METRICS_TIME_SERIES_INTERVAL), + () -> Configuration.getMs(PropertyKey.MASTER_METRICS_TIME_SERIES_INTERVAL), Configuration.global(), mMasterContext.getUserState())); if (Configuration.getBoolean(PropertyKey.MASTER_AUDIT_LOGGING_ENABLED)) { mAsyncAuditLogWriter = new AsyncUserAccessAuditLogWriter("AUDIT_LOG"); @@ -747,7 +745,7 @@ public void start(Boolean isPrimary) throws IOException { if (Configuration.getBoolean(PropertyKey.UNDERFS_CLEANUP_ENABLED)) { getExecutorService().submit( new HeartbeatThread(HeartbeatContext.MASTER_UFS_CLEANUP, new UfsCleaner(this), - Configuration.getMs(PropertyKey.UNDERFS_CLEANUP_INTERVAL), + () -> Configuration.getMs(PropertyKey.UNDERFS_CLEANUP_INTERVAL), Configuration.global(), mMasterContext.getUserState())); } mAccessTimeUpdater.start(); @@ -797,7 +795,6 @@ public void close() throws IOException { Thread.currentThread().interrupt(); LOG.warn("Failed to wait for active sync executor to shut down."); } - ReconfigurableRegistry.unregister(this); } @Override @@ -3999,18 +3996,6 @@ InodeSyncStream.SyncStatus syncMetadata(RpcContext rpcContext, AlluxioURI path, return sync.sync(); } - @Override - public void update() { - if (mReplicationCheckHeartbeatThread != null) { - long newValue = Configuration.getMs( - PropertyKey.MASTER_REPLICATION_CHECK_INTERVAL_MS); - mReplicationCheckHeartbeatThread.updateIntervalMs( - newValue); - LOG.info("The interval of {} updated to {}", - HeartbeatContext.MASTER_REPLICATION_CHECK, newValue); - } - } - @FunctionalInterface interface PermissionCheckFunction { diff --git a/core/server/master/src/main/java/alluxio/master/file/activesync/ActiveSyncManager.java b/core/server/master/src/main/java/alluxio/master/file/activesync/ActiveSyncManager.java index 1d038633fbb3..214c1ec72e67 100644 --- a/core/server/master/src/main/java/alluxio/master/file/activesync/ActiveSyncManager.java +++ b/core/server/master/src/main/java/alluxio/master/file/activesync/ActiveSyncManager.java @@ -262,7 +262,7 @@ public void launchPollingThread(long mountId, long txId) { ActiveSyncer syncer = new ActiveSyncer(mFileSystemMaster, this, mMountTable, mountId); Future future = getExecutor().submit( new HeartbeatThread(HeartbeatContext.MASTER_ACTIVE_UFS_SYNC, - syncer, (int) Configuration.getMs(PropertyKey.MASTER_UFS_ACTIVE_SYNC_INTERVAL), + syncer, () -> Configuration.getMs(PropertyKey.MASTER_UFS_ACTIVE_SYNC_INTERVAL), Configuration.global(), ServerUserState.global())); mPollerMap.put(mountId, future); } diff --git a/core/server/master/src/main/java/alluxio/master/meta/DefaultMetaMaster.java b/core/server/master/src/main/java/alluxio/master/meta/DefaultMetaMaster.java index 11b574b97889..d6b09d746724 100644 --- a/core/server/master/src/main/java/alluxio/master/meta/DefaultMetaMaster.java +++ b/core/server/master/src/main/java/alluxio/master/meta/DefaultMetaMaster.java @@ -300,12 +300,12 @@ public void start(Boolean isPrimary) throws IOException { getExecutorService().submit(new HeartbeatThread( HeartbeatContext.MASTER_LOST_MASTER_DETECTION, new LostMasterDetectionHeartbeatExecutor(), - (int) Configuration.getMs(PropertyKey.MASTER_STANDBY_HEARTBEAT_INTERVAL), + () -> Configuration.getMs(PropertyKey.MASTER_STANDBY_HEARTBEAT_INTERVAL), Configuration.global(), mMasterContext.getUserState())); getExecutorService().submit( new HeartbeatThread(HeartbeatContext.MASTER_LOG_CONFIG_REPORT_SCHEDULING, new LogConfigReportHeartbeatExecutor(), - (int) Configuration + () -> Configuration .getMs(PropertyKey.MASTER_LOG_CONFIG_REPORT_HEARTBEAT_INTERVAL), Configuration.global(), mMasterContext.getUserState())); @@ -317,7 +317,7 @@ public void start(Boolean isPrimary) throws IOException { if (mJournalSpaceMonitor != null) { getExecutorService().submit(new HeartbeatThread( HeartbeatContext.MASTER_JOURNAL_SPACE_MONITOR, mJournalSpaceMonitor, - Configuration.getMs(PropertyKey.MASTER_JOURNAL_SPACE_MONITOR_INTERVAL), + () -> Configuration.getMs(PropertyKey.MASTER_JOURNAL_SPACE_MONITOR_INTERVAL), Configuration.global(), mMasterContext.getUserState())); } if (mState.getClusterID().equals(INVALID_CLUSTER_ID)) { @@ -330,7 +330,7 @@ public void start(Boolean isPrimary) throws IOException { && !Configuration.getBoolean(PropertyKey.TEST_MODE)) { getExecutorService().submit(new HeartbeatThread(HeartbeatContext.MASTER_UPDATE_CHECK, new UpdateChecker(this), - (int) Configuration.getMs(PropertyKey.MASTER_UPDATE_CHECK_INTERVAL), + () -> Configuration.getMs(PropertyKey.MASTER_UPDATE_CHECK_INTERVAL), Configuration.global(), mMasterContext.getUserState())); } } else { @@ -345,7 +345,7 @@ public void start(Boolean isPrimary) throws IOException { .newBuilder(ClientContext.create(Configuration.global())).build()); getExecutorService().submit(new HeartbeatThread(HeartbeatContext.META_MASTER_SYNC, new MetaMasterSync(mMasterAddress, metaMasterClient), - (int) Configuration.getMs(PropertyKey.MASTER_STANDBY_HEARTBEAT_INTERVAL), + () -> Configuration.getMs(PropertyKey.MASTER_STANDBY_HEARTBEAT_INTERVAL), Configuration.global(), mMasterContext.getUserState())); LOG.info("Standby master with address {} starts sending heartbeat to leader master.", mMasterAddress); diff --git a/core/server/master/src/main/java/alluxio/master/metrics/DefaultMetricsMaster.java b/core/server/master/src/main/java/alluxio/master/metrics/DefaultMetricsMaster.java index c7af089dc355..bf65ad6d2449 100644 --- a/core/server/master/src/main/java/alluxio/master/metrics/DefaultMetricsMaster.java +++ b/core/server/master/src/main/java/alluxio/master/metrics/DefaultMetricsMaster.java @@ -180,7 +180,7 @@ public void start(Boolean isLeader) throws IOException { if (isLeader) { getExecutorService().submit(new HeartbeatThread( HeartbeatContext.MASTER_CLUSTER_METRICS_UPDATER, new ClusterMetricsUpdater(), - Configuration.getMs(PropertyKey.MASTER_CLUSTER_METRICS_UPDATE_INTERVAL), + () -> Configuration.getMs(PropertyKey.MASTER_CLUSTER_METRICS_UPDATE_INTERVAL), Configuration.global(), mMasterContext.getUserState())); } } diff --git a/core/server/master/src/main/java/alluxio/master/throttle/DefaultThrottleMaster.java b/core/server/master/src/main/java/alluxio/master/throttle/DefaultThrottleMaster.java index 0c7385373f4a..70ee98d0b85c 100644 --- a/core/server/master/src/main/java/alluxio/master/throttle/DefaultThrottleMaster.java +++ b/core/server/master/src/main/java/alluxio/master/throttle/DefaultThrottleMaster.java @@ -109,7 +109,7 @@ public void start(Boolean isLeader) throws IOException { LOG.info("Starting {}", getName()); mThrottleService = getExecutorService().submit( new HeartbeatThread(HeartbeatContext.MASTER_THROTTLE, mThrottleExecutor, - Configuration.getMs(PropertyKey.MASTER_THROTTLE_HEARTBEAT_INTERVAL), + () -> Configuration.getMs(PropertyKey.MASTER_THROTTLE_HEARTBEAT_INTERVAL), Configuration.global(), mMasterContext.getUserState())); LOG.info("{} is started", getName()); diff --git a/core/server/worker/src/main/java/alluxio/worker/block/DefaultBlockWorker.java b/core/server/worker/src/main/java/alluxio/worker/block/DefaultBlockWorker.java index 0a3b2c931c1d..033eb28a42e6 100644 --- a/core/server/worker/src/main/java/alluxio/worker/block/DefaultBlockWorker.java +++ b/core/server/worker/src/main/java/alluxio/worker/block/DefaultBlockWorker.java @@ -214,7 +214,7 @@ public void start(WorkerNetAddress address) throws IOException { new PinListSync(this, mFileSystemMasterClient)); getExecutorService() .submit(new HeartbeatThread(HeartbeatContext.WORKER_PIN_LIST_SYNC, pinListSync, - (int) Configuration.getMs(PropertyKey.WORKER_BLOCK_HEARTBEAT_INTERVAL_MS), + () -> Configuration.getMs(PropertyKey.WORKER_BLOCK_HEARTBEAT_INTERVAL_MS), Configuration.global(), ServerUserState.global())); // Setup session cleaner @@ -227,7 +227,7 @@ public void start(WorkerNetAddress address) throws IOException { StorageChecker storageChecker = mResourceCloser.register(new StorageChecker()); getExecutorService() .submit(new HeartbeatThread(HeartbeatContext.WORKER_STORAGE_HEALTH, storageChecker, - (int) Configuration.getMs(PropertyKey.WORKER_BLOCK_HEARTBEAT_INTERVAL_MS), + () -> Configuration.getMs(PropertyKey.WORKER_BLOCK_HEARTBEAT_INTERVAL_MS), Configuration.global(), ServerUserState.global())); } @@ -242,7 +242,7 @@ protected void setupBlockMasterSync() throws IOException { .register(new BlockMasterSync(this, mWorkerId, mAddress, mBlockMasterClientPool)); getExecutorService() .submit(new HeartbeatThread(HeartbeatContext.WORKER_BLOCK_SYNC, blockMasterSync, - (int) Configuration.getMs(PropertyKey.WORKER_BLOCK_HEARTBEAT_INTERVAL_MS), + () -> Configuration.getMs(PropertyKey.WORKER_BLOCK_HEARTBEAT_INTERVAL_MS), Configuration.global(), ServerUserState.global())); } diff --git a/integration/fuse/src/main/java/alluxio/fuse/AlluxioFuse.java b/integration/fuse/src/main/java/alluxio/fuse/AlluxioFuse.java index 6acdb91bb031..8ec0aa9048c8 100644 --- a/integration/fuse/src/main/java/alluxio/fuse/AlluxioFuse.java +++ b/integration/fuse/src/main/java/alluxio/fuse/AlluxioFuse.java @@ -178,7 +178,7 @@ public static void main(String[] args) throws ParseException { if (fuseOptions.updateCheckEnabled()) { executor = Executors.newSingleThreadExecutor(); executor.submit(new HeartbeatThread(HeartbeatContext.FUSE_UPDATE_CHECK, - UpdateChecker.create(fuseOptions), Constants.DAY_MS, + UpdateChecker.create(fuseOptions), () -> Long.valueOf(Constants.DAY_MS), Configuration.global(), UserState.Factory.create(conf))); } try (FileSystem fs = FileSystem.Factory.create(fsContext, fuseOptions.getFileSystemOptions())) { diff --git a/job/server/src/main/java/alluxio/master/job/JobMaster.java b/job/server/src/main/java/alluxio/master/job/JobMaster.java index b19bed025b79..fc14d0935f02 100644 --- a/job/server/src/main/java/alluxio/master/job/JobMaster.java +++ b/job/server/src/main/java/alluxio/master/job/JobMaster.java @@ -195,7 +195,7 @@ public void start(Boolean isLeader) throws IOException { getExecutorService() .submit(new HeartbeatThread(HeartbeatContext.JOB_MASTER_LOST_WORKER_DETECTION, new LostWorkerDetectionHeartbeatExecutor(), - (int) Configuration.getMs(PropertyKey.JOB_MASTER_LOST_WORKER_INTERVAL), + () -> Configuration.getMs(PropertyKey.JOB_MASTER_LOST_WORKER_INTERVAL), Configuration.global(), mMasterContext.getUserState())); if (Configuration.getBoolean(PropertyKey.MASTER_AUDIT_LOGGING_ENABLED)) { mAsyncAuditLogWriter = new AsyncUserAccessAuditLogWriter("JOB_MASTER_AUDIT_LOG"); diff --git a/job/server/src/main/java/alluxio/worker/JobWorker.java b/job/server/src/main/java/alluxio/worker/JobWorker.java index 2f39e1d0d8f7..aec996509b95 100644 --- a/job/server/src/main/java/alluxio/worker/JobWorker.java +++ b/job/server/src/main/java/alluxio/worker/JobWorker.java @@ -107,7 +107,7 @@ public void start(WorkerNetAddress address) throws IOException { new HeartbeatThread(HeartbeatContext.JOB_WORKER_COMMAND_HANDLING, new CommandHandlingExecutor(mJobServerContext, taskExecutorManager, mJobMasterClient, address), - (int) Configuration.getMs(PropertyKey.JOB_MASTER_WORKER_HEARTBEAT_INTERVAL), + () -> Configuration.getMs(PropertyKey.JOB_MASTER_WORKER_HEARTBEAT_INTERVAL), Configuration.global(), ServerUserState.global())); } diff --git a/table/server/master/src/main/java/alluxio/master/table/transform/TransformManager.java b/table/server/master/src/main/java/alluxio/master/table/transform/TransformManager.java index 7b0991d6ddea..e5a24c5715be 100644 --- a/table/server/master/src/main/java/alluxio/master/table/transform/TransformManager.java +++ b/table/server/master/src/main/java/alluxio/master/table/transform/TransformManager.java @@ -135,7 +135,7 @@ public TransformManager( public void start(ExecutorService executorService, UserState userState) { executorService.submit( new HeartbeatThread(HeartbeatContext.MASTER_TABLE_TRANSFORMATION_MONITOR, new JobMonitor(), - Configuration.getMs(PropertyKey.TABLE_TRANSFORM_MANAGER_JOB_MONITOR_INTERVAL), + () -> Configuration.getMs(PropertyKey.TABLE_TRANSFORM_MANAGER_JOB_MONITOR_INTERVAL), Configuration.global(), userState)); } From 12f1e8b0ac8507ffa998791635e5d62b26d171b3 Mon Sep 17 00:00:00 2001 From: Chunxu Tang <8018679+ChunxuTang@users.noreply.github.com> Date: Mon, 6 Feb 2023 13:15:52 -0800 Subject: [PATCH 102/334] Fix the flaky test in the two-choice random cache eviction policy ### What changes are proposed in this pull request? Fix the flaky test in the two-choice random cache eviction policy ### Why are the changes needed? As the test is flaky, it will influence other PRs. ### Does this PR introduce any user facing changes? No. pr-link: Alluxio/alluxio#16850 change-id: cid-71dd4af6c446f88be4187d11d7b9c6b6713ec677 --- .../alluxio/client/file/cache/TwoChoiceRandomEvictorTest.java | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/core/client/fs/src/test/java/alluxio/client/file/cache/TwoChoiceRandomEvictorTest.java b/core/client/fs/src/test/java/alluxio/client/file/cache/TwoChoiceRandomEvictorTest.java index 81248c9cd859..0e7686186164 100644 --- a/core/client/fs/src/test/java/alluxio/client/file/cache/TwoChoiceRandomEvictorTest.java +++ b/core/client/fs/src/test/java/alluxio/client/file/cache/TwoChoiceRandomEvictorTest.java @@ -40,7 +40,8 @@ public void evictGetOrder() { mEvictor.updateOnGet(mFirst); Assert.assertEquals(mFirst, mEvictor.evict()); mEvictor.updateOnGet(mSecond); - Assert.assertEquals(mSecond, mEvictor.evict()); + PageId evictedPage = mEvictor.evict(); + Assert.assertTrue(evictedPage.equals(mFirst) || evictedPage.equals(mSecond)); } @Test From 7ab2579ff2a9108fc11a938b10685779f1e3ee3f Mon Sep 17 00:00:00 2001 From: qian0817 Date: Wed, 8 Feb 2023 00:34:51 +0800 Subject: [PATCH 103/334] Merge delete requests to Optimize move directory perf on objectstore ### What changes are proposed in this pull request? Optimize rename directory performance on object store by merging delete object requests. ### Why are the changes needed? In the previous implementation, if there were 10,000 objects in a directory, then there were 10,000 API calls for copying and deleting object. this PR merges the API for deleting object so that there are only 10,000 API calls for copying and 10 API calls for deleting objects in bulk. In our performance tests, when moving a directory with 400,000 objects, we were able to achieve a 30% performance improvement. ### Does this PR introduce any user facing changes? No user facing changes. pr-link: Alluxio/alluxio#16527 change-id: cid-a0af2782b8d538daaee63a06b2e5c0a6108835be --- .../underfs/ObjectUnderFileSystem.java | 116 +++++++++--------- .../underfs/cos/COSUnderFileSystem.java | 21 ++++ .../underfs/obs/OBSUnderFileSystem.java | 21 ++++ .../underfs/oss/OSSUnderFileSystem.java | 14 +++ 4 files changed, 113 insertions(+), 59 deletions(-) diff --git a/core/common/src/main/java/alluxio/underfs/ObjectUnderFileSystem.java b/core/common/src/main/java/alluxio/underfs/ObjectUnderFileSystem.java index c496cc72293e..456f088f8f3f 100755 --- a/core/common/src/main/java/alluxio/underfs/ObjectUnderFileSystem.java +++ b/core/common/src/main/java/alluxio/underfs/ObjectUnderFileSystem.java @@ -50,13 +50,11 @@ import java.util.HashSet; import java.util.List; import java.util.Map; -import java.util.concurrent.Callable; import java.util.concurrent.ExecutionException; import java.util.concurrent.ExecutorService; import java.util.concurrent.Future; import java.util.function.Supplier; import javax.annotation.Nullable; -import javax.annotation.concurrent.NotThreadSafe; import javax.annotation.concurrent.ThreadSafe; /** @@ -105,7 +103,7 @@ protected ObjectUnderFileSystem(AlluxioURI uri, UnderFileSystemConfiguration ufs /** * Information about a single object in object UFS. */ - protected class ObjectStatus { + protected static class ObjectStatus { private static final long INVALID_CONTENT_LENGTH = -1L; private final String mContentHash; @@ -196,7 +194,7 @@ public interface ObjectListingChunk { /** * Permissions in object UFS. */ - public class ObjectPermissions { + public static class ObjectPermissions { final String mOwner; final String mGroup; final short mMode; @@ -238,16 +236,16 @@ public short getMode() { /** * Operations added to this buffer are performed concurrently. + * Note that {@link #getResult()} method blocks {@link #add(Object)} method. * - * @param T input type for operation + * @param input type for operation */ + @ThreadSafe protected abstract class OperationBuffer { - /** A list of inputs in batches to be operated on in parallel. */ - private ArrayList> mBatches; /** A list of the successful operations for each batch. */ - private ArrayList>> mBatchesResult; + private final ArrayList>> mBatchesResult; /** Buffer for a batch of inputs. */ - private List mCurrentBatchBuffer; + private final List mCurrentBatchBuffer; /** Total number of inputs to be operated on across batches. */ protected int mEntriesAdded; @@ -255,7 +253,6 @@ protected abstract class OperationBuffer { * Construct a new {@link OperationBuffer} instance. */ protected OperationBuffer() { - mBatches = new ArrayList<>(); mBatchesResult = new ArrayList<>(); mCurrentBatchBuffer = new ArrayList<>(); mEntriesAdded = 0; @@ -282,7 +279,7 @@ protected OperationBuffer() { * @param input the input to operate on * @throws IOException if a non-Alluxio error occurs */ - public void add(T input) throws IOException { + public synchronized void add(T input) throws IOException { if (mCurrentBatchBuffer.size() == getBatchSize()) { // Batch is full submitBatch(); @@ -297,7 +294,7 @@ public void add(T input) throws IOException { * @return a list of inputs for successful operations * @throws IOException if a non-Alluxio error occurs */ - public List getResult() throws IOException { + public synchronized List getResult() throws IOException { submitBatch(); List result = new ArrayList<>(); for (Future> list : mBatchesResult) { @@ -325,38 +322,16 @@ public List getResult() throws IOException { */ private void submitBatch() throws IOException { if (mCurrentBatchBuffer.size() != 0) { - int batchNumber = mBatches.size(); - mBatches.add(new ArrayList<>(mCurrentBatchBuffer)); + List batch = new ArrayList<>(mCurrentBatchBuffer); mCurrentBatchBuffer.clear(); - mBatchesResult.add(batchNumber, - mExecutorService.submit(new OperationThread(mBatches.get(batchNumber)))); - } - } - - /** - * Thread class to operate on a batch of objects. - */ - @NotThreadSafe - protected class OperationThread implements Callable> { - List mBatch; - - /** - * Operate on a batch of inputs. - * - * @param batch a list of inputs for the current batch - */ - public OperationThread(List batch) { - mBatch = batch; - } - - @Override - public List call() { - try { - return operate(mBatch); - } catch (IOException e) { - // Do not append to success list - return Collections.emptyList(); - } + mBatchesResult.add(mExecutorService.submit(() -> { + try { + return operate(batch); + } catch (IOException e) { + // Do not append to success list + return Collections.emptyList(); + } + })); } } } @@ -465,7 +440,7 @@ public boolean deleteExistingDirectory(String path, DeleteOptions options) throw /** * Object keys added to a {@link DeleteBuffer} will be deleted in batches. */ - @NotThreadSafe + @ThreadSafe protected class DeleteBuffer extends OperationBuffer { /** * Construct a new {@link DeleteBuffer} instance. @@ -665,31 +640,47 @@ public InputStream openExistingFile(String path, OpenOptions options) throws IOE @Override public boolean renameDirectory(String src, String dst) throws IOException { + if (exists(dst)) { + LOG.error("Unable to rename {} to {} because destination already exists.", src, dst); + return false; + } + // Use a global delete buffer, in order to merge delete object requests + DeleteBuffer deleteBuffer = new DeleteBuffer(); + boolean result = renameDirectoryInternal(src, dst, deleteBuffer); + int fileDeleted = deleteBuffer.getResult().size(); + if (fileDeleted != deleteBuffer.mEntriesAdded) { + LOG.warn("Failed to rename directory, successfully deleted {} files out of {}.", + fileDeleted, deleteBuffer.mEntriesAdded); + return false; + } + return result; + } + + private boolean renameDirectoryInternal(String src, String dst, DeleteBuffer deleteBuffer) + throws IOException { UfsStatus[] children = listInternal(src, ListOptions.defaults()); if (children == null) { LOG.error("Failed to list directory {}, aborting rename.", src); return false; } - if (exists(dst)) { - LOG.error("Unable to rename {} to {} because destination already exists.", src, dst); - return false; - } // Source exists and is a directory, and destination does not exist // Rename the source folder first - if (!copyObject(stripPrefixIfPresent(convertToFolderName(src)), - stripPrefixIfPresent(convertToFolderName(dst)))) { + String srcKey = stripPrefixIfPresent(convertToFolderName(src)); + if (!copyObject(srcKey, stripPrefixIfPresent(convertToFolderName(dst)))) { return false; } + deleteBuffer.add(srcKey); + // Rename each child in the src folder to destination/child // a. Since renames are a copy operation, files are added to a buffer and processed concurrently // b. Pseudo-directories are metadata only operations are not added to the buffer - RenameBuffer buffer = new RenameBuffer(); + RenameBuffer buffer = new RenameBuffer(deleteBuffer); for (UfsStatus child : children) { String childSrcPath = PathUtils.concatPath(src, child.getName()); String childDstPath = PathUtils.concatPath(dst, child.getName()); if (child.isDirectory()) { // Recursive call - if (!renameDirectory(childSrcPath, childDstPath)) { + if (!renameDirectoryInternal(childSrcPath, childDstPath, deleteBuffer)) { LOG.error("Failed to rename path {} to {}, aborting rename.", childSrcPath, childDstPath); return false; } @@ -704,8 +695,7 @@ public boolean renameDirectory(String src, String dst) throws IOException { filesRenamed, buffer.mEntriesAdded); return false; } - // Delete src and everything under src - return deleteDirectory(src, DeleteOptions.defaults().setRecursive(true)); + return true; } @Override @@ -717,12 +707,18 @@ public boolean renameRenamableDirectory(String src, String dst) throws IOExcepti /** * File paths added to a {@link RenameBuffer} will be renamed concurrently. */ - @NotThreadSafe + @ThreadSafe protected class RenameBuffer extends OperationBuffer> { + private final DeleteBuffer mDeleteBuffer; + /** * Construct a new {@link RenameBuffer} instance. + * + * @param deleteBuffer delete object buffer */ - public RenameBuffer() {} + public RenameBuffer(DeleteBuffer deleteBuffer) { + mDeleteBuffer = deleteBuffer; + } @Override protected int getBatchSize() { @@ -734,7 +730,10 @@ protected List> operate(List> paths) throws IOException { List> succeeded = new ArrayList<>(); for (Pair pathPair : paths) { - if (renameFile(pathPair.getFirst(), pathPair.getSecond())) { + String src = stripPrefixIfPresent(pathPair.getFirst()); + String dst = stripPrefixIfPresent(pathPair.getSecond()); + if (copyObject(src, dst)) { + mDeleteBuffer.add(src); succeeded.add(pathPair); } } @@ -857,8 +856,7 @@ protected int getListingChunkLengthMax() { * @return length of each list request */ protected int getListingChunkLength(AlluxioConfiguration conf) { - return conf.getInt(PropertyKey.UNDERFS_LISTING_LENGTH) > getListingChunkLengthMax() - ? getListingChunkLengthMax() : conf.getInt(PropertyKey.UNDERFS_LISTING_LENGTH); + return Math.min(conf.getInt(PropertyKey.UNDERFS_LISTING_LENGTH), getListingChunkLengthMax()); } /** diff --git a/underfs/cos/src/main/java/alluxio/underfs/cos/COSUnderFileSystem.java b/underfs/cos/src/main/java/alluxio/underfs/cos/COSUnderFileSystem.java index 4847a2d8816f..14c965c0ef0b 100644 --- a/underfs/cos/src/main/java/alluxio/underfs/cos/COSUnderFileSystem.java +++ b/underfs/cos/src/main/java/alluxio/underfs/cos/COSUnderFileSystem.java @@ -29,6 +29,8 @@ import com.qcloud.cos.auth.COSCredentials; import com.qcloud.cos.exception.CosClientException; import com.qcloud.cos.model.COSObjectSummary; +import com.qcloud.cos.model.DeleteObjectsRequest; +import com.qcloud.cos.model.DeleteObjectsResult; import com.qcloud.cos.model.ListObjectsRequest; import com.qcloud.cos.model.ObjectListing; import com.qcloud.cos.model.ObjectMetadata; @@ -41,6 +43,7 @@ import java.io.InputStream; import java.io.OutputStream; import java.util.List; +import java.util.stream.Collectors; import javax.annotation.concurrent.ThreadSafe; /** @@ -163,6 +166,24 @@ protected boolean deleteObject(String key) { return true; } + @Override + protected List deleteObjects(List keys) throws IOException { + try { + DeleteObjectsRequest request = new DeleteObjectsRequest(mBucketName); + List keyVersions = keys.stream() + .map(DeleteObjectsRequest.KeyVersion::new) + .collect(Collectors.toList()); + request.setKeys(keyVersions); + DeleteObjectsResult result = mClient.deleteObjects(request); + return result.getDeletedObjects() + .stream() + .map(DeleteObjectsResult.DeletedObject::getKey) + .collect(Collectors.toList()); + } catch (CosClientException e) { + throw new IOException("failed to delete objects", e); + } + } + @Override protected String getFolderSuffix() { return FOLDER_SUFFIX; diff --git a/underfs/obs/src/main/java/alluxio/underfs/obs/OBSUnderFileSystem.java b/underfs/obs/src/main/java/alluxio/underfs/obs/OBSUnderFileSystem.java index 85b35528eabe..b402c9bb8134 100644 --- a/underfs/obs/src/main/java/alluxio/underfs/obs/OBSUnderFileSystem.java +++ b/underfs/obs/src/main/java/alluxio/underfs/obs/OBSUnderFileSystem.java @@ -30,6 +30,9 @@ import com.obs.services.ObsClient; import com.obs.services.exception.ObsException; import com.obs.services.model.AbortMultipartUploadRequest; +import com.obs.services.model.DeleteObjectsRequest; +import com.obs.services.model.DeleteObjectsResult; +import com.obs.services.model.KeyAndVersion; import com.obs.services.model.ListMultipartUploadsRequest; import com.obs.services.model.ListObjectsRequest; import com.obs.services.model.MultipartUpload; @@ -50,6 +53,7 @@ import java.util.List; import java.util.concurrent.ExecutorService; import java.util.function.Supplier; +import java.util.stream.Collectors; import javax.annotation.concurrent.ThreadSafe; /** @@ -210,6 +214,23 @@ protected boolean deleteObject(String key) { return true; } + @Override + protected List deleteObjects(List keys) throws IOException { + KeyAndVersion[] kvs = keys.stream() + .map(KeyAndVersion::new) + .toArray(KeyAndVersion[]::new); + DeleteObjectsRequest request = new DeleteObjectsRequest(mBucketName, false, kvs); + try { + DeleteObjectsResult result = mClient.deleteObjects(request); + return result.getDeletedObjectResults() + .stream() + .map(DeleteObjectsResult.DeleteObjectResult::getObjectKey) + .collect(Collectors.toList()); + } catch (ObsException e) { + throw new IOException("Failed to delete objects", e); + } + } + @Override protected String getFolderSuffix() { return FOLDER_SUFFIX; diff --git a/underfs/oss/src/main/java/alluxio/underfs/oss/OSSUnderFileSystem.java b/underfs/oss/src/main/java/alluxio/underfs/oss/OSSUnderFileSystem.java index a74820bb6462..c511620a33ad 100644 --- a/underfs/oss/src/main/java/alluxio/underfs/oss/OSSUnderFileSystem.java +++ b/underfs/oss/src/main/java/alluxio/underfs/oss/OSSUnderFileSystem.java @@ -29,6 +29,8 @@ import com.aliyun.oss.OSSClientBuilder; import com.aliyun.oss.ServiceException; import com.aliyun.oss.model.AbortMultipartUploadRequest; +import com.aliyun.oss.model.DeleteObjectsRequest; +import com.aliyun.oss.model.DeleteObjectsResult; import com.aliyun.oss.model.ListMultipartUploadsRequest; import com.aliyun.oss.model.ListObjectsRequest; import com.aliyun.oss.model.MultipartUpload; @@ -215,6 +217,18 @@ protected boolean deleteObject(String key) { return true; } + @Override + protected List deleteObjects(List keys) throws IOException { + try { + DeleteObjectsRequest request = new DeleteObjectsRequest(mBucketName); + request.setKeys(keys); + DeleteObjectsResult result = mClient.deleteObjects(request); + return result.getDeletedObjects(); + } catch (ServiceException e) { + throw new IOException("Failed to delete objects", e); + } + } + @Override protected String getFolderSuffix() { return FOLDER_SUFFIX; From bac52c429262edd87d1385912728fbced32e3660 Mon Sep 17 00:00:00 2001 From: Huang Hua Date: Thu, 9 Feb 2023 06:12:58 +0800 Subject: [PATCH 104/334] Reduce log level to debug for Page Not Found Page Not Found is reported on the first time to access such page, or when the page cache is evicted. This is normal and not an error. Signed-off-by: Huang Hua ### What changes are proposed in this pull request? Change the log level from error to debug. ### Why are the changes needed? When client reads some file for the first time, worker has the following error messages: ``` 2023-02-08 15:07:43,839 ERROR LocalCacheManager - Failed to get existing page PageId{FileId=1478269e1f194060bc522a21dce77eeee147367afad6d4011ae65543e9b0256b, PageIndex=0} from pageStore alluxio.exception.PageNotFoundException: /Volumes/ramdisk/LOCAL/1048576/221/1478269e1f194060bc522a21dce77eeee147367afad6d4011ae65543e9b0256b/0 at alluxio.client.file.cache.store.LocalPageStore.get(LocalPageStore.java:118) at alluxio.client.file.cache.LocalCacheManager.getPage(LocalCacheManager.java:703) at alluxio.client.file.cache.LocalCacheManager.get(LocalCacheManager.java:483) at alluxio.client.file.cache.NoExceptionCacheManager.get(NoExceptionCacheManager.java:105) at alluxio.worker.dora.PagedFileReader.read(PagedFileReader.java:117) at alluxio.worker.dora.PagedFileReader.transferTo(PagedFileReader.java:177) at alluxio.worker.grpc.FileReadHandler$DataReader.getDataBuffer(FileReadHandler.java:472) at alluxio.worker.grpc.FileReadHandler$DataReader.runInternal(FileReadHandler.java:335) at alluxio.worker.grpc.FileReadHandler$DataReader.run(FileReadHandler.java:284) at java.util.concurrent.Executors$RunnableAdapter.call(Executors.java:511) at java.util.concurrent.FutureTask.run$$$capture(FutureTask.java:266) at java.util.concurrent.FutureTask.run(FutureTask.java) at alluxio.worker.grpc.GrpcExecutors$ImpersonateThreadPoolExecutor.lambda$execute$0(GrpcExecutors.java:159) at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1149) at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:624) at java.lang.Thread.run(Thread.java:750) ``` This is not an error. This is a normal case. If this happens when some cache is evicted/removed, worker/client will re-fetch the data again. ### Does this PR introduce any user facing changes? No. pr-link: Alluxio/alluxio#16856 change-id: cid-64eb4cdbfc35f9a3d6d3700075e0181fb02929ae --- .../main/java/alluxio/client/file/cache/LocalCacheManager.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/core/client/fs/src/main/java/alluxio/client/file/cache/LocalCacheManager.java b/core/client/fs/src/main/java/alluxio/client/file/cache/LocalCacheManager.java index d9c73b3c2fa5..f3edb5db43bc 100644 --- a/core/client/fs/src/main/java/alluxio/client/file/cache/LocalCacheManager.java +++ b/core/client/fs/src/main/java/alluxio/client/file/cache/LocalCacheManager.java @@ -717,7 +717,7 @@ private int getPage(PageInfo pageInfo, int pageOffset, int bytesToRead, return -1; } } catch (IOException | PageNotFoundException e) { - LOG.error("Failed to get existing page {} from pageStore", pageInfo.getPageId(), e); + LOG.debug("Failed to get existing page {} from pageStore", pageInfo.getPageId(), e); return -1; } return bytesToRead; From 778f9ec7ec8a30a55e1002b8da858967760ab266 Mon Sep 17 00:00:00 2001 From: maobaolong <307499405@qq.com> Date: Fri, 10 Feb 2023 09:43:54 +0800 Subject: [PATCH 105/334] Support rm directory and reset DirectChildrenLoadedState ### What changes are proposed in this pull request? Support delete file from alluxio only and reset its directChildrenLoadedState of parent. ### Why are the changes needed? ### Does this PR introduce any user facing changes? ```Console $ bin/alluxio fs rm -R --alluxioOnly --syncParentNextTime=true /test/bb $ bin/alluxio fs ls /test/ drwxr-xr-x mbl staff 0 PERSISTED 05-30-2022 14:45:24:142 DIR /test/bb $ bin/alluxio fs rm -R --alluxioOnly --syncParentNextTime=false /test/bb $ bin/alluxio fs ls /test/ # /test/bb will not be loaded because metadata sync on /test is disabled ``` pr-link: Alluxio/alluxio#15647 change-id: cid-95acf6dd9c2ee820821e382a2a7332486c865caa --- .../master/file/DefaultFileSystemMaster.java | 6 +++++ .../main/proto/grpc/file_system_master.proto | 1 + core/transport/src/main/proto/proto.lock | 5 ++++ .../alluxio/cli/fs/command/RmCommand.java | 14 +++++++++- .../cli/fs/AbstractFileSystemShellTest.java | 19 ++++++++++++++ .../fs/command/RmCommandIntegrationTest.java | 26 +++++++++++++++++++ 6 files changed, 70 insertions(+), 1 deletion(-) diff --git a/core/server/master/src/main/java/alluxio/master/file/DefaultFileSystemMaster.java b/core/server/master/src/main/java/alluxio/master/file/DefaultFileSystemMaster.java index ef392be52bca..cf9639049abd 100644 --- a/core/server/master/src/main/java/alluxio/master/file/DefaultFileSystemMaster.java +++ b/core/server/master/src/main/java/alluxio/master/file/DefaultFileSystemMaster.java @@ -2089,6 +2089,12 @@ public void delete(AlluxioURI path, DeleteContext context) } deleteInternal(rpcContext, inodePath, context, false); + if (context.getOptions().getAlluxioOnly() + && context.getOptions().hasSyncParentNextTime()) { + boolean syncParentNextTime = context.getOptions().getSyncParentNextTime(); + mInodeTree.setDirectChildrenLoaded( + rpcContext, inodePath.getParentInodeDirectory(), !syncParentNextTime); + } auditContext.setSucceeded(true); cacheOperation(context); } diff --git a/core/transport/src/main/proto/grpc/file_system_master.proto b/core/transport/src/main/proto/grpc/file_system_master.proto index 2f3f267c6020..6895c3151ebd 100644 --- a/core/transport/src/main/proto/grpc/file_system_master.proto +++ b/core/transport/src/main/proto/grpc/file_system_master.proto @@ -150,6 +150,7 @@ message DeletePOptions { optional bool alluxioOnly = 2; optional bool unchecked = 3; optional FileSystemMasterCommonPOptions commonOptions = 4; + optional bool syncParentNextTime = 5; optional bool deleteMountPoint = 6; } message DeletePRequest { diff --git a/core/transport/src/main/proto/proto.lock b/core/transport/src/main/proto/proto.lock index f3d8d08b43a2..ca2600f0a076 100644 --- a/core/transport/src/main/proto/proto.lock +++ b/core/transport/src/main/proto/proto.lock @@ -2458,6 +2458,11 @@ "name": "commonOptions", "type": "FileSystemMasterCommonPOptions" }, + { + "id": 5, + "name": "syncParentNextTime", + "type": "bool" + }, { "id": 6, "name": "deleteMountPoint", diff --git a/shell/src/main/java/alluxio/cli/fs/command/RmCommand.java b/shell/src/main/java/alluxio/cli/fs/command/RmCommand.java index 44b32bebb2f6..374d3d6fcb84 100644 --- a/shell/src/main/java/alluxio/cli/fs/command/RmCommand.java +++ b/shell/src/main/java/alluxio/cli/fs/command/RmCommand.java @@ -70,6 +70,14 @@ public final class RmCommand extends AbstractFileSystemCommand { .hasArg(false) .desc("remove mount points in the directory") .build(); + private static final Option SYNC_PARENT_NEXT_TIME = + Option.builder("s") + .longOpt("syncParentNextTime") + .required(false) + .hasArg(true) + .desc("Marks a directory to either trigger a metadata sync or skip the " + + "metadata sync on next access.") + .build(); /** * @param fsContext the filesystem of Alluxio @@ -90,7 +98,8 @@ public Options getOptions() { .addOption(RECURSIVE_ALIAS_OPTION) .addOption(REMOVE_UNCHECKED_OPTION) .addOption(REMOVE_ALLUXIO_ONLY) - .addOption(DELETE_MOUNT_POINT); + .addOption(DELETE_MOUNT_POINT) + .addOption(SYNC_PARENT_NEXT_TIME); } @Override @@ -111,6 +120,9 @@ protected void runPlainPath(AlluxioURI path, CommandLine cl) DeletePOptions options = DeletePOptions.newBuilder().setRecursive(recursive).setAlluxioOnly(isAlluxioOnly) .setDeleteMountPoint(isDeleteMountPoint) + .setSyncParentNextTime( + cl.hasOption(SYNC_PARENT_NEXT_TIME.getLongOpt()) + && Boolean.parseBoolean(cl.getOptionValue(SYNC_PARENT_NEXT_TIME.getLongOpt()))) .setUnchecked(cl.hasOption(REMOVE_UNCHECKED_OPTION_CHAR)).build(); mFileSystem.delete(path, options); diff --git a/tests/src/test/java/alluxio/client/cli/fs/AbstractFileSystemShellTest.java b/tests/src/test/java/alluxio/client/cli/fs/AbstractFileSystemShellTest.java index 4ddeecbac87b..3be051accae4 100644 --- a/tests/src/test/java/alluxio/client/cli/fs/AbstractFileSystemShellTest.java +++ b/tests/src/test/java/alluxio/client/cli/fs/AbstractFileSystemShellTest.java @@ -23,6 +23,9 @@ import alluxio.client.file.FileSystemTestUtils; import alluxio.conf.Configuration; import alluxio.exception.AlluxioException; +import alluxio.grpc.ExistsPOptions; +import alluxio.grpc.FileSystemMasterCommonPOptions; +import alluxio.grpc.LoadMetadataPType; import alluxio.grpc.OpenFilePOptions; import alluxio.grpc.ReadPType; import alluxio.grpc.WritePType; @@ -300,6 +303,22 @@ protected boolean fileExists(AlluxioURI path) { } } + /** + * @param path a file path + * @return whether the file exists in Alluxio + */ + protected boolean fileExistsInAlluxio(AlluxioURI path) { + try { + return sFileSystem.exists(path, + ExistsPOptions.newBuilder().setLoadMetadataType(LoadMetadataPType.NEVER).setCommonOptions( + FileSystemMasterCommonPOptions.newBuilder().setSyncIntervalMs(-1).build()).build()); + } catch (IOException e) { + return false; + } catch (AlluxioException e) { + return false; + } + } + /** * Checks whether the given file is actually persisted by freeing it, then * reading it and comparing it against the expected byte array. diff --git a/tests/src/test/java/alluxio/client/cli/fs/command/RmCommandIntegrationTest.java b/tests/src/test/java/alluxio/client/cli/fs/command/RmCommandIntegrationTest.java index a1a977aaa8d9..8e421677c9b0 100644 --- a/tests/src/test/java/alluxio/client/cli/fs/command/RmCommandIntegrationTest.java +++ b/tests/src/test/java/alluxio/client/cli/fs/command/RmCommandIntegrationTest.java @@ -130,4 +130,30 @@ public void rmrWildCard() throws IOException, AlluxioException { Assert.assertFalse(fileExists(new AlluxioURI(testDir + "/foo"))); Assert.assertFalse(fileExists(new AlluxioURI(testDir + "/foobar4"))); } + + @Test + public void rmSyncDirNextTime() { + StringBuilder toCompare = new StringBuilder(); + sFsShell.run("mkdir", "/testFolder1/testFolder2"); + toCompare.append(getCommandOutput(new String[] {"mkdir", "/testFolder1/testFolder2"})); + sFsShell.run("touch", "/testFolder1/testFolder2/testFile2"); + toCompare + .append(getCommandOutput(new String[] {"touch", "/testFolder1/testFolder2/testFile2"})); + AlluxioURI testFolder1 = new AlluxioURI("/testFolder1"); + AlluxioURI testFolder2 = new AlluxioURI("/testFolder1/testFolder2"); + AlluxioURI testFile2 = new AlluxioURI("/testFolder1/testFolder2/testFile2"); + Assert.assertTrue(fileExists(testFolder1)); + Assert.assertTrue(fileExists(testFolder2)); + Assert.assertTrue(fileExists(testFile2)); + sFsShell.run("rm", "--alluxioOnly", "-s", "true", "/testFolder1/testFolder2/testFile2"); + toCompare.append(getCommandOutput(new String[] {"rm", "/testFolder1/testFolder2/testFile2"}) + .replace("\n", "") + + " only from Alluxio space\n"); + Assert.assertEquals(toCompare.toString(), mOutput.toString()); + Assert.assertTrue(fileExists(testFolder1)); + Assert.assertTrue(fileExists(testFolder2)); + Assert.assertFalse(fileExistsInAlluxio(testFile2)); + Assert.assertTrue(fileExists(testFile2)); + Assert.assertTrue(fileExistsInAlluxio(testFile2)); + } } From f3eca21eea7b1af3813b2976d3eeed492f6869e2 Mon Sep 17 00:00:00 2001 From: lucyge2022 <111789461+lucyge2022@users.noreply.github.com> Date: Thu, 9 Feb 2023 18:13:36 -0800 Subject: [PATCH 106/334] Introduce a rearchitectured S3 proxy service ### What changes are proposed in this pull request? Rearchitecturing the s3 service ### Why are the changes needed? Rearchitecturing the s3 service ### Does this PR introduce any user facing changes? N/A pr-link: Alluxio/alluxio#16654 change-id: cid-2b39b3f50b165cad6aece868f75d8e6218de0cfe --- .../main/java/alluxio/conf/PropertyKey.java | 21 + .../src/main/java/alluxio/web/WebServer.java | 4 + .../s3/CompleteMultipartUploadHandler.java | 3 +- .../alluxio/proxy/s3/RangeFileInStream.java | 14 + .../java/alluxio/proxy/s3/S3BaseTask.java | 108 ++ .../java/alluxio/proxy/s3/S3BucketTask.java | 584 +++++++ .../java/alluxio/proxy/s3/S3Constants.java | 1 + .../main/java/alluxio/proxy/s3/S3Handler.java | 516 ++++++ .../java/alluxio/proxy/s3/S3ObjectTask.java | 1413 +++++++++++++++++ .../alluxio/proxy/s3/S3RequestServlet.java | 127 ++ .../java/alluxio/proxy/s3/S3RestUtils.java | 84 +- .../s3/signature/AwsSignatureProcessor.java | 40 +- .../main/java/alluxio/web/ProxyWebServer.java | 81 +- 13 files changed, 2974 insertions(+), 22 deletions(-) create mode 100644 core/server/proxy/src/main/java/alluxio/proxy/s3/S3BaseTask.java create mode 100644 core/server/proxy/src/main/java/alluxio/proxy/s3/S3BucketTask.java create mode 100644 core/server/proxy/src/main/java/alluxio/proxy/s3/S3Handler.java create mode 100644 core/server/proxy/src/main/java/alluxio/proxy/s3/S3ObjectTask.java create mode 100644 core/server/proxy/src/main/java/alluxio/proxy/s3/S3RequestServlet.java diff --git a/core/common/src/main/java/alluxio/conf/PropertyKey.java b/core/common/src/main/java/alluxio/conf/PropertyKey.java index cfb622902ad3..316e279c5a56 100755 --- a/core/common/src/main/java/alluxio/conf/PropertyKey.java +++ b/core/common/src/main/java/alluxio/conf/PropertyKey.java @@ -5249,6 +5249,23 @@ public String toString() { .setConsistencyCheckLevel(ConsistencyCheckLevel.ENFORCE) .setScope(Scope.SERVER) .build(); + public static final PropertyKey PROXY_S3_V2_VERSION_ENABLED = + booleanBuilder(Name.PROXY_S3_V2_VERSION_ENABLED) + .setDefaultValue(false) + .setDescription("(Experimental) V2, an optimized version of " + + "Alluxio s3 proxy service.") + .setConsistencyCheckLevel(ConsistencyCheckLevel.ENFORCE) + .setScope(Scope.SERVER) + .build(); + public static final PropertyKey PROXY_S3_V2_ASYNC_PROCESSING_ENABLED = + booleanBuilder(Name.PROXY_S3_V2_ASYNC_PROCESSING_ENABLED) + .setDefaultValue(false) + .setDescription("(Experimental) If enabled, handle S3 request " + + "in async mode when v2 version of Alluxio s3 " + + "proxy service is enabled.") + .setConsistencyCheckLevel(ConsistencyCheckLevel.ENFORCE) + .setScope(Scope.SERVER) + .build(); public static final PropertyKey PROXY_STREAM_CACHE_TIMEOUT_MS = durationBuilder(Name.PROXY_STREAM_CACHE_TIMEOUT_MS) .setAlias("alluxio.proxy.stream.cache.timeout.ms") @@ -8395,6 +8412,10 @@ public static final class Name { public static final String PROXY_WEB_PORT = "alluxio.proxy.web.port"; public static final String PROXY_AUDIT_LOGGING_ENABLED = "alluxio.proxy.audit.logging.enabled"; + public static final String PROXY_S3_V2_VERSION_ENABLED = + "alluxio.proxy.s3.v2.version.enabled"; + public static final String PROXY_S3_V2_ASYNC_PROCESSING_ENABLED = + "alluxio.proxy.s3.v2.async.processing.enabled"; public static final String S3_UPLOADS_ID_XATTR_KEY = "s3_uploads_mulitpartupload_id"; // diff --git a/core/server/common/src/main/java/alluxio/web/WebServer.java b/core/server/common/src/main/java/alluxio/web/WebServer.java index 971352fb53d1..c7dd6f221ecf 100644 --- a/core/server/common/src/main/java/alluxio/web/WebServer.java +++ b/core/server/common/src/main/java/alluxio/web/WebServer.java @@ -62,6 +62,10 @@ public abstract class WebServer { private final PrometheusMetricsServlet mPMetricsServlet = new PrometheusMetricsServlet( MetricsSystem.METRIC_REGISTRY); + protected ServerConnector getServerConnector() { + return mServerConnector; + } + /** * Creates a new instance of {@link WebServer}. It pairs URLs with servlets and sets the webapp * folder. diff --git a/core/server/proxy/src/main/java/alluxio/proxy/s3/CompleteMultipartUploadHandler.java b/core/server/proxy/src/main/java/alluxio/proxy/s3/CompleteMultipartUploadHandler.java index 2a849413c477..05f3bcf7edfb 100644 --- a/core/server/proxy/src/main/java/alluxio/proxy/s3/CompleteMultipartUploadHandler.java +++ b/core/server/proxy/src/main/java/alluxio/proxy/s3/CompleteMultipartUploadHandler.java @@ -192,7 +192,8 @@ public void handle(String s, Request request, HttpServletRequest httpServletRequ throw e; } finally { if (stopwatch != null) { - ProxyWebServer.logAccess(httpServletRequest, httpServletResponse, stopwatch); + ProxyWebServer.logAccess(httpServletRequest, httpServletResponse, + stopwatch, S3BaseTask.OpType.CompleteMultipartUpload); } } } diff --git a/core/server/proxy/src/main/java/alluxio/proxy/s3/RangeFileInStream.java b/core/server/proxy/src/main/java/alluxio/proxy/s3/RangeFileInStream.java index 1df6b1f64eef..ffb528895648 100644 --- a/core/server/proxy/src/main/java/alluxio/proxy/s3/RangeFileInStream.java +++ b/core/server/proxy/src/main/java/alluxio/proxy/s3/RangeFileInStream.java @@ -15,6 +15,7 @@ import java.io.IOException; import java.io.InputStream; +import java.nio.ByteBuffer; /** * This class is use {@link FileInStream} underlying, and implement range read. @@ -65,6 +66,19 @@ public int read(byte[] b, int off, int len) throws IOException { return n; } + /** + * Reads up to len bytes of data from the input stream into the byte buffer. + * @param byteBuffer the buffer into which the data is read + * @param off the start offset in the buffer at which the data is written + * @param len the maximum number of bytes to read + * @return the total number of bytes read into the buffer, or -1 if there is no more + * data because the end of the stream has been reached + * @throws IOException + */ + public int read(ByteBuffer byteBuffer, int off, int len) throws IOException { + return this.mUnderlyingStream.read(byteBuffer, off, len); + } + @Override public void close() throws IOException { mUnderlyingStream.close(); diff --git a/core/server/proxy/src/main/java/alluxio/proxy/s3/S3BaseTask.java b/core/server/proxy/src/main/java/alluxio/proxy/s3/S3BaseTask.java new file mode 100644 index 000000000000..9a8a9004ffe3 --- /dev/null +++ b/core/server/proxy/src/main/java/alluxio/proxy/s3/S3BaseTask.java @@ -0,0 +1,108 @@ +/* + * The Alluxio Open Foundation licenses this work under the Apache License, version 2.0 + * (the "License"). You may not use this work except in compliance with the License, which is + * available at www.apache.org/licenses/LICENSE-2.0 + * + * This software is distributed on an "AS IS" basis, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, + * either express or implied, as more fully set forth in the License. + * + * See the NOTICE file distributed with this work for information regarding copyright ownership. + */ + +package alluxio.proxy.s3; + +import javax.ws.rs.core.Response; + +/** + * S3 Abstract Base task for handling S3 API logic. + */ +public abstract class S3BaseTask { + + protected S3Handler mHandler; + protected OpType mOPType; + + /** + * Instantiate a S3BaseTask. + * + * @param handler S3Handler object + * @param opType the enum indicate the S3 API name + */ + public S3BaseTask(S3Handler handler, OpType opType) { + mHandler = handler; + mOPType = opType; + } + + /** + * Return the OpType (S3 API enum). + * + * @return OpType (S3 API enum) + */ + public OpType getOPType() { + return mOPType; + } + + /** + * Run core S3 API logic from different S3 task. + * + * @return Response object containing common HTTP response properties + */ + public abstract Response continueTask(); + + /** + * Run S3 API logic in a customized async way, e.g. delegate the + * core API logic to another thread and do something while waiting. + */ + public void handleTaskAsync() { + } + + /** + * Enum for tagging the http request to target for + * different threadpools for handling. + */ + public enum OpTag { + LIGHT, HEAVY + } + + /** + * Enum indicating name of S3 API handling per http request. + */ + public enum OpType { + + // Object Task + ListParts(OpTag.LIGHT), + GetObjectTagging(OpTag.LIGHT), + PutObjectTagging(OpTag.LIGHT), + DeleteObjectTagging(OpTag.LIGHT), + GetObject(OpTag.HEAVY), PutObject(OpTag.HEAVY), + CopyObject(OpTag.HEAVY), DeleteObject(OpTag.LIGHT), + HeadObject(OpTag.LIGHT), UploadPart(OpTag.LIGHT), + UploadPartCopy(OpTag.HEAVY), + CreateMultipartUpload(OpTag.LIGHT), + AbortMultipartUpload(OpTag.LIGHT), + CompleteMultipartUpload(OpTag.HEAVY), + + // Bucket Task + ListBuckets(OpTag.LIGHT), + ListMultipartUploads(OpTag.LIGHT), + GetBucketTagging(OpTag.LIGHT), + PutBucketTagging(OpTag.LIGHT), + DeleteBucketTagging(OpTag.LIGHT), + CreateBucket(OpTag.LIGHT), + ListObjects(OpTag.LIGHT), // as well as ListObjectsV2 + DeleteObjects(OpTag.LIGHT), + HeadBucket(OpTag.LIGHT), + DeleteBucket(OpTag.LIGHT), + Unsupported(OpTag.LIGHT), + Unknown(OpTag.LIGHT); + + private final OpTag mOpTag; + + OpType(OpTag opTag) { + mOpTag = opTag; + } + + OpTag getOpTag() { + return mOpTag; + } + } +} diff --git a/core/server/proxy/src/main/java/alluxio/proxy/s3/S3BucketTask.java b/core/server/proxy/src/main/java/alluxio/proxy/s3/S3BucketTask.java new file mode 100644 index 000000000000..9e13e22bcffc --- /dev/null +++ b/core/server/proxy/src/main/java/alluxio/proxy/s3/S3BucketTask.java @@ -0,0 +1,584 @@ +/* + * The Alluxio Open Foundation licenses this work under the Apache License, version 2.0 + * (the "License"). You may not use this work except in compliance with the License, which is + * available at www.apache.org/licenses/LICENSE-2.0 + * + * This software is distributed on an "AS IS" basis, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, + * either express or implied, as more fully set forth in the License. + * + * See the NOTICE file distributed with this work for information regarding copyright ownership. + */ + +package alluxio.proxy.s3; + +import alluxio.AlluxioURI; +import alluxio.Constants; +import alluxio.client.file.FileSystem; +import alluxio.client.file.URIStatus; +import alluxio.conf.Configuration; +import alluxio.conf.PropertyKey; +import alluxio.exception.AccessControlException; +import alluxio.exception.AlluxioException; +import alluxio.exception.DirectoryNotEmptyException; +import alluxio.exception.FileDoesNotExistException; +import alluxio.exception.InvalidPathException; +import alluxio.grpc.Bits; +import alluxio.grpc.CreateDirectoryPOptions; +import alluxio.grpc.DeletePOptions; +import alluxio.grpc.ListStatusPOptions; +import alluxio.grpc.PMode; +import alluxio.grpc.SetAttributePOptions; +import alluxio.proto.journal.File; + +import com.fasterxml.jackson.dataformat.xml.XmlMapper; +import com.google.common.base.Preconditions; +import com.google.common.net.InetAddresses; +import com.google.protobuf.ByteString; +import org.apache.commons.lang3.StringUtils; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import java.io.IOException; +import java.util.ArrayList; +import java.util.Comparator; +import java.util.HashMap; +import java.util.List; +import java.util.Map; +import java.util.regex.Matcher; +import java.util.stream.Collectors; +import javax.ws.rs.core.Response; + +/** + * S3 Tasks to handle bucket level or global level request. + * (only bucket name or no bucket name is provided) + */ +public class S3BucketTask extends S3BaseTask { + private static final Logger LOG = LoggerFactory.getLogger(S3BucketTask.class); + + protected S3BucketTask(S3Handler handler, OpType opType) { + super(handler, opType); + } + + @Override + public Response continueTask() { + return S3RestUtils.call(mHandler.getBucket(), () -> { + throw new S3Exception(S3ErrorCode.NOT_IMPLEMENTED); + }); + } + + /** + * Factory for getting a S3BucketTask type task. + */ + public static final class Factory { + /** + * Marshall the request and create corresponding bucket level S3 task. + * @param handler + * @return S3BucketTask + */ + public static S3BucketTask create(S3Handler handler) { + switch (handler.getHTTPVerb()) { + case "GET": + if (StringUtils.isEmpty(handler.getBucket())) { + return new ListBucketsTask(handler, OpType.ListBuckets); + } else if (handler.getQueryParameter("tagging") != null) { + return new GetBucketTaggingTask(handler, OpType.GetBucketTagging); + } else if (handler.getQueryParameter("uploads") != null) { + return new ListMultipartUploadsTask(handler, OpType.ListMultipartUploads); + } else { + return new ListObjectsTask(handler, OpType.ListObjects); + } + case "PUT": + if (handler.getQueryParameter("tagging") != null) { + return new PutBucketTaggingTask(handler, OpType.PutBucketTagging); + } else { + return new CreateBucketTask(handler, OpType.CreateBucket); + } + case "POST": + if (handler.getQueryParameter("delete") != null) { + return new DeleteObjectsTask(handler, OpType.DeleteObjects); + } + break; + case "HEAD": + if (!StringUtils.isEmpty(handler.getBucket())) { + return new HeadBucketTask(handler, OpType.HeadBucket); + } + break; + case "DELETE": + if (handler.getQueryParameter("tagging") != null) { + return new DeleteBucketTaggingTask(handler, OpType.DeleteBucketTagging); + } else { + return new DeleteBucketTask(handler, OpType.DeleteBucket); + } + default: + break; + } + return new S3BucketTask(handler, OpType.Unsupported); + } + } + + private static class ListBucketsTask extends S3BucketTask { + protected ListBucketsTask(S3Handler handler, OpType opType) { + super(handler, opType); + } + + @Override + public Response continueTask() { + return S3RestUtils.call(S3Constants.EMPTY, () -> { + final String user = mHandler.getUser(); + + List objects = new ArrayList<>(); + try (S3AuditContext auditContext = mHandler.createAuditContext( + mOPType.name(), user, null, null)) { + try { + objects = mHandler.getMetaFS().listStatus(new AlluxioURI("/")); + } catch (AlluxioException | IOException e) { + if (e instanceof AccessControlException) { + auditContext.setAllowed(false); + } + auditContext.setSucceeded(false); + throw S3RestUtils.toBucketS3Exception(e, "/"); + } + + final List buckets = objects.stream() + .filter((uri) -> uri.getOwner().equals(user)) + // debatable (?) potentially breaks backcompat(?) + .filter(URIStatus::isFolder) + .collect(Collectors.toList()); + return new ListAllMyBucketsResult(buckets); + } + }); + } + } // end of ListBucketsTask + + private static class GetBucketTaggingTask extends S3BucketTask { + protected GetBucketTaggingTask(S3Handler handler, OpType opType) { + super(handler, opType); + } + + public Response continueTask() { + return S3RestUtils.call(mHandler.getBucket(), () -> { + + String path = S3RestUtils.parsePath(AlluxioURI.SEPARATOR + mHandler.getBucket()); + final String user = mHandler.getUser(); + final FileSystem userFs = S3RestUtils.createFileSystemForUser( + mHandler.getUser(), mHandler.getMetaFS()); + + try (S3AuditContext auditContext = mHandler.createAuditContext( + mOPType.name(), user, mHandler.getBucket(), null)) { + S3RestUtils.checkPathIsAlluxioDirectory(userFs, path, auditContext); + AlluxioURI uri = new AlluxioURI(path); + try { + TaggingData tagData = S3RestUtils.deserializeTags(userFs.getStatus(uri).getXAttr()); + LOG.debug("GetBucketTagging tagData={}", tagData); + return tagData != null ? tagData : new TaggingData(); + } catch (Exception e) { + throw S3RestUtils.toBucketS3Exception(e, mHandler.getBucket(), auditContext); + } + } + }); + } + } // end of GetBucketTaggingTask + + private static class ListMultipartUploadsTask extends S3BucketTask { + + protected ListMultipartUploadsTask(S3Handler handler, OpType opType) { + super(handler, opType); + } + + public Response continueTask() { + return S3RestUtils.call(mHandler.getBucket(), () -> { + final String bucket = mHandler.getBucket(); + Preconditions.checkNotNull(bucket, "required 'bucket' parameter is missing"); + + String path = S3RestUtils.parsePath(AlluxioURI.SEPARATOR + bucket); + final String user = mHandler.getUser(); + final FileSystem userFs = S3RestUtils.createFileSystemForUser(user, mHandler.getMetaFS()); + + try (S3AuditContext auditContext = mHandler.createAuditContext( + mOPType.name(), user, mHandler.getBucket(), null)) { + S3RestUtils.checkPathIsAlluxioDirectory(userFs, path, auditContext); + try { + List children = mHandler.getMetaFS().listStatus(new AlluxioURI( + S3RestUtils.MULTIPART_UPLOADS_METADATA_DIR)); + final List uploadIds = children.stream() + .filter((uri) -> uri.getOwner().equals(user)) + .collect(Collectors.toList()); + return ListMultipartUploadsResult.buildFromStatuses(bucket, uploadIds); + } catch (Exception e) { + throw S3RestUtils.toBucketS3Exception(e, bucket, auditContext); + } + } + }); + } + } // end of ListMultipartUploadsTask + + private static class ListObjectsTask extends S3BucketTask { + protected ListObjectsTask(S3Handler handler, OpType opType) { + super(handler, opType); + } + + private String normalizeS3Prefix(String prefix, char delimiter) { + if (prefix != null) { + int pos = prefix.lastIndexOf(delimiter); + if (pos >= 0) { + return prefix.substring(0, pos + 1); + } + } + return S3Constants.EMPTY; + } + + private String parsePathWithDelimiter(String bucketPath, String prefix, String delimiter) + throws S3Exception { + // TODO(czhu): allow non-"/" delimiters + // Alluxio only support use / as delimiter + if (!delimiter.equals(AlluxioURI.SEPARATOR)) { + throw new S3Exception(bucketPath, new S3ErrorCode( + S3ErrorCode.PRECONDITION_FAILED.getCode(), + "Alluxio S3 API only support / as delimiter.", + S3ErrorCode.PRECONDITION_FAILED.getStatus())); + } + char delim = AlluxioURI.SEPARATOR.charAt(0); + String normalizedBucket = + bucketPath.replace(S3Constants.BUCKET_SEPARATOR, AlluxioURI.SEPARATOR); + String normalizedPrefix = normalizeS3Prefix(prefix, delim); + + if (!normalizedPrefix.isEmpty() && !normalizedPrefix.startsWith(AlluxioURI.SEPARATOR)) { + normalizedPrefix = AlluxioURI.SEPARATOR + normalizedPrefix; + } + return normalizedBucket + normalizedPrefix; + } + + public Response continueTask() { + return S3RestUtils.call(mHandler.getBucket(), () -> { + String path = S3RestUtils.parsePath(AlluxioURI.SEPARATOR + mHandler.getBucket()); + final String user = mHandler.getUser(); + final FileSystem userFs = S3RestUtils.createFileSystemForUser(user, mHandler.getMetaFS()); + + try (S3AuditContext auditContext = mHandler.createAuditContext( + mOPType.name(), user, mHandler.getBucket(), null)) { + S3RestUtils.checkPathIsAlluxioDirectory(userFs, path, auditContext); + String markerParam = mHandler.getQueryParameter("marker"); + String maxKeysParam = mHandler.getQueryParameter("max-keys"); + String prefixParam = mHandler.getQueryParameter("prefix"); + String delimiterParam = mHandler.getQueryParameter("delimiter"); + String encodingTypeParam = mHandler.getQueryParameter("encoding-type"); + String listTypeParam = mHandler.getQueryParameter("list-type"); + String continuationTokenParam = mHandler.getQueryParameter("continuation-token"); + String startAfterParam = mHandler.getQueryParameter("start-after"); + + int maxKeys = maxKeysParam == null + ? ListBucketOptions.DEFAULT_MAX_KEYS : Integer.parseInt(maxKeysParam); + Integer listType = listTypeParam == null ? null : Integer.parseInt(listTypeParam); + ListBucketOptions listBucketOptions = ListBucketOptions.defaults() + .setMarker(markerParam) + .setPrefix(prefixParam) + .setMaxKeys(maxKeys) + .setDelimiter(delimiterParam) + .setEncodingType(encodingTypeParam) + .setListType(listType) + .setContinuationToken(continuationTokenParam) + .setStartAfter(startAfterParam); + + List children; + try { + // TODO(czhu): allow non-"/" delimiters by parsing the prefix & delimiter pair to + // determine what directory to list the contents of + // only list the direct children if delimiter is not null + if (StringUtils.isNotEmpty(delimiterParam)) { + if (prefixParam == null) { + path = parsePathWithDelimiter(path, S3Constants.EMPTY, delimiterParam); + } else { + path = parsePathWithDelimiter(path, prefixParam, delimiterParam); + } + children = userFs.listStatus(new AlluxioURI(path)); + } else { + if (prefixParam != null) { + path = parsePathWithDelimiter(path, prefixParam, AlluxioURI.SEPARATOR); + } + ListStatusPOptions options = ListStatusPOptions.newBuilder() + .setRecursive(true).build(); + children = userFs.listStatus(new AlluxioURI(path), options); + } + } catch (FileDoesNotExistException e) { + // Since we've called S3RestUtils.checkPathIsAlluxioDirectory() on the bucket path + // already, this indicates that the prefix was unable to be found in the Alluxio FS + children = new ArrayList<>(); + } catch (IOException | AlluxioException e) { + auditContext.setSucceeded(false); + throw S3RestUtils.toBucketS3Exception(e, mHandler.getBucket()); + } + return new ListBucketResult( + mHandler.getBucket(), + children, + listBucketOptions); + } // end try-with-resources block + }); + } + } // end of ListObjectsTask + + private static class PutBucketTaggingTask extends S3BucketTask { + + protected PutBucketTaggingTask(S3Handler handler, OpType opType) { + super(handler, opType); + } + + @Override + public Response continueTask() { + return S3RestUtils.call(mHandler.getBucket(), () -> { + final String user = mHandler.getUser(); + final FileSystem userFs = S3RestUtils.createFileSystemForUser(user, mHandler.getMetaFS()); + String bucketPath = S3RestUtils.parsePath(AlluxioURI.SEPARATOR + mHandler.getBucket()); + try (S3AuditContext auditContext = mHandler.createAuditContext( + mOPType.name(), mHandler.getUser(), mHandler.getBucket(), null)) { + S3RestUtils.checkPathIsAlluxioDirectory(mHandler.getMetaFS(), bucketPath, auditContext); + try { + TaggingData tagData = new XmlMapper().readerFor(TaggingData.class) + .readValue(mHandler.getInputStream()); + LOG.debug("PutBucketTagging tagData={}", tagData); + Map xattrMap = new HashMap<>(); + xattrMap.put(S3Constants.TAGGING_XATTR_KEY, TaggingData.serialize(tagData)); + SetAttributePOptions attrPOptions = SetAttributePOptions.newBuilder() + .putAllXattr(xattrMap) + .setXattrUpdateStrategy(File.XAttrUpdateStrategy.UNION_REPLACE) + .build(); + userFs.setAttribute(new AlluxioURI(bucketPath), attrPOptions); + } catch (IOException e) { + if (e.getCause() instanceof S3Exception) { + throw S3RestUtils.toBucketS3Exception((S3Exception) e.getCause(), bucketPath, + auditContext); + } + auditContext.setSucceeded(false); + throw new S3Exception(e, bucketPath, S3ErrorCode.MALFORMED_XML); + } catch (Exception e) { + throw S3RestUtils.toBucketS3Exception(e, bucketPath, auditContext); + } + return Response.Status.OK; + } + }); + } + } // end of PutBucketTaggingTask + + private static class CreateBucketTask extends S3BucketTask { + protected CreateBucketTask(S3Handler handler, OpType opType) { + super(handler, opType); + } + + @Override + public Response continueTask() { + return S3RestUtils.call(mHandler.getBucket(), () -> { + final String user = mHandler.getUser(); + final FileSystem userFs = S3RestUtils.createFileSystemForUser(user, mHandler.getMetaFS()); + String bucketPath = S3RestUtils.parsePath(AlluxioURI.SEPARATOR + mHandler.getBucket()); + try (S3AuditContext auditContext = mHandler.createAuditContext( + mOPType.name(), user, mHandler.getBucket(), null)) { + if (S3Handler.BUCKET_NAMING_RESTRICTION_ENABLED) { + Matcher m = S3Handler.BUCKET_ADJACENT_DOTS_DASHES_PATTERN.matcher(mHandler.getBucket()); + while (m.find()) { + if (!m.group().equals("--")) { + auditContext.setSucceeded(false); + throw new S3Exception(mHandler.getBucket(), S3ErrorCode.INVALID_BUCKET_NAME); + } + } + if (!S3Handler.BUCKET_VALID_NAME_PATTERN.matcher(mHandler.getBucket()).matches() + || S3Handler.BUCKET_INVALIDATION_PREFIX_PATTERN.matcher(mHandler.getBucket()) + .matches() + || S3Handler.BUCKET_INVALID_SUFFIX_PATTERN.matcher(mHandler.getBucket()).matches() + || InetAddresses.isInetAddress(mHandler.getBucket())) { + auditContext.setSucceeded(false); + throw new S3Exception(mHandler.getBucket(), S3ErrorCode.INVALID_BUCKET_NAME); + } + } + try { + URIStatus status = mHandler.getMetaFS().getStatus(new AlluxioURI(bucketPath)); + if (status.isFolder()) { + if (status.getOwner().equals(user)) { + // Silently swallow CreateBucket calls on existing buckets for this user + // - S3 clients may prepend PutObject requests with CreateBucket calls instead of + // calling HeadBucket to ensure that the bucket exists + return Response.Status.OK; + } + // Otherwise, this bucket is owned by a different user + throw new S3Exception(S3ErrorCode.BUCKET_ALREADY_EXISTS); + } + // Otherwise, that path exists in Alluxio but is not a directory + auditContext.setSucceeded(false); + throw new InvalidPathException("A file already exists at bucket path " + bucketPath); + } catch (FileDoesNotExistException e) { + // do nothing, we will create the directory below + } catch (Exception e) { + throw S3RestUtils.toBucketS3Exception(e, bucketPath, auditContext); + } + + // These permission bits will be inherited by all objects/folders created within + // the bucket; we don't support custom bucket/object ACLs at the moment + CreateDirectoryPOptions options = + CreateDirectoryPOptions.newBuilder() + .setMode(PMode.newBuilder() + .setOwnerBits(Bits.ALL) + .setGroupBits(Bits.ALL) + .setOtherBits(Bits.NONE)) + .setWriteType(S3RestUtils.getS3WriteType()) + .build(); + try { + mHandler.getMetaFS().createDirectory(new AlluxioURI(bucketPath), options); + SetAttributePOptions attrPOptions = SetAttributePOptions.newBuilder() + .setOwner(user) + .build(); + mHandler.getMetaFS().setAttribute(new AlluxioURI(bucketPath), attrPOptions); + } catch (Exception e) { + throw S3RestUtils.toBucketS3Exception(e, bucketPath, auditContext); + } + return Response.Status.OK; + } + }); + } + } // end of CreateBucketTask + + private static class DeleteObjectsTask extends S3BucketTask { + + protected DeleteObjectsTask(S3Handler handler, OpType opType) { + super(handler, opType); + } + + public Response continueTask() { + return S3RestUtils.call(mHandler.getBucket(), () -> { + final String user = mHandler.getUser(); + final FileSystem userFs = S3RestUtils.createFileSystemForUser(user, mHandler.getMetaFS()); + String bucketPath = S3RestUtils.parsePath(AlluxioURI.SEPARATOR + mHandler.getBucket()); + try (S3AuditContext auditContext = mHandler.createAuditContext( + mOPType.name(), user, mHandler.getBucket(), null)) { + try { + DeleteObjectsRequest request = new XmlMapper().readerFor(DeleteObjectsRequest.class) + .readValue(mHandler.getInputStream()); + List objs = request.getToDelete(); + List success = new ArrayList<>(); + List errored = new ArrayList<>(); + objs.sort(Comparator.comparingInt(x -> -1 * x.getKey().length())); + objs.forEach(obj -> { + try { + AlluxioURI uri = new AlluxioURI(bucketPath + + AlluxioURI.SEPARATOR + obj.getKey()); + DeletePOptions options = DeletePOptions.newBuilder().build(); + userFs.delete(uri, options); + DeleteObjectsResult.DeletedObject del = new DeleteObjectsResult.DeletedObject(); + del.setKey(obj.getKey()); + success.add(del); + } catch (FileDoesNotExistException | DirectoryNotEmptyException e) { + /* + FDNE - delete on FDNE should be counted as a success, as there's nothing to do + DNE - s3 has no concept dirs - if it _is_ a dir, nothing to delete. + */ + DeleteObjectsResult.DeletedObject del = new DeleteObjectsResult.DeletedObject(); + del.setKey(obj.getKey()); + success.add(del); + } catch (IOException | AlluxioException e) { + DeleteObjectsResult.ErrorObject err = new DeleteObjectsResult.ErrorObject(); + err.setKey(obj.getKey()); + err.setMessage(e.getMessage()); + errored.add(err); + } + }); + + DeleteObjectsResult result = new DeleteObjectsResult(); + if (!request.getQuiet()) { + result.setDeleted(success); + } + result.setErrored(errored); + return result; + } catch (IOException e) { + LOG.debug("Failed to parse DeleteObjects request:", e); + auditContext.setSucceeded(false); + return Response.Status.BAD_REQUEST; + } + } + }); + } + } // end of DeleteObjectsTask + + private static class HeadBucketTask extends S3BucketTask { + protected HeadBucketTask(S3Handler handler, OpType opType) { + super(handler, opType); + } + + @Override + public Response continueTask() { + return S3RestUtils.call(mHandler.getBucket(), () -> { + String bucketPath = S3RestUtils.parsePath(AlluxioURI.SEPARATOR + mHandler.getBucket()); + final String user = mHandler.getUser(); + final FileSystem userFs = S3RestUtils.createFileSystemForUser(user, mHandler.getMetaFS()); + + try (S3AuditContext auditContext = mHandler.createAuditContext( + mOPType.name(), user, mHandler.getBucket(), null)) { + S3RestUtils.checkPathIsAlluxioDirectory(userFs, bucketPath, auditContext); + } + return Response.ok().build(); + }); + } + } // end of HeadBucketTask + + private static class DeleteBucketTaggingTask extends S3BucketTask { + + protected DeleteBucketTaggingTask(S3Handler handler, OpType opType) { + super(handler, opType); + } + + @Override + public Response continueTask() { + return S3RestUtils.call(mHandler.getBucket(), () -> { + final String user = mHandler.getUser(); + final FileSystem userFs = S3RestUtils.createFileSystemForUser(user, mHandler.getMetaFS()); + String bucketPath = S3RestUtils.parsePath(AlluxioURI.SEPARATOR + mHandler.getBucket()); + try (S3AuditContext auditContext = mHandler.createAuditContext( + mOPType.name(), user, mHandler.getBucket(), null)) { + S3RestUtils.checkPathIsAlluxioDirectory(userFs, bucketPath, auditContext); + + LOG.debug("DeleteBucketTagging bucket={}", bucketPath); + Map xattrMap = new HashMap<>(); + xattrMap.put(S3Constants.TAGGING_XATTR_KEY, ByteString.copyFrom(new byte[0])); + SetAttributePOptions attrPOptions = SetAttributePOptions.newBuilder() + .putAllXattr(xattrMap) + .setXattrUpdateStrategy(File.XAttrUpdateStrategy.DELETE_KEYS) + .build(); + try { + userFs.setAttribute(new AlluxioURI(bucketPath), attrPOptions); + } catch (Exception e) { + throw S3RestUtils.toBucketS3Exception(e, bucketPath, auditContext); + } + return Response.Status.NO_CONTENT; + } + }); + } + } // end of DeleteBucketTaggingTask + + private static class DeleteBucketTask extends S3BucketTask { + + protected DeleteBucketTask(S3Handler handler, OpType opType) { + super(handler, opType); + } + + @Override + public Response continueTask() { + return S3RestUtils.call(mHandler.getBucket(), () -> { + final String user = mHandler.getUser(); + final FileSystem userFs = S3RestUtils.createFileSystemForUser(user, mHandler.getMetaFS()); + String bucketPath = S3RestUtils.parsePath(AlluxioURI.SEPARATOR + mHandler.getBucket()); + + try (S3AuditContext auditContext = mHandler.createAuditContext( + mOPType.name(), user, mHandler.getBucket(), null)) { + S3RestUtils.checkPathIsAlluxioDirectory(userFs, bucketPath, auditContext); + // Delete the bucket. + DeletePOptions options = DeletePOptions.newBuilder().setAlluxioOnly(Configuration + .get(PropertyKey.PROXY_S3_DELETE_TYPE) + .equals(Constants.S3_DELETE_IN_ALLUXIO_ONLY)) + .build(); + try { + userFs.delete(new AlluxioURI(bucketPath), options); + } catch (Exception e) { + throw S3RestUtils.toBucketS3Exception(e, bucketPath, auditContext); + } + return Response.Status.NO_CONTENT; + } + }); + } + } // end of DeleteBucketTask +} diff --git a/core/server/proxy/src/main/java/alluxio/proxy/s3/S3Constants.java b/core/server/proxy/src/main/java/alluxio/proxy/s3/S3Constants.java index 1dda55744f77..11cfc9695d1e 100644 --- a/core/server/proxy/src/main/java/alluxio/proxy/s3/S3Constants.java +++ b/core/server/proxy/src/main/java/alluxio/proxy/s3/S3Constants.java @@ -28,6 +28,7 @@ public final class S3Constants { * mount:point:bucket represents Alluxio directory /mount/point/bucket. */ public static final String BUCKET_SEPARATOR = ":"; + public static final String EMPTY = ""; /* Headers */ // standard headers diff --git a/core/server/proxy/src/main/java/alluxio/proxy/s3/S3Handler.java b/core/server/proxy/src/main/java/alluxio/proxy/s3/S3Handler.java new file mode 100644 index 000000000000..46eb1e82226f --- /dev/null +++ b/core/server/proxy/src/main/java/alluxio/proxy/s3/S3Handler.java @@ -0,0 +1,516 @@ +/* + * The Alluxio Open Foundation licenses this work under the Apache License, version 2.0 + * (the "License"). You may not use this work except in compliance with the License, which is + * available at www.apache.org/licenses/LICENSE-2.0 + * + * This software is distributed on an "AS IS" basis, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, + * either express or implied, as more fully set forth in the License. + * + * See the NOTICE file distributed with this work for information regarding copyright ownership. + */ + +package alluxio.proxy.s3; + +import alluxio.AlluxioURI; +import alluxio.client.file.FileSystem; +import alluxio.conf.Configuration; +import alluxio.conf.PropertyKey; +import alluxio.grpc.Bits; +import alluxio.grpc.CreateDirectoryPOptions; +import alluxio.grpc.PMode; +import alluxio.grpc.XAttrPropagationStrategy; +import alluxio.master.audit.AsyncUserAccessAuditLogWriter; +import alluxio.util.CommonUtils; +import alluxio.util.ThreadUtils; +import alluxio.web.ProxyWebServer; + +import com.google.common.base.Stopwatch; +import org.eclipse.jetty.server.Request; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import java.io.IOException; +import java.io.InputStream; +import java.net.URLDecoder; +import java.util.Arrays; +import java.util.Collection; +import java.util.HashMap; +import java.util.HashSet; +import java.util.Iterator; +import java.util.List; +import java.util.Map; +import java.util.Set; +import java.util.regex.Matcher; +import java.util.regex.Pattern; +import javax.annotation.Nullable; +import javax.servlet.ServletContext; +import javax.servlet.ServletInputStream; +import javax.servlet.ServletOutputStream; +import javax.servlet.http.HttpServletRequest; +import javax.servlet.http.HttpServletResponse; +import javax.ws.rs.core.MultivaluedMap; +import javax.ws.rs.core.Response; + +/** + * + */ +public class S3Handler { + public static final boolean BUCKET_NAMING_RESTRICTION_ENABLED = + Configuration.getBoolean(PropertyKey.PROXY_S3_BUCKET_NAMING_RESTRICTIONS_ENABLED); + public static final int MAX_HEADER_METADATA_SIZE = + (int) Configuration.getBytes(PropertyKey.PROXY_S3_METADATA_HEADER_MAX_SIZE); + public static final boolean MULTIPART_CLEANER_ENABLED = + Configuration.getBoolean(PropertyKey.PROXY_S3_MULTIPART_UPLOAD_CLEANER_ENABLED); + // https://docs.aws.amazon.com/AmazonS3/latest/userguide/bucketnamingrules.html + // - Undocumented edge-case, no adjacent periods with hyphens, i.e: '.-' or '-.' + public static final Pattern BUCKET_ADJACENT_DOTS_DASHES_PATTERN = Pattern.compile("([-\\.]{2})"); + public static final Pattern BUCKET_INVALIDATION_PREFIX_PATTERN = Pattern.compile("^xn--.*"); + public static final Pattern BUCKET_INVALID_SUFFIX_PATTERN = Pattern.compile(".*-s3alias$"); + public static final Pattern BUCKET_VALID_NAME_PATTERN = + Pattern.compile("[a-z0-9][a-z0-9\\.-]{1,61}[a-z0-9]"); + public static final Pattern BUCKET_PATH_PATTERN = + Pattern.compile("^" + S3RequestServlet.S3_V2_SERVICE_PATH_PREFIX + "/[^/]*$"); + public static final Pattern OBJECT_PATH_PATTERN = + Pattern.compile("^" + S3RequestServlet.S3_V2_SERVICE_PATH_PREFIX + "/[^/]*/.*$"); + private static final Logger LOG = LoggerFactory.getLogger(S3Handler.class); + private static final ThreadLocal TLS_BYTES = + ThreadLocal.withInitial(() -> new byte[8 * 1024]); + private final String mBucket; + private final String mObject; + private final HttpServletRequest mServletRequest; + private final HttpServletResponse mServletResponse; + private final ServletContext mServletContext; + public AsyncUserAccessAuditLogWriter mAsyncAuditLogWriter; + String[] mUnsupportedSubResources = {"acl", "policy", "versioning", "cors", + "encryption", "intelligent-tiering", "inventory", "lifecycle", + "metrics", "ownershipControls", "replication", "website", "accelerate", + "location", "logging", "metrics", "notification", "ownershipControls", + "policyStatus", "requestPayment", "attributes", "legal-hold", "object-lock", + "retention", "torrent", "publicAccessBlock", "restore", "select"}; + Set mUnsupportedSubResourcesSet = new HashSet<>(Arrays.asList(mUnsupportedSubResources)); + Map mAmzHeaderMap = new HashMap<>(); + Request mBaseRequest; + private Stopwatch mStopwatch; + private String mUser; + private S3BaseTask mS3Task; + private FileSystem mMetaFS; + + /** + * S3Handler Constructor. + * @param bucket + * @param object + * @param request + * @param response + */ + public S3Handler(String bucket, String object, + HttpServletRequest request, HttpServletResponse response) { + mBucket = bucket; + mObject = object; + mServletRequest = request; + mServletResponse = response; + mServletContext = request.getServletContext(); + } + + /** + * Create a S3Handler based on the incoming Request. + * @param path + * @param request + * @param response + * @return A S3Handler + * @throws Exception + * + */ + public static S3Handler createHandler(String path, + HttpServletRequest request, + HttpServletResponse response) throws Exception { + Stopwatch stopwatch = Stopwatch.createStarted(); + Matcher bucketMatcher = BUCKET_PATH_PATTERN.matcher(path); + Matcher objectMatcher = OBJECT_PATH_PATTERN.matcher(path); + + String pathStr = path; + String bucket = null; + String object = null; + S3Handler handler = null; + try { + if (bucketMatcher.matches()) { + pathStr = path.substring(S3RequestServlet.S3_V2_SERVICE_PATH_PREFIX.length() + 1); + bucket = URLDecoder.decode(pathStr, "UTF-8"); + } else if (objectMatcher.matches()) { + pathStr = path.substring(S3RequestServlet.S3_V2_SERVICE_PATH_PREFIX.length() + 1); + bucket = URLDecoder.decode( + pathStr.substring(0, pathStr.indexOf(AlluxioURI.SEPARATOR)), "UTF-8"); + object = URLDecoder.decode( + pathStr.substring(pathStr.indexOf(AlluxioURI.SEPARATOR) + 1), "UTF-8"); + } + handler = new S3Handler(bucket, object, request, response); + handler.setStopwatch(stopwatch); + handler.init(); + S3BaseTask task = null; + if (object != null && !object.isEmpty()) { + task = S3ObjectTask.Factory.create(handler); + } else { + task = S3BucketTask.Factory.create(handler); + } + handler.setS3Task(task); + return handler; + } catch (Exception ex) { + LOG.error("Exception during create s3handler:{}", ThreadUtils.formatStackTrace(ex)); + throw ex; + } + } + + /** + * Process the response returned from S3Task core logic to write to downstream. + * @param servletResponse + * @param response + * @throws IOException + */ + public static void processResponse(HttpServletResponse servletResponse, + Response response) throws IOException { + try { + // Status + servletResponse.setStatus(response.getStatus()); + // Headers + final MultivaluedMap headers = response.getStringHeaders(); + for (final Map.Entry> e : headers.entrySet()) { + final Iterator it = e.getValue().iterator(); + if (!it.hasNext()) { + continue; + } + final String header = e.getKey(); + if (servletResponse.containsHeader(header)) { + // replace any headers previously set with values from Jersey container response. + servletResponse.setHeader(header, it.next()); + } + while (it.hasNext()) { + servletResponse.addHeader(header, it.next()); + } + } + // Entity + if (response.hasEntity()) { + ServletOutputStream servletOut = servletResponse.getOutputStream(); + Object entity = response.getEntity(); + if (entity instanceof InputStream) { + InputStream is = (InputStream) entity; + byte[] bytesArray = TLS_BYTES.get(); + int read; + do { + try { + read = is.read(bytesArray); + } catch (IOException ex) { + /* Alluxio thrown IOException, remapping the exception + and send new response to downstream again */ + Response errorResponse = S3ErrorResponse.createErrorResponse(ex, ""); + S3Handler.processResponse(servletResponse, errorResponse); + return; + } + if (read == -1) { + break; + } + servletOut.write(bytesArray, 0, read); + } while (true); + } else { + String contentStr = entity.toString(); + int contentLen = contentStr.length(); + servletResponse.setContentLength(contentLen); + servletOut.write(contentStr.getBytes()); + } + } + } finally { + response.close(); + } + } + + /** + * Initialize the S3Handler object in preparation for handling the request. + * @throws Exception + */ + public void init() throws Exception { + // Do Authentication of the request. + doAuthentication(); + // Extract x-amz- headers. + extractAMZHeaders(); + // Reject unsupported subresources. + rejectUnsupportedResources(); + // Init utils + ServletContext context = getServletContext(); + mMetaFS = (FileSystem) context.getAttribute(ProxyWebServer.FILE_SYSTEM_SERVLET_RESOURCE_KEY); + mAsyncAuditLogWriter = (AsyncUserAccessAuditLogWriter) context.getAttribute( + ProxyWebServer.ALLUXIO_PROXY_AUDIT_LOG_WRITER_KEY); + // Initiate the S3 API metadata directories + if (!mMetaFS.exists(new AlluxioURI(S3RestUtils.MULTIPART_UPLOADS_METADATA_DIR))) { + mMetaFS.createDirectory(new AlluxioURI(S3RestUtils.MULTIPART_UPLOADS_METADATA_DIR), + CreateDirectoryPOptions.newBuilder() + .setRecursive(true) + .setMode(PMode.newBuilder() + .setOwnerBits(Bits.ALL).setGroupBits(Bits.ALL) + .setOtherBits(Bits.NONE) + .build()) + .setWriteType(S3RestUtils.getS3WriteType()) + .setXattrPropStrat(XAttrPropagationStrategy.LEAF_NODE) + .build()); + } + } + + /** + * get S3Task of this S3Handler. + * @return S3BaseTask + */ + public S3BaseTask getS3Task() { + return mS3Task; + } + + /** + * set S3Task for this S3Handler. + * @param task + */ + public void setS3Task(S3BaseTask task) { + mS3Task = task; + } + + /** + * get HTTP verb of this request. + * @return HTTP Verb + */ + public String getHTTPVerb() { + return mServletRequest.getMethod(); + } + + /** + * get specified HTTP header value of this request. + * @param headerName + * @return header value + */ + public String getHeader(String headerName) { + return mServletRequest.getHeader(headerName); + } + + /** + * get specified HTTP header with a default if not exist. + * @param headerName + * @param defaultHeaderValue + * @return header value + */ + public String getHeaderOrDefault(String headerName, String defaultHeaderValue) { + String headerVal = mServletRequest.getHeader(headerName); + if (headerVal == null) { + headerVal = defaultHeaderValue; + } + return headerVal; + } + + /** + * get HttpServletResponse of this request. + * @return HttpServletResponse + */ + public HttpServletResponse getServletResponse() { + return mServletResponse; + } + + /** + * get HttpServletRequest of this request. + * @return HttpServletRequest + */ + public HttpServletRequest getServletRequest() { + return mServletRequest; + } + + /** + * get ServletContext from current http conversation. + * @return ServletContext + */ + public ServletContext getServletContext() { + return mServletContext; + } + + /** + * retrieve given query parameter value. + * @param queryParam + * @return query parameter value + */ + public String getQueryParameter(String queryParam) { + return mServletRequest.getParameter(queryParam); + } + + /** + * retrieve inputstream from incoming request. + * @return ServletInputStream + */ + public ServletInputStream getInputStream() throws IOException { + return mServletRequest.getInputStream(); + } + + /** + * Creates a {@link S3AuditContext} instance. + * + * @param command the command to be logged by this {@link S3AuditContext} + * @param user user name + * @param bucket bucket name + * @param object object name + * @return newly-created {@link S3AuditContext} instance + */ + public S3AuditContext createAuditContext(String command, + String user, + @Nullable String bucket, + @Nullable String object) { + // Audit log may be enabled during runtime + AsyncUserAccessAuditLogWriter auditLogWriter = null; + if (Configuration.getBoolean(PropertyKey.MASTER_AUDIT_LOGGING_ENABLED)) { + auditLogWriter = mAsyncAuditLogWriter; + } + S3AuditContext auditContext = new S3AuditContext(auditLogWriter); + if (auditLogWriter != null) { + String ugi = ""; + if (user != null) { + try { + String primaryGroup = CommonUtils.getPrimaryGroupName(user, Configuration.global()); + ugi = user + "," + primaryGroup; + } catch (IOException e) { + LOG.debug("Failed to get primary group for user {}.", user); + ugi = user + ",N/A"; + } + } else { + ugi = "N/A"; + } + auditContext.setUgi(ugi) + .setCommand(command) + .setIp(String.format("%s:%s", + mServletRequest.getRemoteAddr(), mServletRequest.getRemotePort())) + .setBucket(bucket) + .setObject(object) + .setAllowed(true) + .setSucceeded(true) + .setCreationTimeNs(System.nanoTime()); + } + return auditContext; + } + + /** + * Utility function to dump a collection into a string. + * @param prefix + * @param collection + * @return result string + */ + public String printCollection(String prefix, Collection collection) { + StringBuilder sb = new StringBuilder(prefix + ":["); + Iterator it = collection.iterator(); + while (it.hasNext()) { + sb.append(it.next().toString()); + if (it.hasNext()) { + sb.append(","); + } + } + sb.append("]"); + return sb.toString(); + } + + /** + * Utility function to dump a map into a string. + * @param prefix + * @param map + * @return result string + */ + public String printMap(String prefix, Map map) { + StringBuilder sb = new StringBuilder(prefix + ":["); + Iterator> it = map.entrySet().iterator(); + while (it.hasNext()) { + Map.Entry entry = it.next(); + sb.append(entry.getKey().toString() + ":" + entry.getValue().toString()); + if (it.hasNext()) { + sb.append(","); + } + } + sb.append("]"); + return sb.toString(); + } + + /** + * Utility function to help extract x-amz- headers from request. + */ + public void extractAMZHeaders() { + java.util.Enumeration headerNamesIt = mServletRequest.getHeaderNames(); + while (headerNamesIt.hasMoreElements()) { + String header = headerNamesIt.nextElement(); + mAmzHeaderMap.putIfAbsent(header, mServletRequest.getHeader(header)); + } + } + + /** + * Reject unsupported request from the given subresources from request. + * @throws S3Exception + */ + public void rejectUnsupportedResources() throws S3Exception { + java.util.Enumeration parameterNamesIt = mServletRequest.getParameterNames(); + while (parameterNamesIt.hasMoreElements()) { + if (mUnsupportedSubResourcesSet.contains(parameterNamesIt.nextElement())) { + throw new S3Exception(S3Constants.EMPTY, S3ErrorCode.NOT_IMPLEMENTED); + } + } + } + + /** + * Do S3 request authentication. + * @throws Exception + */ + public void doAuthentication() throws Exception { + try { + String authorization = mServletRequest.getHeader("Authorization"); + String user = S3RestUtils.getUser(authorization, mServletRequest); + // replace the authorization header value to user + LOG.debug("request origin Authorization Header is: {}, new user header is: {}", + authorization, user); + mUser = user; + } catch (Exception e) { + LOG.warn("exception happened in Authentication."); + throw e; + } + } + + /** + * Get the user name of this request. + * @return user name + */ + public String getUser() { + return mUser; + } + + /** + * Get the bucket name of this request. + * @return bucket name + */ + public String getBucket() { + return mBucket; + } + + /** + * Get the object name of this request. + * @return object name + */ + public String getObject() { + return mObject; + } + + /** + * Get system user FileSystem object. + * @return FileSystem object + */ + public FileSystem getMetaFS() { + return mMetaFS; + } + + /** + * Get Stopwatch object used for recording this request's latency. + * @return Stopwatch object + */ + public Stopwatch getStopwatch() { + return mStopwatch; + } + + /** + * Set the Stopwatch object used for recording this request's latency. + * @param stopwatch + */ + public void setStopwatch(Stopwatch stopwatch) { + mStopwatch = stopwatch; + } +} diff --git a/core/server/proxy/src/main/java/alluxio/proxy/s3/S3ObjectTask.java b/core/server/proxy/src/main/java/alluxio/proxy/s3/S3ObjectTask.java new file mode 100644 index 000000000000..faf232def7c1 --- /dev/null +++ b/core/server/proxy/src/main/java/alluxio/proxy/s3/S3ObjectTask.java @@ -0,0 +1,1413 @@ +/* + * The Alluxio Open Foundation licenses this work under the Apache License, version 2.0 + * (the "License"). You may not use this work except in compliance with the License, which is + * available at www.apache.org/licenses/LICENSE-2.0 + * + * This software is distributed on an "AS IS" basis, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, + * either express or implied, as more fully set forth in the License. + * + * See the NOTICE file distributed with this work for information regarding copyright ownership. + */ + +package alluxio.proxy.s3; + +import alluxio.AlluxioURI; +import alluxio.Constants; +import alluxio.client.WriteType; +import alluxio.client.file.FileInStream; +import alluxio.client.file.FileOutStream; +import alluxio.client.file.FileSystem; +import alluxio.client.file.URIStatus; +import alluxio.conf.Configuration; +import alluxio.conf.PropertyKey; +import alluxio.exception.AlluxioException; +import alluxio.exception.DirectoryNotEmptyException; +import alluxio.exception.FileAlreadyExistsException; +import alluxio.exception.FileDoesNotExistException; +import alluxio.grpc.Bits; +import alluxio.grpc.CreateDirectoryPOptions; +import alluxio.grpc.CreateFilePOptions; +import alluxio.grpc.DeletePOptions; +import alluxio.grpc.PMode; +import alluxio.grpc.RenamePOptions; +import alluxio.grpc.S3SyntaxOptions; +import alluxio.grpc.SetAttributePOptions; +import alluxio.grpc.XAttrPropagationStrategy; +import alluxio.metrics.MetricKey; +import alluxio.metrics.MetricsSystem; +import alluxio.proto.journal.File; +import alluxio.util.ThreadUtils; + +import com.codahale.metrics.Timer; +import com.fasterxml.jackson.dataformat.xml.XmlMapper; +import com.google.common.base.Preconditions; +import com.google.common.io.BaseEncoding; +import com.google.common.io.ByteStreams; +import com.google.common.primitives.Longs; +import com.google.protobuf.ByteString; +import org.apache.commons.codec.binary.Hex; +import org.apache.commons.io.IOUtils; +import org.apache.commons.lang3.StringUtils; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import java.io.IOException; +import java.io.InputStream; +import java.io.UnsupportedEncodingException; +import java.net.URLDecoder; +import java.nio.charset.StandardCharsets; +import java.security.DigestOutputStream; +import java.security.MessageDigest; +import java.util.ArrayList; +import java.util.Date; +import java.util.HashMap; +import java.util.List; +import java.util.Map; +import java.util.UUID; +import java.util.concurrent.CompletableFuture; +import java.util.concurrent.ExecutorService; +import java.util.stream.Collectors; +import javax.servlet.http.HttpServletResponse; +import javax.ws.rs.core.MediaType; +import javax.ws.rs.core.Response; + +/** + * S3 Tasks to handle object level request. + * (bucket and object name provided in the request) + */ +public class S3ObjectTask extends S3BaseTask { + private static final Logger LOG = LoggerFactory.getLogger(S3ObjectTask.class); + + protected S3ObjectTask(S3Handler handler, OpType opType) { + super(handler, opType); + } + + @Override + public Response continueTask() { + return S3RestUtils.call(mHandler.getBucket(), () -> { + throw new S3Exception(S3ErrorCode.NOT_IMPLEMENTED); + }); + } + + /** + * Concatenate bucket and object to make a full path. + * @return full path + */ + public String getObjectTaskResource() { + return mHandler.getBucket() + AlluxioURI.SEPARATOR + mHandler.getObject(); + } + + /** + * Factory for getting a S3ObjectTask. + */ + public static final class Factory { + /** + * Marshall the request and create corresponding object level S3 task. + * @param handler + * @return S3ObjectTask + */ + public static S3ObjectTask create(S3Handler handler) { + switch (handler.getHTTPVerb()) { + case "GET": + if (handler.getQueryParameter("uploadId") != null) { + return new ListPartsTask(handler, OpType.ListParts); + } else if (handler.getQueryParameter("tagging") != null) { + return new GetObjectTaggingTask(handler, OpType.GetObjectTagging); + } else { + return new GetObjectTask(handler, OpType.GetObject); + } + case "PUT": + if (handler.getQueryParameter("tagging") != null) { + return new PutObjectTaggingTask(handler, OpType.PutObjectTagging); + } else if (handler.getQueryParameter("uploadId") != null) { + if (handler.getHeader(S3Constants.S3_COPY_SOURCE_HEADER) != null) { + return new UploadPartTask(handler, OpType.UploadPartCopy); + } + return new UploadPartTask(handler, OpType.UploadPart); + } else { + if (handler.getHeader(S3Constants.S3_COPY_SOURCE_HEADER) != null) { + return new CopyObjectTask(handler, OpType.CopyObject); + } + return new PutObjectTask(handler, OpType.PutObject); + } + case "POST": + if (handler.getQueryParameter("uploads") != null) { + return new CreateMultipartUploadTask(handler, OpType.CreateMultipartUpload); + } else if (handler.getQueryParameter("uploadId") != null) { + return new CompleteMultipartUploadTask(handler, OpType.CompleteMultipartUpload); + } + break; + case "HEAD": + return new HeadObjectTask(handler, OpType.HeadObject); + case "DELETE": + if (handler.getQueryParameter("uploadId") != null) { + return new AbortMultipartUploadTask(handler, OpType.AbortMultipartUpload); + } else if (handler.getQueryParameter("tagging") != null) { + return new DeleteObjectTaggingTask(handler, OpType.DeleteObjectTagging); + } else { + return new DeleteObjectTask(handler, OpType.DeleteObject); + } + default: + return new S3ObjectTask(handler, OpType.Unsupported); + } + return new S3ObjectTask(handler, OpType.Unsupported); + } + } + + private static final class ListPartsTask extends S3ObjectTask { + + public ListPartsTask(S3Handler handler, OpType opType) { + super(handler, opType); + } + + @Override + public Response continueTask() { + return S3RestUtils.call(getObjectTaskResource(), () -> { + final String user = mHandler.getUser(); + final FileSystem userFs = S3RestUtils.createFileSystemForUser(user, mHandler.getMetaFS()); + final String uploadId = mHandler.getQueryParameter("uploadId"); + String bucketPath = S3RestUtils.parsePath(AlluxioURI.SEPARATOR + mHandler.getBucket()); + try (S3AuditContext auditContext = mHandler.createAuditContext( + mOPType.name(), user, mHandler.getBucket(), mHandler.getObject())) { + S3RestUtils.checkPathIsAlluxioDirectory(userFs, bucketPath, auditContext); + + AlluxioURI tmpDir = new AlluxioURI(S3RestUtils.getMultipartTemporaryDirForObject( + bucketPath, mHandler.getObject(), uploadId)); + try { + S3RestUtils.checkStatusesForUploadId(mHandler.getMetaFS(), userFs, tmpDir, uploadId); + } catch (Exception e) { + throw S3RestUtils.toObjectS3Exception((e instanceof FileDoesNotExistException) + ? new S3Exception(mHandler.getObject(), S3ErrorCode.NO_SUCH_UPLOAD) : e, + mHandler.getObject(), auditContext); + } + + try { + List statuses = userFs.listStatus(tmpDir); + statuses.sort(new S3RestUtils.URIStatusNameComparator()); + + List parts = new ArrayList<>(); + for (URIStatus status : statuses) { + parts.add(ListPartsResult.Part.fromURIStatus(status)); + } + + ListPartsResult result = new ListPartsResult(); + result.setBucket(bucketPath); + result.setKey(mHandler.getObject()); + result.setUploadId(uploadId); + result.setParts(parts); + return result; + } catch (Exception e) { + throw S3RestUtils.toObjectS3Exception(e, tmpDir.getPath(), auditContext); + } + } + }); + } + } // end of ListPartsTask + + private static final class GetObjectTaggingTask extends S3ObjectTask { + + public GetObjectTaggingTask(S3Handler handler, OpType opType) { + super(handler, opType); + } + + @Override + public Response continueTask() { + return S3RestUtils.call(getObjectTaskResource(), () -> { + final String user = mHandler.getUser(); + final FileSystem userFs = S3RestUtils.createFileSystemForUser(user, mHandler.getMetaFS()); + String bucketPath = S3RestUtils.parsePath(AlluxioURI.SEPARATOR + mHandler.getBucket()); + String objectPath = bucketPath + AlluxioURI.SEPARATOR + mHandler.getObject(); + AlluxioURI uri = new AlluxioURI(objectPath); + try (S3AuditContext auditContext = mHandler.createAuditContext( + mOPType.name(), user, mHandler.getBucket(), mHandler.getObject())) { + S3RestUtils.checkPathIsAlluxioDirectory(userFs, bucketPath, auditContext); + try { + TaggingData tagData = S3RestUtils.deserializeTags(userFs.getStatus(uri).getXAttr()); + LOG.debug("GetObjectTagging tagData={}", tagData); + return tagData != null ? tagData : new TaggingData(); + } catch (Exception e) { + throw S3RestUtils.toObjectS3Exception(e, objectPath, auditContext); + } + } + }); + } + } // end of GetObjectTaggingTask + + private static final class PutObjectTaggingTask extends S3ObjectTask { + + private PutObjectTaggingTask(S3Handler handler, OpType opType) { + super(handler, opType); + } + + @Override + public Response continueTask() { + return S3RestUtils.call(getObjectTaskResource(), () -> { + final String user = mHandler.getUser(); + final FileSystem userFs = S3RestUtils.createFileSystemForUser(user, mHandler.getMetaFS()); + String bucketPath = S3RestUtils.parsePath(AlluxioURI.SEPARATOR + mHandler.getBucket()); + try (S3AuditContext auditContext = mHandler.createAuditContext( + mOPType.name(), user, mHandler.getBucket(), mHandler.getObject())) { + S3RestUtils.checkPathIsAlluxioDirectory(userFs, bucketPath, auditContext); + String objectPath = bucketPath + AlluxioURI.SEPARATOR + mHandler.getObject(); + AlluxioURI objectUri = new AlluxioURI(objectPath); + TaggingData tagData = null; + try { + tagData = new XmlMapper().readerFor(TaggingData.class) + .readValue(mHandler.getInputStream()); + } catch (IOException e) { + if (e.getCause() instanceof S3Exception) { + throw S3RestUtils.toObjectS3Exception((S3Exception) e.getCause(), objectPath, + auditContext); + } + auditContext.setSucceeded(false); + throw new S3Exception(e, objectPath, S3ErrorCode.MALFORMED_XML); + } + LOG.debug("PutObjectTagging tagData={}", tagData); + Map xattrMap = new HashMap<>(); + if (tagData != null) { + try { + xattrMap.put(S3Constants.TAGGING_XATTR_KEY, TaggingData.serialize(tagData)); + } catch (Exception e) { + throw S3RestUtils.toObjectS3Exception(e, objectPath, auditContext); + } + } + try { + SetAttributePOptions attrPOptions = SetAttributePOptions.newBuilder() + .putAllXattr(xattrMap) + .setXattrUpdateStrategy(File.XAttrUpdateStrategy.UNION_REPLACE) + .build(); + userFs.setAttribute(objectUri, attrPOptions); + } catch (Exception e) { + throw S3RestUtils.toObjectS3Exception(e, objectPath, auditContext); + } + return Response.ok().build(); + } + }); + } + } // end of PutObjectTaggingTask + + private static final class GetObjectTask extends S3ObjectTask { + + public GetObjectTask(S3Handler handler, OpType opType) { + super(handler, opType); + } + + @Override + public Response continueTask() { + return S3RestUtils.call(getObjectTaskResource(), () -> { + final String range = mHandler.getHeaderOrDefault("Range", null); + final String user = mHandler.getUser(); + final FileSystem userFs = S3RestUtils.createFileSystemForUser(user, mHandler.getMetaFS()); + String bucketPath = S3RestUtils.parsePath(AlluxioURI.SEPARATOR + mHandler.getBucket()); + String objectPath = bucketPath + AlluxioURI.SEPARATOR + mHandler.getObject(); + AlluxioURI objectUri = new AlluxioURI(objectPath); + + try (S3AuditContext auditContext = mHandler.createAuditContext( + mOPType.name(), user, mHandler.getBucket(), mHandler.getObject())) { + try { + URIStatus status = userFs.getStatus(objectUri); + FileInStream is = userFs.openFile(objectUri); + S3RangeSpec s3Range = S3RangeSpec.Factory.create(range); + RangeFileInStream ris = RangeFileInStream.Factory.create( + is, status.getLength(), s3Range); + + Response.ResponseBuilder res = Response.ok(ris, MediaType.APPLICATION_OCTET_STREAM_TYPE) + .lastModified(new Date(status.getLastModificationTimeMs())) + .header(S3Constants.S3_CONTENT_LENGTH_HEADER, + s3Range.getLength(status.getLength())); + + // Check range + if (s3Range.isValid()) { + res.status(Response.Status.PARTIAL_CONTENT) + .header(S3Constants.S3_ACCEPT_RANGES_HEADER, S3Constants.S3_ACCEPT_RANGES_VALUE) + .header(S3Constants.S3_CONTENT_RANGE_HEADER, + s3Range.getRealRange(status.getLength())); + } + + // Check for the object's ETag + String entityTag = S3RestUtils.getEntityTag(status); + if (entityTag != null) { + res.header(S3Constants.S3_ETAG_HEADER, entityTag); + } else { + LOG.debug("Failed to find ETag for object: " + objectPath); + } + + // Check if the object had a specified "Content-Type" + res.type(S3RestUtils.deserializeContentType(status.getXAttr())); + + // Check if object had tags, if so we need to return the count + // in the header "x-amz-tagging-count" + TaggingData tagData = S3RestUtils.deserializeTags(status.getXAttr()); + if (tagData != null) { + int taggingCount = tagData.getTagMap().size(); + if (taggingCount > 0) { + res.header(S3Constants.S3_TAGGING_COUNT_HEADER, taggingCount); + } + } + return res.build(); + } catch (Exception e) { + throw S3RestUtils.toObjectS3Exception(e, objectPath, auditContext); + } + } + }); + } + } // end of GetObjectTask + + private static final class HeadObjectTask extends S3ObjectTask { + + public HeadObjectTask(S3Handler handler, OpType opType) { + super(handler, opType); + } + + @Override + public Response continueTask() { + return S3RestUtils.call(getObjectTaskResource(), () -> { + Preconditions.checkNotNull(mHandler.getBucket(), "required 'bucket' parameter is missing"); + Preconditions.checkNotNull(mHandler.getObject(), "required 'object' parameter is missing"); + + final String user = mHandler.getUser(); + final FileSystem userFs = S3RestUtils.createFileSystemForUser(user, mHandler.getMetaFS()); + String bucketPath = S3RestUtils.parsePath(AlluxioURI.SEPARATOR + mHandler.getBucket()); + String objectPath = bucketPath + AlluxioURI.SEPARATOR + mHandler.getObject(); + AlluxioURI objectUri = new AlluxioURI(objectPath); + + try (S3AuditContext auditContext = mHandler.createAuditContext( + mOPType.name(), user, mHandler.getBucket(), mHandler.getObject())) { + try { + URIStatus status = userFs.getStatus(objectUri); + if (status.isFolder() && !mHandler.getObject().endsWith(AlluxioURI.SEPARATOR)) { + throw new FileDoesNotExistException(status.getPath() + " is a directory"); + } + Response.ResponseBuilder res = Response.ok() + .lastModified(new Date(status.getLastModificationTimeMs())) + .header(S3Constants.S3_CONTENT_LENGTH_HEADER, + status.isFolder() ? 0 : status.getLength()); + + // Check for the object's ETag + String entityTag = S3RestUtils.getEntityTag(status); + if (entityTag != null) { + res.header(S3Constants.S3_ETAG_HEADER, entityTag); + } else { + LOG.debug("Failed to find ETag for object: " + objectPath); + } + + // Check if the object had a specified "Content-Type" + res.type(S3RestUtils.deserializeContentType(status.getXAttr())); + return res.build(); + } catch (FileDoesNotExistException e) { + // must be null entity (content length 0) for S3A Filesystem + return Response.status(404).entity(null).header("Content-Length", "0").build(); + } catch (Exception e) { + throw S3RestUtils.toObjectS3Exception(e, objectPath, auditContext); + } + } + }); + } + } // end of HeadObjectTask + + private static final class CopyObjectTask extends PutObjectTask { + + public CopyObjectTask(S3Handler handler, OpType opType) { + super(handler, opType); + } + + @Override + public Response continueTask() { + return S3RestUtils.call(getObjectTaskResource(), () -> { + final String user = mHandler.getUser(); + final FileSystem userFs = S3RestUtils.createFileSystemForUser(user, mHandler.getMetaFS()); + final String bucket = mHandler.getBucket(); + final String object = mHandler.getObject(); + Preconditions.checkNotNull(bucket, "required 'bucket' parameter is missing"); + Preconditions.checkNotNull(object, "required 'object' parameter is missing"); + String bucketPath = S3RestUtils.parsePath(AlluxioURI.SEPARATOR + bucket); + String objectPath = bucketPath + AlluxioURI.SEPARATOR + object; + + final String copySourceParam = mHandler.getHeader(S3Constants.S3_COPY_SOURCE_HEADER); + String copySource = !copySourceParam.startsWith(AlluxioURI.SEPARATOR) + ? AlluxioURI.SEPARATOR + copySourceParam : copySourceParam; + + try (S3AuditContext auditContext = mHandler.createAuditContext( + mOPType.name(), user, mHandler.getBucket(), mHandler.getObject())) { + + if (objectPath.endsWith(AlluxioURI.SEPARATOR)) { + createDirectory(objectPath, userFs, auditContext); + } + AlluxioURI objectUri = new AlluxioURI(objectPath); + + // Populate the xattr Map with the metadata tags if provided + Map xattrMap = new HashMap<>(); + final String taggingHeader = mHandler.getHeader(S3Constants.S3_TAGGING_HEADER); + S3RestUtils.populateTaggingInXAttr(xattrMap, taggingHeader, auditContext, objectPath); + + // populate the xAttr map with the "Content-Type" header + final String contentTypeHeader = mHandler.getHeader(S3Constants.S3_CONTENT_TYPE_HEADER); + S3RestUtils.populateContentTypeInXAttr(xattrMap, contentTypeHeader); + + CreateFilePOptions filePOptions = + CreateFilePOptions.newBuilder() + .setRecursive(true) + .setMode(PMode.newBuilder() + .setOwnerBits(Bits.ALL) + .setGroupBits(Bits.ALL) + .setOtherBits(Bits.NONE).build()) + .setWriteType(S3RestUtils.getS3WriteType()) + .putAllXattr(xattrMap) + .setXattrPropStrat(XAttrPropagationStrategy.LEAF_NODE) + .build(); + + try { + copySource = URLDecoder.decode(copySource, "UTF-8"); + } catch (UnsupportedEncodingException ex) { + throw S3RestUtils.toObjectS3Exception(ex, objectPath, auditContext); + } + URIStatus status = null; + CreateFilePOptions.Builder copyFilePOptionsBuilder = CreateFilePOptions.newBuilder() + .setRecursive(true) + .setMode(PMode.newBuilder() + .setOwnerBits(Bits.ALL) + .setGroupBits(Bits.ALL) + .setOtherBits(Bits.NONE).build()); + + // Handle metadata directive + final String metadataDirective = mHandler.getHeader( + S3Constants.S3_METADATA_DIRECTIVE_HEADER); + if (StringUtils.equals(metadataDirective, S3Constants.Directive.REPLACE.name()) + && filePOptions.getXattrMap().containsKey(S3Constants.CONTENT_TYPE_XATTR_KEY)) { + copyFilePOptionsBuilder.putXattr(S3Constants.CONTENT_TYPE_XATTR_KEY, + filePOptions.getXattrMap().get(S3Constants.CONTENT_TYPE_XATTR_KEY)); + } else { // defaults to COPY + try { + status = userFs.getStatus(new AlluxioURI(copySource)); + if (status.getFileInfo().getXAttr() != null) { + copyFilePOptionsBuilder.putXattr(S3Constants.CONTENT_TYPE_XATTR_KEY, + ByteString.copyFrom(status.getFileInfo().getXAttr().getOrDefault( + S3Constants.CONTENT_TYPE_XATTR_KEY, + MediaType.APPLICATION_OCTET_STREAM.getBytes(S3Constants.HEADER_CHARSET)))); + } + } catch (Exception e) { + throw S3RestUtils.toObjectS3Exception(e, objectPath, auditContext); + } + } + + // Handle tagging directive + final String taggingDirective = mHandler.getHeader( + S3Constants.S3_TAGGING_DIRECTIVE_HEADER); + if (StringUtils.equals(taggingDirective, S3Constants.Directive.REPLACE.name()) + && filePOptions.getXattrMap().containsKey(S3Constants.TAGGING_XATTR_KEY)) { + copyFilePOptionsBuilder.putXattr(S3Constants.TAGGING_XATTR_KEY, + filePOptions.getXattrMap().get(S3Constants.TAGGING_XATTR_KEY)); + } else { // defaults to COPY + try { + if (status == null) { + status = userFs.getStatus(new AlluxioURI(copySource)); + } + if (status.getFileInfo().getXAttr() != null + && status.getFileInfo().getXAttr() + .containsKey(S3Constants.TAGGING_XATTR_KEY)) { + copyFilePOptionsBuilder.putXattr(S3Constants.TAGGING_XATTR_KEY, + TaggingData.serialize(S3RestUtils.deserializeTags(status.getXAttr()))); + } + } catch (Exception e) { + throw S3RestUtils.toObjectS3Exception(e, objectPath, auditContext); + } + } + + String entityTag = copyObject(userFs, auditContext, + objectPath, copySource, copyFilePOptionsBuilder.build()); + return new CopyObjectResult(entityTag, System.currentTimeMillis()); + } + }); + } + } // end of CopyObjectTask + + private static class PutObjectTask extends S3ObjectTask { + // For both PutObject and UploadPart + + public PutObjectTask(S3Handler handler, OpType opType) { + super(handler, opType); + } + + /** + * Common function for create object. + * TODO(lucy) needs to change the central logic here of how we do overwrite + * current logic introduces unhandled race conditions + * @param objectPath + * @param userFs + * @param createFilePOptions + * @param auditContext + * @return Response + * @throws S3Exception + */ + public Response createObject(String objectPath, FileSystem userFs, + CreateFilePOptions createFilePOptions, S3AuditContext auditContext) + throws S3Exception { + AlluxioURI objectUri = new AlluxioURI(objectPath); + final String decodedLengthHeader = mHandler.getHeader("x-amz-decoded-content-length"); + final String contentLength = mHandler.getHeader("Content-Length"); + try { + MessageDigest md5 = MessageDigest.getInstance("MD5"); + + // The request body can be in the aws-chunked encoding format, or not encoded at all + // determine if it's encoded, and then which parts of the stream to read depending on + // the encoding type. + boolean isChunkedEncoding = decodedLengthHeader != null; + long toRead; + InputStream readStream = mHandler.getInputStream(); + if (isChunkedEncoding) { + toRead = Long.parseLong(decodedLengthHeader); + readStream = new ChunkedEncodingInputStream(readStream); + } else { + toRead = Long.parseLong(contentLength); + } + try { + S3RestUtils.deleteExistObject(userFs, objectUri); + } catch (IOException | AlluxioException e) { + throw S3RestUtils.toObjectS3Exception(e, objectUri.getPath(), auditContext); + } + FileOutStream os = userFs.createFile(objectUri, createFilePOptions); + try (DigestOutputStream digestOutputStream = new DigestOutputStream(os, md5)) { + long read = ByteStreams.copy(ByteStreams.limit(readStream, toRead), + digestOutputStream); + if (read < toRead) { + throw new IOException(String.format( + "Failed to read all required bytes from the stream. Read %d/%d", + read, toRead)); + } + } + + byte[] digest = md5.digest(); + String base64Digest = BaseEncoding.base64().encode(digest); + final String contentMD5 = mHandler.getHeader("Content-MD5"); + if (contentMD5 != null && !contentMD5.equals(base64Digest)) { + // The object may be corrupted, delete the written object and return an error. + try { + userFs.delete(objectUri, DeletePOptions.newBuilder().setRecursive(true).build()); + } catch (Exception e2) { + // intend to continue and return BAD_DIGEST S3Exception. + } + throw new S3Exception(objectUri.getPath(), S3ErrorCode.BAD_DIGEST); + } + + String entityTag = Hex.encodeHexString(digest); + // persist the ETag via xAttr + // TODO(czhu): try to compute the ETag prior to creating the file + // to reduce total RPC RTT + S3RestUtils.setEntityTag(userFs, objectUri, entityTag); + return Response.ok().header(S3Constants.S3_ETAG_HEADER, entityTag).build(); + } catch (Exception e) { + throw S3RestUtils.toObjectS3Exception(e, objectPath, auditContext); + } + } + + /** + * Common util func to create directory in alluxio. + * @param objectPath + * @param userFs + * @param auditContext + * @return Response + * @throws S3Exception + */ + public Response createDirectory(String objectPath, FileSystem userFs, + S3AuditContext auditContext) + throws S3Exception { + // Need to create a folder + // TODO(czhu): verify S3 behaviour when ending an object path with a delimiter + // - this is a convenience method for the Alluxio fs which does not have a + // direct counterpart for S3, since S3 does not have "folders" as actual objects + try { + CreateDirectoryPOptions dirOptions = CreateDirectoryPOptions.newBuilder() + .setRecursive(true) + .setMode(PMode.newBuilder() + .setOwnerBits(Bits.ALL) + .setGroupBits(Bits.ALL) + .setOtherBits(Bits.NONE).build()) + .setAllowExists(true) + .build(); + userFs.createDirectory(new AlluxioURI(objectPath), dirOptions); + } catch (FileAlreadyExistsException e) { + // ok if directory already exists the user wanted to create it anyway + LOG.warn("attempting to create dir which already exists"); + } catch (IOException | AlluxioException e) { + throw S3RestUtils.toObjectS3Exception(e, objectPath, auditContext); + } + return Response.ok().build(); + } + + /** + * Common func for copy from a source path to target path. + * @param userFs + * @param auditContext + * @param targetPath + * @param sourcePath + * @param copyFilePOption + * @return entityTag(Etag) + * @throws S3Exception + */ + public String copyObject(FileSystem userFs, S3AuditContext auditContext, + String targetPath, String sourcePath, + CreateFilePOptions copyFilePOption) + throws S3Exception { + AlluxioURI objectUri = new AlluxioURI(targetPath); + if (sourcePath.equals(targetPath)) { + // do not need to copy a file to itself, unless we are changing file attributes + // TODO(czhu): support changing metadata via CopyObject to self, + // verify for UploadPartCopy + auditContext.setSucceeded(false); + throw new S3Exception("Copying an object to itself invalid.", + targetPath, S3ErrorCode.INVALID_REQUEST); + } + try { + S3RestUtils.deleteExistObject(userFs, objectUri); + } catch (IOException | AlluxioException e) { + throw S3RestUtils.toObjectS3Exception(e, objectUri.getPath(), auditContext); + } + try (FileInStream in = userFs.openFile(new AlluxioURI(sourcePath)); + FileOutStream out = userFs.createFile(objectUri, copyFilePOption)) { + MessageDigest md5 = MessageDigest.getInstance("MD5"); + try (DigestOutputStream digestOut = new DigestOutputStream(out, md5)) { + IOUtils.copyLarge(in, digestOut, new byte[8 * Constants.MB]); + byte[] digest = md5.digest(); + String entityTag = Hex.encodeHexString(digest); + // persist the ETag via xAttr + // TODO(czhu): compute the ETag prior to creating the file to reduce total RPC RTT + S3RestUtils.setEntityTag(userFs, objectUri, entityTag); + return entityTag; + } catch (IOException e) { + try { + out.cancel(); + } catch (Throwable t2) { + e.addSuppressed(t2); + } + throw e; + } + } catch (Exception e) { + throw S3RestUtils.toObjectS3Exception(e, targetPath, auditContext); + } + } + + @Override + public Response continueTask() { + return S3RestUtils.call(getObjectTaskResource(), () -> { + // PutObject / UploadPart ... + final String user = mHandler.getUser(); + final FileSystem userFs = S3RestUtils.createFileSystemForUser(user, mHandler.getMetaFS()); + final String bucket = mHandler.getBucket(); + final String object = mHandler.getObject(); + Preconditions.checkNotNull(bucket, "required 'bucket' parameter is missing"); + Preconditions.checkNotNull(object, "required 'object' parameter is missing"); + String bucketPath = S3RestUtils.parsePath(AlluxioURI.SEPARATOR + bucket); + + try (S3AuditContext auditContext = + mHandler.createAuditContext(mOPType.name(), user, bucket, object)) { + S3RestUtils.checkPathIsAlluxioDirectory(userFs, bucketPath, auditContext); + String objectPath = bucketPath + AlluxioURI.SEPARATOR + object; + + if (objectPath.endsWith(AlluxioURI.SEPARATOR)) { + return createDirectory(objectPath, userFs, auditContext); + } + AlluxioURI objectUri = new AlluxioURI(objectPath); + + // Populate the xattr Map with the metadata tags if provided + Map xattrMap = new HashMap<>(); + final String taggingHeader = mHandler.getHeader(S3Constants.S3_TAGGING_HEADER); + S3RestUtils.populateTaggingInXAttr(xattrMap, taggingHeader, auditContext, objectPath); + + // populate the xAttr map with the "Content-Type" header + final String contentTypeHeader = mHandler.getHeader(S3Constants.S3_CONTENT_TYPE_HEADER); + S3RestUtils.populateContentTypeInXAttr(xattrMap, contentTypeHeader); + + CreateFilePOptions filePOptions = + CreateFilePOptions.newBuilder() + .setRecursive(true) + .setMode(PMode.newBuilder() + .setOwnerBits(Bits.ALL) + .setGroupBits(Bits.ALL) + .setOtherBits(Bits.NONE).build()) + .setWriteType(S3RestUtils.getS3WriteType()) + .putAllXattr(xattrMap).setXattrPropStrat(XAttrPropagationStrategy.LEAF_NODE) + .build(); + return createObject(objectPath, userFs, filePOptions, auditContext); + } + }); + } + } // end of PutObjectTask + + private static final class UploadPartTask extends PutObjectTask { + + public UploadPartTask(S3Handler handler, OpType opType) { + super(handler, opType); + } + + @Override + public Response continueTask() { + return S3RestUtils.call(getObjectTaskResource(), () -> { + // UploadPart related params + final String user = mHandler.getUser(); + final FileSystem userFs = S3RestUtils.createFileSystemForUser(user, mHandler.getMetaFS()); + final String bucket = mHandler.getBucket(); + final String object = mHandler.getObject(); + String bucketPath = S3RestUtils.parsePath(AlluxioURI.SEPARATOR + bucket); + + final String partNumberStr = mHandler.getQueryParameter("partNumber"); + Integer partNumber = null; + if (StringUtils.isNotEmpty(partNumberStr)) { + try { + partNumber = Integer.parseInt(partNumberStr); + } catch (Exception ex) { + return new S3Exception(ex, object, S3ErrorCode.INVALID_ARGUMENT); + } + } + final String uploadId = mHandler.getQueryParameter("uploadId"); + Preconditions.checkNotNull(partNumber, "required 'partNumber' parameter is missing"); + Preconditions.checkNotNull(partNumber, "required 'uploadId' parameter is missing"); + + try (S3AuditContext auditContext = + mHandler.createAuditContext(mOPType.name(), user, bucket, object)) { + // This object is part of a multipart upload, should be uploaded into the temporary + // directory first. + String tmpDir = + S3RestUtils.getMultipartTemporaryDirForObject(bucketPath, object, uploadId); + try { + S3RestUtils.checkStatusesForUploadId( + mHandler.getMetaFS(), userFs, new AlluxioURI(tmpDir), uploadId); + } catch (Exception e) { + throw S3RestUtils.toObjectS3Exception((e instanceof FileDoesNotExistException) + ? new S3Exception(object, S3ErrorCode.NO_SUCH_UPLOAD) : e, + object, auditContext); + } + String objectPath = tmpDir + AlluxioURI.SEPARATOR + partNumber; + // eg: /bucket/folder/object_/ + + // UploadPartCopy with source from another object + if (mHandler.getHeader(S3Constants.S3_COPY_SOURCE_HEADER) != null) { + final String copySourceParam = mHandler.getHeader(S3Constants.S3_COPY_SOURCE_HEADER); + String copySource = !copySourceParam.startsWith(AlluxioURI.SEPARATOR) + ? AlluxioURI.SEPARATOR + copySourceParam : copySourceParam; + try { + copySource = URLDecoder.decode(copySource, "UTF-8"); + } catch (UnsupportedEncodingException ex) { + throw S3RestUtils.toObjectS3Exception(ex, objectPath, auditContext); + } + CreateFilePOptions.Builder copyFilePOptionsBuilder = CreateFilePOptions.newBuilder() + .setRecursive(true) + .setMode(PMode.newBuilder() + .setOwnerBits(Bits.ALL) + .setGroupBits(Bits.ALL) + .setOtherBits(Bits.NONE).build()); + String entityTag = copyObject(userFs, auditContext, objectPath, + copySource, copyFilePOptionsBuilder.build()); + return new CopyPartResult(entityTag); + } + // UploadPart with source from http body + CreateFilePOptions filePOptions = + CreateFilePOptions.newBuilder() + .setRecursive(true) + .setMode(PMode.newBuilder() + .setOwnerBits(Bits.ALL) + .setGroupBits(Bits.ALL) + .setOtherBits(Bits.NONE).build()) + .setWriteType(S3RestUtils.getS3WriteType()) + .build(); + return createObject(objectPath, userFs, filePOptions, auditContext); + } + }); + } + } // end of UploadPartTask + + private static final class CreateMultipartUploadTask extends S3ObjectTask { + + public CreateMultipartUploadTask(S3Handler handler, OpType opType) { + super(handler, opType); + } + + @Override + public Response continueTask() { + return S3RestUtils.call(getObjectTaskResource(), () -> { + // CreateMultipartUploadTask ... + final String user = mHandler.getUser(); + final FileSystem userFs = S3RestUtils.createFileSystemForUser(user, mHandler.getMetaFS()); + final String bucket = mHandler.getBucket(); + final String object = mHandler.getObject(); + String bucketPath = S3RestUtils.parsePath(AlluxioURI.SEPARATOR + bucket); + String objectPath = bucketPath + AlluxioURI.SEPARATOR + object; + + // Populate the xattr Map with the metadata tags if provided + Map xattrMap = new HashMap<>(); + + TaggingData tagData = null; + final String taggingHeader = mHandler.getHeader(S3Constants.S3_TAGGING_HEADER); + final String contentTypeHeader = mHandler.getHeader(S3Constants.S3_CONTENT_TYPE_HEADER); + try (S3AuditContext auditContext = mHandler.createAuditContext( + "initiateMultipartUpload", user, bucket, object)) { + S3RestUtils.checkPathIsAlluxioDirectory(userFs, bucketPath, auditContext); + if (taggingHeader != null) { // Parse the tagging header if it exists + try { + tagData = S3RestUtils.deserializeTaggingHeader( + taggingHeader, S3Handler.MAX_HEADER_METADATA_SIZE); + xattrMap.put(S3Constants.TAGGING_XATTR_KEY, TaggingData.serialize(tagData)); + } catch (S3Exception e) { + auditContext.setSucceeded(false); + throw e; // rethrow + } catch (IllegalArgumentException e) { + if (e.getCause() instanceof S3Exception) { + throw S3RestUtils.toObjectS3Exception((S3Exception) e.getCause(), objectPath, + auditContext); + } + throw S3RestUtils.toObjectS3Exception(e, objectPath, auditContext); + } catch (Exception e) { + throw S3RestUtils.toObjectS3Exception(e, objectPath, auditContext); + } + LOG.debug("InitiateMultipartUpload tagData={}", tagData); + } + + try { + // Find an unused UUID + String uploadId; + do { + uploadId = UUID.randomUUID().toString(); + } while (mHandler.getMetaFS().exists( + new AlluxioURI(S3RestUtils.getMultipartMetaFilepathForUploadId(uploadId)))); + + // Create the directory containing the upload parts + AlluxioURI multipartTemporaryDir = new AlluxioURI( + S3RestUtils.getMultipartTemporaryDirForObject(bucketPath, object, uploadId)); + userFs.createDirectory(multipartTemporaryDir, CreateDirectoryPOptions.newBuilder() + .setRecursive(true) + .setMode(PMode.newBuilder() + .setOwnerBits(Bits.ALL) + .setGroupBits(Bits.ALL) + .setOtherBits(Bits.NONE).build()) + .setWriteType(S3RestUtils.getS3WriteType()).build()); + + // Create the Alluxio multipart upload metadata file + if (contentTypeHeader != null) { + xattrMap.put(S3Constants.CONTENT_TYPE_XATTR_KEY, + ByteString.copyFrom(contentTypeHeader, S3Constants.HEADER_CHARSET)); + } + xattrMap.put(S3Constants.UPLOADS_BUCKET_XATTR_KEY, + ByteString.copyFrom(mHandler.getBucket(), S3Constants.XATTR_STR_CHARSET)); + xattrMap.put(S3Constants.UPLOADS_OBJECT_XATTR_KEY, + ByteString.copyFrom(mHandler.getObject(), S3Constants.XATTR_STR_CHARSET)); + xattrMap.put(S3Constants.UPLOADS_FILE_ID_XATTR_KEY, ByteString.copyFrom( + Longs.toByteArray(userFs.getStatus(multipartTemporaryDir).getFileId()))); + mHandler.getMetaFS().createFile( + new AlluxioURI(S3RestUtils.getMultipartMetaFilepathForUploadId(uploadId)), + CreateFilePOptions.newBuilder() + .setRecursive(true) + .setMode(PMode.newBuilder() + .setOwnerBits(Bits.ALL) + .setGroupBits(Bits.ALL) + .setOtherBits(Bits.NONE).build()) + .setWriteType(S3RestUtils.getS3WriteType()) + .putAllXattr(xattrMap) + .setXattrPropStrat(XAttrPropagationStrategy.LEAF_NODE) + .build() + ); + SetAttributePOptions attrPOptions = SetAttributePOptions.newBuilder() + .setOwner(user) + .build(); + mHandler.getMetaFS().setAttribute(new AlluxioURI( + S3RestUtils.getMultipartMetaFilepathForUploadId(uploadId)), attrPOptions); + if (S3Handler.MULTIPART_CLEANER_ENABLED) { + MultipartUploadCleaner.apply(mHandler.getMetaFS(), userFs, bucket, object, uploadId); + } + return new InitiateMultipartUploadResult(bucket, object, uploadId); + } catch (Exception e) { + throw S3RestUtils.toObjectS3Exception(e, objectPath, auditContext); + } + } + }); + } + } // end of CreateMultipartUploadTask + + /** + * CompleteMultipartUploadTask. + */ + public static final class CompleteMultipartUploadTask extends S3ObjectTask { + private final boolean mKeepAliveEnabled = Configuration.getBoolean( + PropertyKey.PROXY_S3_COMPLETE_MULTIPART_UPLOAD_KEEPALIVE_ENABLED); + private final Long mKeepAliveTime = Configuration.getMs( + PropertyKey.PROXY_S3_COMPLETE_MULTIPART_UPLOAD_KEEPALIVE_TIME_INTERVAL); + private String mUploadId; + private FileSystem mUserFs; + private String mBucket; + private String mObject; + + /** + * Create a CompleteMultipartUploadTask. + * @param handler + * @param opType + */ + public CompleteMultipartUploadTask(S3Handler handler, OpType opType) { + super(handler, opType); + } + + @Override + public void handleTaskAsync() { + try { + final String user = mHandler.getUser(); + mBucket = mHandler.getBucket(); + mObject = mHandler.getObject(); + final String uploadId = mHandler.getQueryParameter("uploadId"); + LOG.debug("(bucket: {}, object: {}, uploadId: {}) queuing task...", + mBucket, mObject, uploadId); + HttpServletResponse httpServletResponse = mHandler.getServletResponse(); + + // Set headers before getting committed when flushing whitespaces + httpServletResponse.setContentType(MediaType.APPLICATION_XML); + + CompletableFuture respFut = new CompletableFuture<>(); + ExecutorService es = (ExecutorService) mHandler.getServletContext().getAttribute( + S3RequestServlet.PROXY_S3_V2_HEAVY_POOL); + es.submit(() -> { + Response completeMpUploadResponse = mHandler.getS3Task().continueTask(); + respFut.complete(completeMpUploadResponse); + }); + if (mKeepAliveEnabled) { + // Set status before getting committed when flushing whitespaces + httpServletResponse.setStatus(HttpServletResponse.SC_OK); + long sleepMs = 1000; + while (!respFut.isDone()) { + LOG.debug("(bucket: {}, object: {}, uploadId: {}) sleeping for {}ms...", + mBucket, mObject, uploadId, sleepMs); + try { + Thread.sleep(sleepMs); + } catch (InterruptedException e) { + LOG.error(e.toString()); + } + // TODO(czhu): figure out how to send whitespace characters while still + // returning a correct status code + // - calling getWriter().flush() commits the response (headers, status code, etc.) + // - https://docs.oracle.com/javaee/7/api/javax/servlet/ServletResponse.html#getWriter-- + // periodically sends white space characters to keep the connection from timing out + LOG.debug("(bucket: {}, object: {}, uploadId: {}) sending whitespace...", + mBucket, mObject, uploadId); + httpServletResponse.getWriter().print(" "); + httpServletResponse.getWriter().flush(); + sleepMs = Math.min(2 * sleepMs, mKeepAliveTime); + } + } // otherwise we perform a blocking call on future.get() + + XmlMapper mapper = new XmlMapper(); + try { + Response result = respFut.get(); + if (!mKeepAliveEnabled) { + S3Handler.processResponse(httpServletResponse, result); + } else { + // entity is already a String from a serialized CompleteMultipartUploadResult + String entityStr = result.getEntity().toString(); + httpServletResponse.getWriter().write(entityStr); + } + } catch (Exception e) { + Throwable cause = e.getCause(); + if (cause instanceof S3Exception) { + S3Exception s3Exception = (S3Exception) cause; + httpServletResponse.getWriter().write(mapper.writeValueAsString( + new CompleteMultipartUploadResult(s3Exception.getErrorCode().getCode(), + s3Exception.getErrorCode().getDescription()))); + if (!mKeepAliveEnabled) { + httpServletResponse.setStatus(s3Exception.getErrorCode().getStatus().getStatusCode()); + } + } + LOG.error(ThreadUtils.formatStackTrace(cause)); + } + } catch (Exception e) { + // This try-catch is not intended to handle any exceptions, it is purely + // to ensure that encountered exceptions get logged. + LOG.error("Unhandled exception for {}/{}. {}", mHandler.getBucket(), + mHandler.getObject(), ThreadUtils.formatStackTrace(e)); +// throw e; + } + } + + @Override + public Response continueTask() { + return S3RestUtils.call(getObjectTaskResource(), () -> { + // CompleteMultipartUploadTask ... + String objectPath = null; + String objTempPath = null; + mUploadId = mHandler.getQueryParameter("uploadId"); + final String bucket = mHandler.getBucket(); + final String object = mHandler.getObject(); + final String user = mHandler.getUser(); + mUserFs = S3RestUtils.createFileSystemForUser(user, mHandler.getMetaFS()); + try { + String bucketPath = S3RestUtils.parsePath(AlluxioURI.SEPARATOR + bucket); + S3RestUtils.checkPathIsAlluxioDirectory(mUserFs, bucketPath, null); + objectPath = bucketPath + AlluxioURI.SEPARATOR + object; + // Check for existing multipart info files and dirs + AlluxioURI multipartTemporaryDir = new AlluxioURI( + S3RestUtils.getMultipartTemporaryDirForObject(bucketPath, object, mUploadId)); + URIStatus metaStatus; + + try (com.codahale.metrics.Timer.Context ctx = MetricsSystem + .uniformTimer(MetricKey.PROXY_CHECK_UPLOADID_STATUS_LATENCY.getName()).time()) { + metaStatus = S3RestUtils.checkStatusesForUploadId(mHandler.getMetaFS(), mUserFs, + multipartTemporaryDir, mUploadId).get(1); + } catch (Exception e) { + LOG.warn("checkStatusesForUploadId uploadId:{} failed. {}", object, + ThreadUtils.formatStackTrace(e)); + throw new S3Exception(objectPath, S3ErrorCode.NO_SUCH_UPLOAD); + } + + // Parse the HTTP request body to get the intended list of parts + CompleteMultipartUploadRequest request = parseCompleteMultipartUploadRequest(objectPath); + + // Check if the requested parts are available + List uploadedParts = validateParts(request, objectPath, multipartTemporaryDir); + + // (re)create the merged object to a temporary object path + LOG.debug("CompleteMultipartUploadTask (bucket: {}, object: {}, uploadId: {}) " + + "combining {} parts...", bucket, object, mUploadId, uploadedParts.size()); + CreateFilePOptions createFileOption = prepareForCreateTempFile(metaStatus); + objTempPath = objectPath + ".temp." + UUID.randomUUID(); + AlluxioURI objectTempUri = new AlluxioURI(objTempPath); + FileOutStream os = mUserFs.createFile(objectTempUri, createFileOption); + MessageDigest md5 = MessageDigest.getInstance("MD5"); + + try (DigestOutputStream digestOutputStream = new DigestOutputStream(os, md5); + com.codahale.metrics.Timer.Context ctx = MetricsSystem + .uniformTimer(MetricKey.PROXY_COMPLETE_MP_UPLOAD_MERGE_LATENCY + .getName()).time()) { + for (URIStatus part : uploadedParts) { + try (FileInStream is = mUserFs.openFile(new AlluxioURI(part.getPath()))) { + ByteStreams.copy(is, digestOutputStream); + } + } + } + // persist the ETag via xAttr + String entityTag = Hex.encodeHexString(md5.digest()); + // TODO(czhu): try to compute the ETag prior to creating the file to reduce total RPC RTT + S3RestUtils.setEntityTag(mUserFs, objectTempUri, entityTag); + // rename the temp file to the target object file path + AlluxioURI objectUri = new AlluxioURI(objectPath); + mUserFs.rename(objectTempUri, objectUri, RenamePOptions.newBuilder() + .setPersist(WriteType.fromProto(createFileOption.getWriteType()).isThrough()) + .setS3SyntaxOptions(S3SyntaxOptions.newBuilder() + .setOverwrite(true) + .setIsMultipartUpload(true) + .build()) + .build()); + + // Remove the temporary directory containing the uploaded parts and the + // corresponding Alluxio S3 API metadata file + try (Timer.Context ctx = MetricsSystem + .uniformTimer(MetricKey.PROXY_CLEANUP_MULTIPART_UPLOAD_LATENCY.getName()).time()) { + removePartsDirAndMPMetaFile(multipartTemporaryDir); + } + return new CompleteMultipartUploadResult(objectPath, bucket, object, entityTag); + } catch (Exception e) { + /* On exception we always check if someone completes the multipart object before us to + achieve idempotency: when a race caused by retry(most cases), the commit of + this object happens at time of rename op, check DefaultFileSystemMaster.rename. + * */ + LOG.warn("Exception during CompleteMultipartUpload:{}", ThreadUtils.formatStackTrace(e)); + if (objectPath != null) { + URIStatus objStatus = checkIfComplete(objectPath); + if (objStatus != null) { + String etag = new String(objStatus.getXAttr() + .getOrDefault(S3Constants.ETAG_XATTR_KEY, new byte[0])); + if (!etag.isEmpty()) { + LOG.info("Check for idempotency, uploadId:{} idempotency check passed.", mUploadId); + return new CompleteMultipartUploadResult(objectPath, bucket, object, etag); + } + LOG.info("Check for idempotency, uploadId:{} object path exists but no etag found.", + mUploadId); + } + } + throw S3RestUtils.toObjectS3Exception(e, object); + } finally { + // Cleanup temp obj path no matter what, if path not exist, ignore + cleanupTempPath(objTempPath); + } + }); + } + + /** + * Prepare CreateFilePOptions for create temp multipart upload file. + * + * @param metaStatus multi part upload meta file status + * @return CreateFilePOptions + */ + public CreateFilePOptions prepareForCreateTempFile(URIStatus metaStatus) { + CreateFilePOptions.Builder optionsBuilder = CreateFilePOptions.newBuilder() + .setRecursive(true) + .setMode(PMode.newBuilder() + .setOwnerBits(Bits.ALL) + .setGroupBits(Bits.ALL) + .setOtherBits(Bits.NONE).build()) + .putXattr(PropertyKey.Name.S3_UPLOADS_ID_XATTR_KEY, + ByteString.copyFrom(mUploadId, StandardCharsets.UTF_8)) + .setXattrPropStrat(XAttrPropagationStrategy.LEAF_NODE) + .setWriteType(S3RestUtils.getS3WriteType()); + // Copy Tagging xAttr if it exists + if (metaStatus.getXAttr().containsKey(S3Constants.TAGGING_XATTR_KEY)) { + optionsBuilder.putXattr(S3Constants.TAGGING_XATTR_KEY, + ByteString.copyFrom(metaStatus.getXAttr().get(S3Constants.TAGGING_XATTR_KEY))); + } + // Copy Content-Type Header xAttr if it exists + if (metaStatus.getXAttr().containsKey(S3Constants.CONTENT_TYPE_XATTR_KEY)) { + optionsBuilder.putXattr(S3Constants.CONTENT_TYPE_XATTR_KEY, + ByteString.copyFrom(metaStatus.getXAttr().get(S3Constants.CONTENT_TYPE_XATTR_KEY))); + } + return optionsBuilder.build(); + } + + /** + * Parse xml http body for CompleteMultipartUploadRequest. + * + * @param objectPath + * @return CompleteMultipartUploadRequest + * @throws S3Exception + */ + public CompleteMultipartUploadRequest parseCompleteMultipartUploadRequest(String objectPath) + throws S3Exception { + CompleteMultipartUploadRequest request; + try { + request = new XmlMapper().readerFor(CompleteMultipartUploadRequest.class) + .readValue(mHandler.getInputStream()); + } catch (IllegalArgumentException | IOException e) { + LOG.error("Failed parsing CompleteMultipartUploadRequest:{}", + ThreadUtils.formatStackTrace(e)); + Throwable cause = e.getCause(); + if (cause instanceof S3Exception) { + throw S3RestUtils.toObjectS3Exception((S3Exception) cause, objectPath); + } + throw S3RestUtils.toObjectS3Exception(e, objectPath); + } + return request; + } + + /** + * Validate the parts as part of this multipart uplaod request. + * + * @param request + * @param objectPath + * @param multipartTemporaryDir + * @return List of status of the part files + * @throws S3Exception + * @throws IOException + * @throws AlluxioException + */ + public List validateParts(CompleteMultipartUploadRequest request, + String objectPath, + AlluxioURI multipartTemporaryDir) + throws S3Exception, IOException, AlluxioException { + List uploadedParts = mUserFs.listStatus(multipartTemporaryDir); + uploadedParts.sort(new S3RestUtils.URIStatusNameComparator()); + if (uploadedParts.size() < request.getParts().size()) { + throw new S3Exception(objectPath, S3ErrorCode.INVALID_PART); + } + Map uploadedPartsMap = uploadedParts.stream().collect(Collectors.toMap( + status -> Integer.parseInt(status.getName()), + status -> status + )); + int lastPartNum = request.getParts().get(request.getParts().size() - 1).getPartNumber(); + for (CompleteMultipartUploadRequest.Part part : request.getParts()) { + if (!uploadedPartsMap.containsKey(part.getPartNumber())) { + throw new S3Exception(objectPath, S3ErrorCode.INVALID_PART); + } + if (part.getPartNumber() != lastPartNum // size requirement not applicable to last part + && uploadedPartsMap.get(part.getPartNumber()).getLength() < Configuration.getBytes( + PropertyKey.PROXY_S3_COMPLETE_MULTIPART_UPLOAD_MIN_PART_SIZE)) { + throw new S3Exception(objectPath, S3ErrorCode.ENTITY_TOO_SMALL); + } + } + return uploadedParts; + } + + /** + * Cleanup the multipart upload temporary folder holding the parts files. + * and the meta file for this multipart. + * + * @param multipartTemporaryDir + * @throws IOException + * @throws AlluxioException + */ + public void removePartsDirAndMPMetaFile(AlluxioURI multipartTemporaryDir) + throws IOException, AlluxioException { + mUserFs.delete(multipartTemporaryDir, + DeletePOptions.newBuilder().setRecursive(true).build()); + mHandler.getMetaFS().delete(new AlluxioURI( + S3RestUtils.getMultipartMetaFilepathForUploadId(mUploadId)), + DeletePOptions.newBuilder().build()); + if (S3Handler.MULTIPART_CLEANER_ENABLED) { + MultipartUploadCleaner.cancelAbort(mHandler.getMetaFS(), mUserFs, + mBucket, mObject, mUploadId); + } + } + + /** + * Cleanup the temp object file for complete multipart upload. + * + * @param objTempPath + */ + public void cleanupTempPath(String objTempPath) { + if (objTempPath != null) { + try (Timer.Context ctx = MetricsSystem + .uniformTimer(MetricKey.PROXY_CLEANUP_TEMP_MULTIPART_UPLOAD_OBJ_LATENCY + .getName()).time()) { + mUserFs.delete(new AlluxioURI(objTempPath), DeletePOptions.newBuilder().build()); + } catch (Exception e) { + LOG.warn("Failed to clean up temp path:{}, {}", objTempPath, e.getMessage()); + } + } + } + + /** + * On any exception, check with Master on if the there's an object file. + * bearing the same upload id already got completed. + * + * @param objectPath + * @return the status of the existing object through CompleteMultipartUpload call + */ + public URIStatus checkIfComplete(String objectPath) { + try { + URIStatus objStatus = mUserFs.getStatus(new AlluxioURI(objectPath)); + String uploadId = new String(objStatus.getXAttr() + .getOrDefault(PropertyKey.Name.S3_UPLOADS_ID_XATTR_KEY, new byte[0])); + if (objStatus.isCompleted() && StringUtils.equals(uploadId, mUploadId)) { + return objStatus; + } + } catch (IOException | AlluxioException ex) { + // can't validate if any previous attempt has succeeded + LOG.warn("Check for objectPath:{} failed:{}, unsure if the complete status.", + objectPath, ex.getMessage()); + return null; + } + return null; + } + } // end of CompleteMultipartUploadTask + + private static final class AbortMultipartUploadTask extends S3ObjectTask { + + public AbortMultipartUploadTask(S3Handler handler, OpType opType) { + super(handler, opType); + } + + @Override + public Response continueTask() { + return S3RestUtils.call(getObjectTaskResource(), () -> { + // AbortMultipartUploadTask ... + Preconditions.checkNotNull(mHandler.getBucket(), "required 'bucket' parameter is missing"); + Preconditions.checkNotNull(mHandler.getObject(), "required 'object' parameter is missing"); + + final String user = mHandler.getUser(); + final FileSystem userFs = S3RestUtils.createFileSystemForUser( + user, mHandler.getMetaFS()); + final String uploadId = mHandler.getQueryParameter("uploadId"); + String bucketPath = S3RestUtils.parsePath(AlluxioURI.SEPARATOR + mHandler.getBucket()); + String objectPath = bucketPath + AlluxioURI.SEPARATOR + mHandler.getObject(); + AlluxioURI multipartTemporaryDir = new AlluxioURI(S3RestUtils + .getMultipartTemporaryDirForObject(bucketPath, mHandler.getObject(), uploadId)); + try (S3AuditContext auditContext = mHandler.createAuditContext( + "abortMultipartUpload", user, mHandler.getBucket(), mHandler.getObject())) { + S3RestUtils.checkPathIsAlluxioDirectory(userFs, bucketPath, auditContext); + try { + S3RestUtils.checkStatusesForUploadId(mHandler.getMetaFS(), + userFs, multipartTemporaryDir, uploadId); + } catch (Exception e) { + throw S3RestUtils.toObjectS3Exception((e instanceof FileDoesNotExistException) + ? new S3Exception(mHandler.getObject(), S3ErrorCode.NO_SUCH_UPLOAD) : e, + mHandler.getObject(), auditContext); + } + + try { + userFs.delete(multipartTemporaryDir, + DeletePOptions.newBuilder().setRecursive(true).build()); + mHandler.getMetaFS().delete(new AlluxioURI( + S3RestUtils.getMultipartMetaFilepathForUploadId(uploadId)), + DeletePOptions.newBuilder().build()); + if (S3Handler.MULTIPART_CLEANER_ENABLED) { + MultipartUploadCleaner.cancelAbort(mHandler.getMetaFS(), userFs, + mHandler.getBucket(), mHandler.getObject(), uploadId); + } + } catch (Exception e) { + throw S3RestUtils.toObjectS3Exception(e, objectPath, auditContext); + } + } + + // Note: the normal response for S3 delete key is 204 NO_CONTENT, not 200 OK + return Response.Status.NO_CONTENT; + }); + } + } // end of AbortMultipartUploadTask + + private static final class DeleteObjectTaggingTask extends S3ObjectTask { + + public DeleteObjectTaggingTask(S3Handler handler, OpType opType) { + super(handler, opType); + } + + @Override + public Response continueTask() { + return S3RestUtils.call(getObjectTaskResource(), () -> { + // DeleteObjectTaggingTask ... + Preconditions.checkNotNull(mHandler.getBucket(), "required 'bucket' parameter is missing"); + Preconditions.checkNotNull(mHandler.getObject(), "required 'object' parameter is missing"); + + final String user = mHandler.getUser(); + final FileSystem userFs = S3RestUtils.createFileSystemForUser(user, mHandler.getMetaFS()); + String bucketPath = S3RestUtils.parsePath(AlluxioURI.SEPARATOR + mHandler.getBucket()); + String objectPath = bucketPath + AlluxioURI.SEPARATOR + mHandler.getObject(); + LOG.debug("DeleteObjectTagging object={}", mHandler.getObject()); + Map xattrMap = new HashMap<>(); + xattrMap.put(S3Constants.TAGGING_XATTR_KEY, ByteString.copyFrom(new byte[0])); + SetAttributePOptions attrPOptions = SetAttributePOptions.newBuilder() + .putAllXattr(xattrMap).setXattrUpdateStrategy(File.XAttrUpdateStrategy.DELETE_KEYS) + .build(); + try (S3AuditContext auditContext = mHandler.createAuditContext( + "deleteObjectTags", user, mHandler.getBucket(), mHandler.getObject())) { + S3RestUtils.checkPathIsAlluxioDirectory(userFs, bucketPath, auditContext); + try { + userFs.setAttribute(new AlluxioURI(objectPath), attrPOptions); + } catch (Exception e) { + throw S3RestUtils.toObjectS3Exception(e, objectPath, auditContext); + } + } + // Note: the normal response for S3 delete key is 204 NO_CONTENT, not 200 OK + return Response.Status.NO_CONTENT; + }); + } + } // end of DeleteObjectTaggingTask + + private static final class DeleteObjectTask extends S3ObjectTask { + + public DeleteObjectTask(S3Handler handler, OpType opType) { + super(handler, opType); + } + + @Override + public Response continueTask() { + return S3RestUtils.call(getObjectTaskResource(), () -> { + // DeleteObjectTask ... + Preconditions.checkNotNull(mHandler.getBucket(), "required 'bucket' parameter is missing"); + Preconditions.checkNotNull(mHandler.getObject(), "required 'object' parameter is missing"); + + final String user = mHandler.getUser(); + final FileSystem userFs = S3RestUtils.createFileSystemForUser(user, mHandler.getMetaFS()); + String bucketPath = S3RestUtils.parsePath(AlluxioURI.SEPARATOR + mHandler.getBucket()); + // Delete the object. + String objectPath = bucketPath + AlluxioURI.SEPARATOR + mHandler.getObject(); + DeletePOptions options = DeletePOptions.newBuilder().setAlluxioOnly(Configuration + .get(PropertyKey.PROXY_S3_DELETE_TYPE).equals(Constants.S3_DELETE_IN_ALLUXIO_ONLY)) + .build(); + try (S3AuditContext auditContext = mHandler.createAuditContext( + "deleteObject", user, mHandler.getBucket(), mHandler.getObject())) { + S3RestUtils.checkPathIsAlluxioDirectory(userFs, bucketPath, auditContext); + try { + userFs.delete(new AlluxioURI(objectPath), options); + } catch (FileDoesNotExistException | DirectoryNotEmptyException e) { + // intentionally do nothing, this is ok. It should result in a 204 error + // This is the same response behavior as AWS's S3. + } catch (Exception e) { + throw S3RestUtils.toObjectS3Exception(e, objectPath, auditContext); + } + } + // Note: the normal response for S3 delete key is 204 NO_CONTENT, not 200 OK + return Response.Status.NO_CONTENT; + }); + } + } // end of DeleteObjectTask +} diff --git a/core/server/proxy/src/main/java/alluxio/proxy/s3/S3RequestServlet.java b/core/server/proxy/src/main/java/alluxio/proxy/s3/S3RequestServlet.java new file mode 100644 index 000000000000..78ace32a0a4b --- /dev/null +++ b/core/server/proxy/src/main/java/alluxio/proxy/s3/S3RequestServlet.java @@ -0,0 +1,127 @@ +/* + * The Alluxio Open Foundation licenses this work under the Apache License, version 2.0 + * (the "License"). You may not use this work except in compliance with the License, which is + * available at www.apache.org/licenses/LICENSE-2.0 + * + * This software is distributed on an "AS IS" basis, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, + * either express or implied, as more fully set forth in the License. + * + * See the NOTICE file distributed with this work for information regarding copyright ownership. + */ + +package alluxio.proxy.s3; + +import alluxio.AlluxioURI; +import alluxio.Constants; +import alluxio.conf.Configuration; +import alluxio.conf.PropertyKey; +import alluxio.util.ThreadUtils; +import alluxio.web.ProxyWebServer; + +import org.eclipse.jetty.server.Request; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import java.io.IOException; +import java.util.concurrent.ConcurrentHashMap; +import java.util.concurrent.ExecutorService; +import javax.servlet.AsyncContext; +import javax.servlet.ServletException; +import javax.servlet.http.HttpServlet; +import javax.servlet.http.HttpServletRequest; +import javax.servlet.http.HttpServletResponse; +import javax.ws.rs.core.Response; + +/** + * New architecture Request Servlet for handling s3 requests + * in replacement of JAX-RS. + */ +public class S3RequestServlet extends HttpServlet { + private static final long serialVersionUID = 2966302125671934038L; + public static final String SERVICE_PREFIX = "s3"; + public static final String S3_V2_SERVICE_PATH_PREFIX = Constants.REST_API_PREFIX + + AlluxioURI.SEPARATOR + SERVICE_PREFIX; + private static final Logger LOG = LoggerFactory.getLogger(S3RequestServlet.class); + /* (Experimental for new architecture enabled by PROXY_S3_OPTIMIZED_VERSION_ENABLED) + * Processing threadpools for group of requests (for now, distinguish between + * light-weighted metadata-centric requests and heavy io requests */ + public static final String PROXY_S3_V2_LIGHT_POOL = "Proxy S3 V2 Light Pool"; + public static final String PROXY_S3_V2_HEAVY_POOL = "Proxy S3 V2 Heavy Pool"; + + /** + * Implementation to serve the HttpServletRequest and returns HttpServletResponse. + * @param request the {@link HttpServletRequest} object that + * contains the request the client made of + * the servlet + * + * @param response the {@link HttpServletResponse} object that + * contains the response the servlet returns + * to the client + * + * @throws ServletException + * @throws IOException + */ + @Override + public void service(HttpServletRequest request, + HttpServletResponse response) throws ServletException, IOException { + String target = request.getRequestURI(); + if (!target.startsWith(S3_V2_SERVICE_PATH_PREFIX)) { + return; + } + S3Handler s3Handler = null; + try { + s3Handler = S3Handler.createHandler(target, request, response); + } catch (Exception ex) { + Response errorResponse = S3ErrorResponse.createErrorResponse(ex, ""); + S3Handler.processResponse(response, errorResponse); + return; + } + ((ConcurrentHashMap) getServletContext() + .getAttribute(ProxyWebServer.PROXY_S3_HANDLER_MAP)) + .put((Request) request, s3Handler); + // Handle request async + if (Configuration.getBoolean(PropertyKey.PROXY_S3_V2_ASYNC_PROCESSING_ENABLED)) { + S3BaseTask.OpTag opTag = s3Handler.getS3Task().mOPType.getOpTag(); + ExecutorService es = (ExecutorService) (opTag == S3BaseTask.OpTag.LIGHT + ? getServletContext().getAttribute(PROXY_S3_V2_LIGHT_POOL) + : getServletContext().getAttribute(PROXY_S3_V2_HEAVY_POOL)); + + final AsyncContext asyncCtx = request.startAsync(); + final S3Handler s3HandlerAsync = s3Handler; + es.submit(() -> { + try { + serveRequest(s3HandlerAsync); + } catch (Throwable th) { + try { + ((HttpServletResponse) asyncCtx.getResponse()).sendError( + HttpServletResponse.SC_INTERNAL_SERVER_ERROR); + } catch (Throwable sendErrorEx) { + LOG.error("Unexpected exception for {}/{}. {}", s3HandlerAsync.getBucket(), + s3HandlerAsync.getObject(), ThreadUtils.formatStackTrace(sendErrorEx)); + } + } finally { + asyncCtx.complete(); + } + }); + } + // Handle request in current context + else { + serveRequest(s3Handler); + } + } + + /** + * Core place to call S3 task's core API logic handling + * function w/o exception handling. + * @param s3Handler + * @throws IOException + */ + public void serveRequest(S3Handler s3Handler) throws IOException { + if (s3Handler.getS3Task().getOPType() == S3BaseTask.OpType.CompleteMultipartUpload) { + s3Handler.getS3Task().handleTaskAsync(); + return; + } + Response resp = s3Handler.getS3Task().continueTask(); + S3Handler.processResponse(s3Handler.getServletResponse(), resp); + } +} diff --git a/core/server/proxy/src/main/java/alluxio/proxy/s3/S3RestUtils.java b/core/server/proxy/src/main/java/alluxio/proxy/s3/S3RestUtils.java index 96b324212e0b..9ed612f7e331 100644 --- a/core/server/proxy/src/main/java/alluxio/proxy/s3/S3RestUtils.java +++ b/core/server/proxy/src/main/java/alluxio/proxy/s3/S3RestUtils.java @@ -60,6 +60,7 @@ import javax.annotation.Nonnull; import javax.annotation.Nullable; import javax.security.auth.Subject; +import javax.servlet.http.HttpServletRequest; import javax.ws.rs.container.ContainerRequestContext; import javax.ws.rs.core.MediaType; import javax.ws.rs.core.MultivaluedMap; @@ -539,6 +540,38 @@ public static String getUser(String authorization, ContainerRequestContext reque } } + /** + * Get username from header info from HttpServletRequest. + * + * @param authorization + * @param request + * @return user name + * @throws S3Exception + */ + public static String getUser(String authorization, HttpServletRequest request) + throws S3Exception { + if (S3RestUtils.isAuthenticationEnabled(Configuration.global())) { + return getUserFromSignature(request); + } + try { + return getUserFromAuthorization(authorization, Configuration.global()); + } catch (RuntimeException e) { + throw new S3Exception(new S3ErrorCode(S3ErrorCode.INTERNAL_ERROR.getCode(), + e.getMessage(), S3ErrorCode.INTERNAL_ERROR.getStatus())); + } + } + + private static String getUserFromSignature(HttpServletRequest request) + throws S3Exception { + AwsSignatureProcessor signatureProcessor = new AwsSignatureProcessor(request); + Authenticator authenticator = Authenticator.Factory.create(Configuration.global()); + AwsAuthInfo authInfo = signatureProcessor.getAuthInfo(); + if (authenticator.isAuthenticated(authInfo)) { + return authInfo.getAccessID(); + } + throw new S3Exception(authInfo.toString(), S3ErrorCode.INVALID_IDENTIFIER); + } + /** * Get username from parsed header info. * @@ -608,8 +641,57 @@ public static String getUserFromAuthorization(String authorization, AlluxioConfi } /** - * Comparator based on uri name, treat uri name as a Long number. + * Populate xattr with content type info from header. + * @param xattrMap + * @param contentTypeHeader */ + public static void populateContentTypeInXAttr(Map xattrMap, + String contentTypeHeader) { + if (contentTypeHeader != null) { + xattrMap.put(S3Constants.CONTENT_TYPE_XATTR_KEY, + ByteString.copyFrom(contentTypeHeader, S3Constants.HEADER_CHARSET)); + } + } + + /** + * Populate xattr map with tagging info from tagging header. + * @param xattrMap + * @param taggingHeader + * @param auditContext + * @param objectPath + * @throws S3Exception + */ + public static void populateTaggingInXAttr(Map xattrMap, String taggingHeader, + S3AuditContext auditContext, String objectPath) + throws S3Exception { + TaggingData tagData = null; + if (taggingHeader != null) { // Parse the tagging header if it exists for PutObject + try { + tagData = S3RestUtils.deserializeTaggingHeader( + taggingHeader, S3Handler.MAX_HEADER_METADATA_SIZE); + } catch (IllegalArgumentException e) { + Throwable cause = e.getCause(); + if (cause instanceof S3Exception) { + throw S3RestUtils.toObjectS3Exception((S3Exception) cause, objectPath, + auditContext); + } + throw S3RestUtils.toObjectS3Exception(e, objectPath, auditContext); + } + } + LOG.debug("tagData={}", tagData); + // Populate the xattr Map with the metadata tags if provided + if (tagData != null) { + try { + xattrMap.put(S3Constants.TAGGING_XATTR_KEY, TaggingData.serialize(tagData)); + } catch (Exception e) { + throw S3RestUtils.toObjectS3Exception(e, objectPath, auditContext); + } + } + } + + /** + * Comparator based on uri name, treat uri name as a Long number. + */ public static class URIStatusNameComparator implements Comparator, Serializable { private static final long serialVersionUID = 733270188584155565L; diff --git a/core/server/proxy/src/main/java/alluxio/proxy/s3/signature/AwsSignatureProcessor.java b/core/server/proxy/src/main/java/alluxio/proxy/s3/signature/AwsSignatureProcessor.java index 4bcaea62dec9..21c3d4ad1786 100644 --- a/core/server/proxy/src/main/java/alluxio/proxy/s3/signature/AwsSignatureProcessor.java +++ b/core/server/proxy/src/main/java/alluxio/proxy/s3/signature/AwsSignatureProcessor.java @@ -25,7 +25,9 @@ import org.slf4j.Logger; import org.slf4j.LoggerFactory; +import java.util.HashMap; import java.util.Map; +import javax.servlet.http.HttpServletRequest; import javax.ws.rs.container.ContainerRequestContext; /** @@ -39,7 +41,8 @@ public class AwsSignatureProcessor { LoggerFactory.getLogger(AwsSignatureProcessor.class); private static final String AUTHORIZATION = "Authorization"; - private final ContainerRequestContext mContext; + private ContainerRequestContext mContext; + private HttpServletRequest mServletRequest; /** * Create a new {@link AwsSignatureProcessor}. @@ -50,18 +53,41 @@ public AwsSignatureProcessor(ContainerRequestContext context) { mContext = context; } + /** + * Create a new {@link AwsSignatureProcessor} with HttpServletRequest + * as the info marshall source. + * Used by the new architecture in {@link alluxio.proxy.s3.S3RequestServlet} + * + * @param request + */ + public AwsSignatureProcessor(HttpServletRequest request) { + mServletRequest = request; + } + /** * Extract signature info from request. * @return SignatureInfo * @throws S3Exception */ public SignatureInfo parseSignature() throws S3Exception { - Map headers = S3RestUtils.fromMultiValueToSingleValueMap( - mContext.getHeaders(), true); - String authHeader = headers.get(AUTHORIZATION); - String dateHeader = headers.get(S3_SIGN_DATE); - Map queryParameters = S3RestUtils.fromMultiValueToSingleValueMap( - mContext.getUriInfo().getQueryParameters(), false); + Map queryParameters; + String authHeader; + String dateHeader; + if (mContext != null) { + Map headers = S3RestUtils.fromMultiValueToSingleValueMap( + mContext.getHeaders(), true); + authHeader = headers.get(AUTHORIZATION); + dateHeader = headers.get(S3_SIGN_DATE); + queryParameters = S3RestUtils.fromMultiValueToSingleValueMap( + mContext.getUriInfo().getQueryParameters(), false); + } else { + authHeader = mServletRequest.getHeader(AUTHORIZATION); + dateHeader = mServletRequest.getHeader(S3_SIGN_DATE); + queryParameters = new HashMap<>(); + for (Map.Entry entry : mServletRequest.getParameterMap().entrySet()) { + queryParameters.put(entry.getKey(), entry.getValue()[0]); + } + } SignatureInfo signatureInfo; if ((signatureInfo = diff --git a/core/server/proxy/src/main/java/alluxio/web/ProxyWebServer.java b/core/server/proxy/src/main/java/alluxio/web/ProxyWebServer.java index f876d736de7a..d1b90b83a3ae 100644 --- a/core/server/proxy/src/main/java/alluxio/web/ProxyWebServer.java +++ b/core/server/proxy/src/main/java/alluxio/web/ProxyWebServer.java @@ -21,10 +21,16 @@ import alluxio.metrics.MetricsSystem; import alluxio.proxy.ProxyProcess; import alluxio.proxy.s3.CompleteMultipartUploadHandler; +import alluxio.proxy.s3.S3BaseTask; +import alluxio.proxy.s3.S3Handler; +import alluxio.proxy.s3.S3RequestServlet; import alluxio.proxy.s3.S3RestExceptionMapper; +import alluxio.util.ThreadFactoryUtils; import alluxio.util.io.PathUtils; import com.google.common.base.Stopwatch; +import org.eclipse.jetty.server.HttpChannel; +import org.eclipse.jetty.server.Request; import org.eclipse.jetty.servlet.ServletHolder; import org.glassfish.jersey.server.ResourceConfig; import org.glassfish.jersey.servlet.ServletContainer; @@ -34,6 +40,9 @@ import java.io.IOException; import java.net.InetSocketAddress; import java.util.Collections; +import java.util.concurrent.ArrayBlockingQueue; +import java.util.concurrent.ConcurrentHashMap; +import java.util.concurrent.ThreadPoolExecutor; import java.util.concurrent.TimeUnit; import java.util.stream.Collectors; import javax.annotation.concurrent.NotThreadSafe; @@ -55,10 +64,24 @@ public final class ProxyWebServer extends WebServer { public static final String SERVER_CONFIGURATION_RESOURCE_KEY = "Server Configuration"; public static final String ALLUXIO_PROXY_AUDIT_LOG_WRITER_KEY = "Alluxio Proxy Audit Log Writer"; - private final FileSystem mFileSystem; - private AsyncUserAccessAuditLogWriter mAsyncAuditLogWriter; + public static final String PROXY_S3_HANDLER_MAP = "Proxy S3 Handler Map"; + public ConcurrentHashMap mS3HandlerMap = new ConcurrentHashMap<>(); + + class ProxyListener implements HttpChannel.Listener { + public void onComplete(Request request) + { + S3Handler s3Hdlr = mS3HandlerMap.get(request); + if (s3Hdlr != null) { + ProxyWebServer.logAccess(s3Hdlr.getServletRequest(), s3Hdlr.getServletResponse(), + s3Hdlr.getStopwatch(), s3Hdlr.getS3Task() != null + ? s3Hdlr.getS3Task().getOPType() : S3BaseTask.OpType.Unknown); + } else { + LOG.info("[ACCESSLOG] Request:{} onComplete.", request); + } + } + } /** * Creates a new instance of {@link ProxyWebServer}. @@ -72,8 +95,9 @@ public ProxyWebServer(String serviceName, InetSocketAddress address, super(serviceName, address); // REST configuration - ResourceConfig config = new ResourceConfig().packages("alluxio.proxy", "alluxio.proxy.s3", - "alluxio.proxy.s3.logging") + String[] packages = {"alluxio.proxy", "alluxio.proxy.s3", + "alluxio.proxy.s3.logging"}; + ResourceConfig config = new ResourceConfig().packages(packages) .register(JacksonProtobufObjectMapperProvider.class) .register(S3RestExceptionMapper.class); @@ -104,21 +128,51 @@ public void init() throws ServletException { @Override public void service(final ServletRequest req, final ServletResponse res) - throws ServletException, IOException { + throws ServletException, IOException { Stopwatch stopWatch = Stopwatch.createStarted(); super.service(req, res); if ((req instanceof HttpServletRequest) && (res instanceof HttpServletResponse)) { HttpServletRequest httpReq = (HttpServletRequest) req; HttpServletResponse httpRes = (HttpServletResponse) res; - logAccess(httpReq, httpRes, stopWatch); + logAccess(httpReq, httpRes, stopWatch, null); } } }; - ServletHolder servletHolder = new ServletHolder("Alluxio Proxy Web Service", servlet); - mServletContextHandler - .addServlet(servletHolder, PathUtils.concatPath(Constants.REST_API_PREFIX, "*")); - // TODO(czhu): Move S3 API logging out of CompleteMultipartUploadHandler into a logging handler + + if (Configuration.getBoolean(PropertyKey.PROXY_S3_V2_VERSION_ENABLED)) { + super.getServerConnector().addBean(new ProxyListener()); + ServletHolder s3ServletHolder = new ServletHolder("Alluxio Proxy V2 S3 Service", + new S3RequestServlet() { + @Override + public void init() throws ServletException { + super.init(); + getServletContext().setAttribute(ALLUXIO_PROXY_SERVLET_RESOURCE_KEY, proxyProcess); + getServletContext() + .setAttribute(FILE_SYSTEM_SERVLET_RESOURCE_KEY, mFileSystem); + getServletContext().setAttribute(STREAM_CACHE_SERVLET_RESOURCE_KEY, + new StreamCache(Configuration.getMs(PropertyKey.PROXY_STREAM_CACHE_TIMEOUT_MS))); + getServletContext().setAttribute(ALLUXIO_PROXY_AUDIT_LOG_WRITER_KEY, + mAsyncAuditLogWriter); + + getServletContext().setAttribute(PROXY_S3_V2_LIGHT_POOL, + new ThreadPoolExecutor(8, 64, 0, + TimeUnit.SECONDS, new ArrayBlockingQueue<>(64 * 1024), + ThreadFactoryUtils.build("S3-LIGHTPOOL-%d", false))); + getServletContext().setAttribute(PROXY_S3_V2_HEAVY_POOL, + new ThreadPoolExecutor(8, 64, 0, + TimeUnit.SECONDS, new ArrayBlockingQueue<>(64 * 1024), + ThreadFactoryUtils.build("S3-HEAVYPOOL-%d", false))); + getServletContext().setAttribute(PROXY_S3_HANDLER_MAP, mS3HandlerMap); + } + }); + mServletContextHandler + .addServlet(s3ServletHolder, PathUtils.concatPath(Constants.REST_API_PREFIX, "*")); + return; + } addHandler(new CompleteMultipartUploadHandler(mFileSystem, Constants.REST_API_PREFIX)); + ServletHolder rsServletHolder = new ServletHolder("Alluxio Proxy Web Service", servlet); + mServletContextHandler + .addServlet(rsServletHolder, PathUtils.concatPath(Constants.REST_API_PREFIX, "*")); } @Override @@ -136,18 +190,19 @@ public void stop() throws Exception { * @param request * @param response * @param stopWatch + * @param opType */ public static void logAccess(HttpServletRequest request, HttpServletResponse response, - Stopwatch stopWatch) { + Stopwatch stopWatch, S3BaseTask.OpType opType) { String contentLenStr = "None"; if (request.getHeader("x-amz-decoded-content-length") != null) { contentLenStr = request.getHeader("x-amz-decoded-content-length"); } else if (request.getHeader("Content-Length") != null) { contentLenStr = request.getHeader("Content-Length"); } - String accessLog = String.format("[ACCESSLOG] Request:%s - Status:%d " + String accessLog = String.format("[ACCESSLOG] %s Request:%s - Status:%d " + "- ContentLength:%s - Elapsed(ms):%d", - request, response.getStatus(), + (opType == null ? "" : opType), request, response.getStatus(), contentLenStr, stopWatch.elapsed(TimeUnit.MILLISECONDS)); if (LOG.isDebugEnabled()) { String requestHeaders = Collections.list(request.getHeaderNames()).stream() From 7399c3877af535fad1a725cf56405b1c04b8cb89 Mon Sep 17 00:00:00 2001 From: elega <445092967@qq.com> Date: Mon, 13 Feb 2023 10:54:44 +0800 Subject: [PATCH 107/334] Make version service return unavailable on standby masters ### What changes are proposed in this pull request? Make version service return unavailable on standby masters ### Why are the changes needed? In this https://github.com/Alluxio/alluxio/pull/16839 PR, we adds the capability to run grpc services on standby masters. However, if one uses an old alluxio client and connects to the new master with standby master enabled, it will get some errors. This PR is used to address the issue to make the change backward compatible. ### Does this PR introduce any user facing changes? N/A pr-link: Alluxio/alluxio#16854 change-id: cid-6c82bce1b1d6cb2649658ec09f0f8ed4e243917a --- .../java/alluxio/grpc/GrpcServerBuilder.java | 14 +++++++++++++- .../ServiceVersionClientServiceHandler.java | 19 ++++++++++++++++++- .../java/alluxio/master/MasterProcess.java | 7 +++++++ .../master/service/rpc/RpcServerService.java | 2 +- 4 files changed, 39 insertions(+), 3 deletions(-) diff --git a/core/common/src/main/java/alluxio/grpc/GrpcServerBuilder.java b/core/common/src/main/java/alluxio/grpc/GrpcServerBuilder.java index 491c44e9f798..79cab8631c22 100644 --- a/core/common/src/main/java/alluxio/grpc/GrpcServerBuilder.java +++ b/core/common/src/main/java/alluxio/grpc/GrpcServerBuilder.java @@ -34,6 +34,7 @@ import java.util.Set; import java.util.concurrent.Executor; import java.util.concurrent.TimeUnit; +import java.util.function.Supplier; import javax.annotation.Nullable; /** @@ -262,7 +263,18 @@ public GrpcServerBuilder sslContext(SslContext sslContext) { * @return the built {@link GrpcServer} */ public GrpcServer build() { - addService(new GrpcService(new ServiceVersionClientServiceHandler(mServices)) + return build(null); + } + + /** + * Build the server. + * It attaches required services and interceptors for authentication. + * + * @param nodeStateSupplier a supplier to provide the node state (PRIMARY/STANDBY) + * @return the built {@link GrpcServer} + */ + public GrpcServer build(@Nullable Supplier nodeStateSupplier) { + addService(new GrpcService(new ServiceVersionClientServiceHandler(mServices, nodeStateSupplier)) .disableAuthentication()); if (mGrpcReflectionEnabled) { // authentication needs to be disabled so that the grpc command line tools can call diff --git a/core/common/src/main/java/alluxio/grpc/ServiceVersionClientServiceHandler.java b/core/common/src/main/java/alluxio/grpc/ServiceVersionClientServiceHandler.java index e02bb4d4cca1..d48b1d294cd9 100644 --- a/core/common/src/main/java/alluxio/grpc/ServiceVersionClientServiceHandler.java +++ b/core/common/src/main/java/alluxio/grpc/ServiceVersionClientServiceHandler.java @@ -13,6 +13,8 @@ import alluxio.Constants; import alluxio.annotation.SuppressFBWarnings; +import alluxio.conf.Configuration; +import alluxio.conf.PropertyKey; import com.google.common.collect.ImmutableSet; import io.grpc.Status; @@ -20,6 +22,8 @@ import java.util.Objects; import java.util.Set; +import java.util.function.Supplier; +import javax.annotation.Nullable; /** * This class is a gRPC handler that serves Alluxio service versions. @@ -28,19 +32,32 @@ public final class ServiceVersionClientServiceHandler extends ServiceVersionClientServiceGrpc.ServiceVersionClientServiceImplBase { /** Set of services that are going to be recognized by this versioning service. */ private final Set mServices; + @Nullable private final Supplier mNodeStateSupplier; + private final boolean mStandbyRpcEnabled = + Configuration.getBoolean(PropertyKey.STANDBY_MASTER_GRPC_ENABLED); /** * Creates service version handler that allows given services. * @param services services to allow + * @param nodeStateSupplier the supplier to get the node state */ - public ServiceVersionClientServiceHandler(Set services) { + public ServiceVersionClientServiceHandler( + Set services, @Nullable Supplier nodeStateSupplier) { mServices = ImmutableSet.copyOf(Objects.requireNonNull(services, "services is null")); + mNodeStateSupplier = nodeStateSupplier; } @Override @SuppressFBWarnings(value = "DB_DUPLICATE_SWITCH_CLAUSES") public void getServiceVersion(GetServiceVersionPRequest request, StreamObserver responseObserver) { + if (mStandbyRpcEnabled + && mNodeStateSupplier != null && mNodeStateSupplier.get() == NodeState.STANDBY) { + responseObserver.onError(Status.UNAVAILABLE + .withDescription("GetServiceVersion is not supported on standby master") + .asException()); + return; + } ServiceType serviceType = request.getServiceType(); if (serviceType != ServiceType.UNKNOWN_SERVICE && !mServices.contains(serviceType)) { diff --git a/core/server/master/src/main/java/alluxio/master/MasterProcess.java b/core/server/master/src/main/java/alluxio/master/MasterProcess.java index 585dc889fb03..a0a23badb789 100644 --- a/core/server/master/src/main/java/alluxio/master/MasterProcess.java +++ b/core/server/master/src/main/java/alluxio/master/MasterProcess.java @@ -276,4 +276,11 @@ private boolean pollFor(String message, Supplier waitFor, int timeoutMs public boolean waitForReady(int timeoutMs) { return waitForGrpcServerReady(timeoutMs); } + + /** + * @return the primary selector + */ + public PrimarySelector getPrimarySelector() { + return mLeaderSelector; + } } diff --git a/core/server/master/src/main/java/alluxio/master/service/rpc/RpcServerService.java b/core/server/master/src/main/java/alluxio/master/service/rpc/RpcServerService.java index 2617b0a666e8..629c48038a6f 100644 --- a/core/server/master/src/main/java/alluxio/master/service/rpc/RpcServerService.java +++ b/core/server/master/src/main/java/alluxio/master/service/rpc/RpcServerService.java @@ -111,7 +111,7 @@ protected synchronized void startGrpcServer( LOG.info("registered service {}", type.name()); }); }); - mGrpcServer = builder.build(); + mGrpcServer = builder.build(() -> mMasterProcess.getPrimarySelector().getStateUnsafe()); try { mGrpcServer.start(); mMasterProcess.getSafeModeManager().ifPresent(SafeModeManager::notifyRpcServerStarted); From ff63c9730116f7f2a0e32cc889a540d712abcc09 Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Sun, 12 Feb 2023 20:27:46 -0800 Subject: [PATCH 108/334] Bump hadoop-common from 3.2.1 to 3.2.4 in /examples Bumps hadoop-common from 3.2.1 to 3.2.4. [![Dependabot compatibility score](https://dependabot-badges.githubapp.com/badges/compatibility_score?dependency-name=org.apache.hadoop:hadoop-common&package-manager=maven&previous-version=3.2.1&new-version=3.2.4)](https://docs.github.com/en/github/managing-security-vulnerabilities/about-dependabot-security-updates#about-compatibility-scores) Dependabot will resolve any conflicts with this PR as long as you don't alter it yourself. You can also trigger a rebase manually by commenting `@dependabot rebase`. [//]: # (dependabot-automerge-start) [//]: # (dependabot-automerge-end) ---

Dependabot commands and options
You can trigger Dependabot actions by commenting on this PR: - `@dependabot rebase` will rebase this PR - `@dependabot recreate` will recreate this PR, overwriting any edits that have been made to it - `@dependabot merge` will merge this PR after your CI passes on it - `@dependabot squash and merge` will squash and merge this PR after your CI passes on it - `@dependabot cancel merge` will cancel a previously requested merge and block automerging - `@dependabot reopen` will reopen this PR if it is closed - `@dependabot close` will close this PR and stop Dependabot recreating it. You can achieve the same result by closing it manually - `@dependabot ignore this major version` will close this PR and stop Dependabot creating any more for this major version (unless you reopen the PR or upgrade to it yourself) - `@dependabot ignore this minor version` will close this PR and stop Dependabot creating any more for this minor version (unless you reopen the PR or upgrade to it yourself) - `@dependabot ignore this dependency` will close this PR and stop Dependabot creating any more for this dependency (unless you reopen the PR or upgrade to it yourself) - `@dependabot use these labels` will set the current labels as the default for future PRs for this repo and language - `@dependabot use these reviewers` will set the current reviewers as the default for future PRs for this repo and language - `@dependabot use these assignees` will set the current assignees as the default for future PRs for this repo and language - `@dependabot use this milestone` will set the current milestone as the default for future PRs for this repo and language You can disable automated security fix PRs for this repo from the [Security Alerts page](https://github.com/Alluxio/alluxio/network/alerts).
pr-link: Alluxio/alluxio#16830 change-id: cid-c9e528db52ed4d3bf78ca8ea210d525150454ca4 --- examples/pom.xml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/examples/pom.xml b/examples/pom.xml index ab884853b042..c01d0d5b8a22 100644 --- a/examples/pom.xml +++ b/examples/pom.xml @@ -27,7 +27,7 @@ ${project.parent.basedir}/build false - 3.2.1 + 3.2.4 From 782b38c841235aabd81d071322494c5576c30852 Mon Sep 17 00:00:00 2001 From: yyongycy <39251482+yyongycy@users.noreply.github.com> Date: Tue, 14 Feb 2023 08:27:24 +0800 Subject: [PATCH 109/334] Replace ManagementFactoryHelper with ManagementFactory for java11 ### What changes are proposed in this pull request? Replace ManagementFactoryHelper with ManagementFactory for java11 ### Why are the changes needed? Java 11 doesn't support to directly call ManagementFactoryHelper, so it is necessary to find an alternative one in java11. ### Does this PR introduce any user facing changes? NA pr-link: Alluxio/alluxio#16874 change-id: cid-1e60172569c14a862603b23f013870cca605b3ff --- core/common/src/main/java/alluxio/metrics/MetricsSystem.java | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/core/common/src/main/java/alluxio/metrics/MetricsSystem.java b/core/common/src/main/java/alluxio/metrics/MetricsSystem.java index 5b9440bb4cce..c74667b5b970 100644 --- a/core/common/src/main/java/alluxio/metrics/MetricsSystem.java +++ b/core/common/src/main/java/alluxio/metrics/MetricsSystem.java @@ -40,6 +40,7 @@ import org.slf4j.LoggerFactory; import java.lang.management.BufferPoolMXBean; +import java.lang.management.ManagementFactory; import java.util.ArrayList; import java.util.HashMap; import java.util.HashSet; @@ -163,7 +164,7 @@ public static InstanceType fromString(String text) { private static BufferPoolMXBean getDirectBufferPool() { for (BufferPoolMXBean bufferPoolMXBean - : sun.management.ManagementFactoryHelper.getBufferPoolMXBeans()) { + : ManagementFactory.getPlatformMXBeans(BufferPoolMXBean.class)) { if (bufferPoolMXBean.getName().equals("direct")) { return bufferPoolMXBean; } From 6400f4cc93ae78e6ee2965fa1941f3cc8bf510a2 Mon Sep 17 00:00:00 2001 From: Jason Tieu <6509369+tieujason330@users.noreply.github.com> Date: Tue, 14 Feb 2023 16:13:53 -0800 Subject: [PATCH 110/334] Support Maven reproducible builds ### What changes are proposed in this pull request? See title. Adds https://github.com/git-commit-id/git-commit-id-maven-plugin v.4.0.5 as a dependency. Latest version (5.0.0) requires Java 11. ### Why are the changes needed? Updating `pom.xml` to support reproducible Maven builds. See https://maven.apache.org/guides/mini/guide-reproducible-builds.html. ### Does this PR introduce any user facing changes? Adds a `git.properties` file to output directories containing commit into (can be configurable). ex: ``` #Generated by Git-Commit-Id-Plugin git.build.version=2.10.0-SNAPSHOT git.commit.id.abbrev=622dfb4 git.commit.id.full=622dfb4ca246020efaeb169b18012bd394170e9a git.commit.time=2023-02-13T21\:47\:53Z ``` pr-link: Alluxio/alluxio#16881 change-id: cid-4e90a78a4d1068b5e4cb7620c06b374c96504022 --- pom.xml | 40 ++++++++++++++++++++++++++++++++++++---- 1 file changed, 36 insertions(+), 4 deletions(-) diff --git a/pom.xml b/pom.xml index 8f67e7c6c260..60e7bafd3f16 100644 --- a/pom.xml +++ b/pom.xml @@ -151,6 +151,7 @@ 1.11.0 3.19.6 UTF-8 + ${git.commit.time} 1.7.30 2.13.3 3.1.0-5.8.5 @@ -314,6 +315,11 @@ commons-logging 1.2 + + pl.project13.maven + git-commit-id-plugin + 4.0.5 + io.dropwizard.metrics metrics-core @@ -952,7 +958,7 @@ org.apache.maven.plugins maven-jar-plugin - 2.4 + 3.2.0 **/log4j.properties @@ -978,12 +984,12 @@ org.apache.maven.plugins maven-shade-plugin - 3.2.1 + 3.2.3 org.apache.maven.plugins maven-source-plugin - 2.3 + 3.2.1 org.apache.maven.plugins @@ -1038,7 +1044,7 @@ org.codehaus.mojo properties-maven-plugin - 1.0.0 + 1.1.0 org.codehaus.mojo @@ -1143,6 +1149,32 @@ + + + pl.project13.maven + git-commit-id-plugin + + + get-the-git-infos + + revision + + initialize + + + + true + ${project.build.outputDirectory}/git.properties + yyyy-MM-dd'T'HH:mm:ss'Z' + + git.commit.time + git.build.version + ^git.commit.id.(abbrev|full)$ + + full + + + com.github.spotbugs From 99923c6b955925fd9bbc0af55a7b30fdc2875f19 Mon Sep 17 00:00:00 2001 From: qian0817 Date: Wed, 15 Feb 2023 17:20:24 +0800 Subject: [PATCH 111/334] Reduce useless async cache request when read from remote worker ### What changes are proposed in this pull request? reduce useless async cache request when read from remote worker ### Why are the changes needed? It is useless to trigger async cache when the remote worker has already cached block. ### Does this PR introduce any user facing changes? No user facing changes. pr-link: Alluxio/alluxio#16313 change-id: cid-ad4f236cf143346085b59b24122a3dc6870281e7 --- .../java/alluxio/client/file/AlluxioFileInStream.java | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/core/client/fs/src/main/java/alluxio/client/file/AlluxioFileInStream.java b/core/client/fs/src/main/java/alluxio/client/file/AlluxioFileInStream.java index f2a7f63eb7f6..138cbe4a745d 100644 --- a/core/client/fs/src/main/java/alluxio/client/file/AlluxioFileInStream.java +++ b/core/client/fs/src/main/java/alluxio/client/file/AlluxioFileInStream.java @@ -49,6 +49,7 @@ import java.time.Duration; import java.util.HashMap; import java.util.Map; +import java.util.Objects; import javax.annotation.concurrent.NotThreadSafe; /** @@ -469,7 +470,13 @@ boolean triggerAsyncCaching(BlockInStream stream) { if (mPassiveCachingEnabled && mContext.hasNodeLocalWorker()) { // send request to local worker worker = mContext.getNodeLocalWorker(); - } else { // send request to data source + } else { + if (blockInfo.getLocations().stream() + .anyMatch(it -> Objects.equals(it.getWorkerAddress(), dataSource))) { + mLastBlockIdCached = blockId; + return false; + } + // send request to data source worker = dataSource; } try (CloseableResource blockWorker = From 6e8e0dcc18ffbf8b1a9960632a532ac3bd0e2af7 Mon Sep 17 00:00:00 2001 From: maobaolong <307499405@qq.com> Date: Wed, 15 Feb 2023 17:20:49 +0800 Subject: [PATCH 112/334] Remove hardcoded version dependency in examples module Fix the security issue introduced by following link. The newest version of hadoop fix this issue. - http://horus.oa.com/advisory/HOSA-pzst-tfwm6u4cy - http://horus.oa.com/advisory/HOSA-n612-2d07um3px - http://horus.oa.com/advisory/HOSA-yn2x-x9qulrobf - http://horus.oa.com/advisory/HOSA-jatn-nizpv9w2u I remove the hadoop.version property to keep consistent with parent module. pr-link: Alluxio/alluxio#16844 change-id: cid-b9e60f612d667b0ae3daea1314593a284b65b622 --- examples/pom.xml | 1 - 1 file changed, 1 deletion(-) diff --git a/examples/pom.xml b/examples/pom.xml index c01d0d5b8a22..589f009252eb 100644 --- a/examples/pom.xml +++ b/examples/pom.xml @@ -27,7 +27,6 @@ ${project.parent.basedir}/build false - 3.2.4 From dc41fb1e1f2db0bf8bbff0c02c2aab6a088723bc Mon Sep 17 00:00:00 2001 From: voddle Date: Wed, 15 Feb 2023 17:21:39 +0800 Subject: [PATCH 113/334] [DOCFIX] Update cn version of Upgrade doc What changes are proposed in this pull request? Update cn version of Upgrade doc. Why are the changes needed? There is no corresponding Chinese documentation for upgrade. Does this PR introduce any user facing changes? More Chinese users can access Alluxio documentation more easily. pr-link: Alluxio/alluxio#16840 change-id: cid-be17d3c207fbbaa3211f6e3e7611a2e15741b66f --- docs/cn/administration/Upgrade.md | 101 ++++++++++++++++++++++++++++++ 1 file changed, 101 insertions(+) create mode 100644 docs/cn/administration/Upgrade.md diff --git a/docs/cn/administration/Upgrade.md b/docs/cn/administration/Upgrade.md new file mode 100644 index 000000000000..62451b04e444 --- /dev/null +++ b/docs/cn/administration/Upgrade.md @@ -0,0 +1,101 @@ +--- +layout: global +title: 升级 +nickname: 升级 +group: Administration +priority: 7 +--- + +* Table of Contents +{:toc} + +## 基础升级流程 + +正常情况下,用户可以直接关闭当前的 Alluxio 进程,将 Alluxio 二进制文件更改为更高的版本,同时还按之前的方式配置 Alluxio 集群,并使用现有的日志文件夹/地址来启动 Alluxio 进程 +进行升级。Alluxio 可读取以前的日志文件并从 Journal 日志中自动恢复 Alluxio 元数据。 + +以下两种情况下master日志无法向后兼容,需要采取额外的步骤来升级 Alluxio 集群: + +- 从 Alluxio 1.x 版本升级到 Alluxio 2.x 版本 +- 使用[内嵌日志]({{ '/cn/deploy/Running-Alluxio-On-a-HA-Cluster.html#选项1基于raft的嵌入式日志' | relativize_url}})的情况下从 Alluxio 2.3.x 及以下版本升级到 Alluxio 2.4.0 及以上版本 + +本文档介绍了如何将 Alluxio 升级到非向后兼容版本。 即使是要升级到可向后兼容的版本,仍然建议按照以下步骤在升级前创建备份。 + +## 创建当前版本的备份 + +Alluxio-1.8.1 版本引入了日志备份(backup)功能。 +注意,请不要在备份前修改 Alluxio 二进制文件。 +通过运行以下命令创建日志备份: + +```console +$ ./bin/alluxio fsadmin backup +Successfully backed up journal to ${BACKUP_PATH} +``` + +`${BACKUP_PATH}` 将根据日志的日期和配置确定。 +备份文件将默认保存到集群根 UFS 的 `alluxio.master.backup.directory` 中, +也可以使用 `backup [local_address] --local` 命令将文件备份到当前 leading master 节点的本地文件系统中。 + +## 升级并从备份启动 + +停止现有的 Alluxio 集群后,下载并解压新版本的 Alluxio 。 +从 `/conf` 目录拷贝旧的配置文件。然后通过以下命令将集群格式化: + +```console +$ ./bin/alluxio format +``` +- **警告:** 该操作会对 Alluxio worker 上的内存虚拟硬盘(ramdisk)进行格式化(即:删除其中的内容)。 +如果您希望保留 worker 上的内存虚拟硬盘,请参阅 + [Alluxio worker 内存虚拟硬盘缓存持久化]({{ '/cn/administration/Upgrade.html' | relativize_url}}#alluxio-worker-ramdisk-cache-persistence)。 + +然后使用 `-i ${BACKUP_PATH}` 参数启动集群, +将 `${BACKUP_PATH}` 替换为具体的备份路径。 + +```console +$ ./bin/alluxio-start.sh -i ${BACKUP_PATH} all +``` + +注意这里的 `${BACKUP_PATH}` 应该是类似 HDFS 地址的完整路径,可以被所有 Alluxio master 访问。 +如果要备份到本地文件系统路径,需将备份文件复制到所有 master 节点上的同一位置,然后通过本地备份文件路径启动所有 master。 + +## 升级客户端和服务器 + +Alluxio 2.x 版本对 RPC 层进行了重大修改, +因此 2.0.0 之前版本的客户端不能与 2.0.0 之后版本的服务器一起运行,反之亦然。 +如果要使用 Alluxio-2.x 客户端需升级所有应用程序中的 Alluxio 客户端。 + +请参阅以下步骤: +1. 备份 Alluxio 中文件的元数据。请参阅有关 `backup` 命令的[文档]({{ '/cn/operation/Admin-CLI.html' | relativize_url }}#backup)。 +2. 停止 Alluxio 集群。 +```console +$ ./bin/alluxio-stop.sh all +``` +3. 更新所有应用程序的 Alluxio 客户端 jar 路径。例如`Yarn`, `Spark`, `Hive` 和 `Presto`,在 Cloudera Manager 的 "YARN (包括MR2)" 部分,在 "Configuration" 选项卡中,搜索参数 "Gateway Client Environment Advanced Configuration Snippet (Safety Valve) for hadoop-env.sh"。然后将以下行添加到脚本中: +```console +$ export HADOOP_CLASSPATH={{site.ALLUXIO_CLIENT_JAR_PATH}}:${HADOOP_CLASSPATH} +``` +如下所示: + ![locality]({{ '/img/screenshot_cdh_compute_hadoop_classpath.png' | relativize_url }}) +4. 启动Alluxio集群 +```console +$ ./bin/alluxio-start.sh all +``` +5. 如果您已经更新了某个应用程序的 Alluxio 客户端 jar,请重新启动该应用程序,以便使用新的 Alluxio 客户端 jar。 + +## 其他选项 + +### Alluxio worker 内存磁盘缓存持久化 + +如果您已经配置了 Alluxio worker 上的内存磁盘缓存,可使用另一个存储介质(例如,主机的本地磁盘)来持久化和恢复这些缓存的内容。 + +在运行 `alluxio-stop.sh` 时加上 `-c` 参数来指定 worker 保存其内存磁盘内容的路径(worker会将内容保存到其主机的文件系统中): +``` +$ ./bin/alluxio-stop.sh workers -c ${CACHE_PATH} +``` +- **警告:** 该操作将覆盖并替换给定 `${CACHE_PATH}` 中的所有现有内容 + +然后,在运行 `alluxio-start.sh` 时加上 `-c` 参数来指定包含 worker 内存磁盘缓存内容的目录。 +``` +$ ./bin/alluxio-start.sh workers NoMount -c ${CACHE_PATH} +``` +- **警告:** 该操作将覆盖并替换已配置的 worker 内存磁盘路径中所有的现有内容。 From 60d51564167dbc05047d9435f0825f20ed047c0a Mon Sep 17 00:00:00 2001 From: tian bao <2011xuesong@gmail.com> Date: Thu, 16 Feb 2023 02:55:46 +0800 Subject: [PATCH 114/334] Fix Worker.ActiveClients is negative when load from ufs ### What changes are proposed in this pull request? Fix Worker.ActiveClients is negative when load from ufs ### Why are the changes needed? Please clarify why the changes are needed. For instance, createUfsBlockReader will invoke closeUfsBlock then invoke commitBlock then this metric decrease. But forget to increase firstly . So it need to increase firstly. ### Does this PR introduce any user facing changes? Please list the user-facing changes introduced by your change, including no. pr-link: Alluxio/alluxio#16784 change-id: cid-13d0875dc42336cc612dfed121bba8572164ed60 --- .../main/java/alluxio/worker/block/MonoBlockStore.java | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/core/server/worker/src/main/java/alluxio/worker/block/MonoBlockStore.java b/core/server/worker/src/main/java/alluxio/worker/block/MonoBlockStore.java index 546591f2b2a5..26b2757c885c 100644 --- a/core/server/worker/src/main/java/alluxio/worker/block/MonoBlockStore.java +++ b/core/server/worker/src/main/java/alluxio/worker/block/MonoBlockStore.java @@ -33,6 +33,7 @@ import alluxio.retry.RetryUtils; import alluxio.underfs.UfsManager; import alluxio.util.ThreadFactoryUtils; +import alluxio.worker.block.DefaultBlockWorker.Metrics; import alluxio.worker.block.io.BlockReader; import alluxio.worker.block.io.BlockWriter; import alluxio.worker.block.io.DelegatingBlockReader; @@ -161,6 +162,7 @@ public BlockReader createBlockReader(long sessionId, long blockId, long offset, Optional blockMeta = mLocalBlockStore.getVolatileBlockMeta(blockId); if (blockMeta.isPresent()) { reader = mLocalBlockStore.createBlockReader(sessionId, blockId, offset); + DefaultBlockWorker.Metrics.WORKER_ACTIVE_CLIENTS.inc(); } else { boolean checkUfs = options != null && (options.hasUfsPath() || options.getBlockInUfsTier()); if (!checkUfs) { @@ -169,7 +171,6 @@ public BlockReader createBlockReader(long sessionId, long blockId, long offset, // When the block does not exist in Alluxio but exists in UFS, try to open the UFS block. reader = createUfsBlockReader(sessionId, blockId, offset, positionShort, options); } - DefaultBlockWorker.Metrics.WORKER_ACTIVE_CLIENTS.inc(); return reader; } @@ -181,7 +182,10 @@ public BlockReader createUfsBlockReader(long sessionId, long blockId, long offse try { BlockReader reader = mUnderFileSystemBlockStore.createBlockReader(sessionId, blockId, offset, positionShort, options); - return new DelegatingBlockReader(reader, () -> closeUfsBlock(sessionId, blockId)); + BlockReader blockReader = new DelegatingBlockReader(reader, + () -> closeUfsBlock(sessionId, blockId)); + Metrics.WORKER_ACTIVE_CLIENTS.inc(); + return blockReader; } catch (Exception e) { try { closeUfsBlock(sessionId, blockId); From aea58cd82f57e0336ae020a16d2f4a29c099c74e Mon Sep 17 00:00:00 2001 From: Rico Chiu Date: Wed, 15 Feb 2023 12:44:55 -0800 Subject: [PATCH 115/334] Add maven build dockerfile with jdk17 Add dockerfile with java 17 base and java 8 added for compilation, mirroring the java 11 one pr-link: Alluxio/alluxio#16885 change-id: cid-aa63908b5ef3ef2ef83a022677ec58706d29730d --- dev/jenkins/Dockerfile-jdk17 | 160 +++++++++++++++++++++++++++++++++++ 1 file changed, 160 insertions(+) create mode 100644 dev/jenkins/Dockerfile-jdk17 diff --git a/dev/jenkins/Dockerfile-jdk17 b/dev/jenkins/Dockerfile-jdk17 new file mode 100644 index 000000000000..ec7f017ff5cf --- /dev/null +++ b/dev/jenkins/Dockerfile-jdk17 @@ -0,0 +1,160 @@ +# +# The Alluxio Open Foundation licenses this work under the Apache License, version 2.0 +# (the "License"). You may not use this work except in compliance with the License, which is +# available at www.apache.org/licenses/LICENSE-2.0 +# +# This software is distributed on an "AS IS" basis, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, +# either express or implied, as more fully set forth in the License. +# +# See the NOTICE file distributed with this work for information regarding copyright ownership. +# + +# See https://hub.docker.com/r/alluxio/alluxio-maven for instructions on running the image. + +FROM maven:3.6.3-openjdk-17-slim + +# reference: https://github.com/docker-library/openjdk/blob/master/8/jdk/buster/Dockerfile +# we need jdk 8 in jdk 17 so that we can compile with jdk 8 and test with jdk 17 + +ENV JAVA_HOME /usr/local/openjdk-8 +ENV PATH $JAVA_HOME/bin:$PATH + +# backwards compatibility shim +RUN { echo '#/bin/sh'; echo 'echo "$JAVA_HOME"'; } > /usr/local/bin/docker-java-home && chmod +x /usr/local/bin/docker-java-home && [ "$JAVA_HOME" = "$(docker-java-home)" ] + +RUN set -eux; \ + apt-get update; \ + apt-get install -y --no-install-recommends \ + bzip2 \ + unzip \ + xz-utils \ + \ +# java.lang.UnsatisfiedLinkError: /usr/local/openjdk-11/lib/libfontmanager.so: libfreetype.so.6: cannot open shared object file: No such file or directory +# java.lang.NoClassDefFoundError: Could not initialize class sun.awt.X11FontManager +# https://github.com/docker-library/openjdk/pull/235#issuecomment-424466077 + fontconfig libfreetype6 \ + \ +# utilities for keeping Debian and OpenJDK CA certificates in sync + ca-certificates p11-kit \ +# additional packages not installed in slim base image \ + wget gnupg git \ + ; \ + rm -rf /var/lib/apt/lists/* + +# https://stackoverflow.com/questions/72978485/git-submodule-update-failed-with-fatal-detected-dubious-ownership-in-repositor +RUN git config --global safe.directory '*' + +ENV JAVA_HOME /usr/local/openjdk-8 +RUN { echo '#/bin/sh'; echo 'echo "$JAVA_HOME"'; } > /usr/local/bin/docker-java-home && chmod +x /usr/local/bin/docker-java-home && [ "$JAVA_HOME" = "$(docker-java-home)" ] # backwards compatibility +ENV PATH $JAVA_HOME/bin:$PATH + +# Default to UTF-8 file.encoding +ENV LANG C.UTF-8 + +# https://adoptopenjdk.net/upstream.html +# > +# > What are these binaries? +# > +# > These binaries are built by Red Hat on their infrastructure on behalf of the OpenJDK jdk8u and jdk11u projects. The binaries are created from the unmodified source code at OpenJDK. Although no formal support agreement is provided, please report any bugs you may find to https://bugs.java.com/. +# > +ENV JAVA_VERSION 8u332 +# https://github.com/docker-library/openjdk/issues/320#issuecomment-494050246 +# > +# > I am the OpenJDK 8 and 11 Updates OpenJDK project lead. +# > ... +# > While it is true that the OpenJDK Governing Board has not sanctioned those releases, they (or rather we, since I am a member) didn't sanction Oracle's OpenJDK releases either. As far as I am aware, the lead of an OpenJDK project is entitled to release binary builds, and there is clearly a need for them. +# > + +RUN set -eux; \ + \ + arch="$(dpkg --print-architecture)"; \ + case "$arch" in \ + 'amd64') \ + downloadUrl='https://github.com/AdoptOpenJDK/openjdk8-upstream-binaries/releases/download/jdk8u332-b09/OpenJDK8U-jdk_x64_linux_8u332b09.tar.gz'; \ + ;; \ + 'arm64') \ + downloadUrl='https://github.com/AdoptOpenJDK/openjdk8-upstream-binaries/releases/download/jdk8u332-b09/OpenJDK8U-jdk_aarch64_linux_8u332b09.tar.gz'; \ + ;; \ + *) echo >&2 "error: unsupported architecture: '$arch'"; exit 1 ;; \ + esac; \ + \ + wget --progress=dot:giga -O openjdk.tgz "$downloadUrl"; \ + wget --progress=dot:giga -O openjdk.tgz.asc "$downloadUrl.sign"; \ + \ + export GNUPGHOME="$(mktemp -d)"; \ +# pre-fetch Andrew Haley's (the OpenJDK 8 and 11 Updates OpenJDK project lead) key so we can verify that the OpenJDK key was signed by it +# (https://github.com/docker-library/openjdk/pull/322#discussion_r286839190) +# we pre-fetch this so that the signature it makes on the OpenJDK key can survive "import-clean" in gpg + gpg --batch --keyserver keyserver.ubuntu.com --recv-keys EAC843EBD3EFDB98CC772FADA5CD6035332FA671; \ +# TODO find a good link for users to verify this key is right (https://mail.openjdk.java.net/pipermail/jdk-updates-dev/2019-April/000951.html is one of the only mentions of it I can find); perhaps a note added to https://adoptopenjdk.net/upstream.html would make sense? +# no-self-sigs-only: https://salsa.debian.org/debian/gnupg2/commit/c93ca04a53569916308b369c8b218dad5ae8fe07 + gpg --batch --keyserver keyserver.ubuntu.com --keyserver-options no-self-sigs-only --recv-keys CA5F11C6CE22644D42C6AC4492EF8D39DC13168F; \ + gpg --batch --list-sigs --keyid-format 0xLONG CA5F11C6CE22644D42C6AC4492EF8D39DC13168F \ + | tee /dev/stderr \ + | grep '0xA5CD6035332FA671' \ + | grep 'Andrew Haley'; \ + gpg --batch --verify openjdk.tgz.asc openjdk.tgz; \ + gpgconf --kill all; \ + rm -rf "$GNUPGHOME"; \ + \ + mkdir -p "$JAVA_HOME"; \ + tar --extract \ + --file openjdk.tgz \ + --directory "$JAVA_HOME" \ + --strip-components 1 \ + --no-same-owner \ + ; \ + rm openjdk.tgz*; \ + \ +# update "cacerts" bundle to use Debian's CA certificates (and make sure it stays up-to-date with changes to Debian's store) +# see https://github.com/docker-library/openjdk/issues/327 +# http://rabexc.org/posts/certificates-not-working-java#comment-4099504075 +# https://salsa.debian.org/java-team/ca-certificates-java/blob/3e51a84e9104823319abeb31f880580e46f45a98/debian/jks-keystore.hook.in +# https://git.alpinelinux.org/aports/tree/community/java-cacerts/APKBUILD?id=761af65f38b4570093461e6546dcf6b179d2b624#n29 + { \ + echo '#!/usr/bin/env bash'; \ + echo 'set -Eeuo pipefail'; \ + echo 'trust extract --overwrite --format=java-cacerts --filter=ca-anchors --purpose=server-auth "$JAVA_HOME/jre/lib/security/cacerts"'; \ + } > /etc/ca-certificates/update.d/docker-openjdk; \ + chmod +x /etc/ca-certificates/update.d/docker-openjdk; \ + /etc/ca-certificates/update.d/docker-openjdk; \ + \ +# https://github.com/docker-library/openjdk/issues/331#issuecomment-498834472 + find "$JAVA_HOME/lib" -name '*.so' -exec dirname '{}' ';' | sort -u > /etc/ld.so.conf.d/docker-openjdk.conf; \ + ldconfig; \ + \ +# basic smoke test to see if the default is jdk 8 + javac -version; \ + java -version + +ENV JAVA_HOME /usr/local/openjdk-17 +ENV PATH $JAVA_HOME/bin:$PATH + +# basic smoke test to see if the default is jdk 11 +RUN set -eux; \ + javac -version; \ + java -version + +# need to create /.config to avoid npm errors +RUN mkdir -p /home/jenkins && \ + chmod -R 777 /home/jenkins && \ + chmod g+w /etc/passwd && \ + mkdir -p /.config && \ + chmod -R 777 /.config && \ + apt-get update -y && \ + apt-get upgrade -y ca-certificates && \ + apt-get install -y build-essential fuse3 libfuse3-dev libfuse-dev make ruby ruby-dev +# jekyll for documentation +RUN gem install public_suffix:4.0.7 jekyll:4.2.2 bundler:2.3.18 +# golang for tooling +RUN ARCH=$(dpkg --print-architecture) && \ + wget https://go.dev/dl/go1.18.1.linux-${ARCH}.tar.gz && \ + tar -xvf go1.18.1.linux-${ARCH}.tar.gz && \ + mv go /usr/local +ENV GOROOT=/usr/local/go +ENV PATH=$GOROOT/bin:$PATH +# terraform for deployment scripts +RUN ARCH=$(dpkg --print-architecture) && \ + wget --quiet https://releases.hashicorp.com/terraform/1.0.1/terraform_1.0.1_linux_${ARCH}.zip && \ + unzip -o ./terraform_1.0.1_linux_${ARCH}.zip -d /usr/local/bin/ && \ + rm terraform_1.0.1_linux_${ARCH}.zip From 00da77cf69c0ca29acecc4c37aed6ba53b9e93d2 Mon Sep 17 00:00:00 2001 From: Tyler Crain Date: Wed, 15 Feb 2023 16:20:22 -0800 Subject: [PATCH 116/334] Fix file fingerprint to use atomic get content hash of uploaded file ### What changes are proposed in this pull request? Currently when complete is called on a file in Alluxio, a fingerprint of the file will be created by performing a GetStauts on the file on the UFS. If due to a concurrent write, the state of the file is different than what was written through Alluxio, the fingerprint will not actually match the content of the file in Alluxio. If this happens the state of the file in Alluxio will always be out of sync with the UFS, and the file will never be updated to the most recent version. This is because metadata sync uses the fingerprint to see if the file needs synchronization, and if the fingerprint does not match the file in Alluxio there will be inconsistencies. This PR fixes this by having the contentHash field of the fingerprint be computed while the file is actually written on the UFS. For object stores, this means the hash is taken from the result of the call to PutObject. Unfortunately HDFS does not have a similar interface, so the content hash is taken just after the output stream is closed to complete the write. There could be a small chance that someone changes the file in this window between the two operations. pr-link: Alluxio/alluxio#16597 change-id: cid-64723be309bdb14b05613864af3b6a1bb30cba6d --- .../block/stream/BlockWorkerDataWriter.java | 6 ++ .../client/block/stream/DataWriter.java | 7 +++ .../block/stream/GrpcBlockingStream.java | 15 +++-- .../stream/GrpcDataMessageBlockingStream.java | 17 ++++-- .../client/block/stream/GrpcDataWriter.java | 16 ++++- .../block/stream/LocalFileDataWriter.java | 6 ++ .../UfsFallbackLocalFileDataWriter.java | 6 ++ .../stream/UnderFileSystemFileOutStream.java | 9 +++ .../client/file/AlluxioFileOutStream.java | 2 + .../client/block/stream/TestDataWriter.java | 6 ++ .../ManagedBlockingUfsForwarder.java | 6 ++ .../alluxio/grpc/DataMessageMarshaller.java | 3 +- .../alluxio/grpc/ReadResponseMarshaller.java | 3 +- .../alluxio/grpc/WriteRequestMarshaller.java | 3 +- .../underfs/AtomicFileOutputStream.java | 13 ++++- .../alluxio/underfs/BaseUnderFileSystem.java | 11 +++- .../java/alluxio/underfs/ContentHashable.java | 29 ++++++++++ .../java/alluxio/underfs/Fingerprint.java | 38 +++++++++--- .../underfs/ObjectLowLevelOutputStream.java | 3 +- .../java/alluxio/underfs/UnderFileSystem.java | 17 +++++- .../underfs/UnderFileSystemWithLogging.java | 25 ++++++++ .../java/alluxio/underfs/FingerprintTest.java | 8 ++- .../master/file/DefaultFileSystemMaster.java | 4 +- .../alluxio/master/file/InodeSyncStream.java | 2 +- .../FileSystemMasterSyncMetadataTest.java | 58 +++++++++++++++++++ .../worker/grpc/AbstractWriteHandler.java | 3 + .../worker/grpc/UfsFileWriteHandler.java | 9 +++ .../worker/grpc/WriteRequestContext.java | 17 ++++++ .../src/main/proto/grpc/block_worker.proto | 3 +- .../main/proto/grpc/file_system_master.proto | 1 + core/transport/src/main/proto/proto.lock | 10 ++++ .../cli/UnderFileSystemCommonOperations.java | 35 ++++++++++- .../delegating/DelegatingUnderFileSystem.java | 7 +++ .../alluxio/underfs/cos/COSOutputStream.java | 13 ++++- .../alluxio/underfs/gcs/GCSOutputStream.java | 13 ++++- .../underfs/gcs/v2/GCSV2OutputStream.java | 31 +++++++++- .../hdfs/HdfsUnderFileOutputStream.java | 29 +++++++++- .../underfs/hdfs/HdfsUnderFileSystem.java | 2 +- .../underfs/local/LocalUnderFileSystem.java | 22 ++++++- .../underfs/obs/OBSLowLevelOutputStream.java | 14 ++++- .../alluxio/underfs/obs/OBSOutputStream.java | 13 ++++- .../obs/OBSLowLevelOutputStreamTest.java | 24 ++++++-- .../underfs/obs/OBSOutputStreamTest.java | 24 +++++--- .../underfs/oss/OSSLowLevelOutputStream.java | 15 ++++- .../alluxio/underfs/oss/OSSOutputStream.java | 13 ++++- .../oss/OSSLowLevelOutputStreamTest.java | 28 ++++++++- .../underfs/oss/OSSOutputStreamTest.java | 33 ++++++++--- .../underfs/s3a/S3ALowLevelOutputStream.java | 19 ++++-- .../alluxio/underfs/s3a/S3AOutputStream.java | 13 ++++- .../s3a/S3ALowLevelOutputStreamTest.java | 30 ++++++++-- .../underfs/s3a/S3AOutputStreamTest.java | 15 +++++ 51 files changed, 657 insertions(+), 92 deletions(-) create mode 100644 core/common/src/main/java/alluxio/underfs/ContentHashable.java diff --git a/core/client/fs/src/main/java/alluxio/client/block/stream/BlockWorkerDataWriter.java b/core/client/fs/src/main/java/alluxio/client/block/stream/BlockWorkerDataWriter.java index cf3930efb883..ea1302e604bd 100644 --- a/core/client/fs/src/main/java/alluxio/client/block/stream/BlockWorkerDataWriter.java +++ b/core/client/fs/src/main/java/alluxio/client/block/stream/BlockWorkerDataWriter.java @@ -27,6 +27,7 @@ import io.netty.buffer.ByteBuf; import java.io.IOException; +import java.util.Optional; import javax.annotation.concurrent.NotThreadSafe; /** @@ -86,6 +87,11 @@ public int chunkSize() { return mChunkSize; } + @Override + public Optional getUfsContentHash() { + return Optional.empty(); + } + @Override public void writeChunk(final ByteBuf buf) throws IOException { try { diff --git a/core/client/fs/src/main/java/alluxio/client/block/stream/DataWriter.java b/core/client/fs/src/main/java/alluxio/client/block/stream/DataWriter.java index 90541ebf6e2f..0f7f9665957a 100644 --- a/core/client/fs/src/main/java/alluxio/client/block/stream/DataWriter.java +++ b/core/client/fs/src/main/java/alluxio/client/block/stream/DataWriter.java @@ -29,6 +29,7 @@ import java.io.Closeable; import java.io.IOException; +import java.util.Optional; import javax.annotation.concurrent.ThreadSafe; /** @@ -97,6 +98,12 @@ public static DataWriter create(FileSystemContext context, long blockId, long bl } } + /** + * @return the content hash of the file if it is written to the UFS. Will only + * return a non-empty value after the data writer has been closed. + */ + Optional getUfsContentHash(); + /** * Writes a chunk. This method takes the ownership of this chunk even if it fails to write * the chunk. diff --git a/core/client/fs/src/main/java/alluxio/client/block/stream/GrpcBlockingStream.java b/core/client/fs/src/main/java/alluxio/client/block/stream/GrpcBlockingStream.java index 3a6c5a353419..b7babff520a3 100644 --- a/core/client/fs/src/main/java/alluxio/client/block/stream/GrpcBlockingStream.java +++ b/core/client/fs/src/main/java/alluxio/client/block/stream/GrpcBlockingStream.java @@ -28,6 +28,7 @@ import org.slf4j.LoggerFactory; import java.io.IOException; +import java.util.Optional; import java.util.concurrent.ArrayBlockingQueue; import java.util.concurrent.BlockingQueue; import java.util.concurrent.TimeUnit; @@ -243,14 +244,20 @@ public void cancel() { * Wait for server to complete the inbound stream. * * @param timeoutMs maximum time to wait for server response + * @return the last response of the stream */ - public void waitForComplete(long timeoutMs) throws IOException { + public Optional waitForComplete(long timeoutMs) throws IOException { if (mCompleted || mCanceled) { - return; + return Optional.empty(); } - while (receive(timeoutMs) != null) { + ResT prevResponse; + ResT response = null; + do { // wait until inbound stream is closed from server. - } + prevResponse = response; + response = receive(timeoutMs); + } while (response != null); + return Optional.ofNullable(prevResponse); } /** diff --git a/core/client/fs/src/main/java/alluxio/client/block/stream/GrpcDataMessageBlockingStream.java b/core/client/fs/src/main/java/alluxio/client/block/stream/GrpcDataMessageBlockingStream.java index a07cc3614fdd..b9ba0c37110f 100644 --- a/core/client/fs/src/main/java/alluxio/client/block/stream/GrpcDataMessageBlockingStream.java +++ b/core/client/fs/src/main/java/alluxio/client/block/stream/GrpcDataMessageBlockingStream.java @@ -20,6 +20,7 @@ import io.grpc.stub.StreamObserver; import java.io.IOException; +import java.util.Optional; import java.util.function.Function; import javax.annotation.concurrent.NotThreadSafe; @@ -104,17 +105,21 @@ public void sendDataMessage(DataMessage message, long timeoutM } @Override - public void waitForComplete(long timeoutMs) throws IOException { + public Optional waitForComplete(long timeoutMs) throws IOException { if (mResponseMarshaller == null) { - super.waitForComplete(timeoutMs); - return; + return super.waitForComplete(timeoutMs); } + // loop until the last response is received, whose result will be returned DataMessage message; + DataMessage prevMessage = null; while (!isCanceled() && (message = receiveDataMessage(timeoutMs)) != null) { - if (message.getBuffer() != null) { - message.getBuffer().release(); + if (prevMessage != null && prevMessage.getBuffer() != null) { + prevMessage.getBuffer().release(); } + prevMessage = message; } - super.waitForComplete(timeoutMs); + // note that the combineData call is responsible for releasing the buffer of prevMessage + ResT result = mResponseMarshaller.combineData(prevMessage); + return Optional.ofNullable(super.waitForComplete(timeoutMs).orElse(result)); } } diff --git a/core/client/fs/src/main/java/alluxio/client/block/stream/GrpcDataWriter.java b/core/client/fs/src/main/java/alluxio/client/block/stream/GrpcDataWriter.java index 9b57d946ea0b..bfaffc62a6c5 100644 --- a/core/client/fs/src/main/java/alluxio/client/block/stream/GrpcDataWriter.java +++ b/core/client/fs/src/main/java/alluxio/client/block/stream/GrpcDataWriter.java @@ -36,6 +36,7 @@ import io.netty.buffer.ByteBuf; import java.io.IOException; +import java.util.Optional; import javax.annotation.concurrent.NotThreadSafe; /** @@ -68,6 +69,9 @@ public final class GrpcDataWriter implements DataWriter { private final long mChunkSize; private final GrpcBlockingStream mStream; + /** The content hash resulting from the write operation if one is available. */ + private String mContentHash = null; + /** * The next pos to queue to the buffer. */ @@ -177,6 +181,11 @@ public long pos() { return mPosToQueue; } + @Override + public Optional getUfsContentHash() { + return Optional.ofNullable(mContentHash); + } + @Override public void writeChunk(final ByteBuf buf) throws IOException { mPosToQueue += buf.readableBytes(); @@ -239,6 +248,9 @@ public void flush() throws IOException { writeRequest, mAddress)); } posWritten = response.getOffset(); + if (response.hasContentHash()) { + mContentHash = response.getContentHash(); + } } while (mPosToQueue != posWritten); } @@ -249,7 +261,9 @@ public void close() throws IOException { return; } mStream.close(); - mStream.waitForComplete(mWriterCloseTimeoutMs); + mStream.waitForComplete(mWriterCloseTimeoutMs) + .ifPresent(writeResponse -> mContentHash = writeResponse.hasContentHash() + ? writeResponse.getContentHash() : null); } finally { mClient.close(); } diff --git a/core/client/fs/src/main/java/alluxio/client/block/stream/LocalFileDataWriter.java b/core/client/fs/src/main/java/alluxio/client/block/stream/LocalFileDataWriter.java index b3cd3778350c..03b37e0b9760 100644 --- a/core/client/fs/src/main/java/alluxio/client/block/stream/LocalFileDataWriter.java +++ b/core/client/fs/src/main/java/alluxio/client/block/stream/LocalFileDataWriter.java @@ -31,6 +31,7 @@ import io.netty.buffer.ByteBuf; import java.io.IOException; +import java.util.Optional; import javax.annotation.concurrent.NotThreadSafe; /** @@ -119,6 +120,11 @@ public int chunkSize() { return (int) mChunkSize; } + @Override + public Optional getUfsContentHash() { + return Optional.empty(); + } + @Override public void writeChunk(final ByteBuf buf) throws IOException { try { diff --git a/core/client/fs/src/main/java/alluxio/client/block/stream/UfsFallbackLocalFileDataWriter.java b/core/client/fs/src/main/java/alluxio/client/block/stream/UfsFallbackLocalFileDataWriter.java index b534ea28f326..d4140e85846e 100644 --- a/core/client/fs/src/main/java/alluxio/client/block/stream/UfsFallbackLocalFileDataWriter.java +++ b/core/client/fs/src/main/java/alluxio/client/block/stream/UfsFallbackLocalFileDataWriter.java @@ -22,6 +22,7 @@ import org.slf4j.LoggerFactory; import java.io.IOException; +import java.util.Optional; import javax.annotation.concurrent.NotThreadSafe; /** @@ -82,6 +83,11 @@ public static UfsFallbackLocalFileDataWriter create(FileSystemContext context, mIsWritingToLocal = mLocalFileDataWriter != null; } + @Override + public Optional getUfsContentHash() { + return mGrpcDataWriter.getUfsContentHash(); + } + @Override public void writeChunk(ByteBuf chunk) throws IOException { if (mIsWritingToLocal) { diff --git a/core/client/fs/src/main/java/alluxio/client/block/stream/UnderFileSystemFileOutStream.java b/core/client/fs/src/main/java/alluxio/client/block/stream/UnderFileSystemFileOutStream.java index fd0066ab3786..40df073e81d2 100644 --- a/core/client/fs/src/main/java/alluxio/client/block/stream/UnderFileSystemFileOutStream.java +++ b/core/client/fs/src/main/java/alluxio/client/block/stream/UnderFileSystemFileOutStream.java @@ -30,6 +30,7 @@ @NotThreadSafe public class UnderFileSystemFileOutStream extends BlockOutStream { private static final int ID_UNUSED = -1; + private final DataWriter mDataWriter; /** * Creates an instance of {@link UnderFileSystemFileOutStream} that writes to a UFS file. @@ -52,6 +53,14 @@ public static UnderFileSystemFileOutStream create(FileSystemContext context, */ protected UnderFileSystemFileOutStream(DataWriter dataWriter, WorkerNetAddress address) { super(dataWriter, Long.MAX_VALUE, address); + mDataWriter = dataWriter; + } + + /** + * @return the data writer for the stream + */ + public DataWriter getDataWriter() { + return mDataWriter; } @Override diff --git a/core/client/fs/src/main/java/alluxio/client/file/AlluxioFileOutStream.java b/core/client/fs/src/main/java/alluxio/client/file/AlluxioFileOutStream.java index 5930ddef119e..007cffd65f81 100644 --- a/core/client/fs/src/main/java/alluxio/client/file/AlluxioFileOutStream.java +++ b/core/client/fs/src/main/java/alluxio/client/file/AlluxioFileOutStream.java @@ -170,6 +170,8 @@ public void close() throws IOException { } else { mUnderStorageOutputStream.close(); optionsBuilder.setUfsLength(mBytesWritten); + mUnderStorageOutputStream.getDataWriter().getUfsContentHash().ifPresent( + optionsBuilder::setContentHash); } } diff --git a/core/client/fs/src/test/java/alluxio/client/block/stream/TestDataWriter.java b/core/client/fs/src/test/java/alluxio/client/block/stream/TestDataWriter.java index d9917c1364e0..e83690db2119 100644 --- a/core/client/fs/src/test/java/alluxio/client/block/stream/TestDataWriter.java +++ b/core/client/fs/src/test/java/alluxio/client/block/stream/TestDataWriter.java @@ -15,6 +15,7 @@ import java.io.IOException; import java.nio.ByteBuffer; +import java.util.Optional; /** * A {@link DataWriter} which writes data to a bytebuffer. @@ -26,6 +27,11 @@ public TestDataWriter(ByteBuffer buffer) { mBuffer = buffer; } + @Override + public Optional getUfsContentHash() { + return Optional.empty(); + } + @Override public void writeChunk(ByteBuf chunk) throws IOException { try { diff --git a/core/common/src/main/java/alluxio/concurrent/ManagedBlockingUfsForwarder.java b/core/common/src/main/java/alluxio/concurrent/ManagedBlockingUfsForwarder.java index 5037d36c3837..cc3f9f3af616 100755 --- a/core/common/src/main/java/alluxio/concurrent/ManagedBlockingUfsForwarder.java +++ b/core/common/src/main/java/alluxio/concurrent/ManagedBlockingUfsForwarder.java @@ -37,6 +37,7 @@ import java.io.OutputStream; import java.util.List; import java.util.Map; +import javax.annotation.Nullable; /** * Forwarder for {@link UnderFileSystem} objects that works through with ForkJoinPool's @@ -288,6 +289,11 @@ public Fingerprint getParsedFingerprint(String path) { return mUfs.getParsedFingerprint(path); } + @Override + public Fingerprint getParsedFingerprint(String path, @Nullable String contentHash) { + return mUfs.getParsedFingerprint(path, contentHash); + } + @Override public UfsMode getOperationMode(Map physicalUfsState) { return mUfs.getOperationMode(physicalUfsState); diff --git a/core/common/src/main/java/alluxio/grpc/DataMessageMarshaller.java b/core/common/src/main/java/alluxio/grpc/DataMessageMarshaller.java index 828226cee616..446faa0a1d42 100644 --- a/core/common/src/main/java/alluxio/grpc/DataMessageMarshaller.java +++ b/core/common/src/main/java/alluxio/grpc/DataMessageMarshaller.java @@ -27,6 +27,7 @@ import java.io.InputStream; import java.io.OutputStream; import java.util.Map; +import javax.annotation.Nullable; /** * Marshaller for data messages. @@ -104,7 +105,7 @@ public DataBuffer pollBuffer(T message) { * @param message the message to be combined * @return the message with the combined buffer */ - public abstract T combineData(DataMessage message); + public abstract T combineData(@Nullable DataMessage message); /** * Serialize the message to buffers. diff --git a/core/common/src/main/java/alluxio/grpc/ReadResponseMarshaller.java b/core/common/src/main/java/alluxio/grpc/ReadResponseMarshaller.java index d1ad40596236..cb32eebb812b 100644 --- a/core/common/src/main/java/alluxio/grpc/ReadResponseMarshaller.java +++ b/core/common/src/main/java/alluxio/grpc/ReadResponseMarshaller.java @@ -26,6 +26,7 @@ import java.io.IOException; import java.io.InputStream; +import javax.annotation.Nullable; import javax.annotation.concurrent.NotThreadSafe; /** @@ -83,7 +84,7 @@ protected ReadResponse deserialize(ReadableBuffer buffer) throws IOException { } @Override - public ReadResponse combineData(DataMessage message) { + public ReadResponse combineData(@Nullable DataMessage message) { if (message == null) { return null; } diff --git a/core/common/src/main/java/alluxio/grpc/WriteRequestMarshaller.java b/core/common/src/main/java/alluxio/grpc/WriteRequestMarshaller.java index b25c0bce68f8..bcb1cb4192db 100644 --- a/core/common/src/main/java/alluxio/grpc/WriteRequestMarshaller.java +++ b/core/common/src/main/java/alluxio/grpc/WriteRequestMarshaller.java @@ -26,6 +26,7 @@ import java.io.IOException; import java.io.InputStream; +import javax.annotation.Nullable; import javax.annotation.concurrent.NotThreadSafe; /** @@ -99,7 +100,7 @@ protected WriteRequest deserialize(ReadableBuffer buffer) throws IOException { } @Override - public WriteRequest combineData(DataMessage message) { + public WriteRequest combineData(@Nullable DataMessage message) { if (message == null) { return null; } diff --git a/core/common/src/main/java/alluxio/underfs/AtomicFileOutputStream.java b/core/common/src/main/java/alluxio/underfs/AtomicFileOutputStream.java index e46967d1cff5..141317ccb663 100644 --- a/core/common/src/main/java/alluxio/underfs/AtomicFileOutputStream.java +++ b/core/common/src/main/java/alluxio/underfs/AtomicFileOutputStream.java @@ -21,6 +21,7 @@ import java.io.IOException; import java.io.OutputStream; +import java.util.Optional; import javax.annotation.concurrent.NotThreadSafe; /** @@ -28,7 +29,7 @@ * that writing to the stream is atomic, i.e., all writes become readable only after a close. */ @NotThreadSafe -public class AtomicFileOutputStream extends OutputStream { +public class AtomicFileOutputStream extends OutputStream implements ContentHashable { private static final Logger LOG = LoggerFactory.getLogger(AtomicFileOutputStream.class); private AtomicFileOutputStreamCallback mUfs; @@ -95,5 +96,15 @@ public void close() throws IOException { // TODO(chaomin): consider setMode of the ufs file. mClosed = true; } + + @Override + public Optional getContentHash() throws IOException { + // get the content hash immediately after the file has completed writing + // which will be used for generating the fingerprint of the file in Alluxio + // ideally this value would be received as a result from the close call + // so that we would be sure to have the hash relating to the file uploaded + // (but such an API is not available for the UFSs that use this stream type) + return Optional.of(mUfs.getFileStatus(mPermanentPath).getContentHash()); + } } diff --git a/core/common/src/main/java/alluxio/underfs/BaseUnderFileSystem.java b/core/common/src/main/java/alluxio/underfs/BaseUnderFileSystem.java index 47bd30415c50..c43299e7ab89 100644 --- a/core/common/src/main/java/alluxio/underfs/BaseUnderFileSystem.java +++ b/core/common/src/main/java/alluxio/underfs/BaseUnderFileSystem.java @@ -109,7 +109,7 @@ public String getFingerprint(String path) { if (aclPair == null || aclPair.getFirst() == null || !aclPair.getFirst().hasExtended()) { return Fingerprint.create(getUnderFSType(), status).serialize(); } else { - return Fingerprint.create(getUnderFSType(), status, aclPair.getFirst()).serialize(); + return Fingerprint.create(getUnderFSType(), status, null, aclPair.getFirst()).serialize(); } } catch (Exception e) { // In certain scenarios, it is expected that the UFS path does not exist. @@ -120,14 +120,19 @@ public String getFingerprint(String path) { @Override public Fingerprint getParsedFingerprint(String path) { + return getParsedFingerprint(path, null); + } + + @Override + public Fingerprint getParsedFingerprint(String path, @Nullable String contentHash) { try { UfsStatus status = getStatus(path); Pair aclPair = getAclPair(path); if (aclPair == null || aclPair.getFirst() == null || !aclPair.getFirst().hasExtended()) { - return Fingerprint.create(getUnderFSType(), status); + return Fingerprint.create(getUnderFSType(), status, contentHash); } else { - return Fingerprint.create(getUnderFSType(), status, aclPair.getFirst()); + return Fingerprint.create(getUnderFSType(), status, contentHash, aclPair.getFirst()); } } catch (IOException e) { return Fingerprint.INVALID_FINGERPRINT; diff --git a/core/common/src/main/java/alluxio/underfs/ContentHashable.java b/core/common/src/main/java/alluxio/underfs/ContentHashable.java new file mode 100644 index 000000000000..2975bfe14204 --- /dev/null +++ b/core/common/src/main/java/alluxio/underfs/ContentHashable.java @@ -0,0 +1,29 @@ +/* + * The Alluxio Open Foundation licenses this work under the Apache License, version 2.0 + * (the "License"). You may not use this work except in compliance with the License, which is + * available at www.apache.org/licenses/LICENSE-2.0 + * + * This software is distributed on an "AS IS" basis, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, + * either express or implied, as more fully set forth in the License. + * + * See the NOTICE file distributed with this work for information regarding copyright ownership. + */ + +package alluxio.underfs; + +import java.io.IOException; +import java.util.Optional; + +/** + * Interface for returning the content hash. Instances of {@link java.io.OutputStream} returned by + * {@link UnderFileSystem#create} may implement this interface if the UFS returns the hash of the + * content written when the stream is closed. The content hash will then be used as part of + * the metadata fingerprint when the file is completed on the Alluxio master. + */ +public interface ContentHashable { + /** + * @return the content hash of the file written to the UFS if available + * after the stream has been closed + */ + Optional getContentHash() throws IOException; +} diff --git a/core/common/src/main/java/alluxio/underfs/Fingerprint.java b/core/common/src/main/java/alluxio/underfs/Fingerprint.java index 7fa04358103a..d44a8b5829a5 100644 --- a/core/common/src/main/java/alluxio/underfs/Fingerprint.java +++ b/core/common/src/main/java/alluxio/underfs/Fingerprint.java @@ -78,10 +78,21 @@ public enum Tag { * @return the fingerprint object */ public static Fingerprint create(String ufsName, UfsStatus status) { - if (status == null) { - return new Fingerprint(Collections.emptyMap()); - } - return new Fingerprint(Fingerprint.createTags(ufsName, status)); + return Fingerprint.create(ufsName, status, null); + } + + /** + * Parses the input string and returns the fingerprint object. + * + * @param ufsName the name of the ufs, should be {@link UnderFileSystem#getUnderFSType()} + * @param status the {@link UfsStatus} to create the fingerprint from + * @param contentHash the hash of the contents, if null the hash will be taken from + * the {@link UfsStatus} parameter + * @return the fingerprint object + */ + public static Fingerprint create(String ufsName, UfsStatus status, + @Nullable String contentHash) { + return create(ufsName, status, contentHash, null); } /** @@ -89,14 +100,21 @@ public static Fingerprint create(String ufsName, UfsStatus status) { * * @param ufsName the name of the ufs, should be {@link UnderFileSystem#getUnderFSType()} * @param status the {@link UfsStatus} to create the fingerprint from + * @param contentHash the hash of the contents, if null the hash will be taken from + * the {@link UfsStatus} parameter * @param acl the {@link AccessControlList} to create the fingerprint from * @return the fingerprint object */ - public static Fingerprint create(String ufsName, UfsStatus status, AccessControlList acl) { + public static Fingerprint create(String ufsName, UfsStatus status, + @Nullable String contentHash, @Nullable AccessControlList acl) { if (status == null) { return new Fingerprint(Collections.emptyMap()); } - Map tagMap = Fingerprint.createTags(ufsName, status); + return finishCreate(Fingerprint.createTags(ufsName, status, contentHash), acl); + } + + private static Fingerprint finishCreate(Map tagMap, + @Nullable AccessControlList acl) { if (acl != null) { tagMap.put(Tag.ACL, acl.toString()); } @@ -108,9 +126,12 @@ public static Fingerprint create(String ufsName, UfsStatus status, AccessControl * * @param ufsName the name of the ufs, should be {@link UnderFileSystem#getUnderFSType()} * @param status the {@link UfsStatus} to create the tagmap from + * @param contentHash the hash of the contents, if null the hash will be taken from + * the {@link UfsStatus} parameter * @return the tag map object */ - private static Map createTags(String ufsName, UfsStatus status) { + private static Map createTags(String ufsName, UfsStatus status, + @Nullable String contentHash) { Map tagMap = new HashMap<>(); tagMap.put(Tag.UFS, ufsName); tagMap.put(Tag.OWNER, status.getOwner()); @@ -118,7 +139,8 @@ private static Map createTags(String ufsName, UfsStatus status) { tagMap.put(Tag.MODE, String.valueOf(status.getMode())); if (status instanceof UfsFileStatus) { tagMap.put(Tag.TYPE, Type.FILE.name()); - tagMap.put(Tag.CONTENT_HASH, ((UfsFileStatus) status).getContentHash()); + tagMap.put(Tag.CONTENT_HASH, contentHash == null + ? ((UfsFileStatus) status).getContentHash() : contentHash); } else { tagMap.put(Tag.TYPE, Type.DIRECTORY.name()); } diff --git a/core/common/src/main/java/alluxio/underfs/ObjectLowLevelOutputStream.java b/core/common/src/main/java/alluxio/underfs/ObjectLowLevelOutputStream.java index 9ba2573916e3..123b77aae031 100644 --- a/core/common/src/main/java/alluxio/underfs/ObjectLowLevelOutputStream.java +++ b/core/common/src/main/java/alluxio/underfs/ObjectLowLevelOutputStream.java @@ -75,7 +75,8 @@ * older than clean age will be cleaned. */ @NotThreadSafe -public abstract class ObjectLowLevelOutputStream extends OutputStream { +public abstract class ObjectLowLevelOutputStream extends OutputStream + implements ContentHashable { protected static final Logger LOG = LoggerFactory.getLogger(ObjectLowLevelOutputStream.class); protected final List mTmpDirs; diff --git a/core/common/src/main/java/alluxio/underfs/UnderFileSystem.java b/core/common/src/main/java/alluxio/underfs/UnderFileSystem.java index b490feff0103..783afca7eac8 100755 --- a/core/common/src/main/java/alluxio/underfs/UnderFileSystem.java +++ b/core/common/src/main/java/alluxio/underfs/UnderFileSystem.java @@ -461,9 +461,20 @@ default AlluxioConfiguration getConfiguration() throws IOException { * @param path the path to compute the fingerprint for * @return the string representing the fingerprint */ - default Fingerprint getParsedFingerprint(String path) { - return Fingerprint.parse(getFingerprint(path)); - } + Fingerprint getParsedFingerprint(String path); + + /** + * Same as {@link #getParsedFingerprint(String)} except, will use the given content hash + * as the {@link alluxio.underfs.Fingerprint.Tag#CONTENT_HASH} field of the fingerprint + * if non-null. This is intended to be used when the file is already in Alluxio and + * a fingerprint is being created based on that file where the content hash has already + * been computed. + * @param path the path to compute the fingerprint for + * @param contentHash is used as the {@link alluxio.underfs.Fingerprint.Tag#CONTENT_HASH} + * field when creating the fingerprint. + * @return the string representing the fingerprint + */ + Fingerprint getParsedFingerprint(String path, @Nullable String contentHash); /** * An {@link UnderFileSystem} may be composed of one or more "physical UFS"s. This method is used diff --git a/core/common/src/main/java/alluxio/underfs/UnderFileSystemWithLogging.java b/core/common/src/main/java/alluxio/underfs/UnderFileSystemWithLogging.java index a3deff1d7541..9694c6125a3c 100755 --- a/core/common/src/main/java/alluxio/underfs/UnderFileSystemWithLogging.java +++ b/core/common/src/main/java/alluxio/underfs/UnderFileSystemWithLogging.java @@ -609,6 +609,31 @@ public String toString() { } } + @Override + public Fingerprint getParsedFingerprint(String path, @Nullable String contentHash) { + try { + return call(new UfsCallable() { + @Override + public Fingerprint call() { + return mUnderFileSystem.getParsedFingerprint(path, contentHash); + } + + @Override + public String methodName() { + return "GetParsedFingerprint"; + } + + @Override + public String toString() { + return String.format("path=%s, contentHash=%s", path, contentHash); + } + }); + } catch (IOException e) { + // This is not possible. + return Fingerprint.INVALID_FINGERPRINT; + } + } + @Override public UfsMode getOperationMode(Map physicalUfsState) { return mUnderFileSystem.getOperationMode(physicalUfsState); diff --git a/core/common/src/test/java/alluxio/underfs/FingerprintTest.java b/core/common/src/test/java/alluxio/underfs/FingerprintTest.java index 0e1e6fa4ad74..13eddf51c41c 100644 --- a/core/common/src/test/java/alluxio/underfs/FingerprintTest.java +++ b/core/common/src/test/java/alluxio/underfs/FingerprintTest.java @@ -119,6 +119,12 @@ public void createFingerprintFromUfsStatus() { assertEquals(owner, fp.getTag(Fingerprint.Tag.OWNER)); assertEquals(group, fp.getTag(Fingerprint.Tag.GROUP)); assertEquals(String.valueOf(mode), fp.getTag(Fingerprint.Tag.MODE)); + assertEquals(contentHash, fp.getTag(Fingerprint.Tag.CONTENT_HASH)); + + // create a fingerprint with a custom content hash + String contentHash2 = CommonUtils.randomAlphaNumString(10); + fp = Fingerprint.create(ufsName, fileStatus, contentHash2); + assertEquals(contentHash2, fp.getTag(Fingerprint.Tag.CONTENT_HASH)); } @Test @@ -131,7 +137,7 @@ public void createACLFingeprint() { CommonUtils.randomAlphaNumString(10), CommonUtils.randomAlphaNumString(10), Arrays.asList("user::rw-", "group::r--", "other::rwx")); - Fingerprint fp = Fingerprint.create(CommonUtils.randomAlphaNumString(10), status, acl); + Fingerprint fp = Fingerprint.create(CommonUtils.randomAlphaNumString(10), status, null, acl); String expected = fp.serialize(); assertNotNull(expected); assertEquals("user::rw-,group::r--,other::rwx", diff --git a/core/server/master/src/main/java/alluxio/master/file/DefaultFileSystemMaster.java b/core/server/master/src/main/java/alluxio/master/file/DefaultFileSystemMaster.java index cf9639049abd..062623fd5c5e 100644 --- a/core/server/master/src/main/java/alluxio/master/file/DefaultFileSystemMaster.java +++ b/core/server/master/src/main/java/alluxio/master/file/DefaultFileSystemMaster.java @@ -1704,7 +1704,9 @@ void completeFileInternal(RpcContext rpcContext, LockedInodePath inodePath, try (CloseableResource ufsResource = resolution.acquireUfsResource()) { UnderFileSystem ufs = ufsResource.get(); if (ufsStatus == null) { - ufsFingerprint = ufs.getParsedFingerprint(ufsPath).serialize(); + String contentHash = context.getOptions().hasContentHash() + ? context.getOptions().getContentHash() : null; + ufsFingerprint = ufs.getParsedFingerprint(ufsPath, contentHash).serialize(); } else { ufsFingerprint = Fingerprint.create(ufs.getUnderFSType(), ufsStatus).serialize(); } diff --git a/core/server/master/src/main/java/alluxio/master/file/InodeSyncStream.java b/core/server/master/src/main/java/alluxio/master/file/InodeSyncStream.java index e28b9f9dd4d0..10770e7ba740 100644 --- a/core/server/master/src/main/java/alluxio/master/file/InodeSyncStream.java +++ b/core/server/master/src/main/java/alluxio/master/file/InodeSyncStream.java @@ -832,7 +832,7 @@ private void syncExistingInodeMetadata( || !aclPair.getFirst().hasExtended()) { ufsFpParsed = Fingerprint.create(ufs.getUnderFSType(), cachedStatus); } else { - ufsFpParsed = Fingerprint.create(ufs.getUnderFSType(), cachedStatus, + ufsFpParsed = Fingerprint.create(ufs.getUnderFSType(), cachedStatus, null, aclPair.getFirst()); } } diff --git a/core/server/master/src/test/java/alluxio/master/file/FileSystemMasterSyncMetadataTest.java b/core/server/master/src/test/java/alluxio/master/file/FileSystemMasterSyncMetadataTest.java index fe4cf44cc9fc..8272371b7d69 100644 --- a/core/server/master/src/test/java/alluxio/master/file/FileSystemMasterSyncMetadataTest.java +++ b/core/server/master/src/test/java/alluxio/master/file/FileSystemMasterSyncMetadataTest.java @@ -26,10 +26,13 @@ import alluxio.exception.FileDoesNotExistException; import alluxio.exception.InvalidPathException; import alluxio.file.options.DescendantType; +import alluxio.grpc.CompleteFilePOptions; +import alluxio.grpc.CreateFilePOptions; import alluxio.grpc.DeletePOptions; import alluxio.grpc.FileSystemMasterCommonPOptions; import alluxio.grpc.GetStatusPOptions; import alluxio.grpc.ListStatusPOptions; +import alluxio.grpc.WritePType; import alluxio.heartbeat.HeartbeatContext; import alluxio.heartbeat.ManuallyScheduleHeartbeat; import alluxio.master.CoreMasterContext; @@ -38,7 +41,9 @@ import alluxio.master.MasterTestUtils; import alluxio.master.block.BlockMaster; import alluxio.master.block.BlockMasterFactory; +import alluxio.master.file.contexts.CompleteFileContext; import alluxio.master.file.contexts.CreateDirectoryContext; +import alluxio.master.file.contexts.CreateFileContext; import alluxio.master.file.contexts.DeleteContext; import alluxio.master.file.contexts.GetStatusContext; import alluxio.master.file.contexts.ListStatusContext; @@ -54,6 +59,7 @@ import alluxio.underfs.Fingerprint; import alluxio.underfs.UfsDirectoryStatus; import alluxio.underfs.UfsFileStatus; +import alluxio.underfs.UfsMode; import alluxio.underfs.UfsStatus; import alluxio.underfs.UnderFileSystem; import alluxio.util.IdUtils; @@ -126,6 +132,58 @@ public void after() throws Exception { stopServices(); } + @Test + public void completeFileWithOutOfDateHash() throws Exception { + // In this test we want to simulate a concurrent write to the UFS + // while the file is being created in Alluxio. + // When creating the file in Alluxio, we will use the fingerprint + // of the created file, and not the one on the UFS. + // Thus, when performing a metadata sync there should be a fingerprint + // mismatch. + AlluxioURI ufsMount = setupMockUfsS3Mount(); + String fname = "file"; + AlluxioURI uri = new AlluxioURI("/mnt/local/" + fname); + + // The fingerprint of the file created in Alluxio + String alluxioContentHash = "hashOnComplete"; + // The fingerprint of the file in the UFS + String ufsContentHash = "ufsHash"; + + AlluxioURI filePath = ufsMount.join("file"); + UfsFileStatus fileStatus = new UfsFileStatus( + "file", ufsContentHash, 0L, System.currentTimeMillis(), + "owner1", "owner1", (short) 777, null, 100L); + Mockito.doAnswer(invocation -> + Fingerprint.create("s3", fileStatus, + invocation.getArgument(1))).when(mUfs).getParsedFingerprint( + eq(filePath.toString()), anyString()); + Mockito.doAnswer(invocation -> + Fingerprint.create("s3", fileStatus)) + .when(mUfs).getParsedFingerprint( + eq(filePath.toString())); + Mockito.when(mUfs.exists(filePath.toString())).thenReturn(true); + Mockito.when(mUfs.isDirectory(filePath.toString())).thenReturn(false); + Mockito.when(mUfs.isFile(filePath.toString())).thenReturn(true); + Mockito.when(mUfs.getStatus(filePath.toString())).thenReturn(fileStatus); + Mockito.when(mUfs.getOperationMode(any())).thenReturn(UfsMode.READ_WRITE); + + mFileSystemMaster.createFile(uri, CreateFileContext.mergeFrom(CreateFilePOptions + .newBuilder().setWriteType(WritePType.THROUGH))); + mFileSystemMaster.completeFile(uri, CompleteFileContext.mergeFrom( + CompleteFilePOptions.newBuilder().setContentHash(alluxioContentHash))); + + FileInfo info = mFileSystemMaster.getFileInfo(uri, GetStatusContext.defaults()); + assertEquals(alluxioContentHash, Fingerprint.parse(info.getUfsFingerprint()) + .getTag(Fingerprint.Tag.CONTENT_HASH)); + + // After syncing we should have the new version of the file with the new fingerprint + info = mFileSystemMaster.getFileInfo(uri, + GetStatusContext.mergeFrom(GetStatusPOptions.newBuilder().setCommonOptions( + FileSystemMasterCommonPOptions.newBuilder().setSyncIntervalMs(0).build()))); + assertEquals(ufsContentHash, Fingerprint.parse(info.getUfsFingerprint()) + .getTag(Fingerprint.Tag.CONTENT_HASH)); + } + @Test public void setAttributeOwnerGroupOnMetadataUpdate() throws Exception { AlluxioURI ufsMount = setupMockUfsS3Mount(); diff --git a/core/server/worker/src/main/java/alluxio/worker/grpc/AbstractWriteHandler.java b/core/server/worker/src/main/java/alluxio/worker/grpc/AbstractWriteHandler.java index 8e6c6659bc85..8eff962e860c 100644 --- a/core/server/worker/src/main/java/alluxio/worker/grpc/AbstractWriteHandler.java +++ b/core/server/worker/src/main/java/alluxio/worker/grpc/AbstractWriteHandler.java @@ -409,6 +409,9 @@ protected void handleCommand(WriteRequestCommand command, T context) throws Exce */ private void replySuccess() { mContext.setDoneUnsafe(true); + mContext.getContentHash().ifPresent(contentHash -> mResponseObserver.onNext( + WriteResponse.newBuilder().setContentHash(contentHash).setOffset( + mContext.getPos()).build())); mResponseObserver.onCompleted(); } diff --git a/core/server/worker/src/main/java/alluxio/worker/grpc/UfsFileWriteHandler.java b/core/server/worker/src/main/java/alluxio/worker/grpc/UfsFileWriteHandler.java index 32a574bda853..4cf496483640 100644 --- a/core/server/worker/src/main/java/alluxio/worker/grpc/UfsFileWriteHandler.java +++ b/core/server/worker/src/main/java/alluxio/worker/grpc/UfsFileWriteHandler.java @@ -22,6 +22,7 @@ import alluxio.resource.CloseableResource; import alluxio.security.authentication.AuthenticatedUserInfo; import alluxio.security.authorization.Mode; +import alluxio.underfs.ContentHashable; import alluxio.underfs.UfsManager; import alluxio.underfs.UnderFileSystem; import alluxio.underfs.options.CreateOptions; @@ -83,6 +84,14 @@ protected void completeRequest(UfsFileWriteRequestContext context) } Preconditions.checkState(context.getOutputStream() != null); context.getOutputStream().close(); + if (context.getOutputStream() instanceof ContentHashable) { + try { + ((ContentHashable) context.getOutputStream()).getContentHash() + .ifPresent(context::setContentHash); + } catch (IOException e) { + LOG.warn("Error getting content hash after completing file", e); + } + } CreateOptions createOptions = context.getCreateOptions(); if (createOptions != null) { try { diff --git a/core/server/worker/src/main/java/alluxio/worker/grpc/WriteRequestContext.java b/core/server/worker/src/main/java/alluxio/worker/grpc/WriteRequestContext.java index f36652daa8a0..c271b049d4ff 100644 --- a/core/server/worker/src/main/java/alluxio/worker/grpc/WriteRequestContext.java +++ b/core/server/worker/src/main/java/alluxio/worker/grpc/WriteRequestContext.java @@ -14,6 +14,7 @@ import com.codahale.metrics.Counter; import com.codahale.metrics.Meter; +import java.util.Optional; import javax.annotation.Nullable; import javax.annotation.concurrent.GuardedBy; import javax.annotation.concurrent.ThreadSafe; @@ -43,6 +44,8 @@ public class WriteRequestContext { */ private long mPos; + private String mContentHash = null; + private Counter mCounter; private Meter mMeter; @@ -65,6 +68,20 @@ public T getRequest() { return mRequest; } + /** + * @return the content hash + */ + public Optional getContentHash() { + return Optional.ofNullable(mContentHash); + } + + /** + * @param contentHash the content hash of the written file + */ + public void setContentHash(String contentHash) { + mContentHash = contentHash; + } + /** * @return the error */ diff --git a/core/transport/src/main/proto/grpc/block_worker.proto b/core/transport/src/main/proto/grpc/block_worker.proto index 470c6be90567..a7591f99deb3 100644 --- a/core/transport/src/main/proto/grpc/block_worker.proto +++ b/core/transport/src/main/proto/grpc/block_worker.proto @@ -105,10 +105,11 @@ message WriteRequest { } // The write response. -// next available id: 2 +// next available id: 3 message WriteResponse { optional int64 offset = 1; // Errors will be handled by standard gRPC stream APIs. + optional string contentHash = 2; } // Request for caching a block asynchronously diff --git a/core/transport/src/main/proto/grpc/file_system_master.proto b/core/transport/src/main/proto/grpc/file_system_master.proto index 6895c3151ebd..58000808ceb7 100644 --- a/core/transport/src/main/proto/grpc/file_system_master.proto +++ b/core/transport/src/main/proto/grpc/file_system_master.proto @@ -82,6 +82,7 @@ message CompleteFilePOptions { optional int64 ufsLength = 1; optional ScheduleAsyncPersistencePOptions asyncPersistOptions = 2; optional FileSystemMasterCommonPOptions commonOptions = 3; + optional string contentHash = 4; } message CompleteFilePRequest { /** the path of the file */ diff --git a/core/transport/src/main/proto/proto.lock b/core/transport/src/main/proto/proto.lock index ca2600f0a076..7564f323b38a 100644 --- a/core/transport/src/main/proto/proto.lock +++ b/core/transport/src/main/proto/proto.lock @@ -1061,6 +1061,11 @@ "id": 1, "name": "offset", "type": "int64" + }, + { + "id": 2, + "name": "contentHash", + "type": "string" } ] }, @@ -2213,6 +2218,11 @@ "id": 3, "name": "commonOptions", "type": "FileSystemMasterCommonPOptions" + }, + { + "id": 4, + "name": "contentHash", + "type": "string" } ] }, diff --git a/integration/tools/validation/src/main/java/alluxio/cli/UnderFileSystemCommonOperations.java b/integration/tools/validation/src/main/java/alluxio/cli/UnderFileSystemCommonOperations.java index c37dccea8ed5..ef92a3daec3a 100644 --- a/integration/tools/validation/src/main/java/alluxio/cli/UnderFileSystemCommonOperations.java +++ b/integration/tools/validation/src/main/java/alluxio/cli/UnderFileSystemCommonOperations.java @@ -13,6 +13,8 @@ import alluxio.conf.InstancedConfiguration; import alluxio.conf.PropertyKey; +import alluxio.underfs.ContentHashable; +import alluxio.underfs.Fingerprint; import alluxio.underfs.UfsDirectoryStatus; import alluxio.underfs.UfsFileStatus; import alluxio.underfs.UfsStatus; @@ -50,6 +52,8 @@ public final class UnderFileSystemCommonOperations { = "The content length of the written file is %s but expected %s"; private static final String FILE_CONTENT_INCORRECT = "The content of the written file is incorrect"; + private static final String FILE_CONTENT_HASH_DOES_NOT_MATCH_UFS + = "Content hash computed during file upload does not match content hash on UFS"; private static final String FILE_EXISTS_CHECK_SHOULD_SUCCEED = "Should succeed in UnderFileSystem.exists() check, but failed"; private static final String FILE_EXISTS_CHECK_SHOULD_FAILED @@ -102,6 +106,7 @@ public void createAtomicTest() throws IOException { throw new IOException(IS_FAIL_CHECK_SHOULD_FAILED); } stream.close(); + checkContentHash(testFile, stream); if (!mUfs.isFile(testFile)) { throw new IOException(IS_FAIL_CHECK_SHOULD_SUCCEED); } @@ -151,6 +156,7 @@ public void createParentTest() throws IOException { OutputStream o = mUfs.create(testFile, CreateOptions.defaults(mConfiguration) .setCreateParent(true)); o.close(); + checkContentHash(testFile, o); if (!mUfs.exists(testFile)) { throw new IOException(FILE_EXISTS_CHECK_SHOULD_SUCCEED); } @@ -1173,12 +1179,30 @@ public void renameLargeDirectoryTest() throws Exception { private void createEmptyFile(String path) throws IOException { OutputStream o = mUfs.create(path); o.close(); + checkContentHash(path, o); } private void createTestBytesFile(String path) throws IOException { OutputStream o = mUfs.create(path); o.write(TEST_BYTES); o.close(); + checkContentHash(path, o); + } + + // should be called after the stream is closed, to be sure the content hash computed + // by the stream is equal to the content hash on the UFS + private void checkContentHash(String path, OutputStream stream) throws IOException { + if (stream instanceof ContentHashable) { + if (((ContentHashable) stream).getContentHash().isPresent()) { + String ufsHash = mUfs.getParsedFingerprint(path).getTag(Fingerprint.Tag.CONTENT_HASH); + String streamHash = mUfs.getParsedFingerprint(path, + ((ContentHashable) stream).getContentHash().get()) + .getTag(Fingerprint.Tag.CONTENT_HASH); + if (!streamHash.equals(ufsHash)) { + throw new IOException(FILE_CONTENT_HASH_DOES_NOT_MATCH_UFS); + } + } + } } // Prepare directory tree for pagination tests @@ -1262,6 +1286,7 @@ private int prepareMultiBlockFile(String testFile) throws IOException { outputStream.write(TEST_BYTES); } outputStream.close(); + checkContentHash(testFile, outputStream); return numCopies; } @@ -1283,12 +1308,18 @@ private ObjectStorePreConfig prepareObjectStore() throws IOException { String[] childrenFiles = {"sample1.jpg", "sample2.jpg", "sample3.jpg"}; // Populate children of base directory for (String child : childrenFiles) { - mUfs.create(String.format("%s/%s", baseDirectoryKey, child)).close(); + String path = String.format("%s/%s", baseDirectoryKey, child); + OutputStream stream = mUfs.create(path); + stream.close(); + checkContentHash(path, stream); } // Populate children of sub-directories for (String subdir : subDirectories) { for (String child : childrenFiles) { - mUfs.create(String.format("%s/%s/%s", baseDirectoryKey, subdir, child)).close(); + String path = String.format("%s/%s/%s", baseDirectoryKey, subdir, child); + OutputStream stream = mUfs.create(path); + stream.close(); + checkContentHash(path, stream); } } return new ObjectStorePreConfig(baseDirectoryPath, childrenFiles, subDirectories); diff --git a/tests/src/test/java/alluxio/testutils/underfs/delegating/DelegatingUnderFileSystem.java b/tests/src/test/java/alluxio/testutils/underfs/delegating/DelegatingUnderFileSystem.java index ff3dfbd50eae..c8c8517efd48 100755 --- a/tests/src/test/java/alluxio/testutils/underfs/delegating/DelegatingUnderFileSystem.java +++ b/tests/src/test/java/alluxio/testutils/underfs/delegating/DelegatingUnderFileSystem.java @@ -31,6 +31,8 @@ import alluxio.underfs.options.MkdirsOptions; import alluxio.underfs.options.OpenOptions; +import org.jetbrains.annotations.Nullable; + import java.io.IOException; import java.io.InputStream; import java.io.OutputStream; @@ -182,6 +184,11 @@ public Fingerprint getParsedFingerprint(String path) { return mUfs.getParsedFingerprint(path); } + @Override + public Fingerprint getParsedFingerprint(String path, @Nullable String contentHash) { + return mUfs.getParsedFingerprint(path, contentHash); + } + @Override public UfsMode getOperationMode(Map physicalUfsState) { return mUfs.getOperationMode(physicalUfsState); diff --git a/underfs/cos/src/main/java/alluxio/underfs/cos/COSOutputStream.java b/underfs/cos/src/main/java/alluxio/underfs/cos/COSOutputStream.java index 97fff7806508..17f541128b76 100644 --- a/underfs/cos/src/main/java/alluxio/underfs/cos/COSOutputStream.java +++ b/underfs/cos/src/main/java/alluxio/underfs/cos/COSOutputStream.java @@ -11,6 +11,7 @@ package alluxio.underfs.cos; +import alluxio.underfs.ContentHashable; import alluxio.util.CommonUtils; import alluxio.util.io.PathUtils; @@ -33,6 +34,7 @@ import java.security.MessageDigest; import java.security.NoSuchAlgorithmException; import java.util.List; +import java.util.Optional; import java.util.UUID; import java.util.concurrent.atomic.AtomicBoolean; import javax.annotation.concurrent.NotThreadSafe; @@ -42,7 +44,7 @@ * local disk and copied as a complete file when the {@link #close()} method is called. */ @NotThreadSafe -public final class COSOutputStream extends OutputStream { +public final class COSOutputStream extends OutputStream implements ContentHashable { private static final Logger LOG = LoggerFactory.getLogger(COSOutputStream.class); /** Bucket name of the Alluxio COS bucket. */ @@ -62,6 +64,8 @@ public final class COSOutputStream extends OutputStream { /** Flag to indicate this stream has been closed, to ensure close is only done once. */ private AtomicBoolean mClosed = new AtomicBoolean(false); + private String mContentHash; + /** * Creates a name instance of {@link COSOutputStream}. * @@ -155,7 +159,7 @@ public void close() throws IOException { byte[] hashBytes = mHash.digest(); meta.setContentMD5(new String(Base64.encodeBase64(hashBytes))); } - mCosClient.putObject(mBucketName, mKey, in, meta); + mContentHash = mCosClient.putObject(mBucketName, mKey, in, meta).getETag(); } catch (CosClientException e) { LOG.error("Failed to upload {}. ", mKey); throw new IOException(e); @@ -168,4 +172,9 @@ public void close() throws IOException { } return; } + + @Override + public Optional getContentHash() { + return Optional.ofNullable(mContentHash); + } } diff --git a/underfs/gcs/src/main/java/alluxio/underfs/gcs/GCSOutputStream.java b/underfs/gcs/src/main/java/alluxio/underfs/gcs/GCSOutputStream.java index c4269df3cfb0..a3cbe2847b63 100644 --- a/underfs/gcs/src/main/java/alluxio/underfs/gcs/GCSOutputStream.java +++ b/underfs/gcs/src/main/java/alluxio/underfs/gcs/GCSOutputStream.java @@ -11,6 +11,7 @@ package alluxio.underfs.gcs; +import alluxio.underfs.ContentHashable; import alluxio.util.CommonUtils; import alluxio.util.io.PathUtils; @@ -31,6 +32,7 @@ import java.security.MessageDigest; import java.security.NoSuchAlgorithmException; import java.util.List; +import java.util.Optional; import java.util.UUID; import java.util.concurrent.atomic.AtomicBoolean; import javax.annotation.concurrent.NotThreadSafe; @@ -40,7 +42,7 @@ * local disk and copied as a complete file when the {@link #close()} method is called. */ @NotThreadSafe -public final class GCSOutputStream extends OutputStream { +public final class GCSOutputStream extends OutputStream implements ContentHashable { private static final Logger LOG = LoggerFactory.getLogger(GCSOutputStream.class); /** Bucket name of the Alluxio GCS bucket. */ @@ -64,6 +66,8 @@ public final class GCSOutputStream extends OutputStream { /** Flag to indicate this stream has been closed, to ensure close is only done once. */ private AtomicBoolean mClosed = new AtomicBoolean(false); + private String mContentHash; + /** * Constructs a new stream for writing a file. * @@ -128,7 +132,7 @@ public void close() throws IOException { } else { LOG.warn("MD5 was not computed for: {}", mKey); } - mClient.putObject(mBucketName, obj); + mContentHash = mClient.putObject(mBucketName, obj).getMd5HashAsBase64(); } catch (ServiceException e) { LOG.error("Failed to upload {}.", mKey); throw new IOException(e); @@ -140,5 +144,10 @@ public void close() throws IOException { } } } + + @Override + public Optional getContentHash() { + return Optional.ofNullable(mContentHash); + } } diff --git a/underfs/gcs/src/main/java/alluxio/underfs/gcs/v2/GCSV2OutputStream.java b/underfs/gcs/src/main/java/alluxio/underfs/gcs/v2/GCSV2OutputStream.java index 83e13c22dbb0..935537db4e8b 100644 --- a/underfs/gcs/src/main/java/alluxio/underfs/gcs/v2/GCSV2OutputStream.java +++ b/underfs/gcs/src/main/java/alluxio/underfs/gcs/v2/GCSV2OutputStream.java @@ -11,6 +11,8 @@ package alluxio.underfs.gcs.v2; +import alluxio.underfs.ContentHashable; + import com.google.cloud.WriteChannel; import com.google.cloud.storage.Blob; import com.google.cloud.storage.BlobId; @@ -25,6 +27,10 @@ import java.io.OutputStream; import java.nio.ByteBuffer; import java.nio.channels.ClosedChannelException; +import java.security.MessageDigest; +import java.security.NoSuchAlgorithmException; +import java.util.Base64; +import java.util.Optional; import java.util.concurrent.atomic.AtomicBoolean; import javax.annotation.concurrent.NotThreadSafe; @@ -34,7 +40,7 @@ * to arrive in Alluxio worker. */ @NotThreadSafe -public final class GCSV2OutputStream extends OutputStream { +public final class GCSV2OutputStream extends OutputStream implements ContentHashable { private static final Logger LOG = LoggerFactory.getLogger(GCSV2OutputStream.class); /** Bucket name of the Alluxio GCS bucket. */ @@ -55,6 +61,9 @@ public final class GCSV2OutputStream extends OutputStream { /** The write channel of Google storage object. */ private WriteChannel mWriteChannel; + /** The MD5 hash of the file. */ + private MessageDigest mHash; + /** Flag to indicate this stream has been closed, to ensure close is only done once. */ private AtomicBoolean mClosed = new AtomicBoolean(false); @@ -73,10 +82,19 @@ public GCSV2OutputStream(String bucketName, String key, Storage client) { mClient = client; mSingleByteBuffer = ByteBuffer.allocate(1); mBlobInfo = BlobInfo.newBuilder(BlobId.of(mBucketName, mKey)).build(); + try { + mHash = MessageDigest.getInstance("MD5"); + } catch (NoSuchAlgorithmException e) { + LOG.warn("Algorithm not available for MD5 hash.", e); + mHash = null; + } } @Override public void write(int b) throws IOException { + if (mHash != null) { + mHash.update((byte) b); + } if (mWriteChannel == null) { createWriteChannel(); } @@ -98,6 +116,9 @@ public void write(byte[] b) throws IOException { @Override public void write(byte[] b, int off, int len) throws IOException { + if (mHash != null) { + mHash.update(b, off, len); + } if (mWriteChannel == null) { createWriteChannel(); } @@ -146,4 +167,12 @@ private void createWriteChannel() throws IOException { .format("Failed to create write channel of %s in %s", mKey, mBucketName), e); } } + + @Override + public Optional getContentHash() { + if (mHash != null) { + return Optional.of(Base64.getEncoder().encodeToString(mHash.digest())); + } + return Optional.empty(); + } } diff --git a/underfs/hdfs/src/main/java/alluxio/underfs/hdfs/HdfsUnderFileOutputStream.java b/underfs/hdfs/src/main/java/alluxio/underfs/hdfs/HdfsUnderFileOutputStream.java index 7644d4e50cca..6bf3159a5bac 100644 --- a/underfs/hdfs/src/main/java/alluxio/underfs/hdfs/HdfsUnderFileOutputStream.java +++ b/underfs/hdfs/src/main/java/alluxio/underfs/hdfs/HdfsUnderFileOutputStream.java @@ -11,10 +11,17 @@ package alluxio.underfs.hdfs; +import alluxio.underfs.ContentHashable; +import alluxio.util.UnderFileSystemUtils; + import org.apache.hadoop.fs.FSDataOutputStream; +import org.apache.hadoop.fs.FileStatus; +import org.apache.hadoop.fs.FileSystem; +import org.apache.hadoop.fs.Path; import java.io.IOException; import java.io.OutputStream; +import java.util.Optional; import javax.annotation.concurrent.NotThreadSafe; /** @@ -24,16 +31,22 @@ * flush intend the functionality to be sync. */ @NotThreadSafe -public class HdfsUnderFileOutputStream extends OutputStream { +public class HdfsUnderFileOutputStream extends OutputStream implements ContentHashable { /** Underlying output stream. */ private final FSDataOutputStream mOut; + private final FileSystem mFs; + private final String mPath; /** * Basic constructor. * + * @param fs the hdfs file system object + * @param path the path being written * @param out underlying stream to wrap */ - public HdfsUnderFileOutputStream(FSDataOutputStream out) { + public HdfsUnderFileOutputStream(FileSystem fs, String path, FSDataOutputStream out) { + mFs = fs; + mPath = path; mOut = out; } @@ -68,4 +81,16 @@ public void write(byte[] b) throws IOException { public void write(byte[] b, int off, int len) throws IOException { mOut.write(b, off, len); } + + @Override + public Optional getContentHash() throws IOException { + FileStatus fs = mFs.getFileStatus(new Path(mPath)); + // get the content hash immediately after the file has completed writing + // which will be used for generating the fingerprint of the file in Alluxio + // ideally this value would be received as a result from the close call + // so that we would be sure to have the hash relating to the file uploaded + // (but such an API is not available for HDFS) + return Optional.of(UnderFileSystemUtils.approximateContentHash( + fs.getLen(), fs.getModificationTime())); + } } diff --git a/underfs/hdfs/src/main/java/alluxio/underfs/hdfs/HdfsUnderFileSystem.java b/underfs/hdfs/src/main/java/alluxio/underfs/hdfs/HdfsUnderFileSystem.java index b723b9e3ff9c..630bb683addf 100755 --- a/underfs/hdfs/src/main/java/alluxio/underfs/hdfs/HdfsUnderFileSystem.java +++ b/underfs/hdfs/src/main/java/alluxio/underfs/hdfs/HdfsUnderFileSystem.java @@ -329,7 +329,7 @@ public OutputStream createDirect(String path, CreateOptions options) throws IOEx while (retryPolicy.attempt()) { try { // TODO(chaomin): support creating HDFS files with specified block size and replication. - OutputStream outputStream = new HdfsUnderFileOutputStream( + OutputStream outputStream = new HdfsUnderFileOutputStream(hdfs, path, FileSystem.create(hdfs, new Path(path), new FsPermission(options.getMode().toShort()))); if (options.getAcl() != null) { diff --git a/underfs/local/src/main/java/alluxio/underfs/local/LocalUnderFileSystem.java b/underfs/local/src/main/java/alluxio/underfs/local/LocalUnderFileSystem.java index 400705192bb0..78bcd9ed913e 100755 --- a/underfs/local/src/main/java/alluxio/underfs/local/LocalUnderFileSystem.java +++ b/underfs/local/src/main/java/alluxio/underfs/local/LocalUnderFileSystem.java @@ -18,6 +18,7 @@ import alluxio.underfs.AtomicFileOutputStream; import alluxio.underfs.AtomicFileOutputStreamCallback; import alluxio.underfs.ConsistentUnderFileSystem; +import alluxio.underfs.ContentHashable; import alluxio.underfs.UfsDirectoryStatus; import alluxio.underfs.UfsFileStatus; import alluxio.underfs.UfsStatus; @@ -54,6 +55,7 @@ import java.nio.file.attribute.PosixFileAttributes; import java.util.ArrayList; import java.util.List; +import java.util.Optional; import java.util.Stack; import javax.annotation.concurrent.ThreadSafe; @@ -114,7 +116,7 @@ public OutputStream createDirect(String path, CreateOptions options) throws IOEx throw new IOException(ExceptionMessage.PARENT_CREATION_FAILED.getMessage(path)); } } - OutputStream stream = new BufferedOutputStream(new FileOutputStream(path)); + OutputStream stream = new LocalOutputStream(new FileOutputStream(path), path); try { setMode(path, options.getMode().toShort()); } catch (IOException e) { @@ -124,6 +126,24 @@ public OutputStream createDirect(String path, CreateOptions options) throws IOEx return stream; } + static class LocalOutputStream extends BufferedOutputStream + implements ContentHashable { + + private final String mPath; + + LocalOutputStream(OutputStream out, String path) { + super(out); + mPath = path; + } + + @Override + public Optional getContentHash() { + File file = new File(mPath); + return Optional.of(UnderFileSystemUtils.approximateContentHash( + file.length(), file.lastModified())); + } + } + @Override public boolean deleteDirectory(String path, DeleteOptions options) throws IOException { path = stripPath(path); diff --git a/underfs/obs/src/main/java/alluxio/underfs/obs/OBSLowLevelOutputStream.java b/underfs/obs/src/main/java/alluxio/underfs/obs/OBSLowLevelOutputStream.java index 77daa708a20f..f95aad0a66d8 100644 --- a/underfs/obs/src/main/java/alluxio/underfs/obs/OBSLowLevelOutputStream.java +++ b/underfs/obs/src/main/java/alluxio/underfs/obs/OBSLowLevelOutputStream.java @@ -36,6 +36,7 @@ import java.util.ArrayList; import java.util.Collections; import java.util.List; +import java.util.Optional; import javax.annotation.Nullable; /** @@ -56,6 +57,8 @@ public class OBSLowLevelOutputStream extends ObjectLowLevelOutputStream { */ protected volatile String mUploadId; + private String mContentHash; + /** * Constructs a new stream for writing a file. * @@ -124,7 +127,7 @@ protected void completeMultiPartUploadInternal() throws IOException { LOG.debug("complete multi part {}", mUploadId); CompleteMultipartUploadRequest completeRequest = new CompleteMultipartUploadRequest( mBucketName, mKey, mUploadId, mTags); - getClient().completeMultipartUpload(completeRequest); + mContentHash = getClient().completeMultipartUpload(completeRequest).getEtag(); } catch (ObsException e) { LOG.debug("failed to complete multi part upload", e); throw new IOException( @@ -155,7 +158,7 @@ protected void createEmptyObject(String key) throws IOException { PutObjectRequest request = new PutObjectRequest(mBucketName, key, new ByteArrayInputStream(new byte[0])); request.setMetadata(meta); - getClient().putObject(request); + mContentHash = getClient().putObject(request).getEtag(); } catch (ObsException e) { throw new IOException(e); } @@ -172,7 +175,7 @@ protected void putObject(String key, File file, @Nullable String md5) throws IOE PutObjectRequest request = new PutObjectRequest(mBucketName, key, file); request.setMetadata(meta); - getClient().putObject(request); + mContentHash = getClient().putObject(request).getEtag(); } catch (ObsException e) { throw new IOException(e); } @@ -181,4 +184,9 @@ protected void putObject(String key, File file, @Nullable String md5) throws IOE protected IObsClient getClient() { return mClient; } + + @Override + public Optional getContentHash() { + return Optional.ofNullable(mContentHash); + } } diff --git a/underfs/obs/src/main/java/alluxio/underfs/obs/OBSOutputStream.java b/underfs/obs/src/main/java/alluxio/underfs/obs/OBSOutputStream.java index ad6f7199930a..d79611f11c52 100644 --- a/underfs/obs/src/main/java/alluxio/underfs/obs/OBSOutputStream.java +++ b/underfs/obs/src/main/java/alluxio/underfs/obs/OBSOutputStream.java @@ -11,6 +11,7 @@ package alluxio.underfs.obs; +import alluxio.underfs.ContentHashable; import alluxio.util.CommonUtils; import alluxio.util.io.PathUtils; @@ -33,6 +34,7 @@ import java.security.MessageDigest; import java.security.NoSuchAlgorithmException; import java.util.List; +import java.util.Optional; import java.util.UUID; import java.util.concurrent.atomic.AtomicBoolean; import javax.annotation.concurrent.NotThreadSafe; @@ -42,7 +44,7 @@ * local disk and copied as a complete file when the {@link #close()} method is called. */ @NotThreadSafe -public final class OBSOutputStream extends OutputStream { +public final class OBSOutputStream extends OutputStream implements ContentHashable { private static final Logger LOG = LoggerFactory.getLogger(OBSOutputStream.class); /** @@ -76,6 +78,8 @@ public final class OBSOutputStream extends OutputStream { */ private AtomicBoolean mClosed = new AtomicBoolean(false); + private String mContentHash; + /** * Creates a name instance of {@link OBSOutputStream}. * @@ -172,7 +176,7 @@ public void close() throws IOException { byte[] hashBytes = mHash.digest(); objMeta.setContentMd5(new String(Base64.encodeBase64(hashBytes))); } - mObsClient.putObject(mBucketName, mKey, in, objMeta); + mContentHash = mObsClient.putObject(mBucketName, mKey, in, objMeta).getEtag(); } catch (ObsException e) { LOG.error("Failed to upload {}. Temporary file @ {}", mKey, mFile.getPath()); throw new IOException(e); @@ -184,4 +188,9 @@ public void close() throws IOException { } } } + + @Override + public Optional getContentHash() { + return Optional.ofNullable(mContentHash); + } } diff --git a/underfs/obs/src/test/java/alluxio/underfs/obs/OBSLowLevelOutputStreamTest.java b/underfs/obs/src/test/java/alluxio/underfs/obs/OBSLowLevelOutputStreamTest.java index d544f1101aa3..906cd5d58f57 100644 --- a/underfs/obs/src/test/java/alluxio/underfs/obs/OBSLowLevelOutputStreamTest.java +++ b/underfs/obs/src/test/java/alluxio/underfs/obs/OBSLowLevelOutputStreamTest.java @@ -11,6 +11,8 @@ package alluxio.underfs.obs; +import static org.junit.Assert.assertEquals; +import static org.junit.Assert.assertTrue; import static org.mockito.ArgumentMatchers.any; import static org.mockito.Mockito.never; import static org.mockito.Mockito.times; @@ -33,7 +35,6 @@ import com.obs.services.model.PutObjectResult; import com.obs.services.model.UploadPartRequest; import com.obs.services.model.UploadPartResult; -import org.junit.Assert; import org.junit.Before; import org.junit.Test; import org.junit.runner.RunWith; @@ -93,6 +94,8 @@ public void writeByte() throws Exception { .initiateMultipartUpload(any(InitiateMultipartUploadRequest.class)); Mockito.verify(mMockObsClient, never()) .completeMultipartUpload(any(CompleteMultipartUploadRequest.class)); + assertTrue(mStream.getContentHash().isPresent()); + assertEquals("putTag", mStream.getContentHash().get()); } @Test @@ -110,15 +113,17 @@ public void writeByteArrayForSmallFile() throws Exception { .initiateMultipartUpload(any(InitiateMultipartUploadRequest.class)); Mockito.verify(mMockObsClient, never()) .completeMultipartUpload(any(CompleteMultipartUploadRequest.class)); + assertTrue(mStream.getContentHash().isPresent()); + assertEquals("putTag", mStream.getContentHash().get()); } @Test public void writeByteArrayForLargeFile() throws Exception { int partSize = (int) FormatUtils.parseSpaceSize(PARTITION_SIZE); byte[] b = new byte[partSize + 1]; - Assert.assertEquals(mStream.getPartNumber(), 1); + assertEquals(mStream.getPartNumber(), 1); mStream.write(b, 0, b.length); - Assert.assertEquals(mStream.getPartNumber(), 2); + assertEquals(mStream.getPartNumber(), 2); Mockito.verify(mMockObsClient) .initiateMultipartUpload(any(InitiateMultipartUploadRequest.class)); Mockito.verify(mMockOutputStream).write(b, 0, b.length - 1); @@ -126,9 +131,11 @@ public void writeByteArrayForLargeFile() throws Exception { Mockito.verify(mMockExecutor).submit(any(Callable.class)); mStream.close(); - Assert.assertEquals(mStream.getPartNumber(), 3); + assertEquals(mStream.getPartNumber(), 3); Mockito.verify(mMockObsClient) .completeMultipartUpload(any(CompleteMultipartUploadRequest.class)); + assertTrue(mStream.getContentHash().isPresent()); + assertEquals("multiTag", mStream.getContentHash().get()); } @Test @@ -140,6 +147,8 @@ public void createEmptyFile() throws Exception { Mockito.verify(mMockObsClient, never()) .completeMultipartUpload(any(CompleteMultipartUploadRequest.class)); Mockito.verify(mMockObsClient).putObject(any()); + assertTrue(mStream.getContentHash().isPresent()); + assertEquals("putTag", mStream.getContentHash().get()); } @Test @@ -161,6 +170,8 @@ public void flush() throws Exception { mStream.close(); Mockito.verify(mMockObsClient) .completeMultipartUpload(any(CompleteMultipartUploadRequest.class)); + assertTrue(mStream.getContentHash().isPresent()); + assertEquals("multiTag", mStream.getContentHash().get()); } @Test @@ -183,7 +194,8 @@ private void mockOSSClientAndExecutor() throws Exception { when(mMockObsClient.initiateMultipartUpload(any(InitiateMultipartUploadRequest.class))) .thenReturn(initResult); when(mMockObsClient.putObject(any(PutObjectRequest.class))) - .thenReturn(new PutObjectResult(BUCKET_NAME, KEY, "", "", "", new HashMap<>(), 200)); + .thenReturn(new PutObjectResult(BUCKET_NAME, KEY, "putTag", "", "", new HashMap<>(), + 200)); when(mMockObsClient.uploadPart(any(UploadPartRequest.class))) .thenAnswer((InvocationOnMock invocation) -> { @@ -194,7 +206,7 @@ private void mockOSSClientAndExecutor() throws Exception { }); when(mMockObsClient.completeMultipartUpload(any(CompleteMultipartUploadRequest.class))) - .thenReturn(new CompleteMultipartUploadResult(BUCKET_NAME, KEY, "", "", "", "")); + .thenReturn(new CompleteMultipartUploadResult(BUCKET_NAME, KEY, "multiTag", "", "", "")); mMockTag = (ListenableFuture) PowerMockito.mock(ListenableFuture.class); when(mMockTag.get()).thenReturn(new PartEtag("someTag", 1)); diff --git a/underfs/obs/src/test/java/alluxio/underfs/obs/OBSOutputStreamTest.java b/underfs/obs/src/test/java/alluxio/underfs/obs/OBSOutputStreamTest.java index d0a93afc97c2..9c282be7fab2 100644 --- a/underfs/obs/src/test/java/alluxio/underfs/obs/OBSOutputStreamTest.java +++ b/underfs/obs/src/test/java/alluxio/underfs/obs/OBSOutputStreamTest.java @@ -11,6 +11,8 @@ package alluxio.underfs.obs; +import static org.mockito.ArgumentMatchers.any; + import alluxio.conf.AlluxioConfiguration; import alluxio.conf.Configuration; import alluxio.conf.PropertyKey; @@ -18,6 +20,8 @@ import com.obs.services.ObsClient; import com.obs.services.exception.ObsException; import com.obs.services.model.ObjectMetadata; +import com.obs.services.model.PutObjectResult; +import com.obs.services.model.StorageClassEnum; import org.junit.Before; import org.junit.Rule; import org.junit.Test; @@ -61,6 +65,8 @@ public class OBSOutputStreamTest { @Before public void before() throws Exception { mObsClient = Mockito.mock(ObsClient.class); + Mockito.when(mObsClient.putObject(any(), any(), any(InputStream.class), any())).thenReturn( + new PutObjectResult("bucket", "key", "etag", "version", StorageClassEnum.STANDARD, "url")); mFile = Mockito.mock(File.class); mLocalOutputStream = Mockito.mock(BufferedOutputStream.class); } @@ -88,9 +94,9 @@ public void testConstructor() throws Exception { @PrepareForTest(OBSOutputStream.class) public void testWrite1() throws Exception { PowerMockito.whenNew(BufferedOutputStream.class) - .withArguments(Mockito.any(DigestOutputStream.class)).thenReturn(mLocalOutputStream); + .withArguments(any(DigestOutputStream.class)).thenReturn(mLocalOutputStream); PowerMockito.whenNew(BufferedOutputStream.class) - .withArguments(Mockito.any(FileOutputStream.class)).thenReturn(mLocalOutputStream); + .withArguments(any(FileOutputStream.class)).thenReturn(mLocalOutputStream); OBSOutputStream stream = new OBSOutputStream("testBucketName", "testKey", mObsClient, sConf.getList(PropertyKey.TMP_DIRS)); stream.write(1); @@ -106,9 +112,9 @@ public void testWrite1() throws Exception { @PrepareForTest(OBSOutputStream.class) public void testWrite2() throws Exception { PowerMockito.whenNew(BufferedOutputStream.class) - .withArguments(Mockito.any(DigestOutputStream.class)).thenReturn(mLocalOutputStream); + .withArguments(any(DigestOutputStream.class)).thenReturn(mLocalOutputStream); PowerMockito.whenNew(BufferedOutputStream.class) - .withArguments(Mockito.any(FileOutputStream.class)).thenReturn(mLocalOutputStream); + .withArguments(any(FileOutputStream.class)).thenReturn(mLocalOutputStream); OBSOutputStream stream = new OBSOutputStream("testBucketName", "testKey", mObsClient, sConf.getList(PropertyKey.TMP_DIRS)); byte[] b = new byte[1]; @@ -124,9 +130,9 @@ public void testWrite2() throws Exception { @PrepareForTest(OBSOutputStream.class) public void testWrite3() throws Exception { PowerMockito.whenNew(BufferedOutputStream.class) - .withArguments(Mockito.any(DigestOutputStream.class)).thenReturn(mLocalOutputStream); + .withArguments(any(DigestOutputStream.class)).thenReturn(mLocalOutputStream); PowerMockito.whenNew(BufferedOutputStream.class) - .withArguments(Mockito.any(FileOutputStream.class)).thenReturn(mLocalOutputStream); + .withArguments(any(FileOutputStream.class)).thenReturn(mLocalOutputStream); OBSOutputStream stream = new OBSOutputStream("testBucketName", "testKey", mObsClient, sConf.getList(PropertyKey.TMP_DIRS)); byte[] b = new byte[1]; @@ -146,10 +152,10 @@ public void testCloseError() throws Exception { String errorMessage = "Invoke the createEmptyObject method error."; BufferedInputStream inputStream = PowerMockito.mock(BufferedInputStream.class); PowerMockito.whenNew(BufferedInputStream.class) - .withArguments(Mockito.any(FileInputStream.class)).thenReturn(inputStream); + .withArguments(any(FileInputStream.class)).thenReturn(inputStream); PowerMockito .when(mObsClient.putObject(Mockito.anyString(), Mockito.anyString(), - Mockito.any(InputStream.class), Mockito.any(ObjectMetadata.class))) + any(InputStream.class), any(ObjectMetadata.class))) .thenThrow(new ObsException(errorMessage)); OBSOutputStream stream = new OBSOutputStream("testBucketName", "testKey", mObsClient, sConf.getList(PropertyKey.TMP_DIRS)); @@ -183,7 +189,7 @@ public void testCloseSuccess() throws Exception { @PrepareForTest(OBSOutputStream.class) public void testFlush() throws Exception { PowerMockito.whenNew(BufferedOutputStream.class) - .withArguments(Mockito.any(DigestOutputStream.class)).thenReturn(mLocalOutputStream); + .withArguments(any(DigestOutputStream.class)).thenReturn(mLocalOutputStream); OBSOutputStream stream = new OBSOutputStream("testBucketName", "testKey", mObsClient, sConf.getList(PropertyKey.TMP_DIRS)); stream.flush(); diff --git a/underfs/oss/src/main/java/alluxio/underfs/oss/OSSLowLevelOutputStream.java b/underfs/oss/src/main/java/alluxio/underfs/oss/OSSLowLevelOutputStream.java index e46087ac46d6..ba2d30ab96ac 100644 --- a/underfs/oss/src/main/java/alluxio/underfs/oss/OSSLowLevelOutputStream.java +++ b/underfs/oss/src/main/java/alluxio/underfs/oss/OSSLowLevelOutputStream.java @@ -38,6 +38,7 @@ import java.util.ArrayList; import java.util.Collections; import java.util.List; +import java.util.Optional; import javax.annotation.Nullable; /** @@ -53,6 +54,8 @@ public class OSSLowLevelOutputStream extends ObjectLowLevelOutputStream { /** The upload id of this multipart upload. */ protected volatile String mUploadId; + private String mContentHash; + /** * Constructs a new stream for writing a file. * @@ -132,7 +135,7 @@ protected void completeMultiPartUploadInternal() throws IOException { LOG.debug("complete multi part {}", mUploadId); CompleteMultipartUploadRequest completeRequest = new CompleteMultipartUploadRequest( mBucketName, mKey, mUploadId, mTags); - getClient().completeMultipartUpload(completeRequest); + mContentHash = getClient().completeMultipartUpload(completeRequest).getETag(); } catch (OSSException | ClientException e) { LOG.debug("failed to complete multi part upload", e); throw new IOException( @@ -146,7 +149,8 @@ protected void createEmptyObject(String key) throws IOException { try { ObjectMetadata objMeta = new ObjectMetadata(); objMeta.setContentLength(0); - getClient().putObject(mBucketName, key, new ByteArrayInputStream(new byte[0]), objMeta); + mContentHash = getClient().putObject(mBucketName, key, + new ByteArrayInputStream(new byte[0]), objMeta).getETag(); } catch (OSSException | ClientException e) { throw new IOException(e); } @@ -160,12 +164,17 @@ protected void putObject(String key, File file, @Nullable String md5) throws IOE objMeta.setContentMD5(md5); } PutObjectRequest request = new PutObjectRequest(mBucketName, key, file, objMeta); - getClient().putObject(request); + mContentHash = getClient().putObject(request).getETag(); } catch (OSSException | ClientException e) { throw new IOException(e); } } + @Override + public Optional getContentHash() { + return Optional.ofNullable(mContentHash); + } + protected OSS getClient() { return mClient; } diff --git a/underfs/oss/src/main/java/alluxio/underfs/oss/OSSOutputStream.java b/underfs/oss/src/main/java/alluxio/underfs/oss/OSSOutputStream.java index e37632d25e93..d3213539483e 100644 --- a/underfs/oss/src/main/java/alluxio/underfs/oss/OSSOutputStream.java +++ b/underfs/oss/src/main/java/alluxio/underfs/oss/OSSOutputStream.java @@ -11,6 +11,7 @@ package alluxio.underfs.oss; +import alluxio.underfs.ContentHashable; import alluxio.util.CommonUtils; import alluxio.util.io.PathUtils; @@ -33,6 +34,7 @@ import java.security.MessageDigest; import java.security.NoSuchAlgorithmException; import java.util.List; +import java.util.Optional; import java.util.UUID; import java.util.concurrent.atomic.AtomicBoolean; import javax.annotation.concurrent.NotThreadSafe; @@ -42,7 +44,7 @@ * local disk and copied as a complete file when the {@link #close()} method is called. */ @NotThreadSafe -public final class OSSOutputStream extends OutputStream { +public final class OSSOutputStream extends OutputStream implements ContentHashable { private static final Logger LOG = LoggerFactory.getLogger(OSSOutputStream.class); /** Bucket name of the Alluxio OSS bucket. */ @@ -62,6 +64,8 @@ public final class OSSOutputStream extends OutputStream { /** Flag to indicate this stream has been closed, to ensure close is only done once. */ private AtomicBoolean mClosed = new AtomicBoolean(false); + private String mContentHash; + /** * Creates a name instance of {@link OSSOutputStream}. * @@ -155,7 +159,7 @@ public void close() throws IOException { byte[] hashBytes = mHash.digest(); objMeta.setContentMD5(new String(Base64.encodeBase64(hashBytes))); } - mOssClient.putObject(mBucketName, mKey, in, objMeta); + mContentHash = mOssClient.putObject(mBucketName, mKey, in, objMeta).getETag(); } catch (ServiceException e) { LOG.error("Failed to upload {}.", mKey); throw new IOException(e); @@ -167,4 +171,9 @@ public void close() throws IOException { } } } + + @Override + public Optional getContentHash() { + return Optional.ofNullable(mContentHash); + } } diff --git a/underfs/oss/src/test/java/alluxio/underfs/oss/OSSLowLevelOutputStreamTest.java b/underfs/oss/src/test/java/alluxio/underfs/oss/OSSLowLevelOutputStreamTest.java index 469f52b86401..95aab3ed7180 100644 --- a/underfs/oss/src/test/java/alluxio/underfs/oss/OSSLowLevelOutputStreamTest.java +++ b/underfs/oss/src/test/java/alluxio/underfs/oss/OSSLowLevelOutputStreamTest.java @@ -11,6 +11,8 @@ package alluxio.underfs.oss; +import static org.junit.Assert.assertEquals; +import static org.junit.Assert.assertTrue; import static org.mockito.ArgumentMatchers.any; import static org.mockito.ArgumentMatchers.eq; import static org.mockito.Mockito.never; @@ -30,6 +32,7 @@ import com.aliyun.oss.model.ObjectMetadata; import com.aliyun.oss.model.PartETag; import com.aliyun.oss.model.PutObjectRequest; +import com.aliyun.oss.model.PutObjectResult; import com.aliyun.oss.model.UploadPartRequest; import com.aliyun.oss.model.UploadPartResult; import com.google.common.util.concurrent.ListenableFuture; @@ -94,6 +97,8 @@ public void writeByte() throws Exception { .initiateMultipartUpload(any(InitiateMultipartUploadRequest.class)); Mockito.verify(mMockOssClient, never()) .completeMultipartUpload(any(CompleteMultipartUploadRequest.class)); + assertTrue(mStream.getContentHash().isPresent()); + assertEquals("putTag", mStream.getContentHash().get()); } @Test @@ -111,6 +116,8 @@ public void writeByteArrayForSmallFile() throws Exception { .initiateMultipartUpload(any(InitiateMultipartUploadRequest.class)); Mockito.verify(mMockOssClient, never()) .completeMultipartUpload(any(CompleteMultipartUploadRequest.class)); + assertTrue(mStream.getContentHash().isPresent()); + assertEquals("putTag", mStream.getContentHash().get()); } @Test @@ -130,6 +137,8 @@ public void writeByteArrayForLargeFile() throws Exception { Assert.assertEquals(mStream.getPartNumber(), 3); Mockito.verify(mMockOssClient) .completeMultipartUpload(any(CompleteMultipartUploadRequest.class)); + assertTrue(mStream.getContentHash().isPresent()); + assertEquals("multiTag", mStream.getContentHash().get()); } @Test @@ -142,6 +151,8 @@ public void createEmptyFile() throws Exception { .completeMultipartUpload(any(CompleteMultipartUploadRequest.class)); Mockito.verify(mMockOssClient).putObject(eq(BUCKET_NAME), eq(KEY), any(InputStream.class), any(ObjectMetadata.class)); + assertTrue(mStream.getContentHash().isPresent()); + assertEquals("emptyTag", mStream.getContentHash().get()); } @Test @@ -163,6 +174,8 @@ public void flush() throws Exception { mStream.close(); Mockito.verify(mMockOssClient) .completeMultipartUpload(any(CompleteMultipartUploadRequest.class)); + assertTrue(mStream.getContentHash().isPresent()); + assertEquals("multiTag", mStream.getContentHash().get()); } @Test @@ -172,6 +185,8 @@ public void close() throws Exception { .initiateMultipartUpload(any(InitiateMultipartUploadRequest.class)); Mockito.verify(mMockOssClient, never()) .completeMultipartUpload(any(CompleteMultipartUploadRequest.class)); + assertTrue(mStream.getContentHash().isPresent()); + assertEquals("emptyTag", mStream.getContentHash().get()); } /** @@ -193,8 +208,19 @@ private void mockOSSClientAndExecutor() throws Exception { return uploadResult; }); + PutObjectResult putResult = new PutObjectResult(); + putResult.setETag("putTag"); + when(mMockOssClient.putObject(any(PutObjectRequest.class))).thenReturn(putResult); + + PutObjectResult emptyPutResult = new PutObjectResult(); + emptyPutResult.setETag("emptyTag"); + when(mMockOssClient.putObject(any(String.class), any(String.class), any(InputStream.class), + any(ObjectMetadata.class))).thenReturn(emptyPutResult); + + CompleteMultipartUploadResult multiPartResult = new CompleteMultipartUploadResult(); + multiPartResult.setETag("multiTag"); when(mMockOssClient.completeMultipartUpload(any(CompleteMultipartUploadRequest.class))) - .thenReturn(new CompleteMultipartUploadResult()); + .thenReturn(multiPartResult); mMockTag = (ListenableFuture) PowerMockito.mock(ListenableFuture.class); when(mMockTag.get()).thenReturn(new PartETag(1, "someTag")); diff --git a/underfs/oss/src/test/java/alluxio/underfs/oss/OSSOutputStreamTest.java b/underfs/oss/src/test/java/alluxio/underfs/oss/OSSOutputStreamTest.java index de8edecb68c7..5353b4bf21a8 100644 --- a/underfs/oss/src/test/java/alluxio/underfs/oss/OSSOutputStreamTest.java +++ b/underfs/oss/src/test/java/alluxio/underfs/oss/OSSOutputStreamTest.java @@ -11,6 +11,9 @@ package alluxio.underfs.oss; +import static org.junit.Assert.assertEquals; +import static org.mockito.ArgumentMatchers.any; + import alluxio.conf.AlluxioConfiguration; import alluxio.conf.Configuration; import alluxio.conf.PropertyKey; @@ -18,6 +21,7 @@ import com.aliyun.oss.OSS; import com.aliyun.oss.OSSException; import com.aliyun.oss.model.ObjectMetadata; +import com.aliyun.oss.model.PutObjectResult; import org.junit.Before; import org.junit.Rule; import org.junit.Test; @@ -47,6 +51,7 @@ public class OSSOutputStreamTest { private File mFile; private BufferedOutputStream mLocalOutputStream; private static AlluxioConfiguration sConf = Configuration.global(); + private final String mEtag = "someTag"; /** * The exception expected to be thrown. @@ -60,6 +65,10 @@ public class OSSOutputStreamTest { @Before public void before() throws Exception { mOssClient = Mockito.mock(OSS.class); + PutObjectResult result = Mockito.mock(PutObjectResult.class); + Mockito.when(result.getETag()).thenReturn(mEtag); + Mockito.when(mOssClient.putObject(any(), any(), any(InputStream.class), any())) + .thenReturn(result); mFile = Mockito.mock(File.class); mLocalOutputStream = Mockito.mock(BufferedOutputStream.class); } @@ -87,13 +96,14 @@ public void testConstructor() throws Exception { @PrepareForTest(OSSOutputStream.class) public void testWrite1() throws Exception { PowerMockito.whenNew(BufferedOutputStream.class) - .withArguments(Mockito.any(DigestOutputStream.class)).thenReturn(mLocalOutputStream); + .withArguments(any(DigestOutputStream.class)).thenReturn(mLocalOutputStream); PowerMockito.whenNew(BufferedOutputStream.class) - .withArguments(Mockito.any(FileOutputStream.class)).thenReturn(mLocalOutputStream); + .withArguments(any(FileOutputStream.class)).thenReturn(mLocalOutputStream); OSSOutputStream stream = new OSSOutputStream("testBucketName", "testKey", mOssClient, sConf.getList(PropertyKey.TMP_DIRS)); stream.write(1); stream.close(); + assertEquals(mEtag, stream.getContentHash().get()); Mockito.verify(mLocalOutputStream).write(1); } @@ -105,14 +115,15 @@ public void testWrite1() throws Exception { @PrepareForTest(OSSOutputStream.class) public void testWrite2() throws Exception { PowerMockito.whenNew(BufferedOutputStream.class) - .withArguments(Mockito.any(DigestOutputStream.class)).thenReturn(mLocalOutputStream); + .withArguments(any(DigestOutputStream.class)).thenReturn(mLocalOutputStream); PowerMockito.whenNew(BufferedOutputStream.class) - .withArguments(Mockito.any(FileOutputStream.class)).thenReturn(mLocalOutputStream); + .withArguments(any(FileOutputStream.class)).thenReturn(mLocalOutputStream); OSSOutputStream stream = new OSSOutputStream("testBucketName", "testKey", mOssClient, sConf.getList(PropertyKey.TMP_DIRS)); byte[] b = new byte[1]; stream.write(b, 0, 1); stream.close(); + assertEquals(mEtag, stream.getContentHash().get()); Mockito.verify(mLocalOutputStream).write(b, 0, 1); } @@ -123,14 +134,15 @@ public void testWrite2() throws Exception { @PrepareForTest(OSSOutputStream.class) public void testWrite3() throws Exception { PowerMockito.whenNew(BufferedOutputStream.class) - .withArguments(Mockito.any(DigestOutputStream.class)).thenReturn(mLocalOutputStream); + .withArguments(any(DigestOutputStream.class)).thenReturn(mLocalOutputStream); PowerMockito.whenNew(BufferedOutputStream.class) - .withArguments(Mockito.any(FileOutputStream.class)).thenReturn(mLocalOutputStream); + .withArguments(any(FileOutputStream.class)).thenReturn(mLocalOutputStream); OSSOutputStream stream = new OSSOutputStream("testBucketName", "testKey", mOssClient, sConf .getList(PropertyKey.TMP_DIRS)); byte[] b = new byte[1]; stream.write(b); stream.close(); + assertEquals(mEtag, stream.getContentHash().get()); Mockito.verify(mLocalOutputStream).write(b, 0, 1); } @@ -145,16 +157,17 @@ public void testCloseError() throws Exception { String errorMessage = "Invoke the createEmptyObject method error."; BufferedInputStream inputStream = PowerMockito.mock(BufferedInputStream.class); PowerMockito.whenNew(BufferedInputStream.class) - .withArguments(Mockito.any(FileInputStream.class)).thenReturn(inputStream); + .withArguments(any(FileInputStream.class)).thenReturn(inputStream); PowerMockito .when(mOssClient.putObject(Mockito.anyString(), Mockito.anyString(), - Mockito.any(InputStream.class), Mockito.any(ObjectMetadata.class))) + any(InputStream.class), any(ObjectMetadata.class))) .thenThrow(new OSSException(errorMessage)); OSSOutputStream stream = new OSSOutputStream("testBucketName", "testKey", mOssClient, sConf .getList(PropertyKey.TMP_DIRS)); mThrown.expect(IOException.class); mThrown.expectMessage(errorMessage); stream.close(); + assertEquals(mEtag, stream.getContentHash().get()); } /** @@ -172,6 +185,7 @@ public void testCloseSuccess() throws Exception { OSSOutputStream stream = new OSSOutputStream("testBucketName", "testKey", mOssClient, sConf .getList(PropertyKey.TMP_DIRS)); stream.close(); + assertEquals(mEtag, stream.getContentHash().get()); Mockito.verify(mFile).delete(); } @@ -182,11 +196,12 @@ public void testCloseSuccess() throws Exception { @PrepareForTest(OSSOutputStream.class) public void testFlush() throws Exception { PowerMockito.whenNew(BufferedOutputStream.class) - .withArguments(Mockito.any(DigestOutputStream.class)).thenReturn(mLocalOutputStream); + .withArguments(any(DigestOutputStream.class)).thenReturn(mLocalOutputStream); OSSOutputStream stream = new OSSOutputStream("testBucketName", "testKey", mOssClient, sConf .getList(PropertyKey.TMP_DIRS)); stream.flush(); stream.close(); + assertEquals(mEtag, stream.getContentHash().get()); Mockito.verify(mLocalOutputStream).flush(); } } diff --git a/underfs/s3a/src/main/java/alluxio/underfs/s3a/S3ALowLevelOutputStream.java b/underfs/s3a/src/main/java/alluxio/underfs/s3a/S3ALowLevelOutputStream.java index 465e5e3738d4..d8b4fd48ddc9 100644 --- a/underfs/s3a/src/main/java/alluxio/underfs/s3a/S3ALowLevelOutputStream.java +++ b/underfs/s3a/src/main/java/alluxio/underfs/s3a/S3ALowLevelOutputStream.java @@ -36,6 +36,7 @@ import java.util.ArrayList; import java.util.Collections; import java.util.List; +import java.util.Optional; import javax.annotation.Nullable; import javax.annotation.concurrent.NotThreadSafe; @@ -56,6 +57,8 @@ public class S3ALowLevelOutputStream extends ObjectLowLevelOutputStream { /** The upload id of this multipart upload. */ protected volatile String mUploadId; + private String mContentHash; + /** * Constructs a new stream for writing a file. * @@ -127,8 +130,8 @@ protected void initMultiPartUploadInternal() throws IOException { protected void completeMultiPartUploadInternal() throws IOException { try { LOG.debug("complete multi part {}", mUploadId); - getClient().completeMultipartUpload(new CompleteMultipartUploadRequest( - mBucketName, mKey, mUploadId, mTags)); + mContentHash = getClient().completeMultipartUpload(new CompleteMultipartUploadRequest( + mBucketName, mKey, mUploadId, mTags)).getETag(); } catch (SdkClientException e) { LOG.debug("failed to complete multi part upload", e); throw new IOException( @@ -156,8 +159,9 @@ protected void createEmptyObject(String key) throws IOException { ObjectMetadata meta = new ObjectMetadata(); meta.setContentLength(0); meta.setContentType(Mimetypes.MIMETYPE_OCTET_STREAM); - getClient().putObject( - new PutObjectRequest(mBucketName, key, new ByteArrayInputStream(new byte[0]), meta)); + mContentHash = getClient().putObject( + new PutObjectRequest(mBucketName, key, new ByteArrayInputStream(new byte[0]), meta)) + .getETag(); } catch (SdkClientException e) { throw new IOException(e); } @@ -177,7 +181,7 @@ protected void putObject(String key, File file, @Nullable String md5) throws IOE meta.setContentType(Mimetypes.MIMETYPE_OCTET_STREAM); PutObjectRequest putReq = new PutObjectRequest(mBucketName, key, file); putReq.setMetadata(meta); - getClient().putObject(putReq); + mContentHash = getClient().putObject(putReq).getETag(); } catch (Exception e) { throw new IOException(e); } @@ -186,4 +190,9 @@ protected void putObject(String key, File file, @Nullable String md5) throws IOE protected AmazonS3 getClient() { return mClient; } + + @Override + public Optional getContentHash() { + return Optional.ofNullable(mContentHash); + } } diff --git a/underfs/s3a/src/main/java/alluxio/underfs/s3a/S3AOutputStream.java b/underfs/s3a/src/main/java/alluxio/underfs/s3a/S3AOutputStream.java index 44dd9634de43..e32e1a519d02 100644 --- a/underfs/s3a/src/main/java/alluxio/underfs/s3a/S3AOutputStream.java +++ b/underfs/s3a/src/main/java/alluxio/underfs/s3a/S3AOutputStream.java @@ -11,6 +11,7 @@ package alluxio.underfs.s3a; +import alluxio.underfs.ContentHashable; import alluxio.util.CommonUtils; import alluxio.util.io.PathUtils; @@ -32,6 +33,7 @@ import java.security.MessageDigest; import java.security.NoSuchAlgorithmException; import java.util.List; +import java.util.Optional; import java.util.UUID; import javax.annotation.concurrent.NotThreadSafe; @@ -42,7 +44,7 @@ * multipart upload. */ @NotThreadSafe -public class S3AOutputStream extends OutputStream { +public class S3AOutputStream extends OutputStream implements ContentHashable { private static final Logger LOG = LoggerFactory.getLogger(S3AOutputStream.class); private final boolean mSseEnabled; @@ -76,6 +78,8 @@ public class S3AOutputStream extends OutputStream { /** The MD5 hash of the file. */ private MessageDigest mHash; + private String mContentHash; + /** * Constructs a new stream for writing a file. * @@ -147,7 +151,7 @@ public void close() throws IOException { // Generate the put request and wait for the transfer manager to complete the upload PutObjectRequest putReq = new PutObjectRequest(mBucketName, path, mFile).withMetadata(meta); - getTransferManager().upload(putReq).waitForUploadResult(); + mContentHash = getTransferManager().upload(putReq).waitForUploadResult().getETag(); } catch (Exception e) { LOG.error("Failed to upload {}", path, e); throw new IOException(e); @@ -175,4 +179,9 @@ protected String getUploadPath() { protected TransferManager getTransferManager() { return mManager; } + + @Override + public Optional getContentHash() { + return Optional.ofNullable(mContentHash); + } } diff --git a/underfs/s3a/src/test/java/alluxio/underfs/s3a/S3ALowLevelOutputStreamTest.java b/underfs/s3a/src/test/java/alluxio/underfs/s3a/S3ALowLevelOutputStreamTest.java index 8412f6e6dda5..2a366d2b0b4c 100644 --- a/underfs/s3a/src/test/java/alluxio/underfs/s3a/S3ALowLevelOutputStreamTest.java +++ b/underfs/s3a/src/test/java/alluxio/underfs/s3a/S3ALowLevelOutputStreamTest.java @@ -11,6 +11,8 @@ package alluxio.underfs.s3a; +import static org.junit.Assert.assertEquals; +import static org.junit.Assert.assertTrue; import static org.mockito.ArgumentMatchers.any; import static org.mockito.Mockito.never; import static org.mockito.Mockito.times; @@ -28,11 +30,11 @@ import com.amazonaws.services.s3.model.InitiateMultipartUploadResult; import com.amazonaws.services.s3.model.PartETag; import com.amazonaws.services.s3.model.PutObjectRequest; +import com.amazonaws.services.s3.model.PutObjectResult; import com.amazonaws.services.s3.model.UploadPartRequest; import com.amazonaws.services.s3.model.UploadPartResult; import com.google.common.util.concurrent.ListenableFuture; import com.google.common.util.concurrent.ListeningExecutorService; -import org.junit.Assert; import org.junit.Before; import org.junit.Test; import org.junit.runner.RunWith; @@ -92,6 +94,8 @@ public void writeByte() throws Exception { .initiateMultipartUpload(any(InitiateMultipartUploadRequest.class)); Mockito.verify(mMockS3Client, never()) .completeMultipartUpload(any(CompleteMultipartUploadRequest.class)); + assertTrue(mStream.getContentHash().isPresent()); + assertEquals("putTag", mStream.getContentHash().get()); } @Test @@ -109,15 +113,17 @@ public void writeByteArrayForSmallFile() throws Exception { .initiateMultipartUpload(any(InitiateMultipartUploadRequest.class)); Mockito.verify(mMockS3Client, never()) .completeMultipartUpload(any(CompleteMultipartUploadRequest.class)); + assertTrue(mStream.getContentHash().isPresent()); + assertEquals("putTag", mStream.getContentHash().get()); } @Test public void writeByteArrayForLargeFile() throws Exception { int partSize = (int) FormatUtils.parseSpaceSize(PARTITION_SIZE); byte[] b = new byte[partSize + 1]; - Assert.assertEquals(mStream.getPartNumber(), 1); + assertEquals(mStream.getPartNumber(), 1); mStream.write(b, 0, b.length); - Assert.assertEquals(mStream.getPartNumber(), 2); + assertEquals(mStream.getPartNumber(), 2); Mockito.verify(mMockS3Client) .initiateMultipartUpload(any(InitiateMultipartUploadRequest.class)); Mockito.verify(mMockOutputStream).write(b, 0, b.length - 1); @@ -125,9 +131,11 @@ public void writeByteArrayForLargeFile() throws Exception { Mockito.verify(mMockExecutor).submit(any(Callable.class)); mStream.close(); - Assert.assertEquals(mStream.getPartNumber(), 3); + assertEquals(mStream.getPartNumber(), 3); Mockito.verify(mMockS3Client) .completeMultipartUpload(any(CompleteMultipartUploadRequest.class)); + assertTrue(mStream.getContentHash().isPresent()); + assertEquals("multiTag", mStream.getContentHash().get()); } @Test @@ -139,6 +147,8 @@ public void createEmptyFile() throws Exception { Mockito.verify(mMockS3Client, never()) .completeMultipartUpload(any(CompleteMultipartUploadRequest.class)); Mockito.verify(mMockS3Client).putObject(any(PutObjectRequest.class)); + assertTrue(mStream.getContentHash().isPresent()); + assertEquals("putTag", mStream.getContentHash().get()); } @Test @@ -160,6 +170,8 @@ public void flush() throws Exception { mStream.close(); Mockito.verify(mMockS3Client) .completeMultipartUpload(any(CompleteMultipartUploadRequest.class)); + assertTrue(mStream.getContentHash().isPresent()); + assertEquals("multiTag", mStream.getContentHash().get()); } @Test @@ -169,6 +181,8 @@ public void close() throws Exception { .initiateMultipartUpload(any(InitiateMultipartUploadRequest.class)); Mockito.verify(mMockS3Client, never()) .completeMultipartUpload(any(CompleteMultipartUploadRequest.class)); + assertTrue(mStream.getContentHash().isPresent()); + assertEquals("putTag", mStream.getContentHash().get()); } /** @@ -190,8 +204,14 @@ private void mockS3ClientAndExecutor() throws Exception { return uploadResult; }); + CompleteMultipartUploadResult result = new CompleteMultipartUploadResult(); + result.setETag("multiTag"); when(mMockS3Client.completeMultipartUpload(any(CompleteMultipartUploadRequest.class))) - .thenReturn(new CompleteMultipartUploadResult()); + .thenReturn(result); + + PutObjectResult putResult = new PutObjectResult(); + putResult.setETag("putTag"); + when(mMockS3Client.putObject(any(PutObjectRequest.class))).thenReturn(putResult); mMockTag = (ListenableFuture) PowerMockito.mock(ListenableFuture.class); when(mMockTag.get()).thenReturn(new PartETag(1, "someTag")); diff --git a/underfs/s3a/src/test/java/alluxio/underfs/s3a/S3AOutputStreamTest.java b/underfs/s3a/src/test/java/alluxio/underfs/s3a/S3AOutputStreamTest.java index 93d368ea669a..43dccc5f1405 100644 --- a/underfs/s3a/src/test/java/alluxio/underfs/s3a/S3AOutputStreamTest.java +++ b/underfs/s3a/src/test/java/alluxio/underfs/s3a/S3AOutputStreamTest.java @@ -11,6 +11,9 @@ package alluxio.underfs.s3a; +import static org.junit.Assert.assertEquals; +import static org.junit.Assert.assertFalse; + import alluxio.conf.AlluxioConfiguration; import alluxio.conf.Configuration; import alluxio.conf.PropertyKey; @@ -18,6 +21,7 @@ import com.amazonaws.services.s3.model.PutObjectRequest; import com.amazonaws.services.s3.transfer.TransferManager; import com.amazonaws.services.s3.transfer.Upload; +import com.amazonaws.services.s3.transfer.model.UploadResult; import org.junit.Before; import org.junit.Test; import org.junit.runner.RunWith; @@ -44,6 +48,7 @@ public class S3AOutputStreamTest { private File mFile; private BufferedOutputStream mLocalOutputStream; private S3AOutputStream mStream; + private String mContentHash; /** * Sets the properties and configuration before each test runs. @@ -54,6 +59,10 @@ public void before() throws Exception { mLocalOutputStream = Mockito.mock(BufferedOutputStream.class); TransferManager manager = Mockito.mock(TransferManager.class); Upload result = Mockito.mock(Upload.class); + UploadResult uploadResult = Mockito.mock(UploadResult.class); + Mockito.doReturn(uploadResult).when(result).waitForUploadResult(); + mContentHash = "someHash"; + Mockito.doReturn(mContentHash).when(uploadResult).getETag(); Mockito.when(manager.upload(Mockito.any(PutObjectRequest.class))).thenReturn(result); PowerMockito.whenNew(BufferedOutputStream.class) @@ -64,6 +73,7 @@ public void before() throws Exception { mStream = new S3AOutputStream(BUCKET_NAME, KEY, manager, sConf.getList(PropertyKey.TMP_DIRS), sConf.getBoolean(PropertyKey.UNDERFS_S3_SERVER_SIDE_ENCRYPTION_ENABLED)); + assertFalse(mStream.getContentHash().isPresent()); } /** @@ -73,6 +83,7 @@ public void before() throws Exception { public void writeByte() throws Exception { mStream.write(1); mStream.close(); + assertEquals(mContentHash, mStream.getContentHash().get()); Mockito.verify(mLocalOutputStream).write(1); } @@ -84,6 +95,7 @@ public void writeByteArray() throws Exception { byte[] b = new byte[10]; mStream.write(b); mStream.close(); + assertEquals(mContentHash, mStream.getContentHash().get()); Mockito.verify(mLocalOutputStream).write(b, 0, b.length); } @@ -96,6 +108,7 @@ public void writeByteArrayWithRange() throws Exception { byte[] b = new byte[10]; mStream.write(b, 0, b.length); mStream.close(); + assertEquals(mContentHash, mStream.getContentHash().get()); Mockito.verify(mLocalOutputStream).write(b, 0, b.length); } @@ -105,6 +118,7 @@ public void writeByteArrayWithRange() throws Exception { @Test public void close() throws Exception { mStream.close(); + assertEquals(mContentHash, mStream.getContentHash().get()); Mockito.verify(mFile).delete(); } @@ -115,6 +129,7 @@ public void close() throws Exception { public void flush() throws Exception { mStream.flush(); mStream.close(); + assertEquals(mContentHash, mStream.getContentHash().get()); Mockito.verify(mLocalOutputStream).flush(); } } From 47320b67669360b69403842d4d912a5c977088d0 Mon Sep 17 00:00:00 2001 From: Tyler Crain Date: Wed, 15 Feb 2023 16:20:44 -0800 Subject: [PATCH 117/334] Update retry policy for object stores Modern object stores ensure strong consistency as follows: Alibaba oss: https://www.alibabacloud.com/help/en/object-storage-service/latest/what-is-oss Amazon S3: https://aws.amazon.com/s3/consistency/ GCP Cloud Storage: https://cloud.google.com/storage/docs/consistency Huawei OBS: https://support.huaweicloud.com/intl/en-us/api-obs/obs_04_0118.html When this is non-zero and the UFS is strongly consistent, and Alluxio is out of sync, some operations may retry over and over even though nothing will change, slowing down the system for long periods of time. This value should be 0 by default with strong consistency as Alluxio will see the most up to date version the first time it accesses the object. pr-link: Alluxio/alluxio#16887 change-id: cid-164e455d71f4b17d797ddfbaff2ef7595c0bea90 --- core/common/src/main/java/alluxio/conf/PropertyKey.java | 6 ++++-- .../src/main/java/alluxio/retry/SleepingRetry.java | 2 +- .../java/alluxio/underfs/ObjectUnderFileSystemTest.java | 9 +++++++++ 3 files changed, 14 insertions(+), 3 deletions(-) diff --git a/core/common/src/main/java/alluxio/conf/PropertyKey.java b/core/common/src/main/java/alluxio/conf/PropertyKey.java index 316e279c5a56..1697e58c5b41 100755 --- a/core/common/src/main/java/alluxio/conf/PropertyKey.java +++ b/core/common/src/main/java/alluxio/conf/PropertyKey.java @@ -1286,12 +1286,14 @@ public String toString() { .build(); public static final PropertyKey UNDERFS_EVENTUAL_CONSISTENCY_RETRY_MAX_NUM = intBuilder(Name.UNDERFS_EVENTUAL_CONSISTENCY_RETRY_MAX_NUM) - .setDefaultValue(20) + .setDefaultValue(0) .setDescription("To handle eventually consistent storage semantics " + "for certain under storages, Alluxio will perform retries " + "when under storage metadata doesn't match Alluxio's expectations. " + "These retries use exponential backoff. " - + "This property determines the maximum number of retries.") + + "This property determines the maximum number of retries. " + + "This property defaults to 0 as modern object store UFSs provide strong " + + "consistency.") .setConsistencyCheckLevel(ConsistencyCheckLevel.WARN) .setScope(Scope.SERVER) .build(); diff --git a/core/common/src/main/java/alluxio/retry/SleepingRetry.java b/core/common/src/main/java/alluxio/retry/SleepingRetry.java index 31eed7e0a91b..ec03d07dbedf 100644 --- a/core/common/src/main/java/alluxio/retry/SleepingRetry.java +++ b/core/common/src/main/java/alluxio/retry/SleepingRetry.java @@ -25,7 +25,7 @@ public abstract class SleepingRetry implements RetryPolicy { private int mAttemptCount = 0; protected SleepingRetry(int maxRetries) { - Preconditions.checkArgument(maxRetries > 0, "Max retries must be a positive number"); + Preconditions.checkArgument(maxRetries >= 0, "Max retries must be a non-negative number"); mMaxRetries = maxRetries; } diff --git a/core/common/src/test/java/alluxio/underfs/ObjectUnderFileSystemTest.java b/core/common/src/test/java/alluxio/underfs/ObjectUnderFileSystemTest.java index b1351a0b58db..287abb1690f0 100644 --- a/core/common/src/test/java/alluxio/underfs/ObjectUnderFileSystemTest.java +++ b/core/common/src/test/java/alluxio/underfs/ObjectUnderFileSystemTest.java @@ -15,9 +15,13 @@ import static org.junit.Assert.fail; import alluxio.AlluxioURI; +import alluxio.ConfigurationRule; import alluxio.conf.AlluxioConfiguration; import alluxio.conf.Configuration; +import alluxio.conf.PropertyKey; +import com.google.common.collect.ImmutableMap; +import org.junit.Rule; import org.junit.Test; import org.mockito.Mockito; @@ -28,6 +32,11 @@ public class ObjectUnderFileSystemTest { private static final AlluxioConfiguration CONF = Configuration.global(); + @Rule + public ConfigurationRule mConfigurationRule = new ConfigurationRule(ImmutableMap.of( + PropertyKey.UNDERFS_EVENTUAL_CONSISTENCY_RETRY_MAX_NUM, 20), + Configuration.modifiableGlobal()); + private ObjectUnderFileSystem mObjectUFS = new MockObjectUnderFileSystem(new AlluxioURI("/"), UnderFileSystemConfiguration.defaults(CONF)); From d30fddccd6d2c7d28b60cdc360ea0a2bdf7d6cbf Mon Sep 17 00:00:00 2001 From: elega <445092967@qq.com> Date: Thu, 16 Feb 2023 11:25:33 +0800 Subject: [PATCH 118/334] Add embedded journal support for LocalAlluxioCluster ### What changes are proposed in this pull request? Add embedded journal support for LocalAlluxioCluster in integration tests ### Why are the changes needed? To test some features that only works with embedded journals ### Does this PR introduce any user facing changes? N/A pr-link: Alluxio/alluxio#16875 change-id: cid-218958b272917280916ef8bed0a26e0ce9bdd6d6 --- .../master/AbstractLocalAlluxioCluster.java | 4 + ...terEmbeddedJournalLocalAlluxioCluster.java | 317 ++++++++++++++++++ .../MultiMasterLocalAlluxioCluster.java | 9 + 3 files changed, 330 insertions(+) create mode 100644 minicluster/src/main/java/alluxio/master/MultiMasterEmbeddedJournalLocalAlluxioCluster.java diff --git a/minicluster/src/main/java/alluxio/master/AbstractLocalAlluxioCluster.java b/minicluster/src/main/java/alluxio/master/AbstractLocalAlluxioCluster.java index 6299e8a51839..e37692de1e19 100644 --- a/minicluster/src/main/java/alluxio/master/AbstractLocalAlluxioCluster.java +++ b/minicluster/src/main/java/alluxio/master/AbstractLocalAlluxioCluster.java @@ -240,6 +240,10 @@ protected void setupTest() throws IOException { } } + formatJournal(); + } + + protected void formatJournal() throws IOException { // Formats the journal Format.format(Format.Mode.MASTER, Configuration.global()); } diff --git a/minicluster/src/main/java/alluxio/master/MultiMasterEmbeddedJournalLocalAlluxioCluster.java b/minicluster/src/main/java/alluxio/master/MultiMasterEmbeddedJournalLocalAlluxioCluster.java new file mode 100644 index 000000000000..deec3910d405 --- /dev/null +++ b/minicluster/src/main/java/alluxio/master/MultiMasterEmbeddedJournalLocalAlluxioCluster.java @@ -0,0 +1,317 @@ +/* + * The Alluxio Open Foundation licenses this work under the Apache License, version 2.0 + * (the "License"). You may not use this work except in compliance with the License, which is + * available at www.apache.org/licenses/LICENSE-2.0 + * + * This software is distributed on an "AS IS" basis, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, + * either express or implied, as more fully set forth in the License. + * + * See the NOTICE file distributed with this work for information regarding copyright ownership. + */ + +package alluxio.master; + +import alluxio.AlluxioURI; +import alluxio.ConfigurationTestUtils; +import alluxio.client.file.FileSystem; +import alluxio.client.file.FileSystemContext; +import alluxio.conf.Configuration; +import alluxio.conf.PropertyKey; +import alluxio.grpc.NodeState; +import alluxio.master.journal.JournalType; +import alluxio.multi.process.MasterNetAddress; +import alluxio.multi.process.PortCoordination; +import alluxio.util.CommonUtils; +import alluxio.util.WaitForOptions; +import alluxio.util.io.PathUtils; +import alluxio.util.network.NetworkAddressUtils; +import alluxio.worker.WorkerProcess; + +import com.google.common.base.Preconditions; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import java.io.File; +import java.io.IOException; +import java.net.InetSocketAddress; +import java.util.ArrayList; +import java.util.List; +import java.util.Map; +import java.util.concurrent.TimeoutException; +import javax.annotation.concurrent.NotThreadSafe; + +/** + * A local Alluxio cluster with multiple masters using embedded journal. + * Because the cluster run in a single process, a single configuration instance + * is shared across masters and workers. + * If this causes issues, considering switching to {@link alluxio.multi.process.MultiProcessCluster} + */ +@NotThreadSafe +public final class MultiMasterEmbeddedJournalLocalAlluxioCluster + extends AbstractLocalAlluxioCluster { + private static final Logger LOG = LoggerFactory.getLogger( + MultiMasterEmbeddedJournalLocalAlluxioCluster.class); + + private int mNumOfMasters = 0; + + private final List mMasters = new ArrayList<>(); + private final List mPorts; + + private final List mMasterAddresses; + private final List mJournalFolders = new ArrayList<>(); + + /** + * @param numMasters the number of masters to run + * @param numWorkers the number of workers to run + * @param reservedPorts reserved ports + */ + public MultiMasterEmbeddedJournalLocalAlluxioCluster( + int numMasters, int numWorkers, List reservedPorts) + throws IOException { + super(numWorkers); + mNumOfMasters = numMasters; + mPorts = new ArrayList<>(reservedPorts); + mMasterAddresses = generateMasterAddresses(numMasters); + } + + private List generateMasterAddresses(int numMasters) throws IOException { + int timeout = (int) Configuration.getMs(PropertyKey.NETWORK_HOST_RESOLUTION_TIMEOUT_MS); + List addrs = new ArrayList<>(); + for (int i = 0; i < numMasters; i++) { + addrs.add(new MasterNetAddress( + NetworkAddressUtils.getLocalHostName(timeout), getNewPort(), getNewPort(), getNewPort())); + } + return addrs; + } + + private int getNewPort() throws IOException { + Preconditions.checkState(!mPorts.isEmpty(), "Out of ports to reserve"); + return mPorts.remove(mPorts.size() - 1).getPort(); + } + + @Override + public void initConfiguration(String name) throws IOException { + setAlluxioWorkDirectory(name); + setHostname(); + for (Map.Entry entry : ConfigurationTestUtils + .testConfigurationDefaults(Configuration.global(), + mHostname, mWorkDirectory).entrySet()) { + Configuration.set(entry.getKey(), entry.getValue()); + } + Configuration.set(PropertyKey.TEST_MODE, true); + Configuration.set(PropertyKey.JOB_WORKER_THROTTLING, false); + Configuration.set(PropertyKey.PROXY_WEB_PORT, 0); + Configuration.set(PropertyKey.WORKER_RPC_PORT, 0); + Configuration.set(PropertyKey.WORKER_WEB_PORT, 0); + + List journalAddresses = new ArrayList<>(); + List rpcAddresses = new ArrayList<>(); + for (MasterNetAddress address : mMasterAddresses) { + journalAddresses + .add(String.format("%s:%d", address.getHostname(), address.getEmbeddedJournalPort())); + rpcAddresses.add(String.format("%s:%d", address.getHostname(), address.getRpcPort())); + } + Configuration.set(PropertyKey.MASTER_JOURNAL_TYPE, JournalType.EMBEDDED); + Configuration.set(PropertyKey.MASTER_EMBEDDED_JOURNAL_ADDRESSES, + com.google.common.base.Joiner.on(",").join(journalAddresses)); + Configuration.set(PropertyKey.MASTER_RPC_ADDRESSES, + com.google.common.base.Joiner.on(",").join(rpcAddresses)); + } + + @Override + public synchronized FileSystem getClient() throws IOException { + return getLocalAlluxioMaster().getClient(); + } + + @Override + public FileSystem getClient(FileSystemContext context) throws IOException { + return getLocalAlluxioMaster().getClient(context); + } + + @Override + public LocalAlluxioMaster getLocalAlluxioMaster() { + for (LocalAlluxioMaster master : mMasters) { + // Return the leader master, if possible. + if (master.isServing() + && master.getMasterProcess().mLeaderSelector.getState() == NodeState.PRIMARY) { + return master; + } + } + return mMasters.get(0); + } + + /** + * @param index the index + * @return the local alluxio master + */ + public LocalAlluxioMaster getLocalAlluxioMasterByIndex(int index) { + return mMasters.get(index); + } + + /** + * @param index the index + * @return the worker process by index + */ + public WorkerProcess getWorkerProcess(int index) { + return mWorkers.get(index); + } + + /** + * @return index of leader master in {@link #mMasters}, or -1 if there is no leader temporarily + */ + public int getLeaderIndex() { + for (int i = 0; i < mNumOfMasters; i++) { + if (mMasters.get(i).isServing() + && mMasters.get(i).getMasterProcess().mLeaderSelector.getState() == NodeState.PRIMARY) { + return i; + } + } + return -1; + } + + /** + * @return the master addresses + */ + public List getMasterAddresses() { + List addrs = new ArrayList<>(); + for (int i = 0; i < mNumOfMasters; i++) { + addrs.add(mMasters.get(i).getAddress()); + } + return addrs; + } + + /** + * Iterates over the masters in the order of master creation, stops the first standby master. + * + * @return true if a standby master is successfully stopped, otherwise, false + */ + public boolean stopStandby() { + for (int k = 0; k < mNumOfMasters; k++) { + if (!mMasters.get(k).isServing()) { + try { + LOG.info("master {} is a standby. stopping it...", k); + mMasters.get(k).stop(); + LOG.info("master {} stopped.", k); + } catch (Exception e) { + LOG.error(e.getMessage(), e); + return false; + } + return true; + } + } + return false; + } + + /** + * Iterates over the masters in the order of master creation, stops the leader master. + * + * @return true if the leader master is successfully stopped, false otherwise + */ + public boolean stopLeader() { + int leaderId = getLeaderIndex(); + try { + LOG.info("master {} is the leader. stopping it...", leaderId); + getLocalAlluxioMasterByIndex(leaderId).stop(); + LOG.info("master {} stopped.", leaderId); + } catch (Exception e) { + LOG.error(e.getMessage(), e); + return false; + } + return true; + } + + /** + * Waits for the primary master to start until a timeout occurs. + * + * @param timeoutMs the number of milliseconds to wait before giving up and throwing an exception + */ + public void waitForPrimaryMasterServing(int timeoutMs) + throws TimeoutException, InterruptedException { + CommonUtils.waitFor("the primary leader master to start", + () -> { + int leaderId = getLeaderIndex(); + if (leaderId == -1) { + return false; + } + try { + getLocalAlluxioMasterByIndex(leaderId).getClient().listStatus( + new AlluxioURI("/")); + return true; + } catch (Exception e) { + return false; + } + }, + WaitForOptions.defaults().setTimeoutMs(timeoutMs)); + } + + @Override + protected void startMasters() throws IOException { + // Because all masters run in the same process, they share the same configuration. + // Whenever we start a master, we modify these configurations to its dedicated ones. + // Masters are started one by one so that each can read the correct configurations. + // These configurations are mostly ports and will only be used when the master starts. + // However, if unluckily some places read these configurations during runtime, + // it might cause incorrect behaviors or errors because these configurations might be + // overridden by the late coming masters. + // If this happens, considering switching to MultiProcessCluster. + // Also, please do not rely on these configurations in your test cases + // because these configurations essentially reflect the configurations of + // the last master we started. + for (int k = 0; k < mNumOfMasters; k++) { + Configuration.set(PropertyKey.MASTER_METASTORE_DIR, + PathUtils.concatPath(mWorkDirectory, "metastore-" + k)); + MasterNetAddress address = mMasterAddresses.get(k); + Configuration.set(PropertyKey.LOGGER_TYPE, "MASTER_LOGGER"); + Configuration.set(PropertyKey.MASTER_HOSTNAME, address.getHostname()); + Configuration.set(PropertyKey.MASTER_RPC_PORT, address.getRpcPort()); + Configuration.set(PropertyKey.MASTER_WEB_PORT, address.getWebPort()); + Configuration.set(PropertyKey.MASTER_EMBEDDED_JOURNAL_PORT, + address.getEmbeddedJournalPort()); + Configuration.set(PropertyKey.MASTER_JOURNAL_FOLDER, mJournalFolders.get(k)); + + final LocalAlluxioMaster master = LocalAlluxioMaster.create(mWorkDirectory, false); + master.start(); + LOG.info("master NO.{} started, isServing: {}, address: {}", k, master.isServing(), + master.getAddress()); + mMasters.add(master); + } + + LOG.info("all {} masters started.", mNumOfMasters); + LOG.info("waiting for a leader."); + try { + waitForMasterServing(); + } catch (Exception e) { + throw new IOException(e); + } + // Use first master port + Configuration.set(PropertyKey.MASTER_RPC_PORT, + getLocalAlluxioMaster().getRpcLocalPort()); + } + + @Override + public void startWorkers() throws Exception { + super.startWorkers(); + } + + @Override + public void stopFS() throws Exception { + super.stopFS(); + } + + @Override + public void stopMasters() throws Exception { + for (int k = 0; k < mNumOfMasters; k++) { + mMasters.get(k).stop(); + } + } + + @Override + protected void formatJournal() { + for (int i = 0; i < mNumOfMasters; ++i) { + String extension = "-" + i; + File journalDir = new File(mWorkDirectory, "journal" + extension); + journalDir.mkdirs(); + mJournalFolders.add(journalDir.getAbsolutePath()); + } + } +} diff --git a/minicluster/src/main/java/alluxio/master/MultiMasterLocalAlluxioCluster.java b/minicluster/src/main/java/alluxio/master/MultiMasterLocalAlluxioCluster.java index adba0d25ca7d..72740fd58fbf 100644 --- a/minicluster/src/main/java/alluxio/master/MultiMasterLocalAlluxioCluster.java +++ b/minicluster/src/main/java/alluxio/master/MultiMasterLocalAlluxioCluster.java @@ -24,6 +24,7 @@ import alluxio.util.CommonUtils; import alluxio.util.WaitForOptions; import alluxio.util.io.PathUtils; +import alluxio.worker.WorkerProcess; import com.google.common.base.Throwables; import org.apache.curator.test.TestingServer; @@ -123,6 +124,14 @@ public LocalAlluxioMaster getLocalAlluxioMaster() { return mMasters.get(0); } + /** + * @param index the worker index + * @return the worker process + */ + public WorkerProcess getWorkerProcess(int index) { + return mWorkers.get(index); + } + /** * @return index of leader master in {@link #mMasters}, or -1 if there is no leader temporarily */ From da6abf7ec50e398ac65e15dbbf084a3e61341f86 Mon Sep 17 00:00:00 2001 From: elega <445092967@qq.com> Date: Thu, 16 Feb 2023 12:52:58 +0800 Subject: [PATCH 119/334] Add allowOnStandbyMasters option for version grpc endpoint ### What changes are proposed in this pull request? Add a rejectOnStandbyMasters on version grpc endpoint ### Why are the changes needed? We added a PR to make standby master return unavailable on this version service endpoint https://github.com/Alluxio/alluxio/pull/16854 However, in addition to the polling master inquire client, the AbstractMasterClient also needs this endpoint. https://github.com/Alluxio/alluxio/blob/master/core/common/src/main/java/alluxio/AbstractClient.java#L172 When a client makes a request to standby master, such check will constantly fail and resulted in failures. So we want the logic done in #16854 only applies to PollingMasterInquireClient and hence we added this boolean field to bypass the check. ### Does this PR introduce any user facing changes? N/A pr-link: Alluxio/alluxio#16890 change-id: cid-379f8af78250d2a230bceff9d7ea75739979e198 --- .../src/main/java/alluxio/AbstractClient.java | 5 ++++- .../ServiceVersionClientServiceHandler.java | 3 ++- .../src/main/proto/grpc/version.proto | 21 +++++++++++++++++++ core/transport/src/main/proto/proto.lock | 5 +++++ 4 files changed, 32 insertions(+), 2 deletions(-) diff --git a/core/common/src/main/java/alluxio/AbstractClient.java b/core/common/src/main/java/alluxio/AbstractClient.java index 936318346402..7f340ba6a87a 100644 --- a/core/common/src/main/java/alluxio/AbstractClient.java +++ b/core/common/src/main/java/alluxio/AbstractClient.java @@ -139,7 +139,10 @@ protected long getRemoteServiceVersion() throws AlluxioStatusException { try { return mVersionService .getServiceVersion( - GetServiceVersionPRequest.newBuilder().setServiceType(getRemoteServiceType()).build()) + GetServiceVersionPRequest.newBuilder() + .setServiceType(getRemoteServiceType()) + .setAllowedOnStandbyMasters(true) + .build()) .getVersion(); } catch (Throwable t) { throw AlluxioStatusException.fromThrowable(t); diff --git a/core/common/src/main/java/alluxio/grpc/ServiceVersionClientServiceHandler.java b/core/common/src/main/java/alluxio/grpc/ServiceVersionClientServiceHandler.java index d48b1d294cd9..a795266c1cd4 100644 --- a/core/common/src/main/java/alluxio/grpc/ServiceVersionClientServiceHandler.java +++ b/core/common/src/main/java/alluxio/grpc/ServiceVersionClientServiceHandler.java @@ -51,7 +51,8 @@ public ServiceVersionClientServiceHandler( @SuppressFBWarnings(value = "DB_DUPLICATE_SWITCH_CLAUSES") public void getServiceVersion(GetServiceVersionPRequest request, StreamObserver responseObserver) { - if (mStandbyRpcEnabled + // getAllowedOnStandbyMasters() is defaulted to false + if (!request.getAllowedOnStandbyMasters() && mStandbyRpcEnabled && mNodeStateSupplier != null && mNodeStateSupplier.get() == NodeState.STANDBY) { responseObserver.onError(Status.UNAVAILABLE .withDescription("GetServiceVersion is not supported on standby master") diff --git a/core/transport/src/main/proto/grpc/version.proto b/core/transport/src/main/proto/grpc/version.proto index 8126353ecd9e..3d5011cff33a 100644 --- a/core/transport/src/main/proto/grpc/version.proto +++ b/core/transport/src/main/proto/grpc/version.proto @@ -34,6 +34,27 @@ enum ServiceType { message GetServiceVersionPRequest { optional ServiceType serviceType = 1; + // The purpose of this field is to make grpc service on standby masters work without + // making client changes and keeps backwards compatibility. + // This requests to this endpoint will be rejected on standby masters by default, + // unless this field is set. + // Two places use this request: + // 1. PollingMasterInquireClient uses this endpoint to tell who is the primary master. + // 2. AbstractClient uses this endpoint to verify the version before it RPCs with the master. + // + // Behaviors: + // 1. old clients -> new cluster standby masters + // PollingMasterInquireClient does not set this field and is able to tell which one is primary master because + // the request will be rejected on the standby master. + // AbstractClient does not set this field. + // Old clients only connects to primary so this doesn't break the existing behavior. + // + // 2. new clients -> new cluster standby masters + // PollingMasterInquireClient does not set this field and is able to tell which one is primary master because + // the request will be rejected on the standby master. + // AbstractClient sets this field to true. Rpcs to standby masters can go through and pass the version verification. + + optional bool allowedOnStandbyMasters = 2; } message GetServiceVersionPResponse { optional int64 version = 1; diff --git a/core/transport/src/main/proto/proto.lock b/core/transport/src/main/proto/proto.lock index 7564f323b38a..e3732fb4e21c 100644 --- a/core/transport/src/main/proto/proto.lock +++ b/core/transport/src/main/proto/proto.lock @@ -8220,6 +8220,11 @@ "id": 1, "name": "serviceType", "type": "ServiceType" + }, + { + "id": 2, + "name": "allowedOnStandbyMasters", + "type": "bool" } ] }, From 6f101b1a20f1a4dee7180d44d5b11ff130a2c8ac Mon Sep 17 00:00:00 2001 From: bingzheng Date: Thu, 16 Feb 2023 17:09:26 +0800 Subject: [PATCH 120/334] [SMALLFIX] Add a missing javadoc param add a missing javadoc `@param` pr-link: Alluxio/alluxio#16891 change-id: cid-fa5c6ba2f4315994e8f5167a482c1047210711a7 --- core/client/fs/src/main/java/alluxio/client/file/FileSystem.java | 1 + 1 file changed, 1 insertion(+) diff --git a/core/client/fs/src/main/java/alluxio/client/file/FileSystem.java b/core/client/fs/src/main/java/alluxio/client/file/FileSystem.java index c6c0a05eb705..e57e7bc702e2 100644 --- a/core/client/fs/src/main/java/alluxio/client/file/FileSystem.java +++ b/core/client/fs/src/main/java/alluxio/client/file/FileSystem.java @@ -159,6 +159,7 @@ public static FileSystem create(FileSystemContext context) { /** * @param context the FileSystemContext to use with the FileSystem + * @param options the options associate with the FileSystem * @return a new FileSystem instance */ public static FileSystem create(FileSystemContext context, FileSystemOptions options) { From 7aa8b6c68a3c8a71c4c881adb6c10fa0a365bdc8 Mon Sep 17 00:00:00 2001 From: ssyssy Date: Thu, 16 Feb 2023 13:35:22 -0800 Subject: [PATCH 121/334] Change default TTL action from DELETE to DELETE_ALLUXIO ### What changes are proposed in this pull request? This change makes ttl default action be free. And some changes to make tests passed. ### Why are the changes needed? Fix #12316 ### Does this PR introduce any user facing changes? No pr-link: Alluxio/alluxio#16823 change-id: cid-142490712d94004a0303f18399d4637e12d81523 --- .../file/options/OutStreamOptionsTest.java | 2 +- .../main/java/alluxio/conf/PropertyKey.java | 4 +- .../java/alluxio/master/ProtobufUtils.java | 8 +- .../alluxio/master/file/InodeTtlChecker.java | 15 +- .../master/file/FileSystemMasterTest.java | 6 +- .../src/main/proto/grpc/common.proto | 1 + core/transport/src/main/proto/proto.lock | 8 ++ .../src/main/proto/proto/journal/file.proto | 1 + .../fs/FileSystemMasterIntegrationTest.java | 5 +- .../alluxio/client/fs/TtlIntegrationTest.java | 129 ++++++++++++++++-- 10 files changed, 160 insertions(+), 19 deletions(-) diff --git a/core/client/fs/src/test/java/alluxio/client/file/options/OutStreamOptionsTest.java b/core/client/fs/src/test/java/alluxio/client/file/options/OutStreamOptionsTest.java index 65e27b5d43e2..839de3facc89 100644 --- a/core/client/fs/src/test/java/alluxio/client/file/options/OutStreamOptionsTest.java +++ b/core/client/fs/src/test/java/alluxio/client/file/options/OutStreamOptionsTest.java @@ -102,7 +102,7 @@ public void defaults() throws IOException { assertEquals(ModeUtils.applyFileUMask(Mode.defaults(), mConf.getString(PropertyKey.SECURITY_AUTHORIZATION_PERMISSION_UMASK)), options.getMode()); assertEquals(Constants.NO_TTL, options.getCommonOptions().getTtl()); - assertEquals(TtlAction.DELETE, options.getCommonOptions().getTtlAction()); + assertEquals(TtlAction.DELETE_ALLUXIO, options.getCommonOptions().getTtlAction()); assertEquals(ufsType, options.getUnderStorageType()); assertEquals(WriteType.CACHE_THROUGH, options.getWriteType()); assertEquals(Constants.LAST_TIER, options.getWriteTier()); diff --git a/core/common/src/main/java/alluxio/conf/PropertyKey.java b/core/common/src/main/java/alluxio/conf/PropertyKey.java index 1697e58c5b41..94d9731c010a 100755 --- a/core/common/src/main/java/alluxio/conf/PropertyKey.java +++ b/core/common/src/main/java/alluxio/conf/PropertyKey.java @@ -5691,9 +5691,9 @@ public String toString() { .build(); public static final PropertyKey USER_FILE_CREATE_TTL_ACTION = enumBuilder(Name.USER_FILE_CREATE_TTL_ACTION, TtlAction.class) - .setDefaultValue(TtlAction.DELETE) + .setDefaultValue(TtlAction.DELETE_ALLUXIO) .setDescription("When file's ttl is expired, the action performs on it. Options: " - + "DELETE (default) or FREE") + + "DELETE_ALLUXIO(default), FREE or DELETE") .setScope(Scope.CLIENT) .build(); public static final PropertyKey USER_FILE_UFS_TIER_ENABLED = diff --git a/core/server/master/src/main/java/alluxio/master/ProtobufUtils.java b/core/server/master/src/main/java/alluxio/master/ProtobufUtils.java index d5453976c4a7..82897a147d58 100644 --- a/core/server/master/src/main/java/alluxio/master/ProtobufUtils.java +++ b/core/server/master/src/main/java/alluxio/master/ProtobufUtils.java @@ -32,9 +32,11 @@ private ProtobufUtils() {} // prevent instantiation */ public static TtlAction fromProtobuf(PTtlAction pTtlAction) { if (pTtlAction == null) { - return TtlAction.DELETE; + return TtlAction.DELETE_ALLUXIO; } switch (pTtlAction) { + case DELETE_ALLUXIO: + return TtlAction.DELETE_ALLUXIO; case DELETE: return TtlAction.DELETE; case FREE: @@ -52,9 +54,11 @@ public static TtlAction fromProtobuf(PTtlAction pTtlAction) { */ public static PTtlAction toProtobuf(TtlAction ttlAction) { if (ttlAction == null) { - return PTtlAction.DELETE; + return PTtlAction.DELETE_ALLUXIO; } switch (ttlAction) { + case DELETE_ALLUXIO: + return PTtlAction.DELETE_ALLUXIO; case DELETE: return PTtlAction.DELETE; case FREE: diff --git a/core/server/master/src/main/java/alluxio/master/file/InodeTtlChecker.java b/core/server/master/src/main/java/alluxio/master/file/InodeTtlChecker.java index 76796c39b1fa..c1f467b21848 100644 --- a/core/server/master/src/main/java/alluxio/master/file/InodeTtlChecker.java +++ b/core/server/master/src/main/java/alluxio/master/file/InodeTtlChecker.java @@ -104,7 +104,7 @@ public void heartbeat() throws InterruptedException { } mTtlBuckets.remove(inode); break; - case DELETE:// Default if not set is DELETE + case DELETE: // public delete method will lock the path, and check WRITE permission required at // parent of file if (inode.isDirectory()) { @@ -114,6 +114,19 @@ public void heartbeat() throws InterruptedException { mFileSystemMaster.delete(path, DeleteContext.defaults()); } break; + case DELETE_ALLUXIO: // Default: DELETE_ALLUXIO + // public delete method will lock the path, and check WRITE permission required at + // parent of file + if (inode.isDirectory()) { + mFileSystemMaster.delete(path, + DeleteContext.mergeFrom(DeletePOptions.newBuilder() + .setRecursive(true).setAlluxioOnly(true))); + } else { + mFileSystemMaster.delete(path, + DeleteContext.mergeFrom(DeletePOptions.newBuilder() + .setAlluxioOnly(true))); + } + break; default: LOG.error("Unknown ttl action {}", ttlAction); } diff --git a/core/server/master/src/test/java/alluxio/master/file/FileSystemMasterTest.java b/core/server/master/src/test/java/alluxio/master/file/FileSystemMasterTest.java index 40278a8d5fdb..4ac3bef41ae1 100644 --- a/core/server/master/src/test/java/alluxio/master/file/FileSystemMasterTest.java +++ b/core/server/master/src/test/java/alluxio/master/file/FileSystemMasterTest.java @@ -791,7 +791,7 @@ public void setTtlForDirectoryWithNoTtl() throws Exception { SetAttributeContext.mergeFrom(SetAttributePOptions.newBuilder() .setCommonOptions(FileSystemMasterCommonPOptions.newBuilder().setTtl(0)))); HeartbeatScheduler.execute(HeartbeatContext.MASTER_TTL_CHECK); - // TTL is set to 0, the file and directory should have been deleted during last TTL check. + // TTL is set to 0, the file should have been deleted during last TTL check. mThrown.expect(FileDoesNotExistException.class); mFileSystemMaster.getFileInfo(NESTED_URI, GET_STATUS_CONTEXT); mFileSystemMaster.getFileInfo(NESTED_DIR_URI, GET_STATUS_CONTEXT); @@ -817,7 +817,7 @@ public void setSmallerTtlForFileWithTtl() throws Exception { SetAttributeContext.mergeFrom(SetAttributePOptions.newBuilder() .setCommonOptions(FileSystemMasterCommonPOptions.newBuilder().setTtl(0)))); HeartbeatScheduler.execute(HeartbeatContext.MASTER_TTL_CHECK); - // TTL is reset to 0, the file should have been deleted during last TTL check. + // TTL is set to 0, the file should have been deleted during last TTL check. mThrown.expect(FileDoesNotExistException.class); mFileSystemMaster.getFileInfo(fileId); } @@ -840,7 +840,7 @@ public void setSmallerTtlForDirectoryWithTtl() throws Exception { SetAttributeContext.mergeFrom(SetAttributePOptions.newBuilder() .setCommonOptions(FileSystemMasterCommonPOptions.newBuilder().setTtl(0)))); HeartbeatScheduler.execute(HeartbeatContext.MASTER_TTL_CHECK); - // TTL is reset to 0, the file should have been deleted during last TTL check. + // TTL is set to 0, the file should have been deleted during last TTL check. mThrown.expect(FileDoesNotExistException.class); mFileSystemMaster.getFileInfo(NESTED_URI, GET_STATUS_CONTEXT); } diff --git a/core/transport/src/main/proto/grpc/common.proto b/core/transport/src/main/proto/grpc/common.proto index de020d5063ae..94bf1db6f8e5 100644 --- a/core/transport/src/main/proto/grpc/common.proto +++ b/core/transport/src/main/proto/grpc/common.proto @@ -89,6 +89,7 @@ message ConfigProperty { enum TtlAction { DELETE = 0; // Delete the file after TTL expires. FREE = 1; // Free the file after TTL expires. + DELETE_ALLUXIO = 2; // Delete the data and metadata in Alluxio after TTL expires. } message Command { diff --git a/core/transport/src/main/proto/proto.lock b/core/transport/src/main/proto/proto.lock index e3732fb4e21c..ad504e11a0dc 100644 --- a/core/transport/src/main/proto/proto.lock +++ b/core/transport/src/main/proto/proto.lock @@ -1570,6 +1570,10 @@ { "name": "FREE", "integer": 1 + }, + { + "name": "DELETE_ALLUXIO", + "integer": 2 } ] }, @@ -8604,6 +8608,10 @@ { "name": "FREE", "integer": 1 + }, + { + "name": "DELETE_ALLUXIO", + "integer": 2 } ] }, diff --git a/core/transport/src/main/proto/proto/journal/file.proto b/core/transport/src/main/proto/proto/journal/file.proto index c59840a2d2ec..6d5b789723a9 100644 --- a/core/transport/src/main/proto/proto/journal/file.proto +++ b/core/transport/src/main/proto/proto/journal/file.proto @@ -172,6 +172,7 @@ message InodeDirectoryIdGeneratorEntry { enum PTtlAction { DELETE = 0; FREE = 1; + DELETE_ALLUXIO = 2; } // next available id: 30 diff --git a/tests/src/test/java/alluxio/client/fs/FileSystemMasterIntegrationTest.java b/tests/src/test/java/alluxio/client/fs/FileSystemMasterIntegrationTest.java index aa44d986873c..9b8c1d33e9c6 100644 --- a/tests/src/test/java/alluxio/client/fs/FileSystemMasterIntegrationTest.java +++ b/tests/src/test/java/alluxio/client/fs/FileSystemMasterIntegrationTest.java @@ -187,7 +187,7 @@ public void clientFileInfoEmptyFile() throws Exception { assertFalse(fileInfo.isPersisted()); assertFalse(fileInfo.isPinned()); Assert.assertEquals(Constants.NO_TTL, fileInfo.getTtl()); - Assert.assertEquals(TtlAction.DELETE, fileInfo.getTtlAction()); + Assert.assertEquals(TtlAction.DELETE_ALLUXIO, fileInfo.getTtlAction()); Assert.assertEquals(TEST_USER, fileInfo.getOwner()); Assert.assertEquals(0644, (short) fileInfo.getMode()); } @@ -658,7 +658,8 @@ public void ttlExpiredCreateFile() throws Exception { mFsMaster.createDirectory(new AlluxioURI("/testFolder"), CreateDirectoryContext.defaults()); long ttl = 1; CreateFileContext context = CreateFileContext.mergeFrom(CreateFilePOptions.newBuilder() - .setCommonOptions(FileSystemMasterCommonPOptions.newBuilder().setTtl(ttl))); + .setCommonOptions(FileSystemMasterCommonPOptions.newBuilder().setTtl(ttl))) + .setWriteType(WriteType.CACHE_THROUGH); long fileId = mFsMaster.createFile(new AlluxioURI("/testFolder/testFile1"), context).getFileId(); FileInfo folderInfo = diff --git a/tests/src/test/java/alluxio/client/fs/TtlIntegrationTest.java b/tests/src/test/java/alluxio/client/fs/TtlIntegrationTest.java index 8ee0b29ea286..03b5935348c6 100644 --- a/tests/src/test/java/alluxio/client/fs/TtlIntegrationTest.java +++ b/tests/src/test/java/alluxio/client/fs/TtlIntegrationTest.java @@ -11,14 +11,20 @@ package alluxio.client.fs; +import static org.junit.Assert.assertEquals; import static org.junit.Assert.assertFalse; import static org.junit.Assert.assertTrue; +import alluxio.AlluxioTestDirectory; import alluxio.AlluxioURI; +import alluxio.Constants; +import alluxio.client.file.FileOutStream; import alluxio.client.file.FileSystem; import alluxio.conf.PropertyKey; import alluxio.grpc.CreateFilePOptions; import alluxio.grpc.FileSystemMasterCommonPOptions; +import alluxio.grpc.LoadMetadataPType; +import alluxio.grpc.TtlAction; import alluxio.grpc.WritePType; import alluxio.heartbeat.HeartbeatContext; import alluxio.heartbeat.HeartbeatScheduler; @@ -31,6 +37,10 @@ import org.junit.ClassRule; import org.junit.Rule; import org.junit.Test; +import org.junit.rules.TemporaryFolder; + +import java.io.File; +import java.util.Arrays; /** * Integration tests for handling file TTLs (times to live). @@ -40,35 +50,52 @@ public class TtlIntegrationTest extends BaseIntegrationTest { private FileSystem mFileSystem; + private FileOutStream mOutStream = null; + + protected byte[] mBuffer; + @ClassRule public static ManuallyScheduleHeartbeat sManuallySchedule = new ManuallyScheduleHeartbeat(HeartbeatContext.MASTER_TTL_CHECK); + @Rule + public TemporaryFolder mFolder = new TemporaryFolder(); + + public File mUfs = AlluxioTestDirectory.createTemporaryDirectory("RootUfs"); + private String mLocalUfsPath = mUfs.getAbsolutePath(); + @Rule public LocalAlluxioClusterResource mLocalAlluxioClusterResource = new LocalAlluxioClusterResource.Builder() - .setProperty(PropertyKey.MASTER_TTL_CHECKER_INTERVAL_MS, TTL_INTERVAL_MS).build(); + .setProperty(PropertyKey.MASTER_MOUNT_TABLE_ROOT_UFS, mLocalUfsPath) + .setProperty(PropertyKey.MASTER_TTL_CHECKER_INTERVAL_MS, TTL_INTERVAL_MS) + .setProperty(PropertyKey.USER_FILE_METADATA_LOAD_TYPE, LoadMetadataPType.NEVER) + .build(); @Before - public void before() { + public void before() throws Exception { mFileSystem = FileSystem.Factory.create(); + mBuffer = new byte[10]; + Arrays.fill(mBuffer, (byte) 'A'); } /** - * Tests that when many TTLs expire at the same time, files are deleted properly. + * Tests that when many TTLs expire at the same time, files are deleted from alluxio properly. */ @Test - public void expireManyAfterDelete() throws Exception { + public void expireManyAfterDeleteAlluxio() throws Exception { int numFiles = 100; AlluxioURI[] files = new AlluxioURI[numFiles]; for (int i = 0; i < numFiles; i++) { files[i] = new AlluxioURI("/file" + i); // Only the even-index files should expire. long ttl = i % 2 == 0 ? TTL_INTERVAL_MS / 2 : TTL_INTERVAL_MS * 1000; - mFileSystem.createFile(files[i], - CreateFilePOptions.newBuilder().setWriteType(WritePType.THROUGH) - .setCommonOptions(FileSystemMasterCommonPOptions.newBuilder().setTtl(ttl)).build()) - .close(); + mOutStream = mFileSystem.createFile(files[i], + CreateFilePOptions.newBuilder().setWriteType(WritePType.CACHE_THROUGH) + .setCommonOptions(FileSystemMasterCommonPOptions.newBuilder().setTtl(ttl)).build()); + mOutStream.write(mBuffer, 0, 10); + mOutStream.close(); + // Delete some of the even files to make sure this doesn't trip up the TTL checker. if (i % 20 == 0) { mFileSystem.delete(files[i]); @@ -79,6 +106,92 @@ public void expireManyAfterDelete() throws Exception { for (int i = 0; i < numFiles; i++) { if (i % 2 == 0) { assertFalse(mFileSystem.exists(files[i])); + // Check Ufs file existence + if (i % 20 != 0) { + String fileName = "file" + i; + // Check Ufs file existence + assertTrue(Arrays.stream(mUfs.list()).anyMatch(s -> s.equals(fileName))); + } + } else { + assertTrue(mFileSystem.exists(files[i])); + } + } + } + + /** + * Tests that when many TTLs expire at the same time, files are freed properly. + */ + @Test + public void expireManyAfterFree() throws Exception { + int numFiles = 100; + AlluxioURI[] files = new AlluxioURI[numFiles]; + for (int i = 0; i < numFiles; i++) { + files[i] = new AlluxioURI("/fileFree" + i); + // Only the even-index files should expire. + long ttl = i % 2 == 0 ? TTL_INTERVAL_MS / 2 : TTL_INTERVAL_MS * 1000; + mOutStream = mFileSystem.createFile(files[i], + CreateFilePOptions.newBuilder().setWriteType(WritePType.CACHE_THROUGH) + .setCommonOptions(FileSystemMasterCommonPOptions.newBuilder().setTtl(ttl) + .setTtlAction(TtlAction.FREE)).build()); + mOutStream.write(mBuffer, 0, 10); + mOutStream.close(); + + // Delete some of the even files to make sure this doesn't trip up the TTL checker. + if (i % 20 == 0) { + mFileSystem.delete(files[i]); + } + } + CommonUtils.sleepMs(2 * TTL_INTERVAL_MS); + HeartbeatScheduler.execute(HeartbeatContext.MASTER_TTL_CHECK); + for (int i = 0; i < numFiles; i++) { + if (i % 2 == 0) { + if (i % 20 != 0) { + assertEquals(Constants.NO_TTL, mFileSystem.getStatus(files[i]).getTtl()); + assertEquals(TtlAction.DELETE, mFileSystem.getStatus(files[i]).getTtlAction()); + assertEquals(0, mFileSystem.getStatus(files[i]).getInMemoryPercentage()); + String fileName = "fileFree" + i; + // Check Ufs file existence + assertTrue(Arrays.stream(mUfs.list()).anyMatch(s -> s.equals(fileName))); + } + } else { + assertTrue(mFileSystem.exists(files[i])); + assertEquals(100, mFileSystem.getStatus(files[i]).getInMemoryPercentage()); + } + } + } + + /** + * Tests that when many TTLs expire at the same time, files are deleted from Alluxio and + * UFS properly. + */ + @Test + public void expireManyAfterDelete() throws Exception { + int numFiles = 100; + AlluxioURI[] files = new AlluxioURI[numFiles]; + for (int i = 0; i < numFiles; i++) { + files[i] = new AlluxioURI("/fileDelete" + i); + // Only the even-index files should expire. + long ttl = i % 2 == 0 ? TTL_INTERVAL_MS / 2 : TTL_INTERVAL_MS * 1000; + mOutStream = mFileSystem.createFile(files[i], + CreateFilePOptions.newBuilder().setWriteType(WritePType.CACHE_THROUGH) + .setCommonOptions(FileSystemMasterCommonPOptions.newBuilder().setTtl(ttl) + .setTtlAction(TtlAction.DELETE)).build()); + mOutStream.write(mBuffer, 0, 10); + mOutStream.close(); + + // Delete some of the even files to make sure this doesn't trip up the TTL checker. + if (i % 20 == 0) { + mFileSystem.delete(files[i]); + } + } + CommonUtils.sleepMs(4 * TTL_INTERVAL_MS); + HeartbeatScheduler.execute(HeartbeatContext.MASTER_TTL_CHECK); + for (int i = 0; i < numFiles; i++) { + if (i % 2 == 0) { + assertFalse(mFileSystem.exists(files[i])); + String fileName = "fileDelete" + i; + // Check Ufs file existence + assertFalse(Arrays.stream(mUfs.list()).anyMatch(s -> s.equals(fileName))); } else { assertTrue(mFileSystem.exists(files[i])); } From ebeac49c3cfc9a224cf8772ebe1c3e38ae1a0b5a Mon Sep 17 00:00:00 2001 From: elega <445092967@qq.com> Date: Fri, 17 Feb 2023 11:26:43 +0800 Subject: [PATCH 122/334] Make workers register to all masters ### What changes are proposed in this pull request? Make workers register to all masters 1. Enabled grpc block master service on standby masters 2. Introduce SpecificMasterBlockSync on workers to heartbeat all masters and BlockSyncMaster for its abstraction 3. Introduce add worker id rpc to reach consensus on worker ids across masters 4. Journal the block location change 5. Update the heartbeat report generation and make it more fault tolerant 6. All a basic worker registration integration test ### Why are the changes needed? To accelerate the master failover process ### Does this PR introduce any user facing changes? N/A pr-link: Alluxio/alluxio#16849 change-id: cid-fab5aaccada578a80c7cc149cfc675cc13d4e835 --- .../block/RetryHandlingBlockMasterClient.java | 27 ++ .../java/alluxio/AbstractMasterClient.java | 15 + .../main/java/alluxio/conf/PropertyKey.java | 30 +- .../main/java/alluxio/metrics/MetricKey.java | 5 + .../worker/block/BlockHeartbeatReport.java | 11 + .../alluxio/worker/block/BlockWorker.java | 6 + .../alluxio/master/block/BlockMaster.java | 7 + .../BlockMasterWorkerServiceHandler.java | 13 + .../master/block/DefaultBlockMaster.java | 150 +++++++- .../master/AlwaysPrimaryPrimarySelector.java | 63 +++ .../java/alluxio/master/MasterTestUtils.java | 38 +- .../alluxio/master/block/BlockMasterTest.java | 5 +- .../block/ConcurrentBlockMasterTest.java | 6 +- .../AllMasterRegistrationBlockWorker.java | 78 ++++ .../worker/block/BlockHeartbeatReporter.java | 82 +++- .../worker/block/BlockMasterClient.java | 30 ++ .../worker/block/BlockMasterClientPool.java | 39 +- .../worker/block/BlockSyncMasterGroup.java | 181 +++++++++ .../worker/block/BlockWorkerFactory.java | 17 +- .../worker/block/DefaultBlockWorker.java | 13 +- .../worker/block/SpecificMasterBlockSync.java | 292 ++++++++++++++ .../block/TestSpecificMasterBlockSync.java | 82 ++++ .../block/WorkerMasterRegistrationState.java | 21 + .../AllMasterRegistrationBlockWorkerTest.java | 75 ++++ .../block/BlockHeartbeatReporterTest.java | 82 +++- .../alluxio/worker/block/NoopBlockWorker.java | 5 + .../block/SpecificMasterBlockSyncTest.java | 248 ++++++++++++ .../src/main/proto/grpc/block_master.proto | 15 + core/transport/src/main/proto/proto.lock | 41 ++ .../src/main/proto/proto/journal/block.proto | 3 + .../multi/process/PortCoordination.java | 2 + ...ckMasterRegisterStreamIntegrationTest.java | 6 +- .../WorkerAllMasterRegistrationTest.java | 361 ++++++++++++++++++ 33 files changed, 2010 insertions(+), 39 deletions(-) create mode 100644 core/server/master/src/test/java/alluxio/master/AlwaysPrimaryPrimarySelector.java create mode 100644 core/server/worker/src/main/java/alluxio/worker/block/AllMasterRegistrationBlockWorker.java create mode 100644 core/server/worker/src/main/java/alluxio/worker/block/BlockSyncMasterGroup.java create mode 100644 core/server/worker/src/main/java/alluxio/worker/block/SpecificMasterBlockSync.java create mode 100644 core/server/worker/src/main/java/alluxio/worker/block/TestSpecificMasterBlockSync.java create mode 100644 core/server/worker/src/main/java/alluxio/worker/block/WorkerMasterRegistrationState.java create mode 100644 core/server/worker/src/test/java/alluxio/worker/block/AllMasterRegistrationBlockWorkerTest.java create mode 100644 core/server/worker/src/test/java/alluxio/worker/block/SpecificMasterBlockSyncTest.java create mode 100644 tests/src/test/java/alluxio/server/worker/WorkerAllMasterRegistrationTest.java diff --git a/core/client/fs/src/main/java/alluxio/client/block/RetryHandlingBlockMasterClient.java b/core/client/fs/src/main/java/alluxio/client/block/RetryHandlingBlockMasterClient.java index 2824c36dcb63..e37da23a7a6f 100644 --- a/core/client/fs/src/main/java/alluxio/client/block/RetryHandlingBlockMasterClient.java +++ b/core/client/fs/src/main/java/alluxio/client/block/RetryHandlingBlockMasterClient.java @@ -26,6 +26,8 @@ import alluxio.grpc.ServiceType; import alluxio.grpc.WorkerLostStorageInfo; import alluxio.master.MasterClientContext; +import alluxio.master.selectionpolicy.MasterSelectionPolicy; +import alluxio.retry.RetryPolicy; import alluxio.wire.BlockInfo; import alluxio.wire.BlockMasterInfo; import alluxio.wire.BlockMasterInfo.BlockMasterInfoField; @@ -35,9 +37,11 @@ import org.slf4j.LoggerFactory; import java.io.IOException; +import java.net.InetSocketAddress; import java.util.ArrayList; import java.util.List; import java.util.Set; +import java.util.function.Supplier; import java.util.stream.Collectors; import javax.annotation.concurrent.ThreadSafe; @@ -59,6 +63,29 @@ public RetryHandlingBlockMasterClient(MasterClientContext conf) { super(conf); } + /** + * Creates a new block master client. + * + * @param conf master client configuration + * @param address the master address the client connects to + */ + public RetryHandlingBlockMasterClient(MasterClientContext conf, InetSocketAddress address) { + super(conf, MasterSelectionPolicy.Factory.specifiedMaster(address)); + } + + /** + * Creates a new block master client. + * + * @param conf master client configuration + * @param address the master address the client connects to + * @param retryPolicy retry policy to use + */ + public RetryHandlingBlockMasterClient( + MasterClientContext conf, InetSocketAddress address, + Supplier retryPolicy) { + super(conf, MasterSelectionPolicy.Factory.specifiedMaster(address), retryPolicy); + } + @Override protected ServiceType getRemoteServiceType() { return ServiceType.BLOCK_MASTER_CLIENT_SERVICE; diff --git a/core/common/src/main/java/alluxio/AbstractMasterClient.java b/core/common/src/main/java/alluxio/AbstractMasterClient.java index 9393980f57c0..555a0edea14d 100644 --- a/core/common/src/main/java/alluxio/AbstractMasterClient.java +++ b/core/common/src/main/java/alluxio/AbstractMasterClient.java @@ -68,6 +68,21 @@ public AbstractMasterClient( mMasterSelectionPolicy = MasterSelectionPolicy.Factory.primaryMaster(); } + /** + * Creates a new master client without a specific address. + * @param clientConf master client configuration + * @param selectionPolicy master selection policy: which master the client should connect to + * @param retryPolicySupplier retry policy to use + */ + public AbstractMasterClient( + MasterClientContext clientConf, + MasterSelectionPolicy selectionPolicy, + Supplier retryPolicySupplier) { + super(clientConf, retryPolicySupplier); + mMasterInquireClient = clientConf.getMasterInquireClient(); + mMasterSelectionPolicy = selectionPolicy; + } + @Override public synchronized InetSocketAddress getConfAddress() throws UnavailableException { return mMasterSelectionPolicy.getPrimaryMasterAddressCached(mMasterInquireClient); diff --git a/core/common/src/main/java/alluxio/conf/PropertyKey.java b/core/common/src/main/java/alluxio/conf/PropertyKey.java index 94d9731c010a..801e2df43e9f 100755 --- a/core/common/src/main/java/alluxio/conf/PropertyKey.java +++ b/core/common/src/main/java/alluxio/conf/PropertyKey.java @@ -4527,6 +4527,17 @@ public String toString() { .setConsistencyCheckLevel(ConsistencyCheckLevel.WARN) .setScope(Scope.WORKER) .build(); + public static final PropertyKey WORKER_REGISTER_TO_ALL_MASTERS = + booleanBuilder(Name.WORKER_REGISTER_TO_ALL_MASTERS) + .setDefaultValue(false) + .setDescription("If enabled, workers will register themselves to all masters, " + + "instead of primary master only. This can be used to save the " + + "master failover time because the new primary immediately knows " + + "all existing workers and blocks. Can only be enabled when " + + Name.STANDBY_MASTER_GRPC_ENABLED + " is turned on.") + .setConsistencyCheckLevel(ConsistencyCheckLevel.ENFORCE) + .setScope(Scope.ALL) + .build(); public static final PropertyKey WORKER_REMOTE_IO_SLOW_THRESHOLD = durationBuilder(Name.WORKER_REMOTE_IO_SLOW_THRESHOLD) .setDefaultValue("10s") @@ -4546,7 +4557,20 @@ public String toString() { .setConsistencyCheckLevel(ConsistencyCheckLevel.WARN) .setScope(Scope.WORKER) .build(); - + public static final PropertyKey WORKER_BLOCK_HEARTBEAT_REPORT_SIZE_THRESHOLD = + intBuilder(Name.WORKER_BLOCK_HEARTBEAT_REPORT_SIZE_THRESHOLD) + .setDefaultValue(1_000_000) + .setDescription( + "When " + Name.WORKER_REGISTER_TO_ALL_MASTERS + "=true, " + + "because a worker will send block reports to all masters, " + + "we use a threshold to limit the unsent block report size in worker's memory. " + + "If the worker block heartbeat is larger than the threshold, " + + "we discard the heartbeat message and force " + + "the worker to register with that master with a full report." + ) + .setConsistencyCheckLevel(ConsistencyCheckLevel.WARN) + .setScope(Scope.WORKER) + .build(); public static final PropertyKey WORKER_PAGE_STORE_ASYNC_RESTORE_ENABLED = booleanBuilder(Name.WORKER_PAGE_STORE_ASYNC_RESTORE_ENABLED) .setDefaultValue(true) @@ -8287,10 +8311,14 @@ public static final class Name { "alluxio.worker.register.stream.response.timeout"; public static final String WORKER_REGISTER_STREAM_COMPLETE_TIMEOUT = "alluxio.worker.register.stream.complete.timeout"; + public static final String WORKER_REGISTER_TO_ALL_MASTERS = + "alluxio.worker.register.to.all.masters"; public static final String WORKER_REMOTE_IO_SLOW_THRESHOLD = "alluxio.worker.remote.io.slow.threshold"; public static final String WORKER_BLOCK_MASTER_CLIENT_POOL_SIZE = "alluxio.worker.block.master.client.pool.size"; + public static final String WORKER_BLOCK_HEARTBEAT_REPORT_SIZE_THRESHOLD = + "alluxio.worker.block.heartbeat.report.size.threshold"; public static final String WORKER_PRINCIPAL = "alluxio.worker.principal"; public static final String WORKER_PAGE_STORE_ASYNC_RESTORE_ENABLED = "alluxio.worker.page.store.async.restore.enabled"; diff --git a/core/common/src/main/java/alluxio/metrics/MetricKey.java b/core/common/src/main/java/alluxio/metrics/MetricKey.java index 50a2f7db7cfa..db8ccb5f429d 100644 --- a/core/common/src/main/java/alluxio/metrics/MetricKey.java +++ b/core/common/src/main/java/alluxio/metrics/MetricKey.java @@ -2062,6 +2062,11 @@ public static String getSyncMetricName(long mountId) { + "Use this metric to monitor the RPC pressure on worker.") .setMetricType(MetricType.GAUGE) .build(); + public static final MetricKey WORKER_MASTER_REGISTRATION_SUCCESS_COUNT = + new Builder("Worker.MasterRegistrationSuccessCount") + .setDescription("Total number of the succeed master registration.") + .setMetricType(MetricType.COUNTER) + .build(); // Client metrics public static final MetricKey CLIENT_BLOCK_READ_CHUNK_REMOTE = diff --git a/core/common/src/main/java/alluxio/worker/block/BlockHeartbeatReport.java b/core/common/src/main/java/alluxio/worker/block/BlockHeartbeatReport.java index 5fb02209f85f..32419475151e 100644 --- a/core/common/src/main/java/alluxio/worker/block/BlockHeartbeatReport.java +++ b/core/common/src/main/java/alluxio/worker/block/BlockHeartbeatReport.java @@ -74,4 +74,15 @@ public List getRemovedBlocks() { public Map> getLostStorage() { return Collections.unmodifiableMap(mLostStorage); } + + /** + * @return the number of blocks in the report + */ + public int getBlockChangeCount() { + int count = mRemovedBlocks.size(); + for (List blocks: mAddedBlocks.values()) { + count += blocks.size(); + } + return count; + } } diff --git a/core/common/src/main/java/alluxio/worker/block/BlockWorker.java b/core/common/src/main/java/alluxio/worker/block/BlockWorker.java index 7346691c68dc..8d65db1b0650 100644 --- a/core/common/src/main/java/alluxio/worker/block/BlockWorker.java +++ b/core/common/src/main/java/alluxio/worker/block/BlockWorker.java @@ -21,6 +21,7 @@ import alluxio.proto.dataserver.Protocol; import alluxio.wire.Configuration; import alluxio.wire.FileInfo; +import alluxio.wire.WorkerNetAddress; import alluxio.worker.SessionCleanable; import alluxio.worker.Worker; import alluxio.worker.block.io.BlockReader; @@ -237,4 +238,9 @@ BlockReader createUfsBlockReader(long sessionId, long blockId, long offset, bool * @return the block store */ BlockStore getBlockStore(); + + /** + * @return the worker address + */ + WorkerNetAddress getWorkerAddress(); } diff --git a/core/server/master/src/main/java/alluxio/master/block/BlockMaster.java b/core/server/master/src/main/java/alluxio/master/block/BlockMaster.java index daf9d1906f67..ce36c111427c 100644 --- a/core/server/master/src/main/java/alluxio/master/block/BlockMaster.java +++ b/core/server/master/src/main/java/alluxio/master/block/BlockMaster.java @@ -392,4 +392,11 @@ void workerRegisterStream( * @param workerId the workerId of target worker */ void removeDecommissionedWorker(long workerId) throws NotFoundException; + + /** + * Notify the worker id to a master. + * @param workerId the worker id + * @param workerNetAddress the worker address + */ + void notifyWorkerId(long workerId, WorkerNetAddress workerNetAddress); } diff --git a/core/server/master/src/main/java/alluxio/master/block/BlockMasterWorkerServiceHandler.java b/core/server/master/src/main/java/alluxio/master/block/BlockMasterWorkerServiceHandler.java index c91c829da890..45f15da74213 100644 --- a/core/server/master/src/main/java/alluxio/master/block/BlockMasterWorkerServiceHandler.java +++ b/core/server/master/src/main/java/alluxio/master/block/BlockMasterWorkerServiceHandler.java @@ -28,6 +28,8 @@ import alluxio.grpc.GetWorkerIdPResponse; import alluxio.grpc.GrpcUtils; import alluxio.grpc.LocationBlockIdListEntry; +import alluxio.grpc.NotifyWorkerIdPRequest; +import alluxio.grpc.NotifyWorkerIdPResponse; import alluxio.grpc.RegisterWorkerPOptions; import alluxio.grpc.RegisterWorkerPRequest; import alluxio.grpc.RegisterWorkerPResponse; @@ -218,4 +220,15 @@ static Map> reconstructBlocksOnLocationMap( + "with LocationBlockIdListEntry objects %s", workerId, entryReport)); })); } + + @Override + public void notifyWorkerId( + NotifyWorkerIdPRequest request, + StreamObserver responseObserver) { + RpcUtils.call(LOG, () -> { + mBlockMaster.notifyWorkerId(request.getWorkerId(), + GrpcUtils.fromProto(request.getWorkerNetAddress())); + return alluxio.grpc.NotifyWorkerIdPResponse.getDefaultInstance(); + }, "notifyWorkerId", "request=%s", responseObserver, request); + } } diff --git a/core/server/master/src/main/java/alluxio/master/block/DefaultBlockMaster.java b/core/server/master/src/main/java/alluxio/master/block/DefaultBlockMaster.java index 99adf9859842..d6f093f5f5fa 100644 --- a/core/server/master/src/main/java/alluxio/master/block/DefaultBlockMaster.java +++ b/core/server/master/src/main/java/alluxio/master/block/DefaultBlockMaster.java @@ -25,6 +25,7 @@ import alluxio.conf.PropertyKey; import alluxio.exception.BlockInfoException; import alluxio.exception.ExceptionMessage; +import alluxio.exception.runtime.UnavailableRuntimeException; import alluxio.exception.status.InvalidArgumentException; import alluxio.exception.status.NotFoundException; import alluxio.exception.status.UnavailableException; @@ -34,6 +35,7 @@ import alluxio.grpc.GetRegisterLeasePRequest; import alluxio.grpc.GrpcService; import alluxio.grpc.GrpcUtils; +import alluxio.grpc.NodeState; import alluxio.grpc.RegisterWorkerPOptions; import alluxio.grpc.RegisterWorkerPRequest; import alluxio.grpc.ServiceType; @@ -67,6 +69,7 @@ import alluxio.util.CommonUtils; import alluxio.util.IdUtils; import alluxio.util.ThreadFactoryUtils; +import alluxio.util.WaitForOptions; import alluxio.util.executor.ExecutorServiceFactories; import alluxio.util.executor.ExecutorServiceFactory; import alluxio.util.network.NetworkAddressUtils; @@ -277,6 +280,14 @@ public class DefaultBlockMaster extends CoreMaster implements BlockMaster { private final RegisterLeaseManager mRegisterLeaseManager = new RegisterLeaseManager(); + private final HashMap mWorkerIdMap = new HashMap<>(); + + private final boolean mWorkerRegisterToAllMasters = Configuration.getBoolean( + PropertyKey.WORKER_REGISTER_TO_ALL_MASTERS); + + private final boolean mStandbyMasterRpcEnabled = Configuration.getBoolean( + PropertyKey.STANDBY_MASTER_GRPC_ENABLED); + /** * Creates a new instance of {@link DefaultBlockMaster}. * @@ -349,6 +360,16 @@ public Map getServices() { return services; } + @Override + public Map getStandbyServices() { + Map services = new HashMap<>(); + services.put(ServiceType.BLOCK_MASTER_WORKER_SERVICE, + new GrpcService(ServerInterceptors + .intercept(new BlockMasterWorkerServiceHandler(this), + new ClientContextServerInjector()))); + return services; + } + @Override public boolean processJournalEntry(JournalEntry entry) { // TODO(gene): A better way to process entries besides a huge switch? @@ -374,6 +395,28 @@ public boolean processJournalEntry(JournalEntry entry) { } mBlockMetaStore.putBlock(blockInfoEntry.getBlockId(), BlockMeta.newBuilder().setLength(blockInfoEntry.getLength()).build()); + // This can be called when + // 1. The master is replaying the journal. + // 2. A standby master is applying a journal entry from the primary master. + if (blockInfoEntry.hasBlockLocation()) { + alluxio.grpc.BlockLocation blockLocation = blockInfoEntry.getBlockLocation(); + long workerId = blockLocation.getWorkerId(); + MasterWorkerInfo worker = mWorkers.getFirstByField(ID_INDEX, workerId); + if (worker == null) { + // The master is replaying journal or somehow the worker is not there anymore + // We do not add the BlockLocation because the workerId is not reliable anymore + // If the worker comes back, it will register and BlockLocation will be added then + return true; + } + // The master is running and the journal is from an existing worker + mBlockMetaStore.addLocation(blockInfoEntry.getBlockId(), BlockLocation.newBuilder() + .setWorkerId(workerId) + .setTier(blockLocation.getTierAlias()) + .setMediumType(blockLocation.getMediumType()) + .build()); + worker.addBlock(blockInfoEntry.getBlockId()); + LOG.debug("Added BlockLocation for {} to worker {}", blockInfoEntry.getBlockId(), workerId); + } } else { return false; } @@ -480,7 +523,7 @@ public void close() { @Override public void start(Boolean isLeader) throws IOException { super.start(isLeader); - if (isLeader) { + if (isLeader || mWorkerRegisterToAllMasters) { getExecutorService().submit(new HeartbeatThread( HeartbeatContext.MASTER_LOST_WORKER_DETECTION, new LostWorkerDetectionHeartbeatExecutor(), () -> Configuration.getMs(PropertyKey.MASTER_LOST_WORKER_DETECTION_INTERVAL), @@ -920,9 +963,24 @@ public void commitBlock(long workerId, long usedBytesOnTier, String tierAlias, block.get().getLength(), length); } else { mBlockMetaStore.putBlock(blockId, BlockMeta.newBuilder().setLength(length).build()); - BlockInfoEntry blockInfo = - BlockInfoEntry.newBuilder().setBlockId(blockId).setLength(length).build(); - journalContext.append(JournalEntry.newBuilder().setBlockInfo(blockInfo).build()); + BlockInfoEntry.Builder blockInfoBuilder = + BlockInfoEntry.newBuilder().setBlockId(blockId).setLength(length); + if (mWorkerRegisterToAllMasters) { + blockInfoBuilder + .setBlockId(blockId) + .setLength(length) + .setBlockLocation( + alluxio.grpc.BlockLocation.newBuilder() + .setWorkerId(workerId) + .setMediumType(mediumType) + .setTierAlias(tierAlias) + // Worker addresses are not journaled because adding a block location + // into the meta store only needs a worker id. + .build() + ); + } + journalContext.append( + JournalEntry.newBuilder().setBlockInfo(blockInfoBuilder.build()).build()); } } // Update the block metadata with the new worker location. @@ -1076,6 +1134,10 @@ protected MasterWorkerInfo recordWorkerRegistration(long workerId) { @Override public long getWorkerId(WorkerNetAddress workerNetAddress) { + if (mStandbyMasterRpcEnabled && mPrimarySelector.getState() == NodeState.STANDBY) { + throw new UnavailableRuntimeException( + "GetWorkerId operation is not supported on standby masters"); + } LOG.info("Worker {} requesting for an ID", workerNetAddress); MasterWorkerInfo existingWorker = mWorkers.getFirstByField(ADDRESS_INDEX, workerNetAddress); if (existingWorker != null) { @@ -1095,11 +1157,32 @@ public long getWorkerId(WorkerNetAddress workerNetAddress) { while (!mTempWorkers.add(new MasterWorkerInfo(workerId, workerNetAddress))) { workerId = IdUtils.getRandomNonNegativeLong(); } - LOG.info("getWorkerId(): WorkerNetAddress: {} id: {}", workerNetAddress, workerId); return workerId; } + @Override + public void notifyWorkerId(long workerId, WorkerNetAddress workerNetAddress) { + MasterWorkerInfo existingWorker = mWorkers.getFirstByField(ID_INDEX, workerId); + if (existingWorker != null) { + LOG.warn("A registered worker {} comes again from {}", + workerId, existingWorker.getWorkerAddress()); + return; + } + + existingWorker = findUnregisteredWorker(workerId); + if (existingWorker != null) { + LOG.warn("An unregistered worker {} comes again from {}", + workerId, existingWorker.getWorkerAddress()); + return; + } + + if (!mTempWorkers.add(new MasterWorkerInfo(workerId, workerNetAddress))) { + throw new RuntimeException("Duplicated worker ID for " + workerId + ": " + workerNetAddress); + } + LOG.info("notifyWorkerId(): WorkerNetAddress: {} id: {}", workerNetAddress, workerId); + } + @Override public Optional tryAcquireRegisterLease(GetRegisterLeasePRequest request) { return mRegisterLeaseManager.tryAcquireLease(request); @@ -1310,6 +1393,12 @@ public Command workerHeartbeat(long workerId, Map capacityBytesOnT // by the LostWorkerDetectionHeartbeatExecutor worker.updateLastUpdatedTimeMs(); + if (mWorkerRegisterToAllMasters && mPrimarySelector.getState() == NodeState.STANDBY) { + waitBlockIdPresent( + addedBlocks.values().stream().flatMap(Collection::stream) + .collect(Collectors.toList()), workerId); + } + // The address is final, no need for locking processWorkerMetrics(worker.getWorkerAddress().getHost(), metrics); @@ -1330,7 +1419,7 @@ public Command workerHeartbeat(long workerId, Map capacityBytesOnT processWorkerRemovedBlocks(worker, removedBlockIds, false); processWorkerAddedBlocks(worker, addedBlocks); Set toRemoveBlocks = worker.getToRemoveBlocks(); - if (toRemoveBlocks.isEmpty()) { + if (toRemoveBlocks.isEmpty() || mPrimarySelector.getState() == NodeState.STANDBY) { workerCommand = Command.newBuilder().setCommandType(CommandType.Nothing).build(); } else { workerCommand = Command.newBuilder().setCommandType(CommandType.Free) @@ -1347,6 +1436,47 @@ public Command workerHeartbeat(long workerId, Map capacityBytesOnT return workerCommand; } + /** + * Waits for the block id being presents. + * If workers register to standby masters, when a block is created, + * heartbeats might come before the standby applies the journal. + * To prevent this, we wait as best efforts before ignore unknown block ids. + */ + private void waitBlockIdPresent(Collection blockIds, long workerId) { + final List blockIdsToWait = new ArrayList<>(); + for (long addedBlockId : blockIds) { + if (!mBlockMetaStore.getBlock(addedBlockId).isPresent()) { + blockIdsToWait.add(addedBlockId); + } + } + try { + CommonUtils.waitFor( + "Wait for blocks being committed on master before adding block locations", + () -> { + for (long blockId: blockIdsToWait) { + if (!mBlockMetaStore.getBlock(blockId).isPresent()) { + return false; + } + } + return true; + }, + WaitForOptions.defaults().setInterval(200).setTimeoutMs(1000) + ); + } catch (InterruptedException | TimeoutException e) { + StringBuilder sb = new StringBuilder(); + sb.append("["); + for (long blockIdToWait : blockIdsToWait) { + if (!mBlockMetaStore.getBlock(blockIdToWait).isPresent()) { + sb.append(blockIdToWait); + sb.append(" ,"); + } + } + sb.append("]"); + LOG.warn("Adding block ids {} for worker {} but these blocks don't exist. " + + "These blocks will be ignored", sb, workerId); + } + } + @Override public Clock getClock() { return mClock; @@ -1749,4 +1879,12 @@ public static void registerGauges(final DefaultBlockMaster master) { private Metrics() {} // prevent instantiation } + + /** + * @return the block meta store + */ + @VisibleForTesting + public BlockMetaStore getBlockMetaStore() { + return mBlockMetaStore; + } } diff --git a/core/server/master/src/test/java/alluxio/master/AlwaysPrimaryPrimarySelector.java b/core/server/master/src/test/java/alluxio/master/AlwaysPrimaryPrimarySelector.java new file mode 100644 index 000000000000..48a68603510d --- /dev/null +++ b/core/server/master/src/test/java/alluxio/master/AlwaysPrimaryPrimarySelector.java @@ -0,0 +1,63 @@ +/* + * The Alluxio Open Foundation licenses this work under the Apache License, version 2.0 + * (the "License"). You may not use this work except in compliance with the License, which is + * available at www.apache.org/licenses/LICENSE-2.0 + * + * This software is distributed on an "AS IS" basis, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, + * either express or implied, as more fully set forth in the License. + * + * See the NOTICE file distributed with this work for information regarding copyright ownership. + */ + +package alluxio.master; + +import alluxio.grpc.NodeState; +import alluxio.util.interfaces.Scoped; + +import java.net.InetSocketAddress; +import java.util.function.Consumer; + +/** + * A test primary selector which is always primary. + */ +public final class AlwaysPrimaryPrimarySelector implements PrimarySelector { + @Override + public void start(InetSocketAddress localAddress) { + // Nothing to do. + } + + @Override + public void stop() { + // Nothing to do. + } + + @Override + public NodeState getState() { + return NodeState.PRIMARY; + } + + @Override + public NodeState getStateUnsafe() { + return NodeState.PRIMARY; + } + + @Override + public Scoped onStateChange(Consumer listener) { + // State never changes. + return () -> { }; + } + + @Override + public void waitForState(NodeState state) throws InterruptedException { + switch (state) { + case PRIMARY: + return; + case STANDBY: + // Never happening + Thread.sleep(Long.MAX_VALUE); + break; + default: + throw new IllegalStateException("Unknown primary selector state: " + state); + } + } +} diff --git a/core/server/master/src/test/java/alluxio/master/MasterTestUtils.java b/core/server/master/src/test/java/alluxio/master/MasterTestUtils.java index 552fc4f05292..2b843f368343 100644 --- a/core/server/master/src/test/java/alluxio/master/MasterTestUtils.java +++ b/core/server/master/src/test/java/alluxio/master/MasterTestUtils.java @@ -50,7 +50,36 @@ public static CoreMasterContext testMasterContext(JournalSystem journalSystem) { public static CoreMasterContext testMasterContext(JournalSystem journalSystem, UserState userState) { return testMasterContext(journalSystem, userState, - HeapBlockMetaStore::new, x -> new HeapInodeStore()); + HeapBlockMetaStore::new, x -> new HeapInodeStore(), new AlwaysStandbyPrimarySelector()); + } + + /** + * @return a basic master context for the purpose of testing + * @param journalSystem a journal system to use in the context + * @param userState the user state to use in the context + * @param primarySelector the primary selector + */ + public static CoreMasterContext testMasterContext(JournalSystem journalSystem, + UserState userState, PrimarySelector primarySelector) { + return testMasterContext(journalSystem, userState, + HeapBlockMetaStore::new, x -> new HeapInodeStore(), primarySelector); + } + + /** + * @return a basic master context for the purpose of testing + * @param journalSystem a journal system to use in the context + * @param userState the user state to use in the context + * @param blockStoreFactory a factory to create {@link BlockMetaStore} + * @param inodeStoreFactory a factory to create {@link InodeStore} + */ + public static CoreMasterContext testMasterContext( + JournalSystem journalSystem, UserState userState, + BlockMetaStore.Factory blockStoreFactory, + InodeStore.Factory inodeStoreFactory + ) { + return testMasterContext( + journalSystem, userState, blockStoreFactory, + inodeStoreFactory, new AlwaysPrimaryPrimarySelector()); } /** @@ -59,14 +88,17 @@ public static CoreMasterContext testMasterContext(JournalSystem journalSystem, * @param userState the user state to use in the context * @param blockStoreFactory a factory to create {@link BlockMetaStore} * @param inodeStoreFactory a factory to create {@link InodeStore} + * @param primarySelector the primary selector */ public static CoreMasterContext testMasterContext( JournalSystem journalSystem, UserState userState, BlockMetaStore.Factory blockStoreFactory, - InodeStore.Factory inodeStoreFactory) { + InodeStore.Factory inodeStoreFactory, + PrimarySelector primarySelector + ) { return CoreMasterContext.newBuilder() .setJournalSystem(journalSystem) - .setPrimarySelector(new AlwaysStandbyPrimarySelector()) + .setPrimarySelector(primarySelector) .setUserState(userState) .setSafeModeManager(new TestSafeModeManager()) .setBackupManager(mock(BackupManager.class)) diff --git a/core/server/master/src/test/java/alluxio/master/block/BlockMasterTest.java b/core/server/master/src/test/java/alluxio/master/block/BlockMasterTest.java index 900c179a48c4..7f10171bcb7a 100644 --- a/core/server/master/src/test/java/alluxio/master/block/BlockMasterTest.java +++ b/core/server/master/src/test/java/alluxio/master/block/BlockMasterTest.java @@ -29,6 +29,7 @@ import alluxio.heartbeat.HeartbeatContext; import alluxio.heartbeat.HeartbeatScheduler; import alluxio.heartbeat.ManuallyScheduleHeartbeat; +import alluxio.master.AlwaysPrimaryPrimarySelector; import alluxio.master.CoreMasterContext; import alluxio.master.MasterRegistry; import alluxio.master.MasterTestUtils; @@ -113,7 +114,9 @@ public void before() throws Exception { mRegistry = new MasterRegistry(); mMetrics = Lists.newArrayList(); JournalSystem journalSystem = new NoopJournalSystem(); - CoreMasterContext masterContext = MasterTestUtils.testMasterContext(); + CoreMasterContext masterContext = MasterTestUtils.testMasterContext( + new NoopJournalSystem(), null, new AlwaysPrimaryPrimarySelector() + ); mMetricsMaster = new MetricsMasterFactory().create(mRegistry, masterContext); mClock = new ManualClock(); mExecutorService = diff --git a/core/server/master/src/test/java/alluxio/master/block/ConcurrentBlockMasterTest.java b/core/server/master/src/test/java/alluxio/master/block/ConcurrentBlockMasterTest.java index db88d22011ec..837546568249 100644 --- a/core/server/master/src/test/java/alluxio/master/block/ConcurrentBlockMasterTest.java +++ b/core/server/master/src/test/java/alluxio/master/block/ConcurrentBlockMasterTest.java @@ -27,9 +27,11 @@ import alluxio.grpc.StorageList; import alluxio.heartbeat.HeartbeatContext; import alluxio.heartbeat.ManuallyScheduleHeartbeat; +import alluxio.master.AlwaysPrimaryPrimarySelector; import alluxio.master.CoreMasterContext; import alluxio.master.MasterRegistry; import alluxio.master.MasterTestUtils; +import alluxio.master.journal.noop.NoopJournalSystem; import alluxio.master.metrics.MetricsMaster; import alluxio.master.metrics.MetricsMasterFactory; import alluxio.proto.meta.Block; @@ -109,7 +111,9 @@ public class ConcurrentBlockMasterTest { @Before public void before() throws Exception { mRegistry = new MasterRegistry(); - mMasterContext = MasterTestUtils.testMasterContext(); + mMasterContext = MasterTestUtils.testMasterContext( + new NoopJournalSystem(), null, new AlwaysPrimaryPrimarySelector() + ); mMetricsMaster = new MetricsMasterFactory().create(mRegistry, mMasterContext); mClock = new ManualClock(); mExecutorService = diff --git a/core/server/worker/src/main/java/alluxio/worker/block/AllMasterRegistrationBlockWorker.java b/core/server/worker/src/main/java/alluxio/worker/block/AllMasterRegistrationBlockWorker.java new file mode 100644 index 000000000000..64acbb337e08 --- /dev/null +++ b/core/server/worker/src/main/java/alluxio/worker/block/AllMasterRegistrationBlockWorker.java @@ -0,0 +1,78 @@ +/* + * The Alluxio Open Foundation licenses this work under the Apache License, version 2.0 + * (the "License"). You may not use this work except in compliance with the License, which is + * available at www.apache.org/licenses/LICENSE-2.0 + * + * This software is distributed on an "AS IS" basis, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, + * either express or implied, as more fully set forth in the License. + * + * See the NOTICE file distributed with this work for information regarding copyright ownership. + */ + +package alluxio.worker.block; + +import alluxio.Sessions; +import alluxio.wire.WorkerNetAddress; +import alluxio.worker.file.FileSystemMasterClient; + +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import java.io.IOException; +import java.net.InetSocketAddress; +import java.util.concurrent.atomic.AtomicReference; +import javax.annotation.concurrent.NotThreadSafe; + +/** + * The class is responsible for managing all top level components of BlockWorker. + * + * This block worker implementation register workers to all masters. + */ +@NotThreadSafe +public class AllMasterRegistrationBlockWorker extends DefaultBlockWorker { + private static final Logger LOG = LoggerFactory.getLogger(AllMasterRegistrationBlockWorker.class); + private BlockSyncMasterGroup mBlockSyncMasterGroup; + + /** + * Constructs a block worker when workers register to all masters. + * + * @param blockMasterClientPool a client pool for talking to the block master + * @param fileSystemMasterClient a client for talking to the file system master + * @param sessions an object for tracking and cleaning up client sessions + * @param blockStore an Alluxio block store + * @param workerId worker id + */ + public AllMasterRegistrationBlockWorker( + BlockMasterClientPool blockMasterClientPool, + FileSystemMasterClient fileSystemMasterClient, Sessions sessions, + BlockStore blockStore, AtomicReference workerId) { + super(blockMasterClientPool, fileSystemMasterClient, sessions, blockStore, workerId); + } + + @Override + protected void setupBlockMasterSync() { + mBlockSyncMasterGroup = + BlockSyncMasterGroup.Factory.createAllMasterSync(this); + mResourceCloser.register(mBlockSyncMasterGroup); + mBlockSyncMasterGroup.start(getExecutorService()); + } + + @Override + public void start(WorkerNetAddress address) throws IOException { + super.start(address); + + InetSocketAddress primaryMasterAddress = + (InetSocketAddress) mFileSystemMasterClient.getRemoteSockAddress(); + // Registrations on standby masters are not required to complete for starting a worker + // because standby masters do not serve read requests. + // Standby masters will catch up following block location changes via worker heartbeats. + mBlockSyncMasterGroup.waitForPrimaryMasterRegistrationComplete(primaryMasterAddress); + } + + /** + * @return the block sync master group + */ + public BlockSyncMasterGroup getBlockSyncMasterGroup() { + return mBlockSyncMasterGroup; + } +} diff --git a/core/server/worker/src/main/java/alluxio/worker/block/BlockHeartbeatReporter.java b/core/server/worker/src/main/java/alluxio/worker/block/BlockHeartbeatReporter.java index f26b7b74bbdb..fd0a1e24ece2 100644 --- a/core/server/worker/src/main/java/alluxio/worker/block/BlockHeartbeatReporter.java +++ b/core/server/worker/src/main/java/alluxio/worker/block/BlockHeartbeatReporter.java @@ -11,16 +11,21 @@ package alluxio.worker.block; +import alluxio.conf.Configuration; +import alluxio.conf.PropertyKey; + import com.google.common.collect.Lists; import org.slf4j.Logger; import org.slf4j.LoggerFactory; import java.util.ArrayList; import java.util.HashMap; +import java.util.HashSet; import java.util.Iterator; import java.util.List; import java.util.Map; import java.util.Map.Entry; +import java.util.Set; import javax.annotation.concurrent.ThreadSafe; /** @@ -29,7 +34,7 @@ * through {@link alluxio.worker.block.BlockWorker#commitBlock(long, long, boolean)}. */ @ThreadSafe -public final class BlockHeartbeatReporter extends AbstractBlockStoreEventListener { +public class BlockHeartbeatReporter extends AbstractBlockStoreEventListener { private static final Logger LOG = LoggerFactory.getLogger(BlockHeartbeatReporter.class); /** Lock for operations on the removed and added block collections. */ @@ -50,6 +55,9 @@ public final class BlockHeartbeatReporter extends AbstractBlockStoreEventListene */ private final Map> mLostStorage; + private final boolean mWorkerRegisterToAllMasters = + Configuration.getBoolean(PropertyKey.WORKER_REGISTER_TO_ALL_MASTERS); + /** * Creates a new instance of {@link BlockHeartbeatReporter}. */ @@ -62,12 +70,11 @@ public BlockHeartbeatReporter() { } /** - * Generates the report of the block store delta in the last heartbeat period. Calling this method - * marks the end of a period and the start of a new heartbeat period. + * Generates the report of the report and clear the states. * * @return the block store delta report for the last heartbeat period */ - public BlockHeartbeatReport generateReport() { + public BlockHeartbeatReport generateReportAndClear() { synchronized (mLock) { BlockHeartbeatReport report = new BlockHeartbeatReport(mAddedBlocks, mRemovedBlocks, mLostStorage); @@ -79,6 +86,73 @@ public BlockHeartbeatReport generateReport() { } } + /** + * Clears the internal states of the reporter. + */ + public void clear() { + synchronized (mLock) { + mAddedBlocks.clear(); + mRemovedBlocks.clear(); + mLostStorage.clear(); + } + } + + /** + * Merges back the cleared block lists/maps given a generated report. + * used when the worker heartbeat rpc fails. + * + * @param previousReport the previous generated report + */ + public void mergeBack(BlockHeartbeatReport previousReport) { + synchronized (mLock) { + Set removedBlocksSet = new HashSet<>(mRemovedBlocks); + for (Entry> addedBlockEntry: + previousReport.getAddedBlocks().entrySet()) { + List blockIds = addedBlockEntry.getValue(); + // Two pass scans to avoid creating too many ephemeral objects + // given that adding a block then removing it is unlikely. + boolean needToRemoveBlock = false; + for (long blockId: blockIds) { + if (removedBlocksSet.contains(blockId)) { + needToRemoveBlock = true; + break; + } + } + final List blockIdsToAdd; + if (!needToRemoveBlock) { + blockIdsToAdd = blockIds; + } else { + blockIdsToAdd = new ArrayList<>(); + for (long blockId: blockIds) { + if (!removedBlocksSet.contains(blockId)) { + blockIdsToAdd.add(blockId); + } + } + } + if (blockIdsToAdd.size() == 0) { + continue; + } + if (mAddedBlocks.containsKey(addedBlockEntry.getKey())) { + mAddedBlocks.get(addedBlockEntry.getKey()).addAll(blockIdsToAdd); + } else { + mAddedBlocks.put(addedBlockEntry.getKey(), blockIdsToAdd); + } + } + for (Map.Entry> lostStorageEntry: + previousReport.getLostStorage().entrySet()) { + if (lostStorageEntry.getValue().size() == 0) { + continue; + } + if (mLostStorage.containsKey(lostStorageEntry.getKey())) { + mLostStorage.get(lostStorageEntry.getKey()).addAll(lostStorageEntry.getValue()); + } else { + mLostStorage.put(lostStorageEntry.getKey(), lostStorageEntry.getValue()); + } + } + mRemovedBlocks.addAll(previousReport.getRemovedBlocks()); + } + } + @Override public void onMoveBlockByClient(long blockId, BlockStoreLocation oldLocation, BlockStoreLocation newLocation) { diff --git a/core/server/worker/src/main/java/alluxio/worker/block/BlockMasterClient.java b/core/server/worker/src/main/java/alluxio/worker/block/BlockMasterClient.java index ced8f6efe6db..177bd19ac8a4 100644 --- a/core/server/worker/src/main/java/alluxio/worker/block/BlockMasterClient.java +++ b/core/server/worker/src/main/java/alluxio/worker/block/BlockMasterClient.java @@ -33,11 +33,13 @@ import alluxio.grpc.GrpcUtils; import alluxio.grpc.LocationBlockIdListEntry; import alluxio.grpc.Metric; +import alluxio.grpc.NotifyWorkerIdPRequest; import alluxio.grpc.RegisterWorkerPOptions; import alluxio.grpc.RegisterWorkerPRequest; import alluxio.grpc.ServiceType; import alluxio.grpc.StorageList; import alluxio.master.MasterClientContext; +import alluxio.master.selectionpolicy.MasterSelectionPolicy; import alluxio.retry.RetryPolicy; import alluxio.wire.WorkerNetAddress; @@ -46,6 +48,7 @@ import org.slf4j.LoggerFactory; import java.io.IOException; +import java.net.InetSocketAddress; import java.util.ArrayList; import java.util.HashMap; import java.util.List; @@ -74,6 +77,17 @@ public BlockMasterClient(MasterClientContext conf) { super(conf); } + /** + * Creates a new instance of {@link BlockMasterClient} for the worker and + * connects to a specific master. + * + * @param conf master client configuration + * @param address the master address + */ + public BlockMasterClient(MasterClientContext conf, InetSocketAddress address) { + super(conf, MasterSelectionPolicy.Factory.specifiedMaster(address)); + } + @Override protected ServiceType getRemoteServiceType() { return ServiceType.BLOCK_MASTER_WORKER_SERVICE; @@ -359,4 +373,20 @@ public void registerWithStream(final long workerId, final List storageTi throw ioe.get(); } } + + /** + * Notify all masters about the worker ID. + * @param workerId the worker id + * @param address the worker address + */ + public void notifyWorkerId(long workerId, WorkerNetAddress address) throws IOException { + retryRPC(() -> { + LOG.info("Notifying workerID to master {} with workerId {}, workerAddress {}", + mServerAddress, + workerId, + address); + return mClient.notifyWorkerId(NotifyWorkerIdPRequest.newBuilder() + .setWorkerId(workerId).setWorkerNetAddress(GrpcUtils.toProto(address)).build()); + }, LOG, "NotifyWorkerId", "workerId=%d, workerAddress=%s", workerId, address); + } } diff --git a/core/server/worker/src/main/java/alluxio/worker/block/BlockMasterClientPool.java b/core/server/worker/src/main/java/alluxio/worker/block/BlockMasterClientPool.java index e9ece84e2fc9..3c14b07be555 100644 --- a/core/server/worker/src/main/java/alluxio/worker/block/BlockMasterClientPool.java +++ b/core/server/worker/src/main/java/alluxio/worker/block/BlockMasterClientPool.java @@ -17,11 +17,14 @@ import alluxio.master.MasterClientContext; import alluxio.resource.ResourcePool; +import com.google.common.annotations.VisibleForTesting; import com.google.common.io.Closer; import java.io.IOException; +import java.net.InetSocketAddress; import java.util.Queue; import java.util.concurrent.ConcurrentLinkedQueue; +import javax.annotation.Nullable; import javax.annotation.concurrent.ThreadSafe; /** @@ -34,14 +37,41 @@ public class BlockMasterClientPool extends ResourcePool { private final Queue mClientList; private final MasterClientContext mMasterContext; + /** If not specified, the client pool will create clients connecting to the primary master. **/ + @Nullable + private final InetSocketAddress mMasterAddress; + + /** + * A factory class for testing purpose. + */ + @VisibleForTesting + static class Factory { + BlockMasterClientPool create() { + return new BlockMasterClientPool(); + } + + BlockMasterClientPool create(@Nullable InetSocketAddress address) { + return new BlockMasterClientPool(address); + } + } + /** * Creates a new block master client pool. */ public BlockMasterClientPool() { + this(null); + } + + /** + * Creates a new block master client pool. + * @param address the block master address + */ + public BlockMasterClientPool(@Nullable InetSocketAddress address) { super(Configuration.getInt(PropertyKey.WORKER_BLOCK_MASTER_CLIENT_POOL_SIZE)); mClientList = new ConcurrentLinkedQueue<>(); mMasterContext = MasterClientContext .newBuilder(ClientContext.create(Configuration.global())).build(); + mMasterAddress = address; } @Override @@ -56,7 +86,14 @@ public void close() throws IOException { @Override public BlockMasterClient createNewResource() { - BlockMasterClient client = new BlockMasterClient(mMasterContext); + final BlockMasterClient client; + if (mMasterAddress != null) { + // If an address is specified, that means all clients in this pool connect + // to the specific master no matter it is a primary or standby + client = new BlockMasterClient(mMasterContext, mMasterAddress); + } else { + client = new BlockMasterClient(mMasterContext); + } mClientList.add(client); return client; } diff --git a/core/server/worker/src/main/java/alluxio/worker/block/BlockSyncMasterGroup.java b/core/server/worker/src/main/java/alluxio/worker/block/BlockSyncMasterGroup.java new file mode 100644 index 000000000000..903cd8fb2215 --- /dev/null +++ b/core/server/worker/src/main/java/alluxio/worker/block/BlockSyncMasterGroup.java @@ -0,0 +1,181 @@ +/* + * The Alluxio Open Foundation licenses this work under the Apache License, version 2.0 + * (the "License"). You may not use this work except in compliance with the License, which is + * available at www.apache.org/licenses/LICENSE-2.0 + * + * This software is distributed on an "AS IS" basis, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, + * either express or implied, as more fully set forth in the License. + * + * See the NOTICE file distributed with this work for information regarding copyright ownership. + */ + +package alluxio.worker.block; + +import alluxio.ClientContext; +import alluxio.ProcessUtils; +import alluxio.conf.Configuration; +import alluxio.conf.PropertyKey; +import alluxio.heartbeat.HeartbeatContext; +import alluxio.heartbeat.HeartbeatThread; +import alluxio.master.MasterClientContext; +import alluxio.security.user.ServerUserState; +import alluxio.util.CommonUtils; +import alluxio.util.ConfigurationUtils; +import alluxio.util.WaitForOptions; + +import com.google.common.annotations.VisibleForTesting; +import com.google.common.base.Preconditions; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import java.io.Closeable; +import java.io.IOException; +import java.net.InetSocketAddress; +import java.util.HashMap; +import java.util.List; +import java.util.Map; +import java.util.concurrent.ExecutorService; +import java.util.concurrent.TimeoutException; + +/** + * An abstraction layer that manages the worker heartbeats with multiple block masters. + * This is only active when worker.register.to.all.masters=true. + */ +public class BlockSyncMasterGroup implements Closeable { + private static final Logger LOG = LoggerFactory.getLogger(SpecificMasterBlockSync.class); + private volatile boolean mStarted = false; + + private final boolean mTestMode = Configuration.getBoolean(PropertyKey.TEST_MODE); + + private static BlockMasterClientFactory sBlockMasterClientFactory + = new BlockMasterClientFactory(); + + private static final long WORKER_MASTER_CONNECT_RETRY_TIMEOUT = + Configuration.getMs(PropertyKey.WORKER_MASTER_CONNECT_RETRY_TIMEOUT); + + /** + * Creates a block sync master group. + * @param masterAddresses the master addresses to sync + * @param blockWorker the block worker instance + */ + public BlockSyncMasterGroup( + List masterAddresses, + BlockWorker blockWorker + ) throws IOException { + // TODO(elega): handle master membership changes + // https://github.com/Alluxio/alluxio/issues/16898 + for (InetSocketAddress masterAddr : masterAddresses) { + BlockMasterClient masterClient = sBlockMasterClientFactory.create(masterAddr); + BlockHeartbeatReporter heartbeatReporter = new BlockHeartbeatReporter(); + + blockWorker.getBlockStore().registerBlockStoreEventListener(heartbeatReporter); + // Setup BlockMasterSync + SpecificMasterBlockSync blockMasterSync = mTestMode + ? new TestSpecificMasterBlockSync( + blockWorker, masterClient, heartbeatReporter) + : new SpecificMasterBlockSync( + blockWorker, masterClient, heartbeatReporter); + // Register each BlockMasterSync to the block events on this worker + mMasterSyncOperators.put(masterAddr, blockMasterSync); + LOG.info("Kick off BlockMasterSync with master {}", masterAddr); + } + } + + /** + * Starts the heartbeats. + * @param executorService the executor service to run the heartbeats + */ + public synchronized void start(ExecutorService executorService) { + if (!mStarted) { + mStarted = true; + } + mMasterSyncOperators.values().forEach(blockMasterSync -> executorService + .submit(new HeartbeatThread(HeartbeatContext.WORKER_BLOCK_SYNC, blockMasterSync, + () -> Configuration.getMs(PropertyKey.WORKER_BLOCK_HEARTBEAT_INTERVAL_MS), + Configuration.global(), ServerUserState.global()))); + } + + private final Map mMasterSyncOperators = + new HashMap<>(); + + @Override + public void close() throws IOException { + mMasterSyncOperators.values().forEach( + SpecificMasterBlockSync::close + ); + } + + static void setBlockMasterClientFactory(BlockMasterClientFactory factory) { + sBlockMasterClientFactory = factory; + } + + /** + * Waits until the primary master registration completes. + * @param primaryMasterAddress the primary master address + */ + public void waitForPrimaryMasterRegistrationComplete(InetSocketAddress primaryMasterAddress) { + SpecificMasterBlockSync primaryMasterSync = + mMasterSyncOperators.get(primaryMasterAddress); + Preconditions.checkNotNull( + primaryMasterSync, "Primary master block sync should not be null"); + try { + CommonUtils.waitFor(this + " to start", + primaryMasterSync::isRegistered, + WaitForOptions.defaults().setTimeoutMs((int) WORKER_MASTER_CONNECT_RETRY_TIMEOUT)); + } catch (InterruptedException e) { + Thread.currentThread().interrupt(); + LOG.warn("Exit the worker on interruption", e); + throw new RuntimeException(e); + } catch (TimeoutException e) { + ProcessUtils.fatalError(LOG, e, "Failed to register with primary master"); + } + LOG.info("The worker has registered with primary master, address {}", primaryMasterAddress); + } + + /** + * @return if the worker is registered to all masters + */ + public boolean isRegisteredToAllMasters() { + return mMasterSyncOperators.values().stream().allMatch(SpecificMasterBlockSync::isRegistered); + } + + /** + * @return the master sync operators + */ + public Map getMasterSyncOperators() { + return mMasterSyncOperators; + } + + /** + * The factory class. + */ + public static class Factory { + /** + * Creates a block sync master group that heartbeats to all masters. + * @param blockWorker the block worker instance + * @return the block sync master group instance + */ + public static BlockSyncMasterGroup createAllMasterSync(BlockWorker blockWorker) { + List masterAddresses = + ConfigurationUtils.getMasterRpcAddresses(Configuration.global()); + try { + return new BlockSyncMasterGroup(masterAddresses, blockWorker); + } catch (IOException e) { + throw new RuntimeException(e); + } + } + } + + /** + * A factory class for testing purpose. + */ + @VisibleForTesting + static class BlockMasterClientFactory { + BlockMasterClient create(InetSocketAddress address) { + MasterClientContext context = MasterClientContext + .newBuilder(ClientContext.create(Configuration.global())).build(); + + return new BlockMasterClient(context, address); + } + } +} diff --git a/core/server/worker/src/main/java/alluxio/worker/block/BlockWorkerFactory.java b/core/server/worker/src/main/java/alluxio/worker/block/BlockWorkerFactory.java index 19a0e996f07a..a2f96428ea92 100644 --- a/core/server/worker/src/main/java/alluxio/worker/block/BlockWorkerFactory.java +++ b/core/server/worker/src/main/java/alluxio/worker/block/BlockWorkerFactory.java @@ -34,6 +34,8 @@ @ThreadSafe public final class BlockWorkerFactory implements WorkerFactory { private static final Logger LOG = LoggerFactory.getLogger(BlockWorkerFactory.class); + private final boolean mWorkerRegisterToAllMasters = Configuration.getBoolean( + PropertyKey.WORKER_REGISTER_TO_ALL_MASTERS); /** * Constructs a new {@link BlockWorkerFactory}. @@ -64,10 +66,17 @@ public BlockWorker create(WorkerRegistry registry, UfsManager ufsManager) { default: throw new UnsupportedOperationException("Unsupported block store type."); } - BlockWorker blockWorker = new DefaultBlockWorker(blockMasterClientPool, - new FileSystemMasterClient( - MasterClientContext.newBuilder(ClientContext.create(Configuration.global())).build()), - new Sessions(), blockStore, workerId); + BlockWorker blockWorker = mWorkerRegisterToAllMasters + ? new AllMasterRegistrationBlockWorker(blockMasterClientPool, + new FileSystemMasterClient( + MasterClientContext.newBuilder(ClientContext.create(Configuration.global())) + .build()), + new Sessions(), blockStore, workerId) + : new DefaultBlockWorker(blockMasterClientPool, + new FileSystemMasterClient( + MasterClientContext.newBuilder(ClientContext.create(Configuration.global())) + .build()), + new Sessions(), blockStore, workerId); registry.add(BlockWorker.class, blockWorker); return blockWorker; } diff --git a/core/server/worker/src/main/java/alluxio/worker/block/DefaultBlockWorker.java b/core/server/worker/src/main/java/alluxio/worker/block/DefaultBlockWorker.java index 033eb28a42e6..0dd50a6978c5 100644 --- a/core/server/worker/src/main/java/alluxio/worker/block/DefaultBlockWorker.java +++ b/core/server/worker/src/main/java/alluxio/worker/block/DefaultBlockWorker.java @@ -138,7 +138,11 @@ public class DefaultBlockWorker extends AbstractWorker implements BlockWorker { public DefaultBlockWorker(BlockMasterClientPool blockMasterClientPool, FileSystemMasterClient fileSystemMasterClient, Sessions sessions, BlockStore blockStore, AtomicReference workerId) { - super(ExecutorServiceFactories.fixedThreadPool("block-worker-executor", 5)); + super( + Configuration.getBoolean(PropertyKey.WORKER_REGISTER_TO_ALL_MASTERS) + ? ExecutorServiceFactories.cachedThreadPool("block-worker-executor") + : ExecutorServiceFactories.fixedThreadPool("block-worker-executor", 5) + ); mBlockMasterClientPool = mResourceCloser.register(blockMasterClientPool); mFileSystemMasterClient = mResourceCloser.register(fileSystemMasterClient); mHeartbeatReporter = new BlockHeartbeatReporter(); @@ -169,6 +173,11 @@ public BlockStore getBlockStore() { return mBlockStore; } + @Override + public WorkerNetAddress getWorkerAddress() { + return mAddress; + } + @Override public Set> getDependencies() { return new HashSet<>(); @@ -330,7 +339,7 @@ public BlockWriter createBlockWriter(long sessionId, long blockId) @Override public BlockHeartbeatReport getReport() { - return mHeartbeatReporter.generateReport(); + return mHeartbeatReporter.generateReportAndClear(); } @Override diff --git a/core/server/worker/src/main/java/alluxio/worker/block/SpecificMasterBlockSync.java b/core/server/worker/src/main/java/alluxio/worker/block/SpecificMasterBlockSync.java new file mode 100644 index 000000000000..3c9aeea0b491 --- /dev/null +++ b/core/server/worker/src/main/java/alluxio/worker/block/SpecificMasterBlockSync.java @@ -0,0 +1,292 @@ +/* + * The Alluxio Open Foundation licenses this work under the Apache License, version 2.0 + * (the "License"). You may not use this work except in compliance with the License, which is + * available at www.apache.org/licenses/LICENSE-2.0 + * + * This software is distributed on an "AS IS" basis, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, + * either express or implied, as more fully set forth in the License. + * + * See the NOTICE file distributed with this work for information regarding copyright ownership. + */ + +package alluxio.worker.block; + +import alluxio.ProcessUtils; +import alluxio.conf.Configuration; +import alluxio.conf.PropertyKey; +import alluxio.exception.ConnectionFailedException; +import alluxio.exception.FailedToAcquireRegisterLeaseException; +import alluxio.grpc.Command; +import alluxio.heartbeat.HeartbeatExecutor; +import alluxio.metrics.MetricKey; +import alluxio.metrics.MetricsSystem; +import alluxio.retry.ExponentialBackoffRetry; +import alluxio.retry.RetryPolicy; +import alluxio.util.CommonUtils; +import alluxio.wire.WorkerNetAddress; + +import com.codahale.metrics.Counter; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import java.io.Closeable; +import java.io.IOException; +import java.net.SocketAddress; +import java.util.concurrent.atomic.AtomicReference; +import javax.annotation.concurrent.NotThreadSafe; + +/** + * The block master sync thread when workers are registered to all masters. + * With respect to behaviors, this implementation differs from {@link BlockMasterSync} in: + * 1. The registration takes place asynchronously and the caller can poll the registration state. + * We need to make the process async because when standby master read is enabled, workers have to + * register to all masters and these registrations can happen concurrently to speed up the process. + * 2. A registration failure doesn't throw a fatal exception. Instead, it retries endlessly. + * This is because a standby master registration failure + * should be a soft failure and can be retried later. + */ +@NotThreadSafe +public class SpecificMasterBlockSync implements HeartbeatExecutor, Closeable { + private static final Logger LOG = LoggerFactory.getLogger(SpecificMasterBlockSync.class); + private static final long ACQUIRE_LEASE_WAIT_MAX_DURATION = + Configuration.getMs(PropertyKey.WORKER_REGISTER_LEASE_RETRY_MAX_DURATION); + + private final long mWorkerBlockHeartbeatReportSizeThreshold = + Configuration.getInt(PropertyKey.WORKER_BLOCK_HEARTBEAT_REPORT_SIZE_THRESHOLD); + + private final SocketAddress mMasterAddress; + + /** + * The worker registration state. + * If the state is NOT_REGISTERED, heartbeat will trigger a registration. + * During the registration process, the state will be set to REGISTERING. + * When the registration is done, the state will be set to REGISTERED. + * When the sync receives a registration command from the master during the heartbeat, + * the state will be reset to NOT_REGISTERED and the sync will attempt to register it again + * in the next heartbeat. + */ + private volatile WorkerMasterRegistrationState mWorkerState = + WorkerMasterRegistrationState.NOT_REGISTERED; + + /** + * An async service to remove block. + */ + private final AsyncBlockRemover mAsyncBlockRemover; + + /** + * The worker ID for the worker. This may change if the master asks the worker to re-register. + */ + private final AtomicReference mWorkerId; + + /** + * Client for all master communication. + */ + private final BlockMasterClient mMasterClient; + + /** + * The net address of the worker. + */ + private final WorkerNetAddress mWorkerAddress; + + /** + * The helper instance for sync related methods. + */ + private final BlockMasterSyncHelper mBlockMasterSyncHelper; + + /** + * The block worker responsible for interacting with Alluxio and UFS storage. + */ + private final BlockWorker mBlockWorker; + /** + * Last System.currentTimeMillis() timestamp when a heartbeat successfully completed. + */ + private long mLastSuccessfulHeartbeatMs = 0; + + private final BlockHeartbeatReporter mBlockHeartbeatReporter; + + /** + * Creates a new instance of {@link SpecificMasterBlockSync}. + * + * @param blockWorker the {@link BlockWorker} this syncer is updating to + * @param masterClient the block master client + * @param heartbeatReporter the heartbeat reporter + */ + public SpecificMasterBlockSync( + BlockWorker blockWorker, + BlockMasterClient masterClient, BlockHeartbeatReporter heartbeatReporter) + throws IOException { + mBlockWorker = blockWorker; + mWorkerId = blockWorker.getWorkerId(); + mWorkerAddress = blockWorker.getWorkerAddress(); + mMasterClient = masterClient; + mAsyncBlockRemover = new AsyncBlockRemover(mBlockWorker); + mBlockMasterSyncHelper = new BlockMasterSyncHelper(mMasterClient); + mMasterAddress = masterClient.getRemoteSockAddress(); + mBlockHeartbeatReporter = heartbeatReporter; + } + + private void registerWithMaster() { + RetryPolicy retry = createEndlessRetry(); + while (retry.attempt()) { + try { + LOG.info("Registering with master {}", mMasterAddress); + // The content in the report can be cleared because registration will + // report these block information anyways. + mBlockHeartbeatReporter.clear(); + registerWithMasterInternal(); + LOG.info("Finished registration with {}", mMasterAddress); + return; + } catch (Exception e) { + LOG.error("Failed to register with master {}, error {}, retry count {} Will retry...", + mMasterAddress, e, retry.getAttemptCount()); + mWorkerState = WorkerMasterRegistrationState.NOT_REGISTERED; + } + } + // Should not reach here because the retry is indefinite + ProcessUtils.fatalError(LOG, new RuntimeException(), + "Failed to register with master %s", mMasterAddress); + } + + protected void registerWithMasterInternal() + throws IOException, FailedToAcquireRegisterLeaseException { + // The target master is not necessarily the one that allocated the workerID + LOG.info("Notify the master {} about the workerID {}", mMasterAddress, mWorkerId); + mMasterClient.notifyWorkerId(mWorkerId.get(), mWorkerAddress); + // TODO(elega) If worker registration to all masters happens at the same time, + // this might cause worker OOM issues because each block sync thread will hold a BlockStoreMeta + // instance during the registration. + // If this happens, consider limiting the worker registration concurrency, + // e.g. register the worker to masters one by one. + BlockStoreMeta storeMeta = mBlockWorker.getStoreMetaFull(); + + try { + mBlockMasterSyncHelper.tryAcquireLease(mWorkerId.get(), storeMeta); + } catch (FailedToAcquireRegisterLeaseException e) { + if (Configuration.getBoolean(PropertyKey.TEST_MODE)) { + throw new RuntimeException(String.format("Master register lease timeout exceeded: %dms", + ACQUIRE_LEASE_WAIT_MAX_DURATION)); + } + throw e; + } + mWorkerState = WorkerMasterRegistrationState.REGISTERING; + mBlockMasterSyncHelper.registerToMaster(mWorkerId.get(), storeMeta); + + mWorkerState = WorkerMasterRegistrationState.REGISTERED; + Metrics.WORKER_MASTER_REGISTRATION_SUCCESS_COUNT.inc(); + mLastSuccessfulHeartbeatMs = CommonUtils.getCurrentMs(); + } + + private RetryPolicy createEndlessRetry() { + return new ExponentialBackoffRetry( + 1000, 60 * 1000, Integer.MAX_VALUE); + } + + @Override + public synchronized void heartbeat() throws InterruptedException { + if (mWorkerState == WorkerMasterRegistrationState.NOT_REGISTERED) { + // Not registered because: + // 1. The worker just started, we kick off the 1st registration here. + // 2. Master sends a registration command during + // the heartbeat and resets the registration state. (e.g. master restarted) + // 3. The heartbeat message becomes too big that we decide to fall back to a full re-register + LOG.info("The worker needs to register with master {}", mMasterAddress); + // This will retry indefinitely and essentially block here if the master is not ready + registerWithMaster(); + LOG.info("BlockMasterSync to master {} has started", mMasterAddress); + } + if (mWorkerState == WorkerMasterRegistrationState.REGISTERING) { + return; + } + + RetryPolicy endlessRetry = createEndlessRetry(); + while (endlessRetry.attempt()) { + BlockHeartbeatReport report = mBlockHeartbeatReporter.generateReportAndClear(); + boolean success = false; + try { + beforeHeartbeat(); + success = mBlockMasterSyncHelper.heartbeat( + mWorkerId.get(), report, + mBlockWorker.getStoreMeta(), this::handleMasterCommand); + } catch (Exception e) { + LOG.error("Failed to receive master heartbeat command. worker id {}", mWorkerId, e); + } + if (success) { + mLastSuccessfulHeartbeatMs = CommonUtils.getCurrentMs(); + break; + } else { + LOG.warn( + "Heartbeat failed, worker id {}, worker host {} # of attempts {}, last success ts {}", + mWorkerId.get(), mWorkerAddress.getHost(), endlessRetry.getAttemptCount(), + mLastSuccessfulHeartbeatMs); + if (report.getBlockChangeCount() >= mWorkerBlockHeartbeatReportSizeThreshold) { + // If the report becomes too big, merging it back to the reporter might cause OOM issue. + // We throw away the result and let the worker re-register with the master. + mWorkerState = WorkerMasterRegistrationState.NOT_REGISTERED; + return; + } else { + mBlockHeartbeatReporter.mergeBack(report); + } + } + } + } + + protected void beforeHeartbeat() { + } + + @Override + public void close() { + mAsyncBlockRemover.shutDown(); + mMasterClient.close(); + } + + /** + * @return if the worker has registered with the master successfully + */ + public boolean isRegistered() { + return mWorkerState == WorkerMasterRegistrationState.REGISTERED; + } + + /** + * Handles a master command. The command is one of Unknown, Nothing, Register, Free, or Delete. + * This call will block until the command is complete. + * + * @param cmd the command to execute + * @throws IOException if I/O errors occur + * @throws ConnectionFailedException if connection fails + */ + private void handleMasterCommand(Command cmd) throws IOException, ConnectionFailedException { + if (cmd == null) { + return; + } + switch (cmd.getCommandType()) { + // Currently unused + case Delete: + break; + // Master requests blocks to be removed from Alluxio managed space. + case Free: + mAsyncBlockRemover.addBlocksToDelete(cmd.getDataList()); + break; + // No action required + case Nothing: + break; + // Master requests re-registration + case Register: + mWorkerState = WorkerMasterRegistrationState.NOT_REGISTERED; + break; + // Unknown request + case Unknown: + LOG.error("Master heartbeat sends unknown command {}", cmd); + break; + default: + throw new RuntimeException("Un-recognized command from master " + cmd); + } + } + + /** + * Metrics. + */ + public static final class Metrics { + private static final Counter WORKER_MASTER_REGISTRATION_SUCCESS_COUNT + = MetricsSystem.counter(MetricKey.WORKER_MASTER_REGISTRATION_SUCCESS_COUNT.getName()); + } +} diff --git a/core/server/worker/src/main/java/alluxio/worker/block/TestSpecificMasterBlockSync.java b/core/server/worker/src/main/java/alluxio/worker/block/TestSpecificMasterBlockSync.java new file mode 100644 index 000000000000..d6f3761f9b10 --- /dev/null +++ b/core/server/worker/src/main/java/alluxio/worker/block/TestSpecificMasterBlockSync.java @@ -0,0 +1,82 @@ +/* + * The Alluxio Open Foundation licenses this work under the Apache License, version 2.0 + * (the "License"). You may not use this work except in compliance with the License, which is + * available at www.apache.org/licenses/LICENSE-2.0 + * + * This software is distributed on an "AS IS" basis, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, + * either express or implied, as more fully set forth in the License. + * + * See the NOTICE file distributed with this work for information regarding copyright ownership. + */ + +package alluxio.worker.block; + +import alluxio.exception.FailedToAcquireRegisterLeaseException; +import alluxio.exception.runtime.UnavailableRuntimeException; + +import com.google.common.annotations.VisibleForTesting; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import java.io.IOException; +import java.util.concurrent.atomic.AtomicInteger; +import javax.annotation.concurrent.NotThreadSafe; + +/** + * A test {@link SpecificMasterBlockSync} that adds some interfaces for testing. + */ +@NotThreadSafe +@VisibleForTesting +public final class TestSpecificMasterBlockSync extends SpecificMasterBlockSync { + private static final Logger LOG = LoggerFactory.getLogger(TestSpecificMasterBlockSync.class); + private volatile boolean mFailHeartbeat = false; + private final AtomicInteger mRegistrationSuccessCount = new AtomicInteger(0); + + /** + * Creates a new instance of {@link SpecificMasterBlockSync}. + * + * @param blockWorker the {@link BlockWorker} this syncer is updating to + * @param masterClient the block master client + * @param heartbeatReporter the heartbeat reporter + */ + public TestSpecificMasterBlockSync( + BlockWorker blockWorker, BlockMasterClient masterClient, + BlockHeartbeatReporter heartbeatReporter) throws IOException { + super(blockWorker, masterClient, heartbeatReporter); + } + + /** + * Restores the heartbeat. + */ + public void restoreHeartbeat() { + mFailHeartbeat = false; + } + + /** + * Fails the heartbeat and lets it throws an exception. + */ + public void failHeartbeat() { + mFailHeartbeat = true; + } + + /** + * @return registration success count + */ + public int getRegistrationSuccessCount() { + return mRegistrationSuccessCount.get(); + } + + @Override + protected void registerWithMasterInternal() + throws IOException, FailedToAcquireRegisterLeaseException { + super.registerWithMasterInternal(); + mRegistrationSuccessCount.incrementAndGet(); + } + + @Override + protected void beforeHeartbeat() { + if (mFailHeartbeat) { + throw new UnavailableRuntimeException("Heartbeat paused"); + } + } +} diff --git a/core/server/worker/src/main/java/alluxio/worker/block/WorkerMasterRegistrationState.java b/core/server/worker/src/main/java/alluxio/worker/block/WorkerMasterRegistrationState.java new file mode 100644 index 000000000000..20066eb3d90b --- /dev/null +++ b/core/server/worker/src/main/java/alluxio/worker/block/WorkerMasterRegistrationState.java @@ -0,0 +1,21 @@ +/* + * The Alluxio Open Foundation licenses this work under the Apache License, version 2.0 + * (the "License"). You may not use this work except in compliance with the License, which is + * available at www.apache.org/licenses/LICENSE-2.0 + * + * This software is distributed on an "AS IS" basis, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, + * either express or implied, as more fully set forth in the License. + * + * See the NOTICE file distributed with this work for information regarding copyright ownership. + */ + +package alluxio.worker.block; + +/** + * The enum class for worker master registration state. + */ +public enum WorkerMasterRegistrationState { + REGISTERED, + NOT_REGISTERED, + REGISTERING, +} diff --git a/core/server/worker/src/test/java/alluxio/worker/block/AllMasterRegistrationBlockWorkerTest.java b/core/server/worker/src/test/java/alluxio/worker/block/AllMasterRegistrationBlockWorkerTest.java new file mode 100644 index 000000000000..44e118bdbc23 --- /dev/null +++ b/core/server/worker/src/test/java/alluxio/worker/block/AllMasterRegistrationBlockWorkerTest.java @@ -0,0 +1,75 @@ +/* + * The Alluxio Open Foundation licenses this work under the Apache License, version 2.0 + * (the "License"). You may not use this work except in compliance with the License, which is + * available at www.apache.org/licenses/LICENSE-2.0 + * + * This software is distributed on an "AS IS" basis, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, + * either express or implied, as more fully set forth in the License. + * + * See the NOTICE file distributed with this work for information regarding copyright ownership. + */ + +package alluxio.worker.block; + +import static org.junit.Assert.assertThrows; +import static org.junit.Assert.assertTrue; +import static org.mockito.ArgumentMatchers.any; +import static org.mockito.ArgumentMatchers.anyLong; +import static org.mockito.Mockito.doThrow; +import static org.mockito.Mockito.mock; +import static org.mockito.Mockito.when; + +import alluxio.Sessions; +import alluxio.conf.PropertyKey; +import alluxio.master.journal.JournalType; + +import org.junit.Test; + +import java.io.IOException; +import java.net.InetSocketAddress; +import java.util.concurrent.atomic.AtomicReference; + +/** + * Unit tests for {@link DefaultBlockWorker}. + */ +public class AllMasterRegistrationBlockWorkerTest extends DefaultBlockWorkerTestBase { + @Override + public void before() throws Exception { + mConfigurationRule.set(PropertyKey.WORKER_MASTER_CONNECT_RETRY_TIMEOUT, "5s"); + mConfigurationRule.set(PropertyKey.TEST_MODE, true); + mConfigurationRule.set(PropertyKey.WORKER_REGISTER_TO_ALL_MASTERS, true); + mConfigurationRule.set(PropertyKey.MASTER_JOURNAL_TYPE, JournalType.EMBEDDED); + mConfigurationRule.set(PropertyKey.MASTER_RPC_ADDRESSES, + "localhost:19998,localhost:19988,localhost:19978"); + super.before(); + + when(mFileSystemMasterClient.getRemoteSockAddress()) + .thenReturn(InetSocketAddress.createUnresolved("localhost", 19998)); + + mBlockWorker = new AllMasterRegistrationBlockWorker( + mBlockMasterClientPool, mFileSystemMasterClient, + mock(Sessions.class), mBlockStore, new AtomicReference<>(INVALID_WORKER_ID)); + BlockSyncMasterGroup.setBlockMasterClientFactory( + new BlockSyncMasterGroup.BlockMasterClientFactory() { + @Override + BlockMasterClient create(InetSocketAddress address) { + return mBlockMasterClient; + } + }); + } + + @Test + public void workerMasterRegistrationFailed() throws IOException { + doThrow(new RuntimeException("error")).when(mBlockMasterClient).registerWithStream( + anyLong(), any(), any(), any(), any(), any(), any()); + Exception e = assertThrows(Exception.class, () -> mBlockWorker.start(WORKER_ADDRESS)); + assertTrue(e.getMessage().contains("Fatal error: Failed to register with primary master")); + } + + @Test + public void workerMasterRegistration() throws IOException { + mBlockWorker.start(WORKER_ADDRESS); + } + + // TODO(elega) add a test to confirm the worker can start when the registration to a standby fails +} diff --git a/core/server/worker/src/test/java/alluxio/worker/block/BlockHeartbeatReporterTest.java b/core/server/worker/src/test/java/alluxio/worker/block/BlockHeartbeatReporterTest.java index a04a7e4042a3..ebdcaa799a34 100644 --- a/core/server/worker/src/test/java/alluxio/worker/block/BlockHeartbeatReporterTest.java +++ b/core/server/worker/src/test/java/alluxio/worker/block/BlockHeartbeatReporterTest.java @@ -16,10 +16,16 @@ import static org.junit.Assert.assertTrue; import alluxio.Constants; +import alluxio.conf.Configuration; +import alluxio.conf.PropertyKey; +import com.google.common.collect.ImmutableMap; import org.junit.Before; import org.junit.Test; +import java.util.Arrays; +import java.util.Collections; +import java.util.HashSet; import java.util.List; import java.util.Map; @@ -40,6 +46,7 @@ public final class BlockHeartbeatReporterTest { */ @Before public final void before() { + Configuration.set(PropertyKey.WORKER_REGISTER_TO_ALL_MASTERS, true); mReporter = new BlockHeartbeatReporter(); } @@ -54,18 +61,18 @@ private void removeBlock(long blockId) { } /** - * Tests the {@link BlockHeartbeatReporter#generateReport()} method for an empty report. + * Tests the {@link BlockHeartbeatReporter#generateReportAndClear()} method for an empty report. */ @Test public void generateReportEmpty() { - BlockHeartbeatReport report = mReporter.generateReport(); + BlockHeartbeatReport report = mReporter.generateReportAndClear(); assertTrue(report.getAddedBlocks().isEmpty()); assertTrue(report.getRemovedBlocks().isEmpty()); } /** - * Tests the {@link BlockHeartbeatReporter#generateReport()} method to correctly generate a report - * after moving block. + * Tests the {@link BlockHeartbeatReporter#generateReportAndClear()} + * method to correctly generate a report after moving block. */ @Test public void generateReportMove() { @@ -75,7 +82,7 @@ public void generateReportMove() { moveBlock(block1, MEM_LOC); moveBlock(block2, SSD_LOC); moveBlock(block3, HDD_LOC); - BlockHeartbeatReport report = mReporter.generateReport(); + BlockHeartbeatReport report = mReporter.generateReportAndClear(); Map> addedBlocks = report.getAddedBlocks(); // Block1 moved to memory @@ -95,8 +102,8 @@ public void generateReportMove() { } /** - * Tests the {@link BlockHeartbeatReporter#generateReport()} method that generating a report - * clears the state of the reporter. + * Tests the {@link BlockHeartbeatReporter#generateReportAndClear()} + * method that generating a report clears the state of the reporter. */ @Test public void generateReportStateClear() { @@ -104,18 +111,18 @@ public void generateReportStateClear() { moveBlock(block1, MEM_LOC); // First report should have updates - BlockHeartbeatReport report = mReporter.generateReport(); + BlockHeartbeatReport report = mReporter.generateReportAndClear(); assertFalse(report.getAddedBlocks().isEmpty()); // Second report should not have updates - BlockHeartbeatReport nextReport = mReporter.generateReport(); + BlockHeartbeatReport nextReport = mReporter.generateReportAndClear(); assertTrue(nextReport.getAddedBlocks().isEmpty()); assertTrue(nextReport.getRemovedBlocks().isEmpty()); } /** - * Tests the {@link BlockHeartbeatReporter#generateReport()} method to correctly generate a report - * after removing blocks. + * Tests the {@link BlockHeartbeatReporter#generateReportAndClear()} + * method to correctly generate a report after removing blocks. */ @Test public void generateReportRemove() { @@ -125,7 +132,7 @@ public void generateReportRemove() { removeBlock(block1); removeBlock(block2); removeBlock(block3); - BlockHeartbeatReport report = mReporter.generateReport(); + BlockHeartbeatReport report = mReporter.generateReportAndClear(); // All blocks should be removed List removedBlocks = report.getRemovedBlocks(); @@ -140,8 +147,8 @@ public void generateReportRemove() { } /** - * Tests the {@link BlockHeartbeatReporter#generateReport()} method to correctly generate a report - * after moving a block and the removing it. + * Tests the {@link BlockHeartbeatReporter#generateReportAndClear()} + * method to correctly generate a report after moving a block and the removing it. */ @Test public void generateReportMoveThenRemove() { @@ -150,7 +157,7 @@ public void generateReportMoveThenRemove() { removeBlock(block1); // The block should not be in the added blocks list - BlockHeartbeatReport report = mReporter.generateReport(); + BlockHeartbeatReport report = mReporter.generateReportAndClear(); assertEquals(null, report.getAddedBlocks().get(MEM_LOC)); // The block should be in the removed blocks list @@ -158,4 +165,49 @@ public void generateReportMoveThenRemove() { assertEquals(1, removedBlocks.size()); assertTrue(removedBlocks.contains(block1)); } + + @Test + public void generateAndRevert() { + mReporter.onMoveBlockByWorker(1, MEM_LOC, SSD_LOC); + mReporter.onMoveBlockByWorker(2, MEM_LOC, SSD_LOC); + mReporter.onMoveBlockByWorker(3, SSD_LOC, HDD_LOC); + mReporter.onRemoveBlockByClient(4); + mReporter.onStorageLost(Constants.MEDIUM_MEM, "/foo"); + mReporter.onStorageLost(Constants.MEDIUM_MEM, "/bar"); + BlockHeartbeatReport originalReport = mReporter.generateReportAndClear(); + mReporter.mergeBack(originalReport); + BlockHeartbeatReport newReport = mReporter.generateReportAndClear(); + assertEquals(originalReport.getAddedBlocks(), newReport.getAddedBlocks()); + assertEquals(originalReport.getRemovedBlocks(), newReport.getRemovedBlocks()); + assertEquals(originalReport.getLostStorage(), newReport.getLostStorage()); + } + + @Test + public void generateUpdateThenRevert() { + mReporter.onMoveBlockByWorker(1, HDD_LOC, MEM_LOC); + mReporter.onMoveBlockByWorker(2, HDD_LOC, MEM_LOC); + mReporter.onMoveBlockByWorker(3, HDD_LOC, SSD_LOC); + mReporter.onRemoveBlockByClient(4); + mReporter.onStorageLost(Constants.MEDIUM_MEM, "/foo"); + mReporter.onStorageLost(Constants.MEDIUM_HDD, "/bar"); + BlockHeartbeatReport originalReport = mReporter.generateReportAndClear(); + + mReporter.onRemoveBlockByClient(1); + mReporter.onRemoveBlockByClient(3); + mReporter.onRemoveBlockByClient(5); + mReporter.onMoveBlockByWorker(6, SSD_LOC, HDD_LOC); + mReporter.onMoveBlockByWorker(7, HDD_LOC, MEM_LOC); + mReporter.onStorageLost(Constants.MEDIUM_MEM, "/baz"); + mReporter.mergeBack(originalReport); + BlockHeartbeatReport newReport = mReporter.generateReportAndClear(); + + assertEquals(ImmutableMap.of( + MEM_LOC, Arrays.asList(7L, 2L), + HDD_LOC, Collections.singletonList(6L) + ), newReport.getAddedBlocks()); + assertEquals(new HashSet<>(Arrays.asList(1L, 3L, 4L, 5L)), + new HashSet<>(newReport.getRemovedBlocks())); + assertEquals(2, newReport.getLostStorage().get(Constants.MEDIUM_MEM).size()); + assertEquals(1, newReport.getLostStorage().get(Constants.MEDIUM_HDD).size()); + } } diff --git a/core/server/worker/src/test/java/alluxio/worker/block/NoopBlockWorker.java b/core/server/worker/src/test/java/alluxio/worker/block/NoopBlockWorker.java index 6381349827d5..9abef428ec75 100644 --- a/core/server/worker/src/test/java/alluxio/worker/block/NoopBlockWorker.java +++ b/core/server/worker/src/test/java/alluxio/worker/block/NoopBlockWorker.java @@ -162,6 +162,11 @@ public BlockStore getBlockStore() { throw new UnsupportedOperationException(); } + @Override + public WorkerNetAddress getWorkerAddress() { + throw new UnsupportedOperationException(); + } + @Override public Set> getDependencies() { return null; diff --git a/core/server/worker/src/test/java/alluxio/worker/block/SpecificMasterBlockSyncTest.java b/core/server/worker/src/test/java/alluxio/worker/block/SpecificMasterBlockSyncTest.java new file mode 100644 index 000000000000..cf02f215f52a --- /dev/null +++ b/core/server/worker/src/test/java/alluxio/worker/block/SpecificMasterBlockSyncTest.java @@ -0,0 +1,248 @@ +/* + * The Alluxio Open Foundation licenses this work under the Apache License, version 2.0 + * (the "License"). You may not use this work except in compliance with the License, which is + * available at www.apache.org/licenses/LICENSE-2.0 + * + * This software is distributed on an "AS IS" basis, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, + * either express or implied, as more fully set forth in the License. + * + * See the NOTICE file distributed with this work for information regarding copyright ownership. + */ + +package alluxio.worker.block; + +import static org.junit.Assert.assertEquals; +import static org.junit.Assert.assertFalse; +import static org.junit.Assert.assertTrue; + +import alluxio.ClientContext; +import alluxio.conf.Configuration; +import alluxio.conf.PropertyKey; +import alluxio.exception.FailedToAcquireRegisterLeaseException; +import alluxio.grpc.Command; +import alluxio.grpc.CommandType; +import alluxio.grpc.ConfigProperty; +import alluxio.grpc.Metric; +import alluxio.master.MasterClientContext; +import alluxio.master.SingleMasterInquireClient; +import alluxio.retry.RetryPolicy; +import alluxio.wire.WorkerNetAddress; + +import org.junit.Rule; +import org.junit.Test; +import org.junit.rules.TemporaryFolder; +import org.mockito.Mockito; + +import java.io.File; +import java.io.IOException; +import java.net.InetSocketAddress; +import java.util.Collections; +import java.util.List; +import java.util.Map; +import java.util.concurrent.atomic.AtomicInteger; +import java.util.concurrent.atomic.AtomicReference; + +public class SpecificMasterBlockSyncTest { + @Rule + public TemporaryFolder mTestFolder = new TemporaryFolder(); + + @Test + public void heartbeatThread() throws Exception { + int heartbeatReportCapacityThreshold = 3; + Configuration.set(PropertyKey.WORKER_BLOCK_HEARTBEAT_REPORT_SIZE_THRESHOLD, + heartbeatReportCapacityThreshold); + BlockHeartbeatReporter blockHeartbeatReporter = new TestBlockHeartbeatReporter(); + + // Flaky registration succeeds every other time. + TestBlockMasterClient.INSTANCE.setFlakyRegistration(true); + TestBlockMasterClient.INSTANCE.setReturnRegisterCommand(false); + + SpecificMasterBlockSync sync = new SpecificMasterBlockSync( + getMockedBlockWorker(), TestBlockMasterClient.INSTANCE, blockHeartbeatReporter + ); + assertFalse(sync.isRegistered()); + + // heartbeat registers the worker if it has not been registered. + sync.heartbeat(); + assertTrue(sync.isRegistered()); + + // heartbeat returning register command resets the worker state. + Configuration.set(PropertyKey.WORKER_REGISTER_STREAM_ENABLED, true); + TestBlockMasterClient.INSTANCE.setReturnRegisterCommand(true); + sync.heartbeat(); + TestBlockMasterClient.INSTANCE.setReturnRegisterCommand(false); + assertFalse(sync.isRegistered()); + + Configuration.set(PropertyKey.WORKER_REGISTER_STREAM_ENABLED, false); + TestBlockMasterClient.INSTANCE.setReturnRegisterCommand(true); + sync.heartbeat(); + TestBlockMasterClient.INSTANCE.setReturnRegisterCommand(false); + assertFalse(sync.isRegistered()); + + // heartbeat registers the worker if it has not been registered. + sync.heartbeat(); + assertTrue(sync.isRegistered()); + + // TestBlockHeartbeatReporter generates the report with one more removed block id each time. + // The heartbeat should retry 3 times before it succeeds because + // heartbeatReportCapacityThreshold is 3. + TestBlockMasterClient.INSTANCE.mHeartbeatCallCount = 0; + TestBlockMasterClient.INSTANCE.setHeartbeatError(true); + sync.heartbeat(); + assertFalse(sync.isRegistered()); + assertEquals( + heartbeatReportCapacityThreshold, TestBlockMasterClient.INSTANCE.mHeartbeatCallCount); + + // registration should happen on the next heartbeat and the reporter should be cleared, + // except the newly generated ones. + TestBlockMasterClient.INSTANCE.setHeartbeatError(false); + sync.heartbeat(); + assertTrue(sync.isRegistered()); + assertEquals(1, blockHeartbeatReporter.generateReportAndClear().getBlockChangeCount()); + + assertTrue(TestBlockMasterClient.INSTANCE.mRegisterCalled); + assertTrue(TestBlockMasterClient.INSTANCE.mRegisterWithStreamCalled); + } + + private static class TestBlockHeartbeatReporter extends BlockHeartbeatReporter { + AtomicInteger mId = new AtomicInteger(0); + + @Override + public BlockHeartbeatReport generateReportAndClear() { + // On generation, add one block each time. + onRemoveBlockByWorker(mId.incrementAndGet()); + return super.generateReportAndClear(); + } + } + + private static class TestBlockMasterClient extends BlockMasterClient { + public static final TestBlockMasterClient INSTANCE = new TestBlockMasterClient(); + + private boolean mLastRegisterSuccess = true; + private boolean mFlakyRegistration = false; + private boolean mReturnRegisterCommand = false; + private boolean mHeartbeatFailed = false; + + private boolean mRegisterCalled = false; + + private boolean mRegisterWithStreamCalled = false; + private int mHeartbeatCallCount = 0; + + public void setFlakyRegistration(boolean value) { + mFlakyRegistration = value; + } + + public void setReturnRegisterCommand(boolean value) { + mReturnRegisterCommand = value; + } + + public void setHeartbeatError(boolean value) { + mHeartbeatFailed = value; + } + + public TestBlockMasterClient() { + super(MasterClientContext + .newBuilder(ClientContext.create(Configuration.global())) + .setMasterInquireClient(new SingleMasterInquireClient( + InetSocketAddress.createUnresolved("localhost", 0))).build()); + } + + @Override + public void register( + long workerId, List storageTierAliases, + Map totalBytesOnTiers, Map usedBytesOnTiers, + Map> currentBlocksOnLocation, + Map> lostStorage, List configList) + throws IOException { + if (!mFlakyRegistration) { + return; + } + if (mLastRegisterSuccess) { + mLastRegisterSuccess = false; + throw new IOException("Registration failed"); + } else { + mLastRegisterSuccess = true; + mRegisterCalled = true; + } + } + + @Override + public void registerWithStream( + long workerId, List storageTierAliases, + Map totalBytesOnTiers, + Map usedBytesOnTiers, + Map> currentBlocksOnLocation, + Map> lostStorage, + List configList) throws IOException { + if (!mFlakyRegistration) { + return; + } + if (mLastRegisterSuccess) { + mLastRegisterSuccess = false; + throw new IOException("Registration failed"); + } else { + mLastRegisterSuccess = true; + mRegisterWithStreamCalled = true; + } + } + + @Override + public synchronized Command heartbeat( + long workerId, Map capacityBytesOnTiers, + Map usedBytesOnTiers, + List removedBlocks, + Map> addedBlocks, + Map> lostStorage, + List metrics) throws IOException { + mHeartbeatCallCount++; + if (mHeartbeatFailed) { + throw new IOException("heartbeat failed"); + } + if (mReturnRegisterCommand) { + return Command.newBuilder().setCommandType(CommandType.Register).build(); + } + return Command.newBuilder().setCommandType(CommandType.Nothing).build(); + } + + @Override + public void acquireRegisterLeaseWithBackoff( + long workerId, int estimatedBlockCount, RetryPolicy retry) + throws IOException, FailedToAcquireRegisterLeaseException { + } + + @Override + public void notifyWorkerId(long workerId, WorkerNetAddress address) throws IOException { + } + } + + public BlockMasterClientPool mClientPool = new BlockMasterClientPool() { + @Override + public BlockMasterClient acquire() { + return TestBlockMasterClient.INSTANCE; + } + + @Override + public void release(BlockMasterClient resource) { + } + }; + + private BlockWorker getMockedBlockWorker() throws Exception { + File tempFolder = mTestFolder.newFolder(); + BlockMetadataManager metadataManager = + TieredBlockStoreTestUtils.defaultMetadataManager(tempFolder.getAbsolutePath()); + + BlockWorker blockWorker = Mockito.mock(BlockWorker.class); + Mockito.when(blockWorker.getStoreMetaFull()) + .thenReturn(metadataManager.getBlockStoreMetaFull()); + Mockito.when(blockWorker.getStoreMeta()) + .thenReturn(metadataManager.getBlockStoreMetaFull()); + Mockito.when(blockWorker.getReport()) + .thenReturn(new BlockHeartbeatReport(Collections.emptyMap(), + Collections.emptyList(), Collections.emptyMap())); + Mockito.when(blockWorker.getWorkerAddress()) + .thenReturn(new WorkerNetAddress()); + Mockito.when(blockWorker.getWorkerId()) + .thenReturn(new AtomicReference<>(0L)); + return blockWorker; + } +} diff --git a/core/transport/src/main/proto/grpc/block_master.proto b/core/transport/src/main/proto/grpc/block_master.proto index 1740e49a834b..22f649bcc96e 100644 --- a/core/transport/src/main/proto/grpc/block_master.proto +++ b/core/transport/src/main/proto/grpc/block_master.proto @@ -240,6 +240,16 @@ message CommitBlockInUfsPRequest { message CommitBlockInUfsPOptions {} message CommitBlockInUfsPResponse {} +message NotifyWorkerIdPOptions {} +message NotifyWorkerIdPRequest { + optional int64 workerId = 1; + /** the worker network address */ + optional grpc.WorkerNetAddress workerNetAddress = 2; + optional NotifyWorkerIdPOptions options = 3; +} +message NotifyWorkerIdPResponse { +} + message GetWorkerIdPOptions {} message GetWorkerIdPRequest { /** the worker network address */ @@ -318,6 +328,11 @@ service BlockMasterWorkerService { */ rpc GetWorkerId(GetWorkerIdPRequest) returns (GetWorkerIdPResponse); + /** + * Notify all masters about the worker ID. + */ + rpc NotifyWorkerId(NotifyWorkerIdPRequest) returns (NotifyWorkerIdPResponse); + /** * Registers a worker. */ diff --git a/core/transport/src/main/proto/proto.lock b/core/transport/src/main/proto/proto.lock index ad504e11a0dc..36c4cd273b71 100644 --- a/core/transport/src/main/proto/proto.lock +++ b/core/transport/src/main/proto/proto.lock @@ -595,6 +595,32 @@ { "name": "CommitBlockInUfsPResponse" }, + { + "name": "NotifyWorkerIdPOptions" + }, + { + "name": "NotifyWorkerIdPRequest", + "fields": [ + { + "id": 1, + "name": "workerId", + "type": "int64" + }, + { + "id": 2, + "name": "workerNetAddress", + "type": "grpc.WorkerNetAddress" + }, + { + "id": 3, + "name": "options", + "type": "NotifyWorkerIdPOptions" + } + ] + }, + { + "name": "NotifyWorkerIdPResponse" + }, { "name": "GetWorkerIdPOptions" }, @@ -831,6 +857,11 @@ "in_type": "GetWorkerIdPRequest", "out_type": "GetWorkerIdPResponse" }, + { + "name": "NotifyWorkerId", + "in_type": "NotifyWorkerIdPRequest", + "out_type": "NotifyWorkerIdPResponse" + }, { "name": "RegisterWorker", "in_type": "RegisterWorkerPRequest", @@ -8555,6 +8586,11 @@ "id": 2, "name": "length", "type": "int64" + }, + { + "id": 3, + "name": "block_location", + "type": "grpc.BlockLocation" } ] }, @@ -8569,6 +8605,11 @@ ] } ], + "imports": [ + { + "path": "grpc/common.proto" + } + ], "package": { "name": "alluxio.proto.journal" } diff --git a/core/transport/src/main/proto/proto/journal/block.proto b/core/transport/src/main/proto/proto/journal/block.proto index 3a605bde8a7e..0a03eca15196 100644 --- a/core/transport/src/main/proto/proto/journal/block.proto +++ b/core/transport/src/main/proto/proto/journal/block.proto @@ -2,6 +2,8 @@ syntax = "proto2"; package alluxio.proto.journal; +import "grpc/common.proto"; + // Journal entry messages for the block master. // next available id: 2 @@ -13,6 +15,7 @@ message BlockContainerIdGeneratorEntry { message BlockInfoEntry { optional int64 block_id = 1; optional int64 length = 2; + optional grpc.BlockLocation block_location = 3; } // next available id: 2 diff --git a/minicluster/src/main/java/alluxio/multi/process/PortCoordination.java b/minicluster/src/main/java/alluxio/multi/process/PortCoordination.java index 08b322bdde4f..10af58bc303b 100644 --- a/minicluster/src/main/java/alluxio/multi/process/PortCoordination.java +++ b/minicluster/src/main/java/alluxio/multi/process/PortCoordination.java @@ -106,6 +106,8 @@ public class PortCoordination { public static final List QUORUM_SHELL_INFO = allocate(3, 0); public static final List QUORUM_SHELL_REMOVE = allocate(5, 0); + public static final List WORKER_ALL_MASTER_REGISTRATION = allocate(3, 1); + private static synchronized List allocate(int numMasters, int numWorkers) { int needed = numMasters * MultiProcessCluster.PORTS_PER_MASTER + numWorkers * MultiProcessCluster.PORTS_PER_WORKER; diff --git a/tests/src/test/java/alluxio/server/block/BlockMasterRegisterStreamIntegrationTest.java b/tests/src/test/java/alluxio/server/block/BlockMasterRegisterStreamIntegrationTest.java index c71502276819..dab5f3e302ab 100644 --- a/tests/src/test/java/alluxio/server/block/BlockMasterRegisterStreamIntegrationTest.java +++ b/tests/src/test/java/alluxio/server/block/BlockMasterRegisterStreamIntegrationTest.java @@ -38,6 +38,7 @@ import alluxio.grpc.RegisterWorkerPRequest; import alluxio.grpc.RegisterWorkerPResponse; import alluxio.grpc.StorageList; +import alluxio.master.AlwaysPrimaryPrimarySelector; import alluxio.master.CoreMasterContext; import alluxio.master.MasterRegistry; import alluxio.master.MasterTestUtils; @@ -48,6 +49,7 @@ import alluxio.master.block.RegisterStreamObserver; import alluxio.master.block.WorkerRegisterContext; import alluxio.master.block.meta.MasterWorkerInfo; +import alluxio.master.journal.noop.NoopJournalSystem; import alluxio.master.metrics.MetricsMaster; import alluxio.master.metrics.MetricsMasterFactory; import alluxio.stress.cli.RpcBenchPreparationUtils; @@ -118,7 +120,9 @@ public void before() throws Exception { Configuration.set(PropertyKey.MASTER_WORKER_REGISTER_LEASE_ENABLED, false); mRegistry = new MasterRegistry(); - mMasterContext = MasterTestUtils.testMasterContext(); + mMasterContext = MasterTestUtils.testMasterContext( + new NoopJournalSystem(), null, new AlwaysPrimaryPrimarySelector() + ); mMetricsMaster = new MetricsMasterFactory().create(mRegistry, mMasterContext); mRegistry.add(MetricsMaster.class, mMetricsMaster); mClock = new ManualClock(); diff --git a/tests/src/test/java/alluxio/server/worker/WorkerAllMasterRegistrationTest.java b/tests/src/test/java/alluxio/server/worker/WorkerAllMasterRegistrationTest.java new file mode 100644 index 000000000000..d6873b63f72f --- /dev/null +++ b/tests/src/test/java/alluxio/server/worker/WorkerAllMasterRegistrationTest.java @@ -0,0 +1,361 @@ +/* + * The Alluxio Open Foundation licenses this work under the Apache License, version 2.0 + * (the "License"). You may not use this work except in compliance with the License, which is + * available at www.apache.org/licenses/LICENSE-2.0 + * + * This software is distributed on an "AS IS" basis, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, + * either express or implied, as more fully set forth in the License. + * + * See the NOTICE file distributed with this work for information regarding copyright ownership. + */ + +package alluxio.server.worker; + +import static org.junit.Assert.assertEquals; +import static org.junit.Assert.assertNotEquals; +import static org.junit.Assert.assertThrows; +import static org.junit.Assert.assertTrue; + +import alluxio.AlluxioURI; +import alluxio.client.WriteType; +import alluxio.client.file.FileInStream; +import alluxio.client.file.FileOutStream; +import alluxio.conf.Configuration; +import alluxio.conf.PropertyKey; +import alluxio.exception.status.UnavailableException; +import alluxio.master.MultiMasterEmbeddedJournalLocalAlluxioCluster; +import alluxio.master.block.BlockMaster; +import alluxio.master.block.DefaultBlockMaster; +import alluxio.multi.process.PortCoordination; +import alluxio.testutils.IntegrationTestUtils; +import alluxio.util.CommonUtils; +import alluxio.util.WaitForOptions; +import alluxio.wire.BlockLocationInfo; +import alluxio.worker.block.AllMasterRegistrationBlockWorker; +import alluxio.worker.block.BlockWorker; +import alluxio.worker.block.SpecificMasterBlockSync; +import alluxio.worker.block.TestSpecificMasterBlockSync; + +import com.google.common.collect.Maps; +import org.apache.commons.io.IOUtils; +import org.junit.After; +import org.junit.Before; +import org.junit.Rule; +import org.junit.Test; +import org.junit.rules.TestName; + +import java.net.InetSocketAddress; +import java.nio.charset.Charset; +import java.util.ArrayList; +import java.util.List; +import java.util.Map; +import java.util.Random; + +public class WorkerAllMasterRegistrationTest { + private MultiMasterEmbeddedJournalLocalAlluxioCluster mCluster; + + @Rule + public TestName mTestName = new TestName(); + + private final List mBlockMasters = new ArrayList<>(); + private AllMasterRegistrationBlockWorker mWorker; + private final int mNumMasters = 3; + private final int mNumWorkers = 1; + + private WaitForOptions mDefaultWaitForOptions = WaitForOptions.defaults().setTimeoutMs(30000); + + @Before + public void before() throws Exception { + mCluster = new MultiMasterEmbeddedJournalLocalAlluxioCluster( + mNumMasters, mNumWorkers, PortCoordination.WORKER_ALL_MASTER_REGISTRATION); + mCluster.initConfiguration( + IntegrationTestUtils.getTestName(getClass().getSimpleName(), mTestName.getMethodName())); + Configuration.set(PropertyKey.MASTER_JOURNAL_CHECKPOINT_PERIOD_ENTRIES, 5); + Configuration.set(PropertyKey.MASTER_JOURNAL_LOG_SIZE_BYTES_MAX, 100); + Configuration.set(PropertyKey.WORKER_REGISTER_TO_ALL_MASTERS, true); + Configuration.set(PropertyKey.STANDBY_MASTER_GRPC_ENABLED, true); + Configuration.set(PropertyKey.USER_FILE_WRITE_TYPE_DEFAULT, WriteType.MUST_CACHE); + Configuration.set(PropertyKey.WORKER_BLOCK_HEARTBEAT_REPORT_SIZE_THRESHOLD, 5); + Configuration.set(PropertyKey.MASTER_JOURNAL_FLUSH_TIMEOUT_MS, "30sec"); + Configuration.set(PropertyKey.MASTER_EMBEDDED_JOURNAL_WRITE_TIMEOUT, "10sec"); + Configuration.set(PropertyKey.MASTER_EMBEDDED_JOURNAL_MIN_ELECTION_TIMEOUT, "3s"); + Configuration.set(PropertyKey.MASTER_EMBEDDED_JOURNAL_MAX_ELECTION_TIMEOUT, "6s"); + + mCluster.start(); + + mWorker = (AllMasterRegistrationBlockWorker) + mCluster.getWorkerProcess(0).getWorker(BlockWorker.class); + for (int i = 0; i < mNumMasters; i++) { + mBlockMasters.add((DefaultBlockMaster) + mCluster.getLocalAlluxioMasterByIndex(i).getMasterProcess().getMaster(BlockMaster.class)); + } + } + + @After + public void after() throws Exception { + mCluster.stop(); + mWorker.stop(); + mWorker = null; + mBlockMasters.clear(); + } + + /** + * Tests a happy path where added and removed blocks can all be reported to standby masters. + */ + @Test + public void happyPath() throws Exception { + CommonUtils.waitFor("wait for worker registration complete", () -> + mWorker.getBlockSyncMasterGroup().isRegisteredToAllMasters(), mDefaultWaitForOptions); + + AlluxioURI fileUri = new AlluxioURI("/foobar"); + String fileContent = "foobar"; + + FileOutStream fos = mCluster.getClient().createFile(fileUri); + fos.write(fileContent.getBytes()); + fos.close(); + + FileInStream fis = mCluster.getClient().openFile(fileUri); + assertEquals(fileContent, IOUtils.toString(fis, Charset.defaultCharset())); + + List blockLocations = + mCluster.getClient().getBlockLocations(fileUri); + assertEquals(1, blockLocations.size()); + assertEquals(1, blockLocations.get(0).getLocations().size()); + long blockId = blockLocations.get(0).getBlockInfo().getBlockInfo().getBlockId(); + + // New blocks are added by committing journals + CommonUtils.waitFor("wait for blocks being committed to all masters", () -> + mBlockMasters.stream().allMatch( + it -> it.getBlockMetaStore().getLocations(blockId).size() == 1), + mDefaultWaitForOptions); + + // Removed blocks are reported by worker-master heartbeats + mWorker.removeBlock(new Random().nextLong(), blockId); + CommonUtils.waitFor("wait for blocks being removed to all masters", () -> + mBlockMasters.stream().allMatch( + it -> it.getBlockMetaStore().getLocations(blockId).size() == 0), + mDefaultWaitForOptions); + + assertTrue(mWorker.getBlockSyncMasterGroup().isRegisteredToAllMasters()); + + fis = mCluster.getClient().openFile(fileUri); + FileInStream finalFis = fis; + + // Make sure registration only happen once to each master + assertTrue(getBlockSyncOperators().values().stream() + .allMatch(it -> it.getRegistrationSuccessCount() == 1)); + + assertThrows(UnavailableException.class, + () -> IOUtils.toString(finalFis, Charset.defaultCharset())); + } + + /** + * Tests a scenario where the worker to master heartbeat fails. + */ + @Test + public void workerHeartbeatFail() throws Exception { + CommonUtils.waitFor("wait for worker registration complete", () -> + mWorker.getBlockSyncMasterGroup().isRegisteredToAllMasters(), mDefaultWaitForOptions); + + // Fails all heartbeats + getBlockSyncOperators().values().forEach(TestSpecificMasterBlockSync::failHeartbeat); + + // Write a file + AlluxioURI fileUri = new AlluxioURI("/foobar"); + String fileContent = "foobar"; + + FileOutStream fos = mCluster.getClient().createFile(fileUri); + fos.write(fileContent.getBytes()); + fos.close(); + + // Committed block is on primary master even if the heartbeat is paused. + List blockLocations = + mCluster.getClient().getBlockLocations(fileUri); + assertEquals(1, blockLocations.size()); + assertEquals(1, blockLocations.get(0).getLocations().size()); + long blockId = blockLocations.get(0).getBlockInfo().getBlockInfo().getBlockId(); + + // Added blocks are replicated to standbys by journals, + // so even if the heartbeat fails, standby are still in sync with primary. + CommonUtils.waitFor("wait for blocks being committed to all masters by heartbeats", + () -> + mBlockMasters.stream().allMatch( + it -> it.getBlockMetaStore().getLocations(blockId).size() == 1), + mDefaultWaitForOptions); + + // Remove a block + mWorker.removeBlock(new Random().nextLong(), blockId); + + // Resume all heartbeats and the block location should be moved on standby masters, + // by heartbeats + getBlockSyncOperators().values().forEach(TestSpecificMasterBlockSync::restoreHeartbeat); + CommonUtils.waitFor("wait for blocks being removed on all masters by heartbeats", + () -> + mBlockMasters.stream().allMatch( + it -> it.getBlockMetaStore().getLocations(blockId).size() == 0), + mDefaultWaitForOptions); + + // Make sure registration only happen once to each master + assertTrue(getBlockSyncOperators().values().stream() + .allMatch(it -> it.getRegistrationSuccessCount() == 1)); + } + + /** + * Tests the master failover case and makes sure the re-registration does not happen, + * on the new elected primary master. + */ + @Test + public void masterFailover() throws Exception { + CommonUtils.waitFor("wait for worker registration complete", () -> + mWorker.getBlockSyncMasterGroup().isRegisteredToAllMasters(), mDefaultWaitForOptions); + + AlluxioURI fileUri = new AlluxioURI("/foobar"); + String fileContent = "foobar"; + + FileOutStream fos = mCluster.getClient().createFile(fileUri); + fos.write(fileContent.getBytes()); + fos.close(); + + FileInStream fis = mCluster.getClient().openFile(fileUri); + assertEquals(fileContent, IOUtils.toString(fis, Charset.defaultCharset())); + + // Make sure registration only happen once to each master + assertTrue(getBlockSyncOperators().values().stream() + .allMatch(it -> it.getRegistrationSuccessCount() == 1)); + + // Kill the master and let the failover happen + int leaderId = mCluster.getLeaderIndex(); + mCluster.stopLeader(); + mCluster.waitForPrimaryMasterServing(10000); + assertNotEquals(mCluster.getLeaderIndex(), leaderId); + + fis = mCluster.getClient().openFile(fileUri); + FileInStream finalFis = fis; + // The new elected primary master should be able to serve the request immediately, + // because the added block location is replicated by journal. + assertEquals(fileContent, IOUtils.toString(finalFis, Charset.defaultCharset())); + + // Make sure no more registration happens + assertTrue(getBlockSyncOperators().values().stream() + .allMatch(it -> it.getRegistrationSuccessCount() == 1)); + } + + /** + * Tests worker being able to worker and re-register to all masters after its restart. + */ + @Test + public void workerRestart() throws Exception { + CommonUtils.waitFor("wait for worker registration complete", () -> + mWorker.getBlockSyncMasterGroup().isRegisteredToAllMasters(), mDefaultWaitForOptions); + + AlluxioURI fileUri = new AlluxioURI("/foobar"); + String fileContent = "foobar"; + + FileOutStream fos = mCluster.getClient().createFile(fileUri); + fos.write(fileContent.getBytes()); + fos.close(); + + FileInStream fis = mCluster.getClient().openFile(fileUri); + assertEquals(fileContent, IOUtils.toString(fis, Charset.defaultCharset())); + + mCluster.stopWorkers(); + mCluster.startWorkers(); + + mWorker = (AllMasterRegistrationBlockWorker) + mCluster.getWorkerProcess(0).getWorker(BlockWorker.class); + + CommonUtils.waitFor("wait for worker registration complete", () -> + getBlockSyncOperators().values().stream().allMatch( + SpecificMasterBlockSync::isRegistered), mDefaultWaitForOptions); + + fis = mCluster.getClient().openFile(fileUri); + assertEquals(fileContent, IOUtils.toString(fis, Charset.defaultCharset())); + } + + /** + * Tests the worker can re-register with masters if its heartbeat failed too many times, + * and the block report becomes too big and takes up too many memory. + */ + @Test + public void heartbeatFallsBackToRegister() throws Exception { + CommonUtils.waitFor("wait for worker registration complete", () -> + mWorker.getBlockSyncMasterGroup().isRegisteredToAllMasters(), mDefaultWaitForOptions); + + // Create a test file whose block will be removed later + AlluxioURI testFileUri = new AlluxioURI("/foo"); + FileOutStream fos = mCluster.getLocalAlluxioMaster().getClient().createFile( + new AlluxioURI("/foo")); + fos.write("foo".getBytes()); + fos.close(); + List blockLocations = + mCluster.getClient().getBlockLocations(testFileUri); + long testFileBlockId = blockLocations.get(0).getBlockInfo().getBlockInfo().getBlockId(); + + List blockIdsToRemove = new ArrayList<>(); + int numFiles = 10; + // Create 10 files and corresponding 10 blocks. + // These blocks are added to standby masters by journals. + for (int i = 0; i < numFiles; ++i) { + AlluxioURI fileUri = new AlluxioURI("/" + i); + fos = mCluster.getLocalAlluxioMaster().getClient().createFile( + fileUri); + fos.write("foo".getBytes()); + fos.close(); + blockLocations = mCluster.getClient().getBlockLocations(fileUri); + long blockId = blockLocations.get(0).getBlockInfo().getBlockInfo().getBlockId(); + blockIdsToRemove.add(blockId); + } + + // Make heartbeat return fail + getBlockSyncOperators().values().forEach(TestSpecificMasterBlockSync::failHeartbeat); + + // Removing the blocks for these 10 files, + // heartbeat report containing these blocks will be generated during the heartbeat. + // However, the heartbeat RPC to master will not succeed because + // we made the heartbeat fail. + // So the heartbeat report will become larger and larger as we merge the report + // back to the reporter on RPC failures. + Thread fileGenerationThread = new Thread(() -> { + for (long blockId: blockIdsToRemove) { + try { + mWorker.removeBlock(0, blockId); + Thread.sleep(500); + } catch (Exception e) { + throw new RuntimeException(e); + } + } + }); + fileGenerationThread.start(); + + // Registration should trigger after heartbeat throws exceptions, + // because the heartbeat report contains too many block ids and exceeds the + // WORKER_BLOCK_HEARTBEAT_REPORT_CAPACITY_THRESHOLD and + // the worker will trigger re-registration. + CommonUtils.waitFor("wait for re-registration sending the block location info", + () -> getBlockSyncOperators().values().stream() + .allMatch(it -> it.getRegistrationSuccessCount() >= 2), + WaitForOptions.defaults().setTimeoutMs(60000)); + + // Remove one block and resume the heartbeats, + // and make sure all blocks are propagated to all masters after heartbeat resumes; + fileGenerationThread.join(); + getBlockSyncOperators().values().forEach(TestSpecificMasterBlockSync::restoreHeartbeat); + mWorker.removeBlock(1, testFileBlockId); + + // We have removed the test block id + 10 other block ids, + // so the block meta store should not contain any block location + blockIdsToRemove.add(testFileBlockId); + CommonUtils.waitFor("wait for blocks propagated to masters by heartbeats", + () -> mBlockMasters.stream() + .allMatch(it -> + blockIdsToRemove.stream() + .allMatch(blockId -> it.getBlockMetaStore().getLocations(blockId).size() == 0) + ), + mDefaultWaitForOptions); + } + + private Map getBlockSyncOperators() { + return Maps.transformValues(mWorker.getBlockSyncMasterGroup().getMasterSyncOperators(), + it -> (TestSpecificMasterBlockSync) it); + } +} From fa2d16f1a338f98e493ccab3d28413a32629af0b Mon Sep 17 00:00:00 2001 From: Xinran Dong <81548653+007DXR@users.noreply.github.com> Date: Fri, 17 Feb 2023 11:32:28 +0800 Subject: [PATCH 123/334] [DOCFIX] Update cn version of Spark on Kubernetes doc What changes are proposed in this pull request? Update cn version of Spark on Kubernetes doc. Why are the changes needed? There is no corresponding Chinese documentation for upgrade. Does this PR introduce any user facing changes? More Chinese users can access Alluxio documentation more easily. pr-link: Alluxio/alluxio#16855 change-id: cid-59e640d1ad5bd270d71546d226fa9f546607cf93 --- docs/cn/kubernetes/Spark-On-Kubernetes.md | 124 ++++++++++------------ 1 file changed, 54 insertions(+), 70 deletions(-) diff --git a/docs/cn/kubernetes/Spark-On-Kubernetes.md b/docs/cn/kubernetes/Spark-On-Kubernetes.md index 6c9b38624742..9bbf3413a156 100644 --- a/docs/cn/kubernetes/Spark-On-Kubernetes.md +++ b/docs/cn/kubernetes/Spark-On-Kubernetes.md @@ -1,49 +1,41 @@ --- layout: global -title: Kubernetes环境下在Alluxio上运行Spark +title: 在Kubernetes上通过Alluxio运行Spark nickname: Spark on Kubernetes group: Compute Integrations priority: 1 --- -Alluxio可以在Kubernetes上运行。本指南演示了如何在Kubernetes环境下运行的Alluxio上跑一个Spark作业。 +Alluxio可以在Kubernetes上运行。本指南介绍了如何在Kubernetes环境中利用Alluxio运行Spark作业。 * Table of Contents {:toc} -## 概述 +## 概览 +在Kubernetes上运行Spark时可将Alluxio作为数据访问层。本指南介绍了在Kubernetes中的Alluxio上运行Spark作业的示例。教程中使用的示例是一个计算文件行数的作业。下文中称该作业为`count`。 -在Kubernetes上运行的Spark可以将Alluxio用作数据访问层。 -本指南介绍了Kubernetes环境下在Alluxio上运行Spark作业示例。 -本教程中使用的示例是一个计算一个文件中有多少行的作业。 -在下文中,我们将此作业称为 count。 +## 部署条件 -## 先决条件 - -- Kubernetes集群(版本>=1.8)。 -- Alluxio已部署在Kubernetes集群上。有关如何部署Alluxio的说明,请参考 -[本页]({{ '/en/kubernetes/Running-Alluxio-On-Kubernetes.html' | relativize_url}}) +- 已安装一个Kubernetes集群(版本不低于1.8) +- Alluxio部署在Kubernetes集群上。有关如何部署Alluxio,请参见[此页]({{ '/cn/kubernetes/Running-Alluxio-On-Kubernetes.html' | relativize_url }})。 ## 基本设置 -首先,我们准备一个Spark Docker镜像,其中包括Alluxio客户端和任何其他必需的jar文件。 -在所有Kubernetes节点上都需提供此镜像。 +首先,我们准备一个包含Alluxio client和其他所需jar包的Spark Docker镜像。此镜像应在所有Kubernetes节点上可用。 -### 下载二进制文件 +### 下载Spark软件 +[下载](https://spark.apache.org/downloads.html)所需的Spark版本。我们将预编译的二进制文件用于`spark-submit` 命令,并使用Alluxio中包含的Dockerfile来构建Docker镜像。 -[下载](https://spark.apache.org/downloads.html)所需的Spark版本。 -对于`spark-submit`命令和使用Alluxio所含的Dockerfile编译Docker镜像 -我们都使用预生成的二进制文件,。 ->注:下载为Hadoop预制的软件包 +> 注:下载用于Hadoop的预编译文件包 ```console $ tar -xf spark-2.4.4-bin-hadoop2.7.tgz $ cd spark-2.4.4-bin-hadoop2.7 ``` -### 编译Spark Docker镜像 +### 构建Spark Docker镜像 -从Alluxio Docker镜像中提取Alluxio客户端jar: +解压Alluxio Docker镜像中的Alluxio client: ```console $ id=$(docker create alluxio/alluxio:{{site.ALLUXIO_VERSION_STRING}}) @@ -52,58 +44,49 @@ $ docker cp $id:/opt/alluxio/client/alluxio-{{site.ALLUXIO_VERSION_STRING}}-clie $ docker rm -v $id 1>/dev/null ``` -添加所需的Alluxio客户端jar并构建用于Spark驱动程序和执行程序pods的Docker镜像。 -从Spark发行版目录运行以下命令以添加Alluxio客户端jar。 +添加所需的Alluxio client jar并构建用于Spark driver和executor pod的Docker镜像。从Spark发行版目录运行以下命令,从而添加Alluxio client jar。 ```console $ cp /alluxio-{{site.ALLUXIO_VERSION_STRING}}-client.jar jars/ ``` ->注意:任何复制到`jars`目录的jar文件在编译时都会被包含到Spark Docker镜像中。 +> 注:任何拷贝到jars目录的jar 文件在构建时都包含在Spark Docker镜像中。 -编译Spark Docker镜像 +构建Spark Docker镜像 ```console $ docker build -t spark-alluxio -f kubernetes/dockerfiles/spark/Dockerfile . ``` ->注意:确保所有节点(spark-driver和spark-executor pods将运行的所在节点) -都有该镜像。 +> 注:**确保所有(运行spark-driver和spark-executor pod的)节点都包含此镜像。** ## 示例 -本节说明如何使用编译的Docker镜像来发起一个以Alluxio作为数据源的Spark作业。 +本节介绍如何使用构建的Docker镜像来启动一个以Alluxio为数据源的Spark作业。 ### 短路操作 -短路访问使Spark执行器中的Alluxio客户端可以直接访问主机上的Alluxio worker存储。 -因为不通过网络堆栈来与Alluxio worker通信,这样可以提高性能。 +短路访问使得Spark executor中的Alluxio client能够直接访问主机上的Alluxio worker存储,而无需通过网络传输与Alluxio worker通信,因而实现了性能提升。 -如果在部署Alluxio时未按照指令设置domain socket -[本页]({{ '/en/kubernetes/Running-Alluxio-On-Kubernetes.html' | relativize_url}}#short-circuit-access),则 -可以跳过将`hostPath`卷挂载到Spark执行器步骤。 +如果未按照[此页]({{ '/cn/kubernetes/Running-Alluxio-On-Kubernetes.html' | relativize_url }}#enable-short-circuit-access)的说明在部署Alluxio时设置domain socket(域套接字),则可以跳过将`hostPath`卷挂载到Spark executor的操作。 -如果在运行Alluxio worker进程的主机上将domain socket位置设置为 -`/tmp/alluxio-domain`,并且Alluxio配置为`alluxio.worker.data.server.domain.socket.address=/opt/domain`,使用以下Spark -配置将`/tmp/alluxio-domain`挂载到Spark执行器pod中的`/opt/domain`。 -下一节中的`spark-submit`命令包含这些属性。 +如果在运行Alluxio worker进程的主机上将domain socket位置设置成 `/tmp/alluxio-domain` ,而Alluxio配置为 `alluxio.worker.data.server.domain.socket.address=/opt/domain`,则应使用以下Spark配置将 `/tmp/alluxio-domain` 挂载到Spark executor pod上的 `/opt/domain`。下节中提到的`spark-submit`命令将包括这些属性。 -取决于你的设置,Alluxio worker上的domain socket可以是`hostPath`卷或`PersistententVolumeClaim`两种之一。可以再[此处]({{ '/en/kubernetes/Running-Alluxio-On-Kubernetes.html#short-circuit-access' | relativize_url}})找到有关如何配置Alluxio worker以使用短路操作的更多详细信息。 -这两个选项的spark-submit参数将有所不同。 -可以在以下Spark文档中找到有关如何将卷挂载到Spark执行器的更多[信息](https://spark.apache.org/docs/2.4.4/running-on-kubernetes.html#using-kubernetes-volumes)。 +根据设置不同,Alluxio worker上的domain socket可以是`hostPath`卷,也可以是`PersistententVolumeClaim`。有关如何配置Alluxio worker来使用短路读的详细信息,请点击[此处]({{ '/cn/kubernetes/Running-Alluxio-On-Kubernetes.html#short-circuit-access' | relativize_url }})。上述两个选项的spark-submit参数会有所不同。有关如何将卷挂载到 Spark executor的详细信息,请参见Spark[文档](https://spark.apache.org/docs/2.4.4/running-on-kubernetes.html#using-kubernetes-volumes)。 {% navtabs domainSocket %} {% navtab hostPath %} - 如果使用的是`hostPath` domain socket,则应将以下属性传递给Spark: - + + 如果您使用的是`hostPath` domain socket,则应将下述属性传递给Spark: + ```properties spark.kubernetes.executor.volumes.hostPath.alluxio-domain.mount.path=/opt/domain spark.kubernetes.executor.volumes.hostPath.alluxio-domain.mount.readOnly=true spark.kubernetes.executor.volumes.hostPath.alluxio-domain.options.path=/tmp/alluxio-domain spark.kubernetes.executor.volumes.hostPath.alluxio-domain.options.type=Directory ``` - {% endnavtab %} {% navtab PersistententVolumeClaim %} - 如果使用的是`PersistententVolumeClaim` domain socket,则应将以下属性传递给Spark: + + 如果您使用的是`PersistententVolumeClaim`domain socket,则应将下述属性传递给Spark: ```properties spark.kubernetes.executor.volumes.persistentVolumeClaim.alluxio-domain.mount.path=/opt/domain \ @@ -114,18 +97,15 @@ $ docker build -t spark-alluxio -f kubernetes/dockerfiles/spark/Dockerfile . {% endnavtab %} {% endnavtabs %} -注意: -- Spark中的卷支持是在2.4.0版中添加的。 -- 当不通过domain socket使用短路访问时,可能会观察到性能下降。 +> 注: +> - Spark 2.4.0版本中新增了卷支持。 +> - 当不通过domain socket使用短路访问时可能会出现性能下降。 ### 运行Spark作业 -以下命令在Alluxio位置`/LICENSE`运行一个计字数作业样例。 -可以在Spark驱动程序pod的日志中看到运行的输出和所花费的时间。更进一步[说明参考Spark](https://spark.apache.org/docs/latest/running-on-kubernetes.html)。 - -#### 创建服务帐户(可选) +#### 创建服务账户(可选) -如果没有可使用的服务帐户,可以按如下指令创建一个具有所需访问权限的服务账户来运行spark作业。 +如果您没有服务帐户可用,可创建一个具有所需访问权限的服务帐户来运行spark作业,如下所示: ```console $ kubectl create serviceaccount spark @@ -135,10 +115,13 @@ $ kubectl create clusterrolebinding spark-role --clusterrole=edit \ #### 提交Spark作业 -从Spark发行版目录运行Spark作业 +下述命令在Alluxio `/LICENSE`位置运行字数统计作业。请确保此文件存在于您的Alluxio集群中,或者将路径更改为已存在的文件。 + +您可以在Spark driver pod的日志中看到输出和所用时间。有关在Kubernetes上运行Spark的更多详细信息,请参阅Spark[文档](https://spark.apache.org/docs/latest/running-on-kubernetes.html)。比如,点击[此处](https://spark.apache.org/docs/latest/running-on-kubernetes.html?q=cluster-info#cluster-mode)可查看该命令中使用的部分flag的详细信息。 +从Spark发行版目录运行Spark作业 ```console -$ ./bin/spark-submit --master k8s://https://:8443 \ +$ ./bin/spark-submit --master k8s://https://:6443 \ --deploy-mode cluster --name spark-alluxio --conf spark.executor.instances=1 \ --class org.apache.spark.examples.JavaWordCount \ --driver-memory 500m --executor-memory 1g \ @@ -151,41 +134,42 @@ $ ./bin/spark-submit --master k8s://https://:8443 \ local:///opt/spark/examples/jars/spark-examples_2.11-2.4.4.jar \ alluxio://:19998/LICENSE ``` -> 注意:可以通过运行`kubectl cluster-info`找到Kubernetes API服务器地址。 -您可以在Spark[文档](https://spark.apache.org/docs/latest/running-on-kubernetes.html?q=cluster-info#cluster-mode)中找到更多详细信息。 -你应该使用与你的domain socket卷类型相应的属性 -[domain socket卷类型]({{ '/en/kubernetes/Spark-On-Kubernetes.html#short-circuit-operations' | relativize_url}}。 -## 故障排除 +> 注: +> - 您可通过运行`kubectl cluster-info`找到Kubernetes API服务器的地址和端口。 +> - 默认的 Kubernetes API 服务器端口为 6443,但可能会因集群配置而异 +> - 建议将此命令中的 `` 主机名设置为Alluxio master的Kubernetes服务名(例如,`alluxio-master-0`)。 +> - 如果您使用的是不同版本的 Spark,请确保根据Spark 版本正确设置`spark-examples_2.11-2.4.4.jar`的路径 +> - 此外,应注意确保卷属性与[domain socket卷类型]({{ '/cn/kubernetes/Spark-On-Kubernetes.html#short-circuit-operations' | relativize_url }})一致。 -### 访问Alluxio客户端日志 +## 故障排查 -可在Spark驱动和执行器日志中找到Alluxio客户端日志。 -有关更多说明参考[Spark文档](https://spark.apache.org/docs/latest/running-on-kubernetes.html#debugging) +### 访问Alluxio Client日志 -### Kubernetes客户端上的HTTP 403 +Alluxio client日志可以在Spark driver和executor日志中查看。详细说明请参见[Spark文档](https://spark.apache.org/docs/latest/running-on-kubernetes.html#debugging)。 -如果你的Spark作业因Kubernetes客户端中如下错误而失败: + +### Kubernetes client上出现HTTP 403错误 + +如果您的Spark作业由于Kubernetes client故障而运行失败,如下所示: ``` WARN ExecutorPodsWatchSnapshotSource: Kubernetes client has been closed ... ERROR SparkContext: Error initializing SparkContext. io.fabric8.kubernetes.client.KubernetesClientException ``` - -这可能是由于一个[已知问题](https://issues.apache.org/jira/browse/SPARK-28921)导致的,可以通过将`kubernetes- client.jar`升级至4.4.x来解决。 -您可以在编译`spark-alluxio`镜像之前通过更新`kubernetes-client-xxjar`来修补docker镜像。 +这可能是由一个[已知问题](https://issues.apache.org/jira/browse/SPARK-28921)导致,该问题可以通过将 `kubernetes-client.jar`升级到4.4.x来解决。您可以在构建`spark-alluxio`镜像之前通过更新`kubernetes-client-x.x.jar`来修补docker镜像。 ```console rm spark-2.4.4-bin-hadoop2.7/jars/kubernetes-client-*.jar wget https://repo1.maven.org/maven2/io/fabric8/kubernetes-client/4.4.2/kubernetes-client-4.4.2.jar cp kubernetes-client-4.4.2.jar spark-2.4.4-bin-hadoop2.7/jars ``` -然后编译`spark-alluxio`镜像,并分发到所有节点。 +然后构建`spark-alluxio`镜像并分发到所有节点。 ### 服务帐户没有访问权限 -如果你看到类似以下某些操作被禁止的错误,这是因为用于Spark作业服务帐户没有足够的访问权限来执行操作引起的。 +如果您看到某些操作被禁止的错误(如下所示),那是因为用于spark作业的服务帐户没有足够的访问权限来执行该操作。 ``` ERROR Utils: Uncaught exception in thread main @@ -196,4 +180,4 @@ pods "spark-alluxiolatest-exec-1" is forbidden: User "system:serviceaccount:defa cannot delete resource "pods" in API group "" in the namespace "default". ``` -你应该参考[创建服务帐户]({{ '/en/kubernetes/Spark-On-Kubernetes.html#create-the-service-account-optional' | relativize_url}}确保有正确访问权限。 +您应该通过[创建服务帐户]({{ '/cn/kubernetes/Spark-On-Kubernetes.html#create-the-service-account-optional' | relativize_url }})来确保账户具有合理的访问权限。 From 987feaf725e3c323c1be15bd244fd19ef9d99fac Mon Sep 17 00:00:00 2001 From: humengyu Date: Fri, 17 Feb 2023 22:10:55 +0800 Subject: [PATCH 124/334] [SMALLFIX] Correct version name in comment ### What changes are proposed in this pull request? Rename PROXY_S3_OPTIMIZED_VERSION_ENABLED to PROXY_S3_V2_VERSION_ENABLED in comment. ### Why are the changes needed? Using a error property about how to enable the s3 v2 api in `alluxio.proxy.s3.S3RequestServlet`. pr-link: Alluxio/alluxio#16896 change-id: cid-b10e399e4d94511c1d0c134fe5ab171c95a8f1c8 --- .../proxy/src/main/java/alluxio/proxy/s3/S3RequestServlet.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/core/server/proxy/src/main/java/alluxio/proxy/s3/S3RequestServlet.java b/core/server/proxy/src/main/java/alluxio/proxy/s3/S3RequestServlet.java index 78ace32a0a4b..40c192a0b26b 100644 --- a/core/server/proxy/src/main/java/alluxio/proxy/s3/S3RequestServlet.java +++ b/core/server/proxy/src/main/java/alluxio/proxy/s3/S3RequestServlet.java @@ -42,7 +42,7 @@ public class S3RequestServlet extends HttpServlet { public static final String S3_V2_SERVICE_PATH_PREFIX = Constants.REST_API_PREFIX + AlluxioURI.SEPARATOR + SERVICE_PREFIX; private static final Logger LOG = LoggerFactory.getLogger(S3RequestServlet.class); - /* (Experimental for new architecture enabled by PROXY_S3_OPTIMIZED_VERSION_ENABLED) + /* (Experimental for new architecture enabled by PROXY_S3_V2_VERSION_ENABLED) * Processing threadpools for group of requests (for now, distinguish between * light-weighted metadata-centric requests and heavy io requests */ public static final String PROXY_S3_V2_LIGHT_POOL = "Proxy S3 V2 Light Pool"; From 817a3c43a9602d4297dee8a4099ee83136bc1234 Mon Sep 17 00:00:00 2001 From: Haoning Sun Date: Tue, 21 Feb 2023 14:14:26 +0800 Subject: [PATCH 125/334] [SMALLFIX] Fix a few typos in comment ### What changes are proposed in this pull request? Fix typo. ### Why are the changes needed? NA ### Does this PR introduce any user facing changes? NA pr-link: Alluxio/alluxio#16917 change-id: cid-4bd1a54d173b081b75c97b7132c03f492d8b748e --- .../main/java/alluxio/master/file/meta/MountTable.java | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/core/server/master/src/main/java/alluxio/master/file/meta/MountTable.java b/core/server/master/src/main/java/alluxio/master/file/meta/MountTable.java index ee5a8858de08..fb4397ad3df3 100644 --- a/core/server/master/src/main/java/alluxio/master/file/meta/MountTable.java +++ b/core/server/master/src/main/java/alluxio/master/file/meta/MountTable.java @@ -99,7 +99,7 @@ public MountTable(UfsManager ufsManager, MountInfo rootMountInfo, Clock clock) { } /** - * Returns the underlying writelock of the MountTable. This method will be called when + * Returns the underlying write lock of the MountTable. This method will be called when * fileSystemMaster is adding a new MountPoint. * * @return the write lock of the mountTable @@ -160,7 +160,7 @@ public void addValidated(Supplier journalContext, * Verify if the given (alluxioPath, ufsPath) can be inserted into MountTable. This method is * NOT ThreadSafe. This method will not acquire any locks, so the caller MUST apply the lock * first before calling this method. - * @param alluxioUri the alluxio path that is about to be the mountpoint + * @param alluxioUri the alluxio path that is about to be the mount point * @param ufsUri the UFS path that is about to mount * @param mountId the mount id * @param options the mount options @@ -257,7 +257,7 @@ public boolean delete(Supplier journalContext, AlluxioURI uri, for (String mountPath : mState.getMountTable().keySet()) { try { if (PathUtils.hasPrefix(mountPath, path) && (!path.equals(mountPath))) { - LOG.warn("The path to unmount {} contains another nested mountpoint {}", + LOG.warn("The path to unmount {} contains another nested mount point {}", path, mountPath); return false; } @@ -324,7 +324,7 @@ public String getMountPoint(AlluxioURI uri) throws InvalidPathException { try (LockResource r = new LockResource(mReadLock)) { for (Map.Entry entry : mState.getMountTable().entrySet()) { String mount = entry.getKey(); - // we choose a new candidate path if the previous candidatepath is a prefix + // we choose a new candidate path if the previous candidate path is a prefix // of the current alluxioPath and the alluxioPath is a prefix of the path if (!mount.equals(ROOT) && PathUtils.hasPrefix(path, mount) && lastMount.length() < mount.length()) { From d28e9337247fef7798e0e10c8058271780e39d9d Mon Sep 17 00:00:00 2001 From: Nandeeshvar Porko Pandiyan <47167374+nand-porko@users.noreply.github.com> Date: Tue, 21 Feb 2023 01:16:05 -0500 Subject: [PATCH 126/334] [SMALLFIX] Fix Alluxio/new-contributor-tasks#624 Fixes Alluxio/new-contributor-tasks#624 https://github.com/Alluxio/new-contributor-tasks/issues/624 pr-link: Alluxio/alluxio#16871 change-id: cid-1c61908eb034cdb6d84b5510798ac8c2c7c029e9 --- .../main/java/alluxio/client/block/BlockStoreClient.java | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/core/client/fs/src/main/java/alluxio/client/block/BlockStoreClient.java b/core/client/fs/src/main/java/alluxio/client/block/BlockStoreClient.java index 337cc64e1ccf..65221df05e2a 100644 --- a/core/client/fs/src/main/java/alluxio/client/block/BlockStoreClient.java +++ b/core/client/fs/src/main/java/alluxio/client/block/BlockStoreClient.java @@ -125,7 +125,7 @@ public BlockInStream getInStream(long blockId, InStreamOptions options) throws I * * @param blockId the id of the block to read * @param options the options associated with the read request - * @param failedWorkers the map of workers addresses to most recent failure time + * @param failedWorkers the map of worker's addresses to most recent failure time * @return a stream which reads from the beginning of the block */ public BlockInStream getInStream(long blockId, InStreamOptions options, @@ -140,7 +140,7 @@ public BlockInStream getInStream(long blockId, InStreamOptions options, * * @param info the block info * @param options the options associated with the read request - * @param failedWorkers the map of workers addresses to most recent failure time + * @param failedWorkers the map of worker's addresses to most recent failure time * @return a stream which reads from the beginning of the block */ public BlockInStream getInStream(BlockInfo info, InStreamOptions options, @@ -169,7 +169,7 @@ public BlockInStream getInStream(BlockInfo info, InStreamOptions options, * @param info the info of the block to read * @param status the URIStatus associated with the read request * @param policy the policy determining the Alluxio worker location - * @param failedWorkers the map of workers addresses to most recent failure time + * @param failedWorkers the map of worker's addresses to most recent failure time * @return the data source and type of data source of the block */ public Pair getDataSourceAndType(BlockInfo info, From 16ff653d639d46a3b072b3c0c42a09c914d5e584 Mon Sep 17 00:00:00 2001 From: yuyang wang <39869597+Jackson-Wang-7@users.noreply.github.com> Date: Tue, 21 Feb 2023 15:10:32 +0800 Subject: [PATCH 127/334] Support overwrite option in createFile ### What changes are proposed in this pull request? Support overwrite option in createFile ### Why are the changes needed? before this change, if we have the same name file existing in Alluxio, we will try `getStatus`, `deleteFile`, and then create the new one. now we just need to call `createFile` with overwrite option. no matter in hdfs api or s3 api. Excessive RPCs are saved. ### Does this PR introduce any user facing changes? add a new option in CreateFileOption for overwriting. pr-link: Alluxio/alluxio#16886 change-id: cid-5b84132d9c4da731b7d1bbf35d71885052e8c5b0 --- .../alluxio/hadoop/AbstractFileSystem.java | 22 ++---------- .../hadoop/AbstractFileSystemTest.java | 7 ++-- .../alluxio/exception/ExceptionMessage.java | 3 ++ .../master/file/DefaultFileSystemMaster.java | 36 +++++++++++++++++++ .../file/FileSystemMasterFsOptsTest.java | 29 +++++++++++++++ .../java/alluxio/proxy/s3/S3ObjectTask.java | 15 +++----- .../proxy/s3/S3RestServiceHandler.java | 14 ++------ .../main/proto/grpc/file_system_master.proto | 1 + core/transport/src/main/proto/proto.lock | 5 +++ .../CopyFromLocalCommandIntegrationTest.java | 11 +++--- .../fs/command/CpCommandIntegrationTest.java | 4 ++- 11 files changed, 99 insertions(+), 48 deletions(-) diff --git a/core/client/hdfs/src/main/java/alluxio/hadoop/AbstractFileSystem.java b/core/client/hdfs/src/main/java/alluxio/hadoop/AbstractFileSystem.java index 9d11dd09a6d2..41bdb08302e8 100644 --- a/core/client/hdfs/src/main/java/alluxio/hadoop/AbstractFileSystem.java +++ b/core/client/hdfs/src/main/java/alluxio/hadoop/AbstractFileSystem.java @@ -60,7 +60,6 @@ import java.security.AccessControlContext; import java.security.AccessController; import java.security.PrivilegedExceptionAction; -import java.text.MessageFormat; import java.util.ArrayList; import java.util.Arrays; import java.util.HashSet; @@ -172,29 +171,14 @@ public FSDataOutputStream create(Path path, FsPermission permission, boolean ove AlluxioURI uri = getAlluxioPath(path); CreateFilePOptions options = CreateFilePOptions.newBuilder().setBlockSizeBytes(blockSize) - .setMode(new Mode(permission.toShort()).toProto()).setRecursive(true).build(); + .setMode(new Mode(permission.toShort()).toProto()).setRecursive(true) + .setOverwrite(overwrite).build(); FileOutStream outStream; try { outStream = mFileSystem.createFile(uri, options); } catch (AlluxioException e) { - //now we should consider the override parameter - try { - if (mFileSystem.exists(uri)) { - if (!overwrite) { - throw new IOException( - "Not allowed to create() (overwrite=false) for existing Alluxio path: " + uri); - } - if (mFileSystem.getStatus(uri).isFolder()) { - throw new IOException(MessageFormat - .format("{0} already exists. Directories cannot be overwritten with create", uri)); - } - mFileSystem.delete(uri); - } - outStream = mFileSystem.createFile(uri, options); - } catch (AlluxioException e2) { - throw new IOException(e2); - } + throw new IOException(e); } return new FSDataOutputStream(outStream, mStatistics); } diff --git a/core/client/hdfs/src/test/java/alluxio/hadoop/AbstractFileSystemTest.java b/core/client/hdfs/src/test/java/alluxio/hadoop/AbstractFileSystemTest.java index 7ccba889e7a0..158643a9a84e 100644 --- a/core/client/hdfs/src/test/java/alluxio/hadoop/AbstractFileSystemTest.java +++ b/core/client/hdfs/src/test/java/alluxio/hadoop/AbstractFileSystemTest.java @@ -37,6 +37,7 @@ import alluxio.client.file.URIStatus; import alluxio.conf.InstancedConfiguration; import alluxio.conf.PropertyKey; +import alluxio.exception.ExceptionMessage; import alluxio.exception.FileAlreadyExistsException; import alluxio.util.ConfigurationUtils; import alluxio.wire.BlockInfo; @@ -665,13 +666,15 @@ public void createWithoutOverwrite() throws Exception { when(alluxioFs.exists(new AlluxioURI(HadoopUtils.getPathWithoutScheme(path)))) .thenReturn(true); when(alluxioFs.createFile(eq(new AlluxioURI(HadoopUtils.getPathWithoutScheme(path))), any())) - .thenThrow(new FileAlreadyExistsException(path.toString())); + .thenThrow(new FileAlreadyExistsException( + ExceptionMessage.CANNOT_OVERWRITE_FILE_WITHOUT_OVERWRITE.getMessage(path.toString()))); try (FileSystem alluxioHadoopFs = new FileSystem(alluxioFs)) { alluxioHadoopFs.create(path, false, 100, (short) 1, 1000); fail("create() of existing file is expected to fail"); } catch (IOException e) { - assertEquals("Not allowed to create() (overwrite=false) for existing Alluxio path: " + path, + assertEquals("alluxio.exception.FileAlreadyExistsException: " + + ExceptionMessage.CANNOT_OVERWRITE_FILE_WITHOUT_OVERWRITE.getMessage(path), e.getMessage()); } } diff --git a/core/common/src/main/java/alluxio/exception/ExceptionMessage.java b/core/common/src/main/java/alluxio/exception/ExceptionMessage.java index 15ad6383f2e0..8723ed1b0b19 100644 --- a/core/common/src/main/java/alluxio/exception/ExceptionMessage.java +++ b/core/common/src/main/java/alluxio/exception/ExceptionMessage.java @@ -95,6 +95,9 @@ public enum ExceptionMessage { ROOT_CANNOT_BE_RENAMED("The root directory cannot be renamed"), JOURNAL_ENTRY_MISSING( "Journal entries are missing between sequence number {0} (inclusive) and {1} (exclusive)."), + CANNOT_OVERWRITE_DIRECTORY("{0} already exists. Directories cannot be overwritten with create"), + CANNOT_OVERWRITE_FILE_WITHOUT_OVERWRITE("{0} already exists. If you want to overwrite the file," + + " you need to specify the overwrite option."), // block master NO_WORKER_FOUND("No worker with workerId {0,number,#} is found"), diff --git a/core/server/master/src/main/java/alluxio/master/file/DefaultFileSystemMaster.java b/core/server/master/src/main/java/alluxio/master/file/DefaultFileSystemMaster.java index 062623fd5c5e..b82e61513696 100644 --- a/core/server/master/src/main/java/alluxio/master/file/DefaultFileSystemMaster.java +++ b/core/server/master/src/main/java/alluxio/master/file/DefaultFileSystemMaster.java @@ -1870,6 +1870,7 @@ public FileInfo createFile(AlluxioURI path, CreateFileContext context) // Check if ufs is writable checkUfsMode(path, OperationType.WRITE); } + deleteFileIfOverwrite(rpcContext, inodePath, context); createFileInternal(rpcContext, inodePath, context); auditContext.setSrcInode(inodePath.getInode()).setSucceeded(true); cacheOperation(context); @@ -1878,6 +1879,41 @@ public FileInfo createFile(AlluxioURI path, CreateFileContext context) } } + /** + * @param rpcContext the rpc context + * @param inodePath the path to be created + * @param context the method context + */ + private void deleteFileIfOverwrite(RpcContext rpcContext, LockedInodePath inodePath, + CreateFileContext context) + throws FileDoesNotExistException, IOException, InvalidPathException, + FileAlreadyExistsException { + if (inodePath.fullPathExists()) { + Inode currentInode = inodePath.getInode(); + if (!context.getOptions().hasOverwrite() || !context.getOptions().getOverwrite()) { + throw new FileAlreadyExistsException( + ExceptionMessage.CANNOT_OVERWRITE_FILE_WITHOUT_OVERWRITE.getMessage( + inodePath.getUri())); + } + // if the fullpath is a file and the option is to overwrite, delete it + if (currentInode.isDirectory()) { + throw new FileAlreadyExistsException( + ExceptionMessage.CANNOT_OVERWRITE_DIRECTORY.getMessage(inodePath.getUri())); + } else { + try { + deleteInternal(rpcContext, inodePath, DeleteContext.mergeFrom( + DeletePOptions.newBuilder().setRecursive(true) + .setAlluxioOnly(!context.isPersisted())), true); + inodePath.removeLastInode(); + } catch (DirectoryNotEmptyException e) { + // Should not reach here + throw new InvalidPathException( + ExceptionMessage.CANNOT_OVERWRITE_DIRECTORY.getMessage(inodePath.getUri())); + } + } + } + } + /** * @param rpcContext the rpc context * @param inodePath the path to be created diff --git a/core/server/master/src/test/java/alluxio/master/file/FileSystemMasterFsOptsTest.java b/core/server/master/src/test/java/alluxio/master/file/FileSystemMasterFsOptsTest.java index 4eacaa95b0cc..62f93ecf559d 100644 --- a/core/server/master/src/test/java/alluxio/master/file/FileSystemMasterFsOptsTest.java +++ b/core/server/master/src/test/java/alluxio/master/file/FileSystemMasterFsOptsTest.java @@ -14,6 +14,7 @@ import static org.junit.Assert.assertEquals; import static org.junit.Assert.assertFalse; import static org.junit.Assert.assertNotEquals; +import static org.junit.Assert.assertThrows; import static org.junit.Assert.assertTrue; import static org.junit.Assert.fail; @@ -105,6 +106,34 @@ public void createFileUsesOperationTime() throws Exception { assertEquals(100, info.getLastAccessTimeMs()); } + @Test + public void createFileWithOverwrite() throws Exception { + AlluxioURI path = new AlluxioURI("/test"); + mFileSystemMaster.createFile(path, CreateFileContext.defaults()); + // create without overwrite + Exception e = assertThrows(FileAlreadyExistsException.class, () -> { + mFileSystemMaster.createFile(path, CreateFileContext.defaults()); + }); + assertTrue(e.getMessage() + .contains(ExceptionMessage.CANNOT_OVERWRITE_FILE_WITHOUT_OVERWRITE.getMessage(path))); + + // create with overwrite + CreateFileContext createFileContextWithOverwrite = CreateFileContext.defaults(); + createFileContextWithOverwrite.getOptions().setOverwrite(true); + mFileSystemMaster.createFile(path, createFileContextWithOverwrite); + FileInfo info = mFileSystemMaster.getFileInfo(path, GetStatusContext.defaults()); + + // overwrite an existed directory + AlluxioURI testpath = new AlluxioURI("/test2"); + mFileSystemMaster.createDirectory(testpath, CreateDirectoryContext.defaults()); + + e = assertThrows(FileAlreadyExistsException.class, () -> { + mFileSystemMaster.createFile(testpath, createFileContextWithOverwrite); + }); + assertTrue(e.getMessage() + .contains(ExceptionMessage.CANNOT_OVERWRITE_DIRECTORY.getMessage(testpath))); + } + /** * Tests the {@link FileSystemMaster#delete(AlluxioURI, DeleteContext)} method. */ diff --git a/core/server/proxy/src/main/java/alluxio/proxy/s3/S3ObjectTask.java b/core/server/proxy/src/main/java/alluxio/proxy/s3/S3ObjectTask.java index faf232def7c1..4a0af6e33273 100644 --- a/core/server/proxy/src/main/java/alluxio/proxy/s3/S3ObjectTask.java +++ b/core/server/proxy/src/main/java/alluxio/proxy/s3/S3ObjectTask.java @@ -454,6 +454,7 @@ public Response continueTask() { .setWriteType(S3RestUtils.getS3WriteType()) .putAllXattr(xattrMap) .setXattrPropStrat(XAttrPropagationStrategy.LEAF_NODE) + .setOverwrite(true) .build(); try { @@ -560,11 +561,6 @@ public Response createObject(String objectPath, FileSystem userFs, } else { toRead = Long.parseLong(contentLength); } - try { - S3RestUtils.deleteExistObject(userFs, objectUri); - } catch (IOException | AlluxioException e) { - throw S3RestUtils.toObjectS3Exception(e, objectUri.getPath(), auditContext); - } FileOutStream os = userFs.createFile(objectUri, createFilePOptions); try (DigestOutputStream digestOutputStream = new DigestOutputStream(os, md5)) { long read = ByteStreams.copy(ByteStreams.limit(readStream, toRead), @@ -657,11 +653,6 @@ public String copyObject(FileSystem userFs, S3AuditContext auditContext, throw new S3Exception("Copying an object to itself invalid.", targetPath, S3ErrorCode.INVALID_REQUEST); } - try { - S3RestUtils.deleteExistObject(userFs, objectUri); - } catch (IOException | AlluxioException e) { - throw S3RestUtils.toObjectS3Exception(e, objectUri.getPath(), auditContext); - } try (FileInStream in = userFs.openFile(new AlluxioURI(sourcePath)); FileOutStream out = userFs.createFile(objectUri, copyFilePOption)) { MessageDigest md5 = MessageDigest.getInstance("MD5"); @@ -726,6 +717,7 @@ public Response continueTask() { .setOtherBits(Bits.NONE).build()) .setWriteType(S3RestUtils.getS3WriteType()) .putAllXattr(xattrMap).setXattrPropStrat(XAttrPropagationStrategy.LEAF_NODE) + .setOverwrite(true) .build(); return createObject(objectPath, userFs, filePOptions, auditContext); } @@ -794,7 +786,8 @@ public Response continueTask() { .setMode(PMode.newBuilder() .setOwnerBits(Bits.ALL) .setGroupBits(Bits.ALL) - .setOtherBits(Bits.NONE).build()); + .setOtherBits(Bits.NONE).build()) + .setOverwrite(true); String entityTag = copyObject(userFs, auditContext, objectPath, copySource, copyFilePOptionsBuilder.build()); return new CopyPartResult(entityTag); diff --git a/core/server/proxy/src/main/java/alluxio/proxy/s3/S3RestServiceHandler.java b/core/server/proxy/src/main/java/alluxio/proxy/s3/S3RestServiceHandler.java index 81277e50e7e0..83c3d00d84e7 100644 --- a/core/server/proxy/src/main/java/alluxio/proxy/s3/S3RestServiceHandler.java +++ b/core/server/proxy/src/main/java/alluxio/proxy/s3/S3RestServiceHandler.java @@ -819,6 +819,7 @@ public Response createObjectOrUploadPart(@HeaderParam("Content-MD5") final Strin .setOtherBits(Bits.NONE).build()) .setWriteType(S3RestUtils.getS3WriteType()) .putAllXattr(xattrMap).setXattrPropStrat(XAttrPropagationStrategy.LEAF_NODE) + .setOverwrite(true) .build(); // not copying from an existing file @@ -838,11 +839,6 @@ public Response createObjectOrUploadPart(@HeaderParam("Content-MD5") final Strin } else { toRead = Long.parseLong(contentLength); } - try { - S3RestUtils.deleteExistObject(userFs, objectUri); - } catch (IOException | AlluxioException e) { - throw S3RestUtils.toObjectS3Exception(e, objectUri.getPath(), auditContext); - } FileOutStream os = userFs.createFile(objectUri, filePOptions); try (DigestOutputStream digestOutputStream = new DigestOutputStream(os, md5)) { long read = ByteStreams.copy(ByteStreams.limit(readStream, toRead), @@ -893,7 +889,8 @@ public Response createObjectOrUploadPart(@HeaderParam("Content-MD5") final Strin .setMode(PMode.newBuilder() .setOwnerBits(Bits.ALL) .setGroupBits(Bits.ALL) - .setOtherBits(Bits.NONE).build()); + .setOtherBits(Bits.NONE).build()) + .setOverwrite(true); // Handle metadata directive if (metadataDirective == S3Constants.Directive.REPLACE && filePOptions.getXattrMap().containsKey(S3Constants.CONTENT_TYPE_XATTR_KEY)) { @@ -940,11 +937,6 @@ public Response createObjectOrUploadPart(@HeaderParam("Content-MD5") final Strin throw new S3Exception("Copying an object to itself invalid.", objectPath, S3ErrorCode.INVALID_REQUEST); } - try { - S3RestUtils.deleteExistObject(userFs, objectUri); - } catch (IOException | AlluxioException e) { - throw S3RestUtils.toObjectS3Exception(e, objectUri.getPath(), auditContext); - } try (FileInStream in = userFs.openFile(new AlluxioURI(copySource)); FileOutStream out = userFs.createFile(objectUri, copyFilePOptionsBuilder.build())) { MessageDigest md5 = MessageDigest.getInstance("MD5"); diff --git a/core/transport/src/main/proto/grpc/file_system_master.proto b/core/transport/src/main/proto/grpc/file_system_master.proto index 58000808ceb7..11409ac5a83b 100644 --- a/core/transport/src/main/proto/grpc/file_system_master.proto +++ b/core/transport/src/main/proto/grpc/file_system_master.proto @@ -138,6 +138,7 @@ message CreateFilePOptions { optional int64 persistenceWaitTime = 10; map xattr = 11; optional XAttrPropagationStrategy xattrPropStrat = 12 [default = NEW_PATHS]; + optional bool overwrite = 13; } message CreateFilePRequest { /** the path of the file */ diff --git a/core/transport/src/main/proto/proto.lock b/core/transport/src/main/proto/proto.lock index 36c4cd273b71..46d8959a7381 100644 --- a/core/transport/src/main/proto/proto.lock +++ b/core/transport/src/main/proto/proto.lock @@ -2449,6 +2449,11 @@ "value": "NEW_PATHS" } ] + }, + { + "id": 13, + "name": "overwrite", + "type": "bool" } ], "maps": [ diff --git a/tests/src/test/java/alluxio/client/cli/fs/command/CopyFromLocalCommandIntegrationTest.java b/tests/src/test/java/alluxio/client/cli/fs/command/CopyFromLocalCommandIntegrationTest.java index e15d86e69eb0..d648043b191a 100644 --- a/tests/src/test/java/alluxio/client/cli/fs/command/CopyFromLocalCommandIntegrationTest.java +++ b/tests/src/test/java/alluxio/client/cli/fs/command/CopyFromLocalCommandIntegrationTest.java @@ -24,6 +24,7 @@ import alluxio.conf.Configuration; import alluxio.conf.PropertyKey; import alluxio.exception.AlluxioException; +import alluxio.exception.ExceptionMessage; import alluxio.grpc.DeletePOptions; import alluxio.grpc.OpenFilePOptions; import alluxio.grpc.ReadPType; @@ -196,7 +197,8 @@ public void copyFromLocalOverwrite() throws Exception { String[] cmd2 = {"copyFromLocal", testFile2.getPath(), alluxioFilePath.getPath()}; Assert.assertEquals(-1, sFsShell.run(cmd2)); Assert.assertThat(mOutput.toString(), containsString( - "Not allowed to create file because path already exists: " + alluxioFilePath.getPath())); + ExceptionMessage.CANNOT_OVERWRITE_FILE_WITHOUT_OVERWRITE.getMessage( + alluxioFilePath.getPath()))); // Make sure the original file is intact Assert.assertTrue(BufferUtils .equalIncreasingByteArray(LEN1, readContent(alluxioFilePath, LEN1))); @@ -235,15 +237,16 @@ public void copyFromLocalMustCacheThenCacheThrough() throws Exception { File file = mTestFolder.newFile(); try (Closeable c = new ConfigurationRule(PropertyKey.USER_FILE_WRITE_TYPE_DEFAULT, WriteType.MUST_CACHE.toString(), Configuration.modifiableGlobal()).toResource()) { - Assert.assertEquals(0, sFsShell.run("copyFromLocal", file.getAbsolutePath(), "/")); + Assert.assertEquals(0, sFsShell.run("copyFromLocal", file.getAbsolutePath(), "/test")); } try (Closeable c = new ConfigurationRule(PropertyKey.USER_FILE_WRITE_TYPE_DEFAULT, WriteType.CACHE_THROUGH.toString(), Configuration.modifiableGlobal()).toResource()) { mOutput.reset(); - sFsShell.run("copyFromLocal", file.getAbsolutePath(), "/"); + sFsShell.run("copyFromLocal", file.getAbsolutePath(), "/test"); } Assert.assertThat(mOutput.toString(), - containsString("Not allowed to create file because path already exists")); + containsString( + ExceptionMessage.CANNOT_OVERWRITE_FILE_WITHOUT_OVERWRITE.getMessage("/test"))); } @Test diff --git a/tests/src/test/java/alluxio/client/cli/fs/command/CpCommandIntegrationTest.java b/tests/src/test/java/alluxio/client/cli/fs/command/CpCommandIntegrationTest.java index 5ad05aa9150e..931110acc37d 100644 --- a/tests/src/test/java/alluxio/client/cli/fs/command/CpCommandIntegrationTest.java +++ b/tests/src/test/java/alluxio/client/cli/fs/command/CpCommandIntegrationTest.java @@ -25,6 +25,7 @@ import alluxio.conf.InstancedConfiguration; import alluxio.conf.PropertyKey; import alluxio.exception.AlluxioException; +import alluxio.exception.ExceptionMessage; import alluxio.grpc.FileSystemMasterCommonPOptions; import alluxio.grpc.OpenFilePOptions; import alluxio.grpc.ReadPType; @@ -505,7 +506,8 @@ public void copyFromLocalOverwrite() throws Exception { String[] cmd2 = {"cp", "file://" + testFile2.getPath(), alluxioFilePath.getPath()}; Assert.assertEquals(-1, sFsShell.run(cmd2)); Assert.assertThat(mOutput.toString(), containsString( - "Not allowed to create file because path already exists: " + alluxioFilePath.getPath())); + ExceptionMessage.CANNOT_OVERWRITE_FILE_WITHOUT_OVERWRITE.getMessage( + alluxioFilePath.getPath()))); // Make sure the original file is intact Assert.assertTrue(BufferUtils .equalIncreasingByteArray(LEN1, readContent(alluxioFilePath, LEN1))); From ea9d10c37642888aec1d7dfd345433ef1b65670c Mon Sep 17 00:00:00 2001 From: Haoning Sun Date: Tue, 21 Feb 2023 15:16:20 +0800 Subject: [PATCH 128/334] Fix typo in S3Exception ### What changes are proposed in this pull request? Fix typo. pr-link: Alluxio/alluxio#16751 change-id: cid-ef5fa5eab7cbfa6e424d514b9745c0cf41fb5a98 --- .../src/main/java/alluxio/proxy/s3/S3RestServiceHandler.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/core/server/proxy/src/main/java/alluxio/proxy/s3/S3RestServiceHandler.java b/core/server/proxy/src/main/java/alluxio/proxy/s3/S3RestServiceHandler.java index 83c3d00d84e7..d1f4007fed03 100644 --- a/core/server/proxy/src/main/java/alluxio/proxy/s3/S3RestServiceHandler.java +++ b/core/server/proxy/src/main/java/alluxio/proxy/s3/S3RestServiceHandler.java @@ -287,7 +287,7 @@ public Response getBucket(@PathParam("bucket") final String bucket, if (policyStatus != null) { throw new S3Exception(bucket, new S3ErrorCode( S3ErrorCode.INTERNAL_ERROR.getCode(), - "GetBucketpolicyStatus is not currently supported.", + "GetBucketPolicyStatus is not currently supported.", S3ErrorCode.INTERNAL_ERROR.getStatus())); } From f90ccaba86b4b4b8cb3761ef1cfa6d163dab30a4 Mon Sep 17 00:00:00 2001 From: yiichan Date: Tue, 21 Feb 2023 15:18:41 +0800 Subject: [PATCH 129/334] Add WorkerInfoField test in CapacityCommandTest ### What changes are proposed in this pull request? As the title. If Options has a WorkerInfoField which POptions does not have, or if POptions has a WorkerInfoField which Options does not have, this test will be failed. The latest test version can not check these two error, as shown in the PR below. ### Why are the changes needed? [Add missing variant in gRPC WorkerInfoField](https://github.com/Alluxio/alluxio/pull/16457) ### Does this PR introduce any user facing changes? No. pr-link: Alluxio/alluxio#16507 change-id: cid-23fa04dbd9b0402bff37ddb8ae42f5ec6e18f719 --- .../options/GetWorkerReportOptionsTest.java | 43 +++++++++++++++++++ 1 file changed, 43 insertions(+) create mode 100644 core/client/fs/src/test/java/alluxio/client/block/options/GetWorkerReportOptionsTest.java diff --git a/core/client/fs/src/test/java/alluxio/client/block/options/GetWorkerReportOptionsTest.java b/core/client/fs/src/test/java/alluxio/client/block/options/GetWorkerReportOptionsTest.java new file mode 100644 index 000000000000..2f19e60e93d6 --- /dev/null +++ b/core/client/fs/src/test/java/alluxio/client/block/options/GetWorkerReportOptionsTest.java @@ -0,0 +1,43 @@ +/* + * The Alluxio Open Foundation licenses this work under the Apache License, version 2.0 + * (the "License"). You may not use this work except in compliance with the License, which is + * available at www.apache.org/licenses/LICENSE-2.0 + * + * This software is distributed on an "AS IS" basis, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, + * either express or implied, as more fully set forth in the License. + * + * See the NOTICE file distributed with this work for information regarding copyright ownership. + */ + +package alluxio.client.block.options; + +import alluxio.grpc.WorkerInfoField; +import alluxio.grpc.WorkerRange; + +import org.junit.Assert; +import org.junit.Test; + +public class GetWorkerReportOptionsTest { + /** + * Check whether WorkerInfoField class and WorkerInfoField in proto file has identical fields. + */ + @Test + public void identicalFieldsForWorkerInfoAndRange() { + for (GetWorkerReportOptions.WorkerInfoField field : + GetWorkerReportOptions.WorkerInfoField.values()) { + Assert.assertEquals(field, GetWorkerReportOptions + .WorkerInfoField.fromProto(field.toProto())); + } + for (GetWorkerReportOptions.WorkerRange range : GetWorkerReportOptions.WorkerRange.values()) { + Assert.assertEquals(range, GetWorkerReportOptions.WorkerRange.fromProto(range.toProto())); + } + + for (WorkerInfoField field : WorkerInfoField.values()) { + Assert.assertEquals(field, + GetWorkerReportOptions.WorkerInfoField.fromProto(field).toProto()); + } + for (WorkerRange range : WorkerRange.values()) { + Assert.assertEquals(range, GetWorkerReportOptions.WorkerRange.fromProto(range).toProto()); + } + } +} From f9cb6df62777fc5c9272950dbcd48bec3b0cee4f Mon Sep 17 00:00:00 2001 From: yangchenye Date: Tue, 21 Feb 2023 01:19:09 -0600 Subject: [PATCH 130/334] Add local read benchmarks for PagedBlockStore ### What changes are proposed in this pull request? Added benchmarks for `PagedBlockStore` that read from local storage rather than UFS. ### Why are the changes needed? This piece is missing as `PagedBlockStore` didn't support creating local blocks then. ### Does this PR introduce any user facing changes? No. pr-link: Alluxio/alluxio#16804 change-id: cid-5e0226beb45ae714cae4417c43076f0857cdf7c6 --- .../src/main/java/alluxio/worker/BlockStoreBase.java | 7 ++++++- .../alluxio/worker/BlockStoreRandomReadBench.java | 12 ++++++++++++ .../worker/BlockStoreSequentialReadBench.java | 12 ++++++++++++ 3 files changed, 30 insertions(+), 1 deletion(-) diff --git a/microbench/src/main/java/alluxio/worker/BlockStoreBase.java b/microbench/src/main/java/alluxio/worker/BlockStoreBase.java index 7b0170ae79cc..fcb4bdb9194c 100644 --- a/microbench/src/main/java/alluxio/worker/BlockStoreBase.java +++ b/microbench/src/main/java/alluxio/worker/BlockStoreBase.java @@ -124,7 +124,12 @@ public void prepareLocalBlock(long blockId, long blockSize, byte[] data) } mMonoBlockStore.commitBlock(1, blockId, false); - // todo(yangchen): create local block for PagedBlockStore + mPagedBlockStore.createBlock(1, blockId, 0, + new CreateBlockOptions(null, null, blockSize)); + try (BlockWriter writer = mPagedBlockStore.createBlockWriter(1, blockId)) { + writer.append(ByteBuffer.wrap(data)); + } + mPagedBlockStore.commitBlock(1, blockId, false); } /** diff --git a/microbench/src/main/java/alluxio/worker/BlockStoreRandomReadBench.java b/microbench/src/main/java/alluxio/worker/BlockStoreRandomReadBench.java index f3d471bc81d2..bfbec647c903 100644 --- a/microbench/src/main/java/alluxio/worker/BlockStoreRandomReadBench.java +++ b/microbench/src/main/java/alluxio/worker/BlockStoreRandomReadBench.java @@ -145,6 +145,18 @@ public void monoBlockStoreRandReadUfs(RandomReadParams params) throws Exception params.mUfsMountId, params.mUfsPath, params.mBlockSize, params.mOffsets, params.mReadSize); } + @Benchmark + public void pagedBlockStoreRandReadLocal(RandomReadParams params) throws Exception { + randReadLocal(params.mBlockStoreBase.mPagedBlockStore, + params.mLocalBlockId, params.mBlockSize, params.mOffsets, params.mReadSize); + } + + @Benchmark + public void pagedBlockStoreRandTransferLocal(RandomReadParams params) throws Exception { + randTransferLocal(params.mBlockStoreBase.mPagedBlockStore, + params.mLocalBlockId, params.mBlockSize, params.mOffsets, params.mReadSize); + } + @Benchmark public void pagedBlockStoreRandReadUfs(RandomReadParams params) throws Exception { randReadUfs(params.mBlockStoreBase.mPagedBlockStore, params.mUfsBlockId, diff --git a/microbench/src/main/java/alluxio/worker/BlockStoreSequentialReadBench.java b/microbench/src/main/java/alluxio/worker/BlockStoreSequentialReadBench.java index e34bfe177c42..d2a0face43ff 100644 --- a/microbench/src/main/java/alluxio/worker/BlockStoreSequentialReadBench.java +++ b/microbench/src/main/java/alluxio/worker/BlockStoreSequentialReadBench.java @@ -133,6 +133,18 @@ public void monoBlockStoreTransferLocal(BlockStoreParams params) throws Exceptio params.mLocalBlockId, params.mBlockSizeByte); } + @Benchmark + public void pagedBlockStoreReadLocal(BlockStoreParams params) throws Exception { + readFullyLocal(params.mBlockStoreBase.mPagedBlockStore, + params.mLocalBlockId, params.mBlockSizeByte); + } + + @Benchmark + public void pagedBlockStoreTransferLocal(BlockStoreParams params) throws Exception { + transferFullyLocal(params.mBlockStoreBase.mPagedBlockStore, + params.mLocalBlockId, params.mBlockSizeByte); + } + /** * Use {@link BlockReader#read} to read all block cached locally to memory. * This method simulates {@link alluxio.worker.grpc.BlockReadHandler}'s use of BlockStore From 73f3ce83c8a3ef77ac3eebb4579bb7d412784ec9 Mon Sep 17 00:00:00 2001 From: ssyssy Date: Tue, 21 Feb 2023 01:53:50 -0800 Subject: [PATCH 131/334] Add metrics sink to job master ### What changes are proposed in this pull request? Adding metrics sink to job master. ### Why are the changes needed? Fix the issue that the job master is unable to sink metrics. ### Does this PR introduce any user facing changes? Yes, this change will enable users to sink metrics from the job master. pr-link: Alluxio/alluxio#16899 change-id: cid-7394471270d4617007eeb97e1674b90585337624 --- job/server/src/main/java/alluxio/master/job/JobMaster.java | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/job/server/src/main/java/alluxio/master/job/JobMaster.java b/job/server/src/main/java/alluxio/master/job/JobMaster.java index fc14d0935f02..ae99321ca928 100644 --- a/job/server/src/main/java/alluxio/master/job/JobMaster.java +++ b/job/server/src/main/java/alluxio/master/job/JobMaster.java @@ -184,6 +184,10 @@ public long getNewJobId() { @Override public void start(Boolean isLeader) throws IOException { super.start(isLeader); + + // Start serving metrics system, this will not block + MetricsSystem.startSinks(Configuration.getString(PropertyKey.METRICS_CONF_FILE)); + // Fail any jobs that were still running when the last job master stopped. for (PlanCoordinator planCoordinator : mPlanTracker.coordinators()) { if (!planCoordinator.isJobFinished()) { From f0e9a5da89c7d763ae5dc3c2a4bed3fa042316b4 Mon Sep 17 00:00:00 2001 From: humengyu Date: Wed, 22 Feb 2023 10:22:16 +0800 Subject: [PATCH 132/334] Add rate limit for s3 proxy ### What changes are proposed in this pull request? Add read limit for s3 proxy when getObject return files. ### Why are the changes needed? We use NVME to speed up reading algorithm model, but we find that the reading speed of alluxio is too fast and k8s container will consume a lot of network card resources, and then affect other containers of the same host, so we need to limit the reading speed. ### Does this PR introduce any user facing changes? Add two properties: 1. `alluxio.proxy.s3.global.read.rate.limit.mb` to limit all connections rate; 2. `alluxio.proxy.s3.single.connection.read.rate.limit.mb` to limit single connection rate. pr-link: Alluxio/alluxio#16866 change-id: cid-613baec7d469bb68b3c75343c49d6822ee4bd1a6 --- .../main/java/alluxio/conf/PropertyKey.java | 20 +++ .../proxy/s3/RateLimitInputStream.java | 68 ++++++++++ .../java/alluxio/proxy/s3/S3ObjectTask.java | 17 ++- .../proxy/s3/S3RestServiceHandler.java | 19 ++- .../java/alluxio/proxy/s3/S3RestUtils.java | 14 +++ .../main/java/alluxio/web/ProxyWebServer.java | 12 ++ .../proxy/s3/RateLimitInputStreamTest.java | 117 ++++++++++++++++++ 7 files changed, 265 insertions(+), 2 deletions(-) create mode 100644 core/server/proxy/src/main/java/alluxio/proxy/s3/RateLimitInputStream.java create mode 100644 core/server/proxy/src/test/java/alluxio/proxy/s3/RateLimitInputStreamTest.java diff --git a/core/common/src/main/java/alluxio/conf/PropertyKey.java b/core/common/src/main/java/alluxio/conf/PropertyKey.java index 801e2df43e9f..df489577043f 100755 --- a/core/common/src/main/java/alluxio/conf/PropertyKey.java +++ b/core/common/src/main/java/alluxio/conf/PropertyKey.java @@ -5328,6 +5328,22 @@ public String toString() { .setConsistencyCheckLevel(ConsistencyCheckLevel.WARN) .setScope(Scope.SERVER) .build(); + public static final PropertyKey PROXY_S3_SINGLE_CONNECTION_READ_RATE_LIMIT_MB = + intBuilder(Name.PROXY_S3_SINGLE_CONNECTION_READ_RATE_LIMIT_MB) + .setConsistencyCheckLevel(ConsistencyCheckLevel.ENFORCE) + .setDescription("Limit the maximum read speed for each connection. " + + "Set value less than or equal to 0 to disable rate limits.") + .setDefaultValue(0) + .setScope(Scope.SERVER) + .build(); + public static final PropertyKey PROXY_S3_GLOBAL_READ_RATE_LIMIT_MB = + intBuilder(Name.PROXY_S3_GLOBAL_READ_RATE_LIMIT_MB) + .setConsistencyCheckLevel(ConsistencyCheckLevel.ENFORCE) + .setDescription("Limit the maximum read speed for all connections. " + + "Set value less than or equal to 0 to disable rate limits.") + .setDefaultValue(0) + .setScope(Scope.SERVER) + .build(); // // Locality related properties @@ -8447,6 +8463,10 @@ public static final class Name { public static final String PROXY_S3_V2_ASYNC_PROCESSING_ENABLED = "alluxio.proxy.s3.v2.async.processing.enabled"; public static final String S3_UPLOADS_ID_XATTR_KEY = "s3_uploads_mulitpartupload_id"; + public static final String PROXY_S3_GLOBAL_READ_RATE_LIMIT_MB = + "alluxio.proxy.s3.global.read.rate.limit.mb"; + public static final String PROXY_S3_SINGLE_CONNECTION_READ_RATE_LIMIT_MB = + "alluxio.proxy.s3.single.connection.read.rate.limit.mb"; // // Locality related properties diff --git a/core/server/proxy/src/main/java/alluxio/proxy/s3/RateLimitInputStream.java b/core/server/proxy/src/main/java/alluxio/proxy/s3/RateLimitInputStream.java new file mode 100644 index 000000000000..7fa6f770e447 --- /dev/null +++ b/core/server/proxy/src/main/java/alluxio/proxy/s3/RateLimitInputStream.java @@ -0,0 +1,68 @@ +/* + * The Alluxio Open Foundation licenses this work under the Apache License, version 2.0 + * (the "License"). You may not use this work except in compliance with the License, which is + * available at www.apache.org/licenses/LICENSE-2.0 + * + * This software is distributed on an "AS IS" basis, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, + * either express or implied, as more fully set forth in the License. + * + * See the NOTICE file distributed with this work for information regarding copyright ownership. + */ + +package alluxio.proxy.s3; + +import com.google.common.util.concurrent.RateLimiter; + +import java.io.IOException; +import java.io.InputStream; + +/** + * This class is a wrapper for InputStream which limit rate when reading bytes. + */ +public class RateLimitInputStream extends InputStream { + + private final InputStream mInputStream; + private final RateLimiter[] mRateLimiters; + + /** + * Constructs a new {@link RateLimitInputStream}. + * + * @param inputStream Original stream to be limited + * @param rateLimiters RateLimiters to limit Maximal reading bytes per second + */ + public RateLimitInputStream(InputStream inputStream, RateLimiter... rateLimiters) { + mInputStream = inputStream; + mRateLimiters = rateLimiters; + } + + @Override + public int read() throws IOException { + acquire(1); + return mInputStream.read(); + } + + @Override + public int read(byte[] b) throws IOException { + return read(b, 0, b.length); + } + + @Override + public int read(byte[] b, int off, int len) throws IOException { + acquire(Math.min(b.length - off, len)); + return mInputStream.read(b, off, len); + } + + @Override + public void close() throws IOException { + mInputStream.close(); + } + + private void acquire(int permits) { + for (RateLimiter rateLimiter : mRateLimiters) { + if (rateLimiter == null) { + continue; + } + rateLimiter.acquire(permits); + } + } +} diff --git a/core/server/proxy/src/main/java/alluxio/proxy/s3/S3ObjectTask.java b/core/server/proxy/src/main/java/alluxio/proxy/s3/S3ObjectTask.java index 4a0af6e33273..0c3a08e599c1 100644 --- a/core/server/proxy/src/main/java/alluxio/proxy/s3/S3ObjectTask.java +++ b/core/server/proxy/src/main/java/alluxio/proxy/s3/S3ObjectTask.java @@ -37,6 +37,7 @@ import alluxio.metrics.MetricsSystem; import alluxio.proto.journal.File; import alluxio.util.ThreadUtils; +import alluxio.web.ProxyWebServer; import com.codahale.metrics.Timer; import com.fasterxml.jackson.dataformat.xml.XmlMapper; @@ -44,6 +45,7 @@ import com.google.common.io.BaseEncoding; import com.google.common.io.ByteStreams; import com.google.common.primitives.Longs; +import com.google.common.util.concurrent.RateLimiter; import com.google.protobuf.ByteString; import org.apache.commons.codec.binary.Hex; import org.apache.commons.io.IOUtils; @@ -311,7 +313,20 @@ public Response continueTask() { RangeFileInStream ris = RangeFileInStream.Factory.create( is, status.getLength(), s3Range); - Response.ResponseBuilder res = Response.ok(ris, MediaType.APPLICATION_OCTET_STREAM_TYPE) + InputStream inputStream; + RateLimiter globalRateLimiter = (RateLimiter) mHandler.getServletContext() + .getAttribute(ProxyWebServer.GLOBAL_RATE_LIMITER_SERVLET_RESOURCE_KEY); + long rate = (long) mHandler.getMetaFS().getConf() + .getInt(PropertyKey.PROXY_S3_SINGLE_CONNECTION_READ_RATE_LIMIT_MB) * Constants.MB; + RateLimiter currentRateLimiter = S3RestUtils.createRateLimiter(rate).orElse(null); + if (currentRateLimiter == null && globalRateLimiter == null) { + inputStream = ris; + } else { + inputStream = new RateLimitInputStream(ris, globalRateLimiter, currentRateLimiter); + } + + Response.ResponseBuilder res = Response.ok(inputStream, + MediaType.APPLICATION_OCTET_STREAM_TYPE) .lastModified(new Date(status.getLastModificationTimeMs())) .header(S3Constants.S3_CONTENT_LENGTH_HEADER, s3Range.getLength(status.getLength())); diff --git a/core/server/proxy/src/main/java/alluxio/proxy/s3/S3RestServiceHandler.java b/core/server/proxy/src/main/java/alluxio/proxy/s3/S3RestServiceHandler.java index d1f4007fed03..94627fac01be 100644 --- a/core/server/proxy/src/main/java/alluxio/proxy/s3/S3RestServiceHandler.java +++ b/core/server/proxy/src/main/java/alluxio/proxy/s3/S3RestServiceHandler.java @@ -46,6 +46,7 @@ import com.google.common.io.ByteStreams; import com.google.common.net.InetAddresses; import com.google.common.primitives.Longs; +import com.google.common.util.concurrent.RateLimiter; import com.google.protobuf.ByteString; import org.apache.commons.codec.binary.Hex; import org.apache.commons.io.IOUtils; @@ -126,6 +127,8 @@ public final class S3RestServiceHandler { private final Pattern mBucketInvalidSuffixPattern; private final Pattern mBucketValidNamePattern; + private final RateLimiter mGlobalRateLimiter; + /** * Constructs a new {@link S3RestServiceHandler}. * @@ -168,6 +171,9 @@ public S3RestServiceHandler(@Context ServletContext context) .build() ); } + + mGlobalRateLimiter = (RateLimiter) context.getAttribute( + ProxyWebServer.GLOBAL_RATE_LIMITER_SERVLET_RESOURCE_KEY); } /** @@ -1236,7 +1242,18 @@ private Response getObject(final String bucket, S3RangeSpec s3Range = S3RangeSpec.Factory.create(range); RangeFileInStream ris = RangeFileInStream.Factory.create(is, status.getLength(), s3Range); - Response.ResponseBuilder res = Response.ok(ris) + InputStream inputStream; + long rate = + (long) mSConf.getInt(PropertyKey.PROXY_S3_SINGLE_CONNECTION_READ_RATE_LIMIT_MB) + * Constants.MB; + RateLimiter currentRateLimiter = S3RestUtils.createRateLimiter(rate).orElse(null); + if (currentRateLimiter == null && mGlobalRateLimiter == null) { + inputStream = ris; + } else { + inputStream = new RateLimitInputStream(ris, mGlobalRateLimiter, currentRateLimiter); + } + + Response.ResponseBuilder res = Response.ok(inputStream) .lastModified(new Date(status.getLastModificationTimeMs())) .header(S3Constants.S3_CONTENT_LENGTH_HEADER, s3Range.getLength(status.getLength())); diff --git a/core/server/proxy/src/main/java/alluxio/proxy/s3/S3RestUtils.java b/core/server/proxy/src/main/java/alluxio/proxy/s3/S3RestUtils.java index 9ed612f7e331..a7963319c820 100644 --- a/core/server/proxy/src/main/java/alluxio/proxy/s3/S3RestUtils.java +++ b/core/server/proxy/src/main/java/alluxio/proxy/s3/S3RestUtils.java @@ -40,6 +40,7 @@ import com.fasterxml.jackson.dataformat.xml.XmlMapper; import com.google.common.annotations.VisibleForTesting; import com.google.common.primitives.Longs; +import com.google.common.util.concurrent.RateLimiter; import com.google.protobuf.ByteString; import org.apache.commons.lang3.StringUtils; import org.slf4j.Logger; @@ -56,6 +57,7 @@ import java.util.HashMap; import java.util.List; import java.util.Map; +import java.util.Optional; import java.util.TreeMap; import javax.annotation.Nonnull; import javax.annotation.Nullable; @@ -689,6 +691,18 @@ public static void populateTaggingInXAttr(Map xattrMap, Stri } } + /** + * Create a rate limiter for given rate. + * @param rate bytes per second + * @return empty if rate <= 0 + */ + public static Optional createRateLimiter(long rate) { + if (rate <= 0) { + return Optional.empty(); + } + return Optional.of(RateLimiter.create(rate)); + } + /** * Comparator based on uri name, treat uri name as a Long number. */ diff --git a/core/server/proxy/src/main/java/alluxio/web/ProxyWebServer.java b/core/server/proxy/src/main/java/alluxio/web/ProxyWebServer.java index d1b90b83a3ae..7aac4bca16ac 100644 --- a/core/server/proxy/src/main/java/alluxio/web/ProxyWebServer.java +++ b/core/server/proxy/src/main/java/alluxio/web/ProxyWebServer.java @@ -25,10 +25,12 @@ import alluxio.proxy.s3.S3Handler; import alluxio.proxy.s3.S3RequestServlet; import alluxio.proxy.s3.S3RestExceptionMapper; +import alluxio.proxy.s3.S3RestUtils; import alluxio.util.ThreadFactoryUtils; import alluxio.util.io.PathUtils; import com.google.common.base.Stopwatch; +import com.google.common.util.concurrent.RateLimiter; import org.eclipse.jetty.server.HttpChannel; import org.eclipse.jetty.server.Request; import org.eclipse.jetty.servlet.ServletHolder; @@ -64,6 +66,9 @@ public final class ProxyWebServer extends WebServer { public static final String SERVER_CONFIGURATION_RESOURCE_KEY = "Server Configuration"; public static final String ALLUXIO_PROXY_AUDIT_LOG_WRITER_KEY = "Alluxio Proxy Audit Log Writer"; + public static final String GLOBAL_RATE_LIMITER_SERVLET_RESOURCE_KEY = "Global Rate Limiter"; + + private final RateLimiter mGlobalRateLimiter; private final FileSystem mFileSystem; private AsyncUserAccessAuditLogWriter mAsyncAuditLogWriter; public static final String PROXY_S3_HANDLER_MAP = "Proxy S3 Handler Map"; @@ -102,6 +107,9 @@ public ProxyWebServer(String serviceName, InetSocketAddress address, .register(S3RestExceptionMapper.class); mFileSystem = FileSystem.Factory.create(Configuration.global()); + long rate = + (long) Configuration.getInt(PropertyKey.PROXY_S3_GLOBAL_READ_RATE_LIMIT_MB) * Constants.MB; + mGlobalRateLimiter = S3RestUtils.createRateLimiter(rate).orElse(null); if (Configuration.getBoolean(PropertyKey.PROXY_AUDIT_LOGGING_ENABLED)) { mAsyncAuditLogWriter = new AsyncUserAccessAuditLogWriter("PROXY_AUDIT_LOG"); @@ -124,6 +132,10 @@ public void init() throws ServletException { getServletContext().setAttribute(STREAM_CACHE_SERVLET_RESOURCE_KEY, new StreamCache(Configuration.getMs(PropertyKey.PROXY_STREAM_CACHE_TIMEOUT_MS))); getServletContext().setAttribute(ALLUXIO_PROXY_AUDIT_LOG_WRITER_KEY, mAsyncAuditLogWriter); + if (mGlobalRateLimiter != null) { + getServletContext().setAttribute(GLOBAL_RATE_LIMITER_SERVLET_RESOURCE_KEY, + mGlobalRateLimiter); + } } @Override diff --git a/core/server/proxy/src/test/java/alluxio/proxy/s3/RateLimitInputStreamTest.java b/core/server/proxy/src/test/java/alluxio/proxy/s3/RateLimitInputStreamTest.java new file mode 100644 index 000000000000..93353f167f30 --- /dev/null +++ b/core/server/proxy/src/test/java/alluxio/proxy/s3/RateLimitInputStreamTest.java @@ -0,0 +1,117 @@ +/* + * The Alluxio Open Foundation licenses this work under the Apache License, version 2.0 + * (the "License"). You may not use this work except in compliance with the License, which is + * available at www.apache.org/licenses/LICENSE-2.0 + * + * This software is distributed on an "AS IS" basis, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, + * either express or implied, as more fully set forth in the License. + * + * See the NOTICE file distributed with this work for information regarding copyright ownership. + */ + +package alluxio.proxy.s3; + +import static alluxio.Constants.KB; +import static alluxio.Constants.MB; + +import com.google.common.util.concurrent.RateLimiter; +import org.apache.commons.io.IOUtils; +import org.junit.Assert; +import org.junit.Before; +import org.junit.Test; + +import java.io.ByteArrayInputStream; +import java.io.ByteArrayOutputStream; +import java.io.IOException; +import java.util.ArrayList; +import java.util.Arrays; +import java.util.List; +import java.util.Random; +import java.util.UUID; +import java.util.concurrent.ExecutorService; +import java.util.concurrent.Executors; +import java.util.concurrent.FutureTask; +import java.util.stream.Collectors; + +public class RateLimitInputStreamTest { + + private byte[] mData; + + @Before + public void init() throws IOException { + ByteArrayOutputStream byteArrayOutputStream = new ByteArrayOutputStream(MB); + int count = 0; + while (count < MB) { + byte[] bytes = UUID.randomUUID().toString().getBytes(); + byteArrayOutputStream.write(bytes); + count += bytes.length; + } + mData = Arrays.copyOf(byteArrayOutputStream.toByteArray(), MB); + } + + @Test + public void testSingleThreadRead() throws IOException { + Random random = new Random(); + for (int i = 1; i <= 5; i++) { + long rate1 = (random.nextInt(4) + 1) * 100 * KB; + long rate2 = (random.nextInt(4) + 1) * 100 * KB; + ByteArrayInputStream inputStream = new ByteArrayInputStream(mData); + RateLimitInputStream rateLimitInputStream = new RateLimitInputStream(inputStream, + RateLimiter.create(rate1), RateLimiter.create(rate2)); + ByteArrayOutputStream byteArrayOutputStream = new ByteArrayOutputStream(MB); + long start = System.currentTimeMillis(); + IOUtils.copy(rateLimitInputStream, byteArrayOutputStream, KB); + long end = System.currentTimeMillis(); + long duration = end - start; + long expectedDuration = MB / Math.min(rate1, rate2) * 1000; + Assert.assertTrue(duration >= expectedDuration && duration <= expectedDuration + 1000); + Assert.assertArrayEquals(mData, byteArrayOutputStream.toByteArray()); + } + } + + private void testMultiThreadRead(long globalRate, long rate, int threadNum) { + long totalSize = (long) threadNum * mData.length; + RateLimiter globalRateLimiter = RateLimiter.create(globalRate); + ExecutorService threadPool = Executors.newFixedThreadPool(threadNum); + List> tasks = new ArrayList<>(); + for (int i = 1; i <= threadNum; i++) { + tasks.add(new FutureTask<>(() -> { + ByteArrayInputStream inputStream = new ByteArrayInputStream(mData); + RateLimitInputStream rateLimitInputStream = new RateLimitInputStream(inputStream, + RateLimiter.create(rate), globalRateLimiter); + ByteArrayOutputStream byteArrayOutputStream = new ByteArrayOutputStream(MB); + IOUtils.copy(rateLimitInputStream, byteArrayOutputStream, KB); + return byteArrayOutputStream.toByteArray(); + })); + } + long start = System.currentTimeMillis(); + tasks.forEach(threadPool::submit); + List results; + try { + results = tasks.stream().map(task -> { + try { + return task.get(); + } catch (Exception e) { + throw new RuntimeException(e); + } + }).collect(Collectors.toList()); + } finally { + threadPool.shutdownNow(); + } + long end = System.currentTimeMillis(); + long duration = end - start; + long expectedDuration = totalSize / Math.min(globalRate, (long) threadNum * rate) * 1000; + Assert.assertTrue(duration >= expectedDuration && duration <= expectedDuration + 1000); + results.forEach(bytes -> Assert.assertArrayEquals(mData, bytes)); + } + + @Test + public void testMultiThreadReadWithBiggerGlobalRate() { + testMultiThreadRead(400 * KB, 100 * KB, 3); + } + + @Test + public void testMultiThreadReadWithSmallerGlobalRate() { + testMultiThreadRead(100 * KB, 200 * KB, 3); + } +} From ff0a6da12b8342de6ed5235a3785acecc8fafc1e Mon Sep 17 00:00:00 2001 From: yiichan Date: Wed, 22 Feb 2023 16:20:43 +0800 Subject: [PATCH 133/334] Fix free worker command bugs ### What changes are proposed in this pull request? Fix potential bugs in freeWorker command. ### Why are the changes needed? When a worker has been decommissioned, its metadata can not be got by calling `getWorkerInfolist()`. This method accesses `LoadingCache> mWorkerInfoCache` in `DefaultBlockMaster.java`, which will not refresh instantly. As to method `removeDecommissionedWorker()` in `BlockMasterClientServiceHandler.java`, if we don't add FieldRanges, the list `decommissionedWorkers` would not get enough information to run the loop below successfully, though the worker has been decommissioned. ### Does this PR introduce any user facing changes? No. pr-link: Alluxio/alluxio#16458 change-id: cid-101865dc9ec4f40e7561f81f38287c6efc2ae23f --- .../master/block/BlockMasterClientServiceHandler.java | 6 +++++- .../java/alluxio/cli/fs/command/FreeWorkerCommand.java | 7 ++++++- 2 files changed, 11 insertions(+), 2 deletions(-) diff --git a/core/server/master/src/main/java/alluxio/master/block/BlockMasterClientServiceHandler.java b/core/server/master/src/main/java/alluxio/master/block/BlockMasterClientServiceHandler.java index deb5c66f5125..f6bd7c3aaa7d 100644 --- a/core/server/master/src/main/java/alluxio/master/block/BlockMasterClientServiceHandler.java +++ b/core/server/master/src/main/java/alluxio/master/block/BlockMasterClientServiceHandler.java @@ -34,6 +34,7 @@ import alluxio.grpc.GrpcUtils; import alluxio.grpc.RemoveDecommissionedWorkerPOptions; import alluxio.grpc.RemoveDecommissionedWorkerPResponse; +import alluxio.grpc.WorkerInfoField; import alluxio.grpc.WorkerRange; import alluxio.wire.WorkerInfo; @@ -150,7 +151,10 @@ public void removeDecommissionedWorker(RemoveDecommissionedWorkerPOptions option RpcUtils.call(LOG, () -> { List decommissionedWorkers = mBlockMaster.getWorkerReport( new GetWorkerReportOptions(GetWorkerReportPOptions.newBuilder() - .setWorkerRange(WorkerRange.DECOMMISSIONED).build())); + .setWorkerRange(WorkerRange.DECOMMISSIONED) + .addFieldRanges(WorkerInfoField.ADDRESS) + .addFieldRanges(WorkerInfoField.ID) + .build())); for (WorkerInfo worker : decommissionedWorkers) { if (worker.getAddress().getHost().equals(options.getWorkerName())) { mBlockMaster.removeDecommissionedWorker(worker.getId()); diff --git a/shell/src/main/java/alluxio/cli/fs/command/FreeWorkerCommand.java b/shell/src/main/java/alluxio/cli/fs/command/FreeWorkerCommand.java index a1502e898dec..0b731bf98865 100644 --- a/shell/src/main/java/alluxio/cli/fs/command/FreeWorkerCommand.java +++ b/shell/src/main/java/alluxio/cli/fs/command/FreeWorkerCommand.java @@ -15,6 +15,7 @@ import alluxio.annotation.PublicApi; import alluxio.client.block.BlockMasterClient; +import alluxio.client.block.options.GetWorkerReportOptions; import alluxio.client.block.stream.BlockWorkerClient; import alluxio.client.file.FileSystemContext; import alluxio.exception.AlluxioException; @@ -57,7 +58,11 @@ public int run(CommandLine cl) throws AlluxioException, IOException { try (CloseableResource masterClientResource = mFsContext.acquireBlockMasterClientResource()) { - totalWorkers = masterClientResource.get().getWorkerInfoList().stream() + totalWorkers = masterClientResource.get() + // the default option is to get all worker infos, + // as we want to make sure the worker by the name exists and is not a typo + .getWorkerReport(GetWorkerReportOptions.defaults()) + .stream() .map(WorkerInfo::getAddress) .collect(toList()); } From c4a700720f54107f2d222594880bad2bdd47b54d Mon Sep 17 00:00:00 2001 From: Bowen Ding <6999708+dbw9580@users.noreply.github.com> Date: Thu, 23 Feb 2023 06:23:51 +0800 Subject: [PATCH 134/334] Fix head and tail commands read less than expected ### What changes are proposed in this pull request? Fix a bug where `fs head` and `fs tail` output less data than it is expected to. ### Why are the changes needed? The code is buggy: it only calls `read` once, and does not check if the returned number of bytes read is equal to the total number of bytes to read as specified by the cli option. Compare with the `cat` command: https://github.com/Alluxio/alluxio/blob/73f3ce83c8a3ef77ac3eebb4579bb7d412784ec9/shell/src/main/java/alluxio/cli/fs/command/CatCommand.java#L57-L63 ### Does this PR introduce any user facing changes? No. pr-link: Alluxio/alluxio#16928 change-id: cid-86b76a3444fa9efe2cd63a4b42a42e4f62b8f21b --- .../java/alluxio/cli/fs/command/HeadCommand.java | 15 +++------------ .../java/alluxio/cli/fs/command/TailCommand.java | 16 ++++------------ 2 files changed, 7 insertions(+), 24 deletions(-) diff --git a/shell/src/main/java/alluxio/cli/fs/command/HeadCommand.java b/shell/src/main/java/alluxio/cli/fs/command/HeadCommand.java index 090b79e474a7..f96a5d490fb4 100644 --- a/shell/src/main/java/alluxio/cli/fs/command/HeadCommand.java +++ b/shell/src/main/java/alluxio/cli/fs/command/HeadCommand.java @@ -24,6 +24,7 @@ import alluxio.util.FormatUtils; import com.google.common.base.Preconditions; +import com.google.common.io.ByteStreams; import org.apache.commons.cli.CommandLine; import org.apache.commons.cli.Option; import org.apache.commons.cli.Options; @@ -71,18 +72,8 @@ protected void runPlainPath(AlluxioURI plainPath, CommandLine cl) throw new IOException(ExceptionMessage.PATH_MUST_BE_FILE.getMessage(plainPath)); } try (FileInStream is = mFileSystem.openFile(plainPath)) { - long bytesToRead; - if (status.getLength() > mNumOfBytes) { - bytesToRead = mNumOfBytes; - } else { - bytesToRead = status.getLength(); - } - - byte[] buf = new byte[(int) bytesToRead]; - int read = is.read(buf); - if (read != -1) { - System.out.write(buf, 0, read); - } + final long bytesToRead = Math.min(status.getLength(), mNumOfBytes); + ByteStreams.copy(ByteStreams.limit(is, bytesToRead), System.out); } } diff --git a/shell/src/main/java/alluxio/cli/fs/command/TailCommand.java b/shell/src/main/java/alluxio/cli/fs/command/TailCommand.java index 8e7f60165056..c7091f54820e 100644 --- a/shell/src/main/java/alluxio/cli/fs/command/TailCommand.java +++ b/shell/src/main/java/alluxio/cli/fs/command/TailCommand.java @@ -24,6 +24,7 @@ import alluxio.util.FormatUtils; import com.google.common.base.Preconditions; +import com.google.common.io.ByteStreams; import org.apache.commons.cli.CommandLine; import org.apache.commons.cli.Option; import org.apache.commons.cli.Options; @@ -69,18 +70,9 @@ protected void runPlainPath(AlluxioURI path, CommandLine cl) throw new IOException(ExceptionMessage.PATH_MUST_BE_FILE.getMessage(path)); } try (FileInStream is = mFileSystem.openFile(path)) { - byte[] buf = new byte[numOfBytes]; - long bytesToRead; - if (status.getLength() > numOfBytes) { - bytesToRead = numOfBytes; - } else { - bytesToRead = status.getLength(); - } - is.skip(status.getLength() - bytesToRead); - int read = is.read(buf); - if (read != -1) { - System.out.write(buf, 0, read); - } + final long bytesToRead = Math.min(status.getLength(), numOfBytes); + ByteStreams.skipFully(is, status.getLength() - bytesToRead); + ByteStreams.copy(ByteStreams.limit(is, bytesToRead), System.out); } } From 903269f077f30d3c274adfbc53ef248be8ff7356 Mon Sep 17 00:00:00 2001 From: qian0817 Date: Thu, 23 Feb 2023 09:00:30 +0800 Subject: [PATCH 135/334] Implement unbuffer interface for HdfsFileInputStream ### What changes are proposed in this pull request? Implement unbuffer interface for HdfsFileInputStream. Fix #16016. ### Why are the changes needed? If the unbuffer method is not implemented, then impala will not be able to use the file handle cache. ### Does this PR introduce any user facing changes? Implement CanUnbuffer and StreamCapabilities for HdfsFileInputStream. pr-link: Alluxio/alluxio#16017 change-id: cid-b50163c7b4f199b8a61d5818a0e4739039f2745c --- .../main/java/alluxio/client/CanUnbuffer.java | 23 ++ .../client/block/stream/BlockInStream.java | 17 +- .../client/file/AlluxioFileInStream.java | 13 ++ .../alluxio/client/file/FileInStream.java | 7 +- .../file/cache/LocalCacheFileInStream.java | 7 + .../block/stream/TestBlockInStream.java | 13 ++ .../client/block/stream/TestDataReader.java | 3 + .../client/file/AlluxioFileInStreamTest.java | 104 +++++++++ .../cache/LocalCacheFileInStreamTest.java | 32 +++ .../hadoop/AlluxioHdfsInputStream.java | 5 + .../hadoop/BaseHdfsFileInputStream.java | 206 ++++++++++++++++++ .../alluxio/hadoop/HdfsFileInputStream.java | 169 +------------- .../alluxio/hadoop/HdfsFileInputStream.java | 66 ++++++ 13 files changed, 497 insertions(+), 168 deletions(-) create mode 100644 core/client/fs/src/main/java/alluxio/client/CanUnbuffer.java create mode 100644 core/client/hdfs/src/main/java/alluxio/hadoop/BaseHdfsFileInputStream.java create mode 100644 core/client/hdfs3/src/main/java/alluxio/hadoop/HdfsFileInputStream.java diff --git a/core/client/fs/src/main/java/alluxio/client/CanUnbuffer.java b/core/client/fs/src/main/java/alluxio/client/CanUnbuffer.java new file mode 100644 index 000000000000..c9da5ecda937 --- /dev/null +++ b/core/client/fs/src/main/java/alluxio/client/CanUnbuffer.java @@ -0,0 +1,23 @@ +/* + * The Alluxio Open Foundation licenses this work under the Apache License, version 2.0 + * (the "License"). You may not use this work except in compliance with the License, which is + * available at www.apache.org/licenses/LICENSE-2.0 + * + * This software is distributed on an "AS IS" basis, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, + * either express or implied, as more fully set forth in the License. + * + * See the NOTICE file distributed with this work for information regarding copyright ownership. + */ + +package alluxio.client; + +/** + * indicate that InputStream can clear their buffers on request. + */ +public interface CanUnbuffer { + /** + * Reduce the buffering. This will also free sockets and file descriptors held by the stream, + * if possible. + */ + void unbuffer(); +} diff --git a/core/client/fs/src/main/java/alluxio/client/block/stream/BlockInStream.java b/core/client/fs/src/main/java/alluxio/client/block/stream/BlockInStream.java index 042662935f28..e6652444987b 100644 --- a/core/client/fs/src/main/java/alluxio/client/block/stream/BlockInStream.java +++ b/core/client/fs/src/main/java/alluxio/client/block/stream/BlockInStream.java @@ -13,6 +13,7 @@ import alluxio.Seekable; import alluxio.client.BoundedStream; +import alluxio.client.CanUnbuffer; import alluxio.client.PositionedReadable; import alluxio.client.ReadType; import alluxio.client.file.FileSystemContext; @@ -25,6 +26,7 @@ import alluxio.grpc.ReadRequest; import alluxio.network.protocol.databuffer.DataBuffer; import alluxio.proto.dataserver.Protocol; +import alluxio.util.LogUtils; import alluxio.util.io.BufferUtils; import alluxio.util.network.NettyUtils; import alluxio.util.network.NetworkAddressUtils; @@ -48,7 +50,7 @@ */ @NotThreadSafe public class BlockInStream extends InputStream implements BoundedStream, Seekable, - PositionedReadable { + PositionedReadable, CanUnbuffer { private static final Logger LOG = LoggerFactory.getLogger(BlockInStream.class); /** the source tracking where the block is from. */ @@ -70,9 +72,9 @@ public enum BlockInStreamSource { /** Current position of the stream, relative to the start of the block. */ private long mPos = 0; /** The current data chunk. */ - private DataBuffer mCurrentChunk; + protected DataBuffer mCurrentChunk; - private DataReader mDataReader; + protected DataReader mDataReader; private final DataReader.Factory mDataReaderFactory; private boolean mClosed = false; @@ -510,6 +512,15 @@ private void closeDataReader() throws IOException { mDataReader = null; } + @Override + public void unbuffer() { + try { + closeDataReader(); + } catch (IOException e) { + LogUtils.warnWithException(LOG, "failed to unbuffer the block stream", e); + } + } + /** * Convenience method to ensure the stream is not closed. */ diff --git a/core/client/fs/src/main/java/alluxio/client/file/AlluxioFileInStream.java b/core/client/fs/src/main/java/alluxio/client/file/AlluxioFileInStream.java index 138cbe4a745d..afa5a452d1b8 100644 --- a/core/client/fs/src/main/java/alluxio/client/file/AlluxioFileInStream.java +++ b/core/client/fs/src/main/java/alluxio/client/file/AlluxioFileInStream.java @@ -421,6 +421,9 @@ private void closeBlockInStream(BlockInStream stream) throws IOException { if (stream == mBlockInStream) { // if stream is instance variable, set to null mBlockInStream = null; } + if (stream == mCachedPositionedReadStream) { + mCachedPositionedReadStream = null; + } if (blockSource == BlockInStream.BlockInStreamSource.NODE_LOCAL || blockSource == BlockInStream.BlockInStreamSource.PROCESS_LOCAL) { return; @@ -519,4 +522,14 @@ private void handleRetryableException(BlockInStream stream, IOException e) { mFailedWorkers.put(workerAddress, System.currentTimeMillis()); } } + + @Override + public void unbuffer() { + if (mBlockInStream != null) { + mBlockInStream.unbuffer(); + } + if (mCachedPositionedReadStream != null) { + mCachedPositionedReadStream.unbuffer(); + } + } } diff --git a/core/client/fs/src/main/java/alluxio/client/file/FileInStream.java b/core/client/fs/src/main/java/alluxio/client/file/FileInStream.java index 473ca4e9bf4d..cc61f7fb2be0 100644 --- a/core/client/fs/src/main/java/alluxio/client/file/FileInStream.java +++ b/core/client/fs/src/main/java/alluxio/client/file/FileInStream.java @@ -13,6 +13,7 @@ import alluxio.Seekable; import alluxio.client.BoundedStream; +import alluxio.client.CanUnbuffer; import alluxio.client.PositionedReadable; import alluxio.exception.PreconditionMessage; import alluxio.util.io.BufferUtils; @@ -29,7 +30,7 @@ * into a given offset of the stream to read. */ public abstract class FileInStream extends InputStream - implements BoundedStream, PositionedReadable, Seekable { + implements BoundedStream, PositionedReadable, Seekable, CanUnbuffer { private final byte[] mSingleByte = new byte[1]; @Override @@ -101,4 +102,8 @@ public int read(ByteBuffer byteBuffer, int off, int len) throws IOException { } return nread; } + + @Override + public void unbuffer() { + } } diff --git a/core/client/fs/src/main/java/alluxio/client/file/cache/LocalCacheFileInStream.java b/core/client/fs/src/main/java/alluxio/client/file/cache/LocalCacheFileInStream.java index f8784a74b43d..fa78f002c55e 100644 --- a/core/client/fs/src/main/java/alluxio/client/file/cache/LocalCacheFileInStream.java +++ b/core/client/fs/src/main/java/alluxio/client/file/cache/LocalCacheFileInStream.java @@ -309,6 +309,13 @@ public void seek(long pos) { mPosition = pos; } + @Override + public void unbuffer() { + if (mExternalFileInStream != null) { + mExternalFileInStream.unbuffer(); + } + } + /** * Convenience method to ensure the stream is not closed. */ diff --git a/core/client/fs/src/test/java/alluxio/client/block/stream/TestBlockInStream.java b/core/client/fs/src/test/java/alluxio/client/block/stream/TestBlockInStream.java index 842f532ffca2..fd6f756525cc 100644 --- a/core/client/fs/src/test/java/alluxio/client/block/stream/TestBlockInStream.java +++ b/core/client/fs/src/test/java/alluxio/client/block/stream/TestBlockInStream.java @@ -11,8 +11,11 @@ package alluxio.client.block.stream; +import alluxio.network.protocol.databuffer.DataBuffer; import alluxio.wire.WorkerNetAddress; +import com.google.common.annotations.VisibleForTesting; + import java.io.IOException; import java.nio.ByteBuffer; @@ -56,6 +59,16 @@ public boolean isClosed() { return mClosed; } + @VisibleForTesting + public DataReader getDataReader() { + return mDataReader; + } + + @VisibleForTesting + public DataBuffer getCurrentChunk() { + return mCurrentChunk; + } + @Override public void close() throws IOException { mClosed = true; diff --git a/core/client/fs/src/test/java/alluxio/client/block/stream/TestDataReader.java b/core/client/fs/src/test/java/alluxio/client/block/stream/TestDataReader.java index b7dc2cae0b35..ad36d2a6c4ac 100644 --- a/core/client/fs/src/test/java/alluxio/client/block/stream/TestDataReader.java +++ b/core/client/fs/src/test/java/alluxio/client/block/stream/TestDataReader.java @@ -14,6 +14,8 @@ import alluxio.network.protocol.databuffer.DataBuffer; import alluxio.network.protocol.databuffer.NioDataBuffer; +import com.google.common.base.Preconditions; + import java.io.IOException; import java.nio.ByteBuffer; import javax.annotation.Nullable; @@ -38,6 +40,7 @@ public TestDataReader(byte[] data, long chunkSize, long offset, long length) { @Override @Nullable public DataBuffer readChunk() { + Preconditions.checkState(!mClosed, "reader is closed"); if (mPos >= mEnd || mPos >= mData.length) { return null; } diff --git a/core/client/fs/src/test/java/alluxio/client/file/AlluxioFileInStreamTest.java b/core/client/fs/src/test/java/alluxio/client/file/AlluxioFileInStreamTest.java index b77f28d10ed6..04271a1d8d16 100644 --- a/core/client/fs/src/test/java/alluxio/client/file/AlluxioFileInStreamTest.java +++ b/core/client/fs/src/test/java/alluxio/client/file/AlluxioFileInStreamTest.java @@ -13,6 +13,7 @@ import static org.junit.Assert.assertArrayEquals; import static org.junit.Assert.assertEquals; +import static org.junit.Assert.assertNull; import static org.junit.Assert.assertTrue; import static org.junit.Assert.fail; import static org.junit.Assume.assumeTrue; @@ -826,6 +827,109 @@ public void triggerAsyncOnClose() throws Exception { assertTrue(mTestStream.triggerAsyncCaching(mInStreams.get(mInStreams.size() - 1))); } + @Test + public void unbufferAroundRead() throws Exception { + int bufferSize = (int) (mFileSize / 2); + byte[] buffer = new byte[bufferSize]; + mTestStream.read(buffer); + assertArrayEquals(BufferUtils.getIncreasingByteArray(bufferSize), buffer); + unbuffer(); + mTestStream.read(buffer); + assertArrayEquals(BufferUtils.getIncreasingByteArray(bufferSize, bufferSize), buffer); + unbuffer(); + } + + @Test + public void unbufferAroundPositionRead() throws Exception { + int bufferSize = (int) (mFileSize / 4); + byte[] buffer = new byte[bufferSize]; + mTestStream.positionedRead(bufferSize, buffer, 0, bufferSize); + assertArrayEquals(BufferUtils.getIncreasingByteArray(bufferSize, bufferSize), buffer); + unbuffer(); + mTestStream.positionedRead(bufferSize, buffer, 0, bufferSize); + assertArrayEquals(BufferUtils.getIncreasingByteArray(bufferSize, bufferSize), buffer); + unbuffer(); + } + + @Test + public void unbufferAroundSeek() throws Exception { + int bufferSize = (int) (mFileSize / 8); + int seekSize = (int) (mFileSize / 8); + byte[] buffer = new byte[bufferSize]; + unbuffer(); + mTestStream.seek(seekSize); + unbuffer(); + mTestStream.read(buffer); + assertArrayEquals(BufferUtils.getIncreasingByteArray(seekSize, bufferSize), buffer); + } + + @Test + public void unbufferAroundSkip() throws Exception { + int bufferSize = (int) (mFileSize / 8); + int skipSize = (int) (mFileSize / 8); + byte[] buffer = new byte[bufferSize]; + unbuffer(); + mTestStream.read(buffer); + unbuffer(); + mTestStream.skip(skipSize); + unbuffer(); + mTestStream.read(buffer); + assertArrayEquals(BufferUtils.getIncreasingByteArray(skipSize + bufferSize, bufferSize), + buffer); + } + + @Test + public void unbufferOnClosedFile() throws Exception { + mTestStream.close(); + unbuffer(); + } + + @Test + public void multipleUnbuffers() throws Exception { + byte[] buffer = new byte[(int) (mFileSize / 2)]; + unbuffer(); + unbuffer(); + mTestStream.read(buffer); + assertArrayEquals(BufferUtils.getIncreasingByteArray((int) (mFileSize / 2)), buffer); + unbuffer(); + unbuffer(); + } + + @Test + public void unbufferMultipleReads() throws IOException { + int bufferSize = (int) (mFileSize / 8); + byte[] buffer = new byte[bufferSize]; + unbuffer(); + mTestStream.read(buffer); + assertArrayEquals(BufferUtils.getIncreasingByteArray(bufferSize), buffer); + unbuffer(); + mTestStream.read(buffer); + assertArrayEquals( + BufferUtils.getIncreasingByteArray(bufferSize, bufferSize), buffer); + mTestStream.read(buffer); + assertArrayEquals( + BufferUtils.getIncreasingByteArray(bufferSize * 2, bufferSize), buffer); + unbuffer(); + mTestStream.read(buffer); + assertArrayEquals( + BufferUtils.getIncreasingByteArray(bufferSize * 3, bufferSize), buffer); + mTestStream.read(buffer); + assertArrayEquals( + BufferUtils.getIncreasingByteArray(bufferSize * 4, bufferSize), buffer); + mTestStream.read(buffer); + assertArrayEquals( + BufferUtils.getIncreasingByteArray(bufferSize * 5, bufferSize), buffer); + unbuffer(); + } + + private void unbuffer() { + mTestStream.unbuffer(); + for (TestBlockInStream stream : mInStreams) { + assertNull(stream.getCurrentChunk()); + assertNull(stream.getDataReader()); + } + } + /** * Tests that reading dataRead bytes into a buffer will properly write those bytes to the cache * streams and that the correct bytes are read from the {@link FileInStream}. diff --git a/core/client/fs/src/test/java/alluxio/client/file/cache/LocalCacheFileInStreamTest.java b/core/client/fs/src/test/java/alluxio/client/file/cache/LocalCacheFileInStreamTest.java index 876e12363282..9119e26384d2 100644 --- a/core/client/fs/src/test/java/alluxio/client/file/cache/LocalCacheFileInStreamTest.java +++ b/core/client/fs/src/test/java/alluxio/client/file/cache/LocalCacheFileInStreamTest.java @@ -524,6 +524,38 @@ protected Stopwatch createUnstartedStopwatch() { Assert.assertEquals(timeSource.get(StepTicker.Type.CACHE_MISS), timeReadExternal); } + @Test + public void testUnbuffer() throws Exception { + int fileSize = mPageSize; + byte[] testData = BufferUtils.getIncreasingByteArray(fileSize); + ByteArrayCacheManager manager = new ByteArrayCacheManager(); + LocalCacheFileInStream stream = setupWithSingleFile(testData, manager); + + int partialReadSize = fileSize / 5; + int offset = fileSize / 5; + + byte[] cacheMiss = new byte[partialReadSize]; + stream.unbuffer(); + stream.seek(offset); + stream.unbuffer(); + Assert.assertEquals(partialReadSize, stream.read(cacheMiss)); + stream.unbuffer(); + Assert.assertArrayEquals( + Arrays.copyOfRange(testData, offset, offset + partialReadSize), cacheMiss); + Assert.assertEquals(0, manager.mPagesServed); + Assert.assertEquals(1, manager.mPagesCached); + + byte[] cacheHit = new byte[partialReadSize]; + stream.unbuffer(); + stream.seek(offset); + stream.unbuffer(); + Assert.assertEquals(partialReadSize, stream.read(cacheHit)); + stream.unbuffer(); + Assert.assertArrayEquals( + Arrays.copyOfRange(testData, offset, offset + partialReadSize), cacheHit); + Assert.assertEquals(1, manager.mPagesServed); + } + private LocalCacheFileInStream setupWithSingleFile(byte[] data, CacheManager manager) throws Exception { Map files = new HashMap<>(); diff --git a/core/client/hdfs/src/main/java/alluxio/hadoop/AlluxioHdfsInputStream.java b/core/client/hdfs/src/main/java/alluxio/hadoop/AlluxioHdfsInputStream.java index 581ae7e8de95..4671e812b21c 100644 --- a/core/client/hdfs/src/main/java/alluxio/hadoop/AlluxioHdfsInputStream.java +++ b/core/client/hdfs/src/main/java/alluxio/hadoop/AlluxioHdfsInputStream.java @@ -104,4 +104,9 @@ public int positionedRead(long position, byte[] buffer, int offset, int length) throws IOException { return mInput.read(position, buffer, offset, length); } + + @Override + public void unbuffer() { + mInput.unbuffer(); + } } diff --git a/core/client/hdfs/src/main/java/alluxio/hadoop/BaseHdfsFileInputStream.java b/core/client/hdfs/src/main/java/alluxio/hadoop/BaseHdfsFileInputStream.java new file mode 100644 index 000000000000..bfe1c1278c96 --- /dev/null +++ b/core/client/hdfs/src/main/java/alluxio/hadoop/BaseHdfsFileInputStream.java @@ -0,0 +1,206 @@ +/* + * The Alluxio Open Foundation licenses this work under the Apache License, version 2.0 + * (the "License"). You may not use this work except in compliance with the License, which is + * available at www.apache.org/licenses/LICENSE-2.0 + * + * This software is distributed on an "AS IS" basis, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, + * either express or implied, as more fully set forth in the License. + * + * See the NOTICE file distributed with this work for information regarding copyright ownership. + */ + +package alluxio.hadoop; + +import alluxio.AlluxioURI; +import alluxio.client.file.FileInStream; +import alluxio.client.file.FileSystem; +import alluxio.exception.AlluxioException; +import alluxio.exception.ExceptionMessage; +import alluxio.exception.FileDoesNotExistException; + +import org.apache.hadoop.fs.ByteBufferReadable; +import org.apache.hadoop.fs.FileSystem.Statistics; +import org.apache.hadoop.fs.PositionedReadable; +import org.apache.hadoop.fs.Seekable; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import java.io.EOFException; +import java.io.FileNotFoundException; +import java.io.IOException; +import java.io.InputStream; +import java.nio.ByteBuffer; +import javax.annotation.concurrent.NotThreadSafe; + +/** + * An input stream for reading a file from HDFS. This is just a wrapper around + * {@link FileInStream} with additional statistics gathering in a {@link Statistics} object. + */ +@NotThreadSafe +public class BaseHdfsFileInputStream extends InputStream implements Seekable, PositionedReadable, + ByteBufferReadable { + private static final Logger LOG = LoggerFactory.getLogger(BaseHdfsFileInputStream.class); + + private final Statistics mStatistics; + protected final FileInStream mInputStream; + + private boolean mClosed = false; + + /** + * Constructs a new stream for reading a file from HDFS. + * + * @param fs the file system + * @param uri the Alluxio file URI + * @param stats filesystem statistics + */ + public BaseHdfsFileInputStream(FileSystem fs, AlluxioURI uri, Statistics stats) + throws IOException { + LOG.debug("HdfsFileInputStream({}, {})", uri, stats); + + mStatistics = stats; + try { + mInputStream = fs.openFile(uri); + } catch (FileDoesNotExistException e) { + // Transform the Alluxio exception to a Java exception to satisfy the HDFS API contract. + throw new FileNotFoundException(ExceptionMessage.PATH_DOES_NOT_EXIST.getMessage(uri)); + } catch (AlluxioException e) { + throw new IOException(e); + } + } + + /** + * Constructs a new stream for reading a file from HDFS. + * + * @param inputStream the input stream + * @param stats filesystem statistics + */ + public BaseHdfsFileInputStream(FileInStream inputStream, Statistics stats) { + mInputStream = inputStream; + mStatistics = stats; + } + + @Override + public int available() throws IOException { + if (mClosed) { + throw new IOException("Cannot query available bytes from a closed stream."); + } + return (int) mInputStream.remaining(); + } + + @Override + public void close() throws IOException { + if (mClosed) { + return; + } + mInputStream.close(); + mClosed = true; + } + + @Override + public long getPos() throws IOException { + return mInputStream.getPos(); + } + + @Override + public int read() throws IOException { + if (mClosed) { + throw new IOException(ExceptionMessage.READ_CLOSED_STREAM.getMessage()); + } + + int data = mInputStream.read(); + if (data != -1 && mStatistics != null) { + mStatistics.incrementBytesRead(1); + } + return data; + } + + @Override + public int read(byte[] buffer) throws IOException { + return read(buffer, 0, buffer.length); + } + + @Override + public int read(byte[] buffer, int offset, int length) throws IOException { + if (mClosed) { + throw new IOException(ExceptionMessage.READ_CLOSED_STREAM.getMessage()); + } + + int bytesRead = mInputStream.read(buffer, offset, length); + if (bytesRead != -1 && mStatistics != null) { + mStatistics.incrementBytesRead(bytesRead); + } + return bytesRead; + } + + @Override + public int read(ByteBuffer buf) throws IOException { + if (mClosed) { + throw new IOException(ExceptionMessage.READ_CLOSED_STREAM.getMessage()); + } + int bytesRead = mInputStream.read(buf); + if (bytesRead != -1 && mStatistics != null) { + mStatistics.incrementBytesRead(bytesRead); + } + return bytesRead; + } + + @Override + public int read(long position, byte[] buffer, int offset, int length) throws IOException { + if (mClosed) { + throw new IOException(ExceptionMessage.READ_CLOSED_STREAM.getMessage()); + } + + int bytesRead = mInputStream.positionedRead(position, buffer, offset, length); + if (bytesRead != -1 && mStatistics != null) { + mStatistics.incrementBytesRead(bytesRead); + } + return bytesRead; + } + + @Override + public void readFully(long position, byte[] buffer) throws IOException { + readFully(position, buffer, 0, buffer.length); + } + + @Override + public void readFully(long position, byte[] buffer, int offset, int length) throws IOException { + int totalBytesRead = 0; + while (totalBytesRead < length) { + int bytesRead = + read(position + totalBytesRead, buffer, offset + totalBytesRead, length - totalBytesRead); + if (bytesRead == -1) { + throw new EOFException(); + } + totalBytesRead += bytesRead; + } + } + + @Override + public void seek(long pos) throws IOException { + try { + mInputStream.seek(pos); + } catch (IllegalArgumentException e) { // convert back to IOException + throw new IOException(e); + } + } + + /** + * This method is not supported in {@link BaseHdfsFileInputStream}. + * + * @param targetPos N/A + * @return N/A + * @throws IOException always + */ + @Override + public boolean seekToNewSource(long targetPos) throws IOException { + throw new IOException("This method is not supported."); + } + + @Override + public long skip(long n) throws IOException { + if (mClosed) { + throw new IOException("Cannot skip bytes in a closed stream."); + } + return mInputStream.skip(n); + } +} diff --git a/core/client/hdfs/src/main/java/alluxio/hadoop/HdfsFileInputStream.java b/core/client/hdfs/src/main/java/alluxio/hadoop/HdfsFileInputStream.java index 8ac3f7ab3285..a00901202458 100644 --- a/core/client/hdfs/src/main/java/alluxio/hadoop/HdfsFileInputStream.java +++ b/core/client/hdfs/src/main/java/alluxio/hadoop/HdfsFileInputStream.java @@ -14,37 +14,15 @@ import alluxio.AlluxioURI; import alluxio.client.file.FileInStream; import alluxio.client.file.FileSystem; -import alluxio.exception.AlluxioException; -import alluxio.exception.ExceptionMessage; -import alluxio.exception.FileDoesNotExistException; -import org.apache.hadoop.fs.ByteBufferReadable; import org.apache.hadoop.fs.FileSystem.Statistics; -import org.apache.hadoop.fs.PositionedReadable; -import org.apache.hadoop.fs.Seekable; -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; -import java.io.EOFException; -import java.io.FileNotFoundException; import java.io.IOException; -import java.io.InputStream; -import java.nio.ByteBuffer; -import javax.annotation.concurrent.NotThreadSafe; /** - * An input stream for reading a file from HDFS. This is just a wrapper around - * {@link FileInStream} with additional statistics gathering in a {@link Statistics} object. + * HdfsFileInputStream implement for hadoop 1 and hadoop 2. */ -@NotThreadSafe -public class HdfsFileInputStream extends InputStream implements Seekable, PositionedReadable, - ByteBufferReadable { - private static final Logger LOG = LoggerFactory.getLogger(HdfsFileInputStream.class); - - private final Statistics mStatistics; - private final FileInStream mInputStream; - - private boolean mClosed = false; +public class HdfsFileInputStream extends BaseHdfsFileInputStream { /** * Constructs a new stream for reading a file from HDFS. @@ -53,19 +31,8 @@ public class HdfsFileInputStream extends InputStream implements Seekable, Positi * @param uri the Alluxio file URI * @param stats filesystem statistics */ - public HdfsFileInputStream(FileSystem fs, AlluxioURI uri, Statistics stats) - throws IOException { - LOG.debug("HdfsFileInputStream({}, {})", uri, stats); - - mStatistics = stats; - try { - mInputStream = fs.openFile(uri); - } catch (FileDoesNotExistException e) { - // Transform the Alluxio exception to a Java exception to satisfy the HDFS API contract. - throw new FileNotFoundException(ExceptionMessage.PATH_DOES_NOT_EXIST.getMessage(uri)); - } catch (AlluxioException e) { - throw new IOException(e); - } + public HdfsFileInputStream(FileSystem fs, AlluxioURI uri, Statistics stats) throws IOException { + super(fs, uri, stats); } /** @@ -75,132 +42,6 @@ public HdfsFileInputStream(FileSystem fs, AlluxioURI uri, Statistics stats) * @param stats filesystem statistics */ public HdfsFileInputStream(FileInStream inputStream, Statistics stats) { - mInputStream = inputStream; - mStatistics = stats; - } - - @Override - public int available() throws IOException { - if (mClosed) { - throw new IOException("Cannot query available bytes from a closed stream."); - } - return (int) mInputStream.remaining(); - } - - @Override - public void close() throws IOException { - if (mClosed) { - return; - } - mInputStream.close(); - mClosed = true; - } - - @Override - public long getPos() throws IOException { - return mInputStream.getPos(); - } - - @Override - public int read() throws IOException { - if (mClosed) { - throw new IOException(ExceptionMessage.READ_CLOSED_STREAM.getMessage()); - } - - int data = mInputStream.read(); - if (data != -1 && mStatistics != null) { - mStatistics.incrementBytesRead(1); - } - return data; - } - - @Override - public int read(byte[] buffer) throws IOException { - return read(buffer, 0, buffer.length); - } - - @Override - public int read(byte[] buffer, int offset, int length) throws IOException { - if (mClosed) { - throw new IOException(ExceptionMessage.READ_CLOSED_STREAM.getMessage()); - } - - int bytesRead = mInputStream.read(buffer, offset, length); - if (bytesRead != -1 && mStatistics != null) { - mStatistics.incrementBytesRead(bytesRead); - } - return bytesRead; - } - - @Override - public int read(ByteBuffer buf) throws IOException { - if (mClosed) { - throw new IOException(ExceptionMessage.READ_CLOSED_STREAM.getMessage()); - } - int bytesRead = mInputStream.read(buf); - if (bytesRead != -1 && mStatistics != null) { - mStatistics.incrementBytesRead(bytesRead); - } - return bytesRead; - } - - @Override - public int read(long position, byte[] buffer, int offset, int length) throws IOException { - if (mClosed) { - throw new IOException(ExceptionMessage.READ_CLOSED_STREAM.getMessage()); - } - - int bytesRead = mInputStream.positionedRead(position, buffer, offset, length); - if (bytesRead != -1 && mStatistics != null) { - mStatistics.incrementBytesRead(bytesRead); - } - return bytesRead; - } - - @Override - public void readFully(long position, byte[] buffer) throws IOException { - readFully(position, buffer, 0, buffer.length); - } - - @Override - public void readFully(long position, byte[] buffer, int offset, int length) throws IOException { - int totalBytesRead = 0; - while (totalBytesRead < length) { - int bytesRead = - read(position + totalBytesRead, buffer, offset + totalBytesRead, length - totalBytesRead); - if (bytesRead == -1) { - throw new EOFException(); - } - totalBytesRead += bytesRead; - } - } - - @Override - public void seek(long pos) throws IOException { - try { - mInputStream.seek(pos); - } catch (IllegalArgumentException e) { // convert back to IOException - throw new IOException(e); - } - } - - /** - * This method is not supported in {@link HdfsFileInputStream}. - * - * @param targetPos N/A - * @return N/A - * @throws IOException always - */ - @Override - public boolean seekToNewSource(long targetPos) throws IOException { - throw new IOException("This method is not supported."); - } - - @Override - public long skip(long n) throws IOException { - if (mClosed) { - throw new IOException("Cannot skip bytes in a closed stream."); - } - return mInputStream.skip(n); + super(inputStream, stats); } } diff --git a/core/client/hdfs3/src/main/java/alluxio/hadoop/HdfsFileInputStream.java b/core/client/hdfs3/src/main/java/alluxio/hadoop/HdfsFileInputStream.java new file mode 100644 index 000000000000..9c1f6511b2db --- /dev/null +++ b/core/client/hdfs3/src/main/java/alluxio/hadoop/HdfsFileInputStream.java @@ -0,0 +1,66 @@ +/* + * The Alluxio Open Foundation licenses this work under the Apache License, version 2.0 + * (the "License"). You may not use this work except in compliance with the License, which is + * available at www.apache.org/licenses/LICENSE-2.0 + * + * This software is distributed on an "AS IS" basis, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, + * either express or implied, as more fully set forth in the License. + * + * See the NOTICE file distributed with this work for information regarding copyright ownership. + */ + +package alluxio.hadoop; + +import alluxio.AlluxioURI; +import alluxio.client.file.FileInStream; +import alluxio.client.file.FileSystem; + +import org.apache.commons.lang3.StringUtils; +import org.apache.hadoop.fs.CanUnbuffer; +import org.apache.hadoop.fs.FileSystem.Statistics; +import org.apache.hadoop.fs.StreamCapabilities; + +import java.io.IOException; +import javax.annotation.concurrent.NotThreadSafe; + +/** + * HdfsFileInputStream implement for hadoop 3. + * This is just a wrapper around {@link HdfsFileInputStream} with + * CanUnbuffer and StreamCapabilities support. + */ +@NotThreadSafe +public class HdfsFileInputStream extends BaseHdfsFileInputStream + implements CanUnbuffer, StreamCapabilities { + /** + * Constructs a new stream for reading a file from HDFS. + * + * @param fs the file system + * @param uri the Alluxio file URI + * @param stats filesystem statistics + */ + public HdfsFileInputStream(FileSystem fs, AlluxioURI uri, Statistics stats) + throws IOException { + super(fs, uri, stats); + } + + /** + * Constructs a new stream for reading a file from HDFS. + * + * @param inputStream the input stream + * @param stats filesystem statistics + */ + public HdfsFileInputStream(FileInStream inputStream, Statistics stats) { + super(inputStream, stats); + } + + @Override + public boolean hasCapability(String capability) { + return StringUtils.equalsIgnoreCase("in:unbuffer", capability) + || StringUtils.equalsIgnoreCase("in:readbytebuffer", capability); + } + + @Override + public void unbuffer() { + mInputStream.unbuffer(); + } +} From a5d57e9f7888fb78c65e45540f2f8ec22da149cc Mon Sep 17 00:00:00 2001 From: Yaolong Liu Date: Thu, 23 Feb 2023 14:59:11 +0800 Subject: [PATCH 136/334] Make standby master keep MountTable up-to-date ### What changes are proposed in this pull request? Keep the MountTable on the standby master up-to-date before switching to primary. ### Why are the changes needed? Please clarify why the changes are needed. For instance, 1. If you propose a new API, clarify the use case for a new API. 2. If you fix a bug, describe the bug. ### Does this PR introduce any user facing changes? No pr-link: Alluxio/alluxio#16908 change-id: cid-5e239e4ec8ffde12af4c79828e031f7c0c307cd7 --- .../java/alluxio/master/file/meta/MountTable.java | 14 ++++++++++++-- 1 file changed, 12 insertions(+), 2 deletions(-) diff --git a/core/server/master/src/main/java/alluxio/master/file/meta/MountTable.java b/core/server/master/src/main/java/alluxio/master/file/meta/MountTable.java index fb4397ad3df3..563a282b9f37 100644 --- a/core/server/master/src/main/java/alluxio/master/file/meta/MountTable.java +++ b/core/server/master/src/main/java/alluxio/master/file/meta/MountTable.java @@ -12,6 +12,7 @@ package alluxio.master.file.meta; import alluxio.AlluxioURI; +import alluxio.conf.Configuration; import alluxio.exception.AccessControlException; import alluxio.exception.ExceptionMessage; import alluxio.exception.FileAlreadyExistsException; @@ -38,6 +39,7 @@ import alluxio.resource.LockResource; import alluxio.underfs.UfsManager; import alluxio.underfs.UnderFileSystem; +import alluxio.underfs.UnderFileSystemConfiguration; import alluxio.util.IdUtils; import alluxio.util.io.PathUtils; @@ -95,7 +97,7 @@ public MountTable(UfsManager ufsManager, MountInfo rootMountInfo, Clock clock) { mReadLock = lock.readLock(); mWriteLock = lock.writeLock(); mUfsManager = ufsManager; - mState = new State(rootMountInfo, clock); + mState = new State(rootMountInfo, clock, mUfsManager); } /** @@ -690,15 +692,17 @@ public final class State implements Journaled { private final Map mMountTable; /** Map from mount id to cache of paths which have been synced with UFS. */ private final UfsSyncPathCache mUfsSyncPathCache; + private final UfsManager mUfsManager; /** * @param mountInfo root mount info * @param clock the clock used for computing sync times */ - State(MountInfo mountInfo, Clock clock) { + State(MountInfo mountInfo, Clock clock, UfsManager ufsManager) { mMountTable = new HashMap<>(10); mMountTable.put(MountTable.ROOT, mountInfo); mUfsSyncPathCache = new UfsSyncPathCache(clock); + mUfsManager = ufsManager; } /** @@ -727,13 +731,19 @@ public void applyAndJournal(Supplier context, DeleteMountPointEn private void applyAddMountPoint(AddMountPointEntry entry) { try (LockResource r = new LockResource(mWriteLock)) { MountInfo mountInfo = fromAddMountPointEntry(entry); + UnderFileSystemConfiguration ufsConf = new UnderFileSystemConfiguration( + Configuration.global(), mountInfo.getOptions().getReadOnly()) + .createMountSpecificConf(mountInfo.getOptions().getPropertiesMap()); mMountTable.put(entry.getAlluxioPath(), mountInfo); + mUfsManager.addMount(mountInfo.getMountId(), mountInfo.getUfsUri(), ufsConf); } } private void applyDeleteMountPoint(DeleteMountPointEntry entry) { try (LockResource r = new LockResource(mWriteLock)) { + long mountId = mMountTable.get(entry.getAlluxioPath()).getMountId(); mMountTable.remove(entry.getAlluxioPath()); + mUfsManager.removeMount(mountId); } } From c148612007e5ba358f3fea7cddc24760dc98b2bf Mon Sep 17 00:00:00 2001 From: Bowen Ding <6999708+dbw9580@users.noreply.github.com> Date: Thu, 23 Feb 2023 16:51:41 +0800 Subject: [PATCH 137/334] Add CapacityBasedDeterministicHashPolicy ### What changes are proposed in this pull request? Add a new block location policy `CapacityBaseDeterministicHashPolicy`. ### Why are the changes needed? We want a `CapacityBaseRandomPolicy` that is deterministic. See also #16187. ### Does this PR introduce any user facing changes? Yes, a new block location policy is available for config item `alluxio.user.ufs.block.read.location.policy` and `alluxio.user.block.write.location.policy.class`. pr-link: Alluxio/alluxio#16237 change-id: cid-47ba9b1d197b5ad546ac1a993590d49e963c3811 --- .../CapacityBasedDeterministicHashPolicy.java | 136 ++++++++ ...acityBasedDeterministicHashPolicyTest.java | 316 ++++++++++++++++++ .../main/java/alluxio/conf/PropertyKey.java | 7 +- docs/en/api/Java-API.md | 40 ++- 4 files changed, 495 insertions(+), 4 deletions(-) create mode 100644 core/client/fs/src/main/java/alluxio/client/block/policy/CapacityBasedDeterministicHashPolicy.java create mode 100644 core/client/fs/src/test/java/alluxio/client/block/policy/CapacityBasedDeterministicHashPolicyTest.java diff --git a/core/client/fs/src/main/java/alluxio/client/block/policy/CapacityBasedDeterministicHashPolicy.java b/core/client/fs/src/main/java/alluxio/client/block/policy/CapacityBasedDeterministicHashPolicy.java new file mode 100644 index 000000000000..936ed11770bb --- /dev/null +++ b/core/client/fs/src/main/java/alluxio/client/block/policy/CapacityBasedDeterministicHashPolicy.java @@ -0,0 +1,136 @@ +/* + * The Alluxio Open Foundation licenses this work under the Apache License, version 2.0 + * (the "License"). You may not use this work except in compliance with the License, which is + * available at www.apache.org/licenses/LICENSE-2.0 + * + * This software is distributed on an "AS IS" basis, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, + * either express or implied, as more fully set forth in the License. + * + * See the NOTICE file distributed with this work for information regarding copyright ownership. + */ + +package alluxio.client.block.policy; + +import alluxio.client.block.BlockWorkerInfo; +import alluxio.client.block.policy.options.GetWorkerOptions; +import alluxio.conf.AlluxioConfiguration; +import alluxio.conf.PropertyKey; +import alluxio.wire.WorkerNetAddress; + +import com.google.common.annotations.VisibleForTesting; +import com.google.common.base.Preconditions; +import com.google.common.collect.Streams; +import org.apache.commons.codec.digest.MurmurHash3; + +import java.util.ArrayList; +import java.util.Comparator; +import java.util.List; +import java.util.Optional; +import java.util.TreeMap; +import java.util.concurrent.ThreadLocalRandom; +import java.util.concurrent.atomic.AtomicLong; + +/** + * A policy that pseudo-randomly distributes blocks between workers according to their capacity, + * so that the probability a worker is chosen is equal to the ratio of its capacity over total + * capacity of all workers, provided that the blocks requested follow a uniform distribution. + * If sharding is disabled, the same block is always assigned to the same worker. If sharding + * is enabled, the block is assigned to a fixed set of workers. + * + * The target worker is determined by the following algorithm: + * 1. build a cumulative distribution function by adding up all workers and their capacities. + * workers are sorted by their host name alphabetically. + * if worker A has 90 GB, B has 10 GB and C has 900 GB, the CDF looks like + * | 0 ... 90 | 90 ... 100 | 100 ... 1000 | + * | worker A | worker B | worker C | + * 2. find a fixed starting point in [0, totalCapacity) determined by the hashed block id. + * | 0 ... 90 | 90 ... 100 | 100 ... 1000 | + * | worker A | worker B | worker C | + * ^ start = 95 + * 3. find the corresponding worker in the CDF. + * which is worker B in this example + * 4. if #shards = 1, this worker is selected. otherwise, find a set of candidates: + * 4.1 hashed_block_id(0) = block id + * 4.2 for i in [1, #shards], hashed_block_id(i) = hash(hashed_block_id(i-1)) + * 4.3 find the worker whose position corresponds to hashed_block_id(i) in the CDF, + * and add it to the candidates set + * 4.4 repeat 4.2 - 4.4 + * 5. select a random worker in the candidate set + * + * The difference between this policy and {@link CapacityBaseRandomPolicy} is that this policy + * uses the hashed block ID as the index to choose the target worker, so that the same block is + * always routed to the same set of workers. + * + * Both this policy and {@link DeterministicHashPolicy} choose workers based the hashed block ID. + * The difference is that {@link DeterministicHashPolicy} uniformly distributes the blocks among + * the configured number of shards, while this policy chooses workers based on a distribution of + * their normalized capacity. + * + * @see CapacityBaseRandomPolicy + * @see DeterministicHashPolicy + */ +public class CapacityBasedDeterministicHashPolicy implements BlockLocationPolicy { + private final int mShards; + + /** + * Constructor required by + * {@link BlockLocationPolicy.Factory#create(Class, AlluxioConfiguration)}. + * @param conf Alluxio configuration + */ + public CapacityBasedDeterministicHashPolicy(AlluxioConfiguration conf) { + int numShards = + conf.getInt(PropertyKey.USER_UFS_BLOCK_READ_LOCATION_POLICY_DETERMINISTIC_HASH_SHARDS); + Preconditions.checkArgument(numShards >= 1, "number of shards must be no less than 1"); + mShards = numShards; + } + + @Override + public Optional getWorker(GetWorkerOptions options) { + TreeMap capacityCdf = new TreeMap<>(); + AtomicLong totalCapacity = new AtomicLong(0); + Streams.stream(options.getBlockWorkerInfos()) + .filter(workerInfo -> workerInfo.getCapacityBytes() >= options.getBlockInfo().getLength()) + // sort by hostname to guarantee two workers with the same capacity has a defined order + .sorted(Comparator.comparing(w -> w.getNetAddress().getHost())) + .forEach(workerInfo -> { + capacityCdf.put(totalCapacity.get(), workerInfo); + totalCapacity.getAndAdd(workerInfo.getCapacityBytes()); + }); + if (totalCapacity.get() == 0 || capacityCdf.isEmpty()) { + return Optional.empty(); + } + long blockId = options.getBlockInfo().getBlockId(); + BlockWorkerInfo chosenWorker = pickWorker(capacityCdf, blockId, totalCapacity.get()); + return Optional.of(chosenWorker.getNetAddress()); + } + + private BlockWorkerInfo pickWorker(TreeMap capacityCdf, + long blockId, long totalCapacity) { + if (mShards == 1) { + // if no sharding, simply return the worker corresponding to the start point + long startPoint = Math.abs(hashBlockId(blockId)) % totalCapacity; + return capacityCdf.floorEntry(startPoint).getValue(); + } + long hashedBlockId = blockId; + List candidates = new ArrayList<>(); + for (int i = 1; i <= Math.min(mShards, capacityCdf.size()); i++) { + hashedBlockId = hashBlockId(hashedBlockId); + BlockWorkerInfo candidate = capacityCdf + .floorEntry(Math.abs(hashedBlockId) % totalCapacity) // non-null as capacities >= 0 + .getValue(); + candidates.add(candidate); + } + return getRandomCandidate(candidates); + } + + @VisibleForTesting + protected long hashBlockId(long blockId) { + return MurmurHash3.hash64(blockId); + } + + @VisibleForTesting + protected BlockWorkerInfo getRandomCandidate(List candidates) { + int randomIndex = ThreadLocalRandom.current().nextInt(candidates.size()); + return candidates.get(randomIndex); + } +} diff --git a/core/client/fs/src/test/java/alluxio/client/block/policy/CapacityBasedDeterministicHashPolicyTest.java b/core/client/fs/src/test/java/alluxio/client/block/policy/CapacityBasedDeterministicHashPolicyTest.java new file mode 100644 index 000000000000..cb422716a831 --- /dev/null +++ b/core/client/fs/src/test/java/alluxio/client/block/policy/CapacityBasedDeterministicHashPolicyTest.java @@ -0,0 +1,316 @@ +/* + * The Alluxio Open Foundation licenses this work under the Apache License, version 2.0 + * (the "License"). You may not use this work except in compliance with the License, which is + * available at www.apache.org/licenses/LICENSE-2.0 + * + * This software is distributed on an "AS IS" basis, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, + * either express or implied, as more fully set forth in the License. + * + * See the NOTICE file distributed with this work for information regarding copyright ownership. + */ + +package alluxio.client.block.policy; + +import static org.junit.Assert.assertEquals; +import static org.junit.Assert.assertFalse; +import static org.junit.Assert.assertTrue; + +import alluxio.client.block.BlockWorkerInfo; +import alluxio.client.block.policy.options.GetWorkerOptions; +import alluxio.conf.AlluxioConfiguration; +import alluxio.conf.Configuration; +import alluxio.conf.InstancedConfiguration; +import alluxio.conf.PropertyKey; +import alluxio.wire.BlockInfo; +import alluxio.wire.WorkerNetAddress; + +import com.google.common.base.Preconditions; +import com.google.common.collect.ImmutableList; +import com.google.common.collect.ImmutableMap; +import org.junit.Test; + +import java.util.ArrayList; +import java.util.Collections; +import java.util.HashMap; +import java.util.HashSet; +import java.util.List; +import java.util.Map; +import java.util.Optional; +import java.util.Set; +import java.util.concurrent.ThreadLocalRandom; +import java.util.stream.Collectors; +import java.util.stream.LongStream; + +public class CapacityBasedDeterministicHashPolicyTest { + + private static final CapacityBasedDeterministicHashPolicy NO_SHARDING_POLICY; + private static final CapacityBasedDeterministicHashPolicy THREE_SHARDS_POLICY; + private static final AlluxioConfiguration NO_SHARDING_CONF; + private static final AlluxioConfiguration THREE_SHARDS_CONF; + + static { + InstancedConfiguration noShardingConf = Configuration.copyGlobal(); + noShardingConf.set( + PropertyKey.USER_UFS_BLOCK_READ_LOCATION_POLICY_DETERMINISTIC_HASH_SHARDS, 1); + NO_SHARDING_CONF = noShardingConf; + InstancedConfiguration threeShardsConf = Configuration.copyGlobal(); + threeShardsConf.set( + PropertyKey.USER_UFS_BLOCK_READ_LOCATION_POLICY_DETERMINISTIC_HASH_SHARDS, 3); + THREE_SHARDS_CONF = threeShardsConf; + NO_SHARDING_POLICY = new CapacityBasedDeterministicHashPolicy(NO_SHARDING_CONF); + THREE_SHARDS_POLICY = new CapacityBasedDeterministicHashPolicy(THREE_SHARDS_CONF); + } + + @Test + public void basic() { + class TestPolicy extends CapacityBasedDeterministicHashPolicy { + public TestPolicy(AlluxioConfiguration conf) { + super(conf); + } + + @Override + protected long hashBlockId(long blockId) { + return blockId; + } + + @Override + protected BlockWorkerInfo getRandomCandidate(List candidates) { + // always pick the last candidate + Preconditions.checkArgument(candidates.size() >= 1); + return candidates.get(candidates.size() - 1); + } + } + + TestPolicy policy = new TestPolicy(NO_SHARDING_CONF); + + // total capacity: 100 + List blockWorkerInfos = ImmutableList.of( + new BlockWorkerInfo(new WorkerNetAddress().setHost("0"), 10, 0), + new BlockWorkerInfo(new WorkerNetAddress().setHost("1"), 20, 0), + new BlockWorkerInfo(new WorkerNetAddress().setHost("2"), 20, 0), + new BlockWorkerInfo(new WorkerNetAddress().setHost("3"), 0, 0), + new BlockWorkerInfo(new WorkerNetAddress().setHost("4"), 50, 0) + ); + BlockInfo blockInfo = new BlockInfo(); + GetWorkerOptions options = GetWorkerOptions.defaults() + .setBlockWorkerInfos(blockWorkerInfos) + .setBlockInfo(blockInfo); + + blockInfo.setBlockId(1); + assertEquals("0", policy.getWorker(options).get().getHost()); + blockInfo.setBlockId(5); + assertEquals("0", policy.getWorker(options).get().getHost()); + blockInfo.setBlockId(10); + assertEquals("1", policy.getWorker(options).get().getHost()); + blockInfo.setBlockId(30); + assertEquals("2", policy.getWorker(options).get().getHost()); + blockInfo.setBlockId(50); + assertEquals("4", policy.getWorker(options).get().getHost()); + } + + @Test + public void sharding() { + class TestPolicy extends CapacityBasedDeterministicHashPolicy { + private final long mTotalCapacity; + + public TestPolicy(AlluxioConfiguration conf, long totalCapacity) { + super(conf); + mTotalCapacity = totalCapacity; + } + + @Override + protected long hashBlockId(long blockId) { + // this simulates a hash function that generates a hash value that is either + // the block id itself, or its complement against total capacity + return mTotalCapacity - blockId; + } + + @Override + protected BlockWorkerInfo getRandomCandidate(List candidates) { + // always pick the last candidate + Preconditions.checkArgument(candidates.size() >= 1); + return candidates.get(candidates.size() - 1); + } + } + + // total capacity: 100 + List blockWorkerInfos = ImmutableList.of( + new BlockWorkerInfo(new WorkerNetAddress().setHost("0"), 10, 0), + new BlockWorkerInfo(new WorkerNetAddress().setHost("1"), 20, 0), + new BlockWorkerInfo(new WorkerNetAddress().setHost("2"), 20, 0), + new BlockWorkerInfo(new WorkerNetAddress().setHost("3"), 0, 0), + new BlockWorkerInfo(new WorkerNetAddress().setHost("4"), 50, 0) + ); + BlockInfo blockInfo = new BlockInfo(); + GetWorkerOptions options = GetWorkerOptions.defaults() + .setBlockWorkerInfos(blockWorkerInfos) + .setBlockInfo(blockInfo); + + InstancedConfiguration shard4Conf = Configuration.copyGlobal(); + shard4Conf + .set(PropertyKey.USER_UFS_BLOCK_READ_LOCATION_POLICY_DETERMINISTIC_HASH_SHARDS, 4); + TestPolicy policyShard4 = new TestPolicy(shard4Conf, 100); + TestPolicy policyShard3 = new TestPolicy(THREE_SHARDS_CONF, 100); + + // for 3 shards policy, the block ids are hashed 3 times, + // therefore the effective hash value is the block id's complement + // for 4 shards policy, the hash value is the same as the block id + blockInfo.setBlockId(1); + assertEquals("4", policyShard3.getWorker(options).get().getHost()); + assertEquals("0", policyShard4.getWorker(options).get().getHost()); + blockInfo.setBlockId(5); + assertEquals("4", policyShard3.getWorker(options).get().getHost()); + assertEquals("0", policyShard4.getWorker(options).get().getHost()); + blockInfo.setBlockId(10); + assertEquals("4", policyShard3.getWorker(options).get().getHost()); + assertEquals("1", policyShard4.getWorker(options).get().getHost()); + blockInfo.setBlockId(60); + assertEquals("2", policyShard3.getWorker(options).get().getHost()); + assertEquals("4", policyShard4.getWorker(options).get().getHost()); + blockInfo.setBlockId(90); + assertEquals("1", policyShard3.getWorker(options).get().getHost()); + assertEquals("4", policyShard4.getWorker(options).get().getHost()); + } + + /** + * Tests that the probability a worker is chosen is linear to its normalized capacity, + * provided uniform block requests distribution. + */ + @Test + public void linearDistribution() { + final long capacityUpperBound = 1000; + final int numWorkers = 100; + final int numTrials = 100000; + final List capacities = + LongStream.generate(() -> ThreadLocalRandom.current().nextLong(capacityUpperBound)) + .limit(numWorkers).boxed().collect(Collectors.toList()); + final long totalCapacity = capacities.stream().reduce(0L, Long::sum); + + ImmutableMap.Builder workersBuilder = ImmutableMap.builder(); + for (int i = 0; i < numWorkers; i++) { + // used bytes shouldn't matter in case of CapacityBasedDeterministicHashPolicy; + // random number does not affect the outcome of the policy + long randomUsedBytes = ThreadLocalRandom.current().nextLong(); + WorkerNetAddress addr = new WorkerNetAddress().setHost(String.valueOf(i)); + BlockWorkerInfo workerInfo = new BlockWorkerInfo(addr, capacities.get(i), randomUsedBytes); + workersBuilder.put(addr, workerInfo); + } + Map workers = workersBuilder.build(); + + BlockInfo blockInfo = new BlockInfo(); + GetWorkerOptions options = GetWorkerOptions.defaults() + .setBlockInfo(blockInfo) + .setBlockWorkerInfos(ImmutableList.copyOf(workers.values())); + // worker to number of hits map + Map hits = new HashMap<>(); + for (int i = 0; i < numTrials; i++) { + // assume uniform block distribution + blockInfo.setBlockId(ThreadLocalRandom.current().nextLong()); + Optional chosen = THREE_SHARDS_POLICY.getWorker(options); + assertTrue(chosen.isPresent()); + hits.computeIfPresent(chosen.get(), (k, v) -> v + 1); + hits.putIfAbsent(chosen.get(), 1L); + } + // the chance that workers of a particular capacity are chosen converges to + // the ratio of their capacity over total capacity, as the number of trials increases + final double confidence = 0.01; + for (Map.Entry entry : hits.entrySet()) { + long capacity = workers.get(entry.getKey()).getCapacityBytes(); + double normalizedCapacity = capacity * 1.0 / totalCapacity; + double normalizedHits = entry.getValue() * 1.0 / numTrials; + assertTrue(Math.abs(normalizedCapacity - normalizedHits) < confidence); + } + } + + /** + * Tests that the outcome of the policy is deterministic if sharding is turned off. + */ + @Test + public void deterministicChoice() { + List workerInfos = generateBlockWorkerInfos(100, 1); + BlockInfo blockInfo = new BlockInfo().setBlockId(1); + GetWorkerOptions options = GetWorkerOptions.defaults() + .setBlockInfo(blockInfo) + .setBlockWorkerInfos(workerInfos); + WorkerNetAddress chosen = NO_SHARDING_POLICY.getWorker(options).get(); + for (int i = 0; i < 10000; i++) { + Optional workerInfo = NO_SHARDING_POLICY.getWorker(options); + assertTrue(workerInfo.isPresent()); + assertEquals(chosen, workerInfo.get()); + } + } + + /** + * Tests that when sharding is enabled (shards >1), the upper bound of the number of all + * possibly selected workers is the configured shards value. + * + * Note: the lower bound is 1. + */ + @Test + public void numShardsDoesNotExceedConfiguredValue() { + List workerInfos = generateBlockWorkerInfos(100, 1); + BlockInfo blockInfo = new BlockInfo().setBlockId(1); + GetWorkerOptions options = GetWorkerOptions.defaults() + .setBlockInfo(blockInfo) + .setBlockWorkerInfos(workerInfos); + for (int numShards = 1; numShards < 20; numShards++) { + InstancedConfiguration conf = Configuration.copyGlobal(); + conf.set(PropertyKey.USER_UFS_BLOCK_READ_LOCATION_POLICY_DETERMINISTIC_HASH_SHARDS, + numShards); + CapacityBasedDeterministicHashPolicy policy = new CapacityBasedDeterministicHashPolicy(conf); + Set seenWorkers = new HashSet<>(); + for (int i = 0; i < 1000; i++) { + Optional workerInfo = policy.getWorker(options); + assertTrue(workerInfo.isPresent()); + seenWorkers.add(workerInfo.get()); + } + assertTrue(seenWorkers.size() <= numShards); + } + } + + @Test + public void zeroCapacityWorker() { + List workerInfos = generateBlockWorkerInfos(10, 0); + BlockInfo blockInfo = new BlockInfo().setBlockId(1); + GetWorkerOptions options = GetWorkerOptions.defaults() + .setBlockInfo(blockInfo) + .setBlockWorkerInfos(workerInfos); + assertFalse(NO_SHARDING_POLICY.getWorker(options).isPresent()); + } + + /** + * Tests that two workers with the same capacity has a well-defined order, independent of the + * order they are present in the worker list. + */ + @Test + public void stability() { + List workerInfos = new ArrayList<>(generateBlockWorkerInfos(10, 100)); + BlockInfo blockInfo = new BlockInfo().setBlockId(1); + GetWorkerOptions options = GetWorkerOptions.defaults() + .setBlockInfo(blockInfo) + .setBlockWorkerInfos(workerInfos); + assertTrue(NO_SHARDING_POLICY.getWorker(options).isPresent()); + WorkerNetAddress chosen = NO_SHARDING_POLICY.getWorker(options).get(); + for (int i = 0; i < 100; i++) { + Collections.shuffle(workerInfos); + assertTrue(NO_SHARDING_POLICY.getWorker(options).isPresent()); + assertEquals(chosen, NO_SHARDING_POLICY.getWorker(options).get()); + } + } + + /** + * Generates a list of workers with the same capacity, and with the index as its hostname. + */ + private List generateBlockWorkerInfos(int numWorkers, int capacity) { + ImmutableList.Builder workerInfoBuilder = ImmutableList.builder(); + for (int i = 0; i < numWorkers; i++) { + // used bytes shouldn't matter in case of CapacityBasedDeterministicHashPolicy; + // random number does not affect the outcome of the policy + long randomUsedBytes = ThreadLocalRandom.current().nextLong(); + WorkerNetAddress addr = new WorkerNetAddress().setHost(String.valueOf(i)); + BlockWorkerInfo workerInfo = new BlockWorkerInfo(addr, capacity, randomUsedBytes); + workerInfoBuilder.add(workerInfo); + } + return workerInfoBuilder.build(); + } +} diff --git a/core/common/src/main/java/alluxio/conf/PropertyKey.java b/core/common/src/main/java/alluxio/conf/PropertyKey.java index df489577043f..6820b5f62b78 100755 --- a/core/common/src/main/java/alluxio/conf/PropertyKey.java +++ b/core/common/src/main/java/alluxio/conf/PropertyKey.java @@ -6602,6 +6602,8 @@ public String toString() { .setDescription(format("When an Alluxio client reads a file from the UFS, it " + "delegates the read to an Alluxio worker. The client uses this policy to choose " + "which worker to read through. Built-in choices: %s.", Arrays.asList( + javadocLink("alluxio.client.block.policy.CapacityBasedDeterministicHashPolicy"), + javadocLink("alluxio.client.block.policy.CapacityBaseRandomPolicy"), javadocLink("alluxio.client.block.policy.DeterministicHashPolicy"), javadocLink("alluxio.client.block.policy.LocalFirstAvoidEvictionPolicy"), javadocLink("alluxio.client.block.policy.LocalFirstPolicy"), @@ -6615,8 +6617,9 @@ public String toString() { intBuilder(Name.USER_UFS_BLOCK_READ_LOCATION_POLICY_DETERMINISTIC_HASH_SHARDS) .setDefaultValue(1) .setDescription("When alluxio.user.ufs.block.read.location.policy is set to " - + "alluxio.client.block.policy.DeterministicHashPolicy, this specifies the number of " - + "hash shards.") + + "alluxio.client.block.policy.DeterministicHashPolicy or " + + "alluxio.client.block.policy.CapacityBasedDeterministicHashPolicy, " + + "this specifies the number of hash shards.") .setConsistencyCheckLevel(ConsistencyCheckLevel.WARN) .setScope(Scope.CLIENT) .build(); diff --git a/docs/en/api/Java-API.md b/docs/en/api/Java-API.md index f2a06be954f4..1f8fc629cfdc 100644 --- a/docs/en/api/Java-API.md +++ b/docs/en/api/Java-API.md @@ -271,8 +271,8 @@ The built-in policies include: * [DeterministicHashPolicy](https://docs.alluxio.io/os/javadoc/{{site.ALLUXIO_MAJOR_VERSION}}/alluxio/client/block/policy/DeterministicHashPolicy.html) > This policy maps the blockId to several deterministic Alluxio workers. The number of workers a block - > can be mapped to can be passed through the constructor. The default is 1. It skips the workers - > that do not have enough capacity to hold the block. + > can be mapped to can be specified by `alluxio.user.ufs.block.read.location.policy.deterministic.hash.shards`. + > The default is 1. It skips the workers that do not have enough capacity to hold the block. > > This policy is useful for limiting the amount of replication that occurs when reading blocks from > the UFS with high concurrency. With 30 workers and 100 remote clients reading the same block @@ -284,6 +284,42 @@ The built-in policies include: > Note that the hash function relies on the number of workers in the cluster, so if the number of > workers changes, the workers chosen by the policy for a given block will likely change. +* [CapacityBaseRandomPolicy](https://docs.alluxio.io/os/javadoc/{{site.ALLUXIO_MAJOR_VERSION}}/alluxio/client/block/policy/CapacityBaseRandomPolicy.html) + + > This policy chooses a worker with a probability equal to the worker's normalized capacity, + > i.e. the ratio of its capacity over the total capacity of all workers. It randomly distributes + > workload based on the worker capacities so that larger workers get more requests. + > + > This policy is useful for clusters where workers have heterogeneous storage capabilities, but + > the distribution of workload does not match that of storage. For example, in a cluster of 5 + > workers, one of the workers has only half the capacity of the others, however, it is co-located + > with a client that generates twice the amount of read requests than others. In this scenario, + > the default LocalFirstPolicy will quickly cause the smaller worker to go out of space, while + > the larger workers has plenty of storage left unused. Although the client will retry with a + > different worker when the local worker is out of space, this will increase I/O latency. + > + > Note that the randomness is + > based on capacity instead of availability, because in the long run, all workers will be + > filled up and have availability close to 0, which would cause this policy to degrade to a + > uniformly distributed random policy. + +* [CapacityBasedDeterministicHashPolicy](https://docs.alluxio.io/os/javadoc/{{site.ALLUXIO_MAJOR_VERSION}}/alluxio/client/block/policy/CapacityBasedDeterministicHashPolicy.html) + + > This policy is a combination of DeterministicHashPolicy and CapacityBaseRandomPolicy. + > It ensures each block is always assigned to the same set of workers. Additionally, provided + > that block requests follow a uniform distribution, they are assigned to each worker with a probability + > equal to the worker's normalized capacity. The number of workers that a block can be assigned + > to can be specified by `alluxio.user.ufs.block.read.location.policy.deterministic.hash.shards`. + > + > This policy is useful when CapacityBaseRandomPolicy causes too many replicas across multiple + > workers, and one wish to limit the number of replication, in a way similar to + > DeterministicHashPolicy. + > + > Note that this is not a random policy in itself. The outcome distribution of this policy is + > dependent on the distribution of the block requests. When the distribution of block + > requests is highly skewed, the workers chosen will not follow a distribution based on workers' + > normalized capacities. + Alluxio supports custom policies, so you can also develop your own policy appropriate for your workload by implementing the interface `alluxio.client.block.policy.BlockLocationPolicy`. Note that a default policy must have a constructor which takes `alluxio.conf.AlluxioConfiguration`. From a02c5f6ab9d9976aa300101da7679c6eab8846ab Mon Sep 17 00:00:00 2001 From: lucyge2022 <111789461+lucyge2022@users.noreply.github.com> Date: Thu, 23 Feb 2023 14:53:34 -0800 Subject: [PATCH 138/334] Fix incorrect flag passing into delete op ### What changes are proposed in this pull request? Incorrect usage of flag in atomic rename for the final step of completing the target multipart-upload file. ### Why are the changes needed? if write type is cache_thru or thru, the atomic rename ( delete target and rename src to target ) will incorrectly delete alluxio-only instead of deleting UFS, hence making the renaming op in UFS fail. ### Does this PR introduce any user facing changes? N/A pr-link: Alluxio/alluxio#16941 change-id: cid-b38904c24dee066adac2f854127f2d877bd21dcd --- .../main/java/alluxio/master/file/DefaultFileSystemMaster.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/core/server/master/src/main/java/alluxio/master/file/DefaultFileSystemMaster.java b/core/server/master/src/main/java/alluxio/master/file/DefaultFileSystemMaster.java index b82e61513696..897442c9f14c 100644 --- a/core/server/master/src/main/java/alluxio/master/file/DefaultFileSystemMaster.java +++ b/core/server/master/src/main/java/alluxio/master/file/DefaultFileSystemMaster.java @@ -3119,7 +3119,7 @@ private boolean checkForOverwriteSyntax(RpcContext rpcContext, try { deleteInternal(rpcContext, dstInodePath, DeleteContext .mergeFrom(DeletePOptions.newBuilder() - .setRecursive(true).setAlluxioOnly(context.getPersist())), true); + .setRecursive(true).setAlluxioOnly(!context.getPersist())), true); dstInodePath.removeLastInode(); } catch (DirectoryNotEmptyException ex) { // IGNORE, this will never happen From 8ef0a4d9ed7e10a5c5640969f14f12d31f3ea732 Mon Sep 17 00:00:00 2001 From: elega <445092967@qq.com> Date: Fri, 24 Feb 2023 10:19:47 +0800 Subject: [PATCH 139/334] Fix metadata sync behavior when descendant type is NONE ### What changes are proposed in this pull request? When the metadata sync descendant type is NONE, stop loading the children of the sync root. If a metadata sync is trigged by a GetStatus() call on a directory Previous behavior: The directory itself, as well as all its sub directories in the inode store will be synced. New behavior: ONLY the directory itself will be loaded. ### Why are the changes needed? This PR addresses https://github.com/Alluxio/alluxio/issues/16922. The incorrect metadata sync behavior on GetStatus for a directory loads more children of the directory than expected and put a lot of pressure on UFS side. ### Does this PR introduce any user facing changes? Yes. The metadata sync behavior has been changed. See the comment above. The previous behavior was actually wrong and we added a hidden feature flag to allow customers to fallback. pr-link: Alluxio/alluxio#16935 change-id: cid-2a5a2b4959422ecff74149881e400659d07c2163 --- .../main/java/alluxio/conf/PropertyKey.java | 14 ++++ .../alluxio/master/file/InodeSyncStream.java | 10 +++ .../FileSystemMasterSyncMetadataTest.java | 67 +++++++++++++++++++ .../client/fs/UfsSyncIntegrationTest.java | 10 ++- 4 files changed, 99 insertions(+), 2 deletions(-) diff --git a/core/common/src/main/java/alluxio/conf/PropertyKey.java b/core/common/src/main/java/alluxio/conf/PropertyKey.java index 6820b5f62b78..f5612b790397 100755 --- a/core/common/src/main/java/alluxio/conf/PropertyKey.java +++ b/core/common/src/main/java/alluxio/conf/PropertyKey.java @@ -3650,6 +3650,18 @@ public String toString() { .setIsHidden(true) .setConsistencyCheckLevel(ConsistencyCheckLevel.WARN) .build(); + public static final PropertyKey + MASTER_METADATA_SYNC_GET_DIRECTORY_STATUS_SKIP_LOADING_CHILDREN = + booleanBuilder(Name.MASTER_METADATA_SYNC_GET_DIRECTORY_STATUS_SKIP_LOADING_CHILDREN) + .setDescription( + "If set to true, skip loading children during metadata sync when " + + "descendant type is set to NONE, for example, a metadata sync triggered " + + "by a getStatus on a directory.") + .setScope(Scope.MASTER) + .setDefaultValue(true) + .setIsHidden(true) + .setConsistencyCheckLevel(ConsistencyCheckLevel.WARN) + .build(); // In Java8 in container environment Runtime.availableProcessors() always returns 1, // which is not the actual number of cpus, so we set a safe default value 32. public static final PropertyKey MASTER_METADATA_SYNC_UFS_PREFETCH_POOL_SIZE = @@ -7925,6 +7937,8 @@ public static final class Name { "alluxio.master.metadata.sync.instrument.executor"; public static final String MASTER_METADATA_SYNC_REPORT_FAILURE = "alluxio.master.metadata.sync.report.failure"; + public static final String MASTER_METADATA_SYNC_GET_DIRECTORY_STATUS_SKIP_LOADING_CHILDREN = + "alluxio.master.metadata.sync.get.directory.status.skip.loading.children"; public static final String MASTER_METADATA_SYNC_UFS_PREFETCH_POOL_SIZE = "alluxio.master.metadata.sync.ufs.prefetch.pool.size"; public static final String MASTER_METADATA_SYNC_TRAVERSAL_ORDER = diff --git a/core/server/master/src/main/java/alluxio/master/file/InodeSyncStream.java b/core/server/master/src/main/java/alluxio/master/file/InodeSyncStream.java index 10770e7ba740..4d67ebd9f4e3 100644 --- a/core/server/master/src/main/java/alluxio/master/file/InodeSyncStream.java +++ b/core/server/master/src/main/java/alluxio/master/file/InodeSyncStream.java @@ -296,6 +296,10 @@ public enum SyncStatus { private final int mConcurrencyLevel = Configuration.getInt(PropertyKey.MASTER_METADATA_SYNC_CONCURRENCY_LEVEL); + private final boolean mGetDirectoryStatusSkipLoadingChildren = + Configuration.getBoolean( + PropertyKey.MASTER_METADATA_SYNC_GET_DIRECTORY_STATUS_SKIP_LOADING_CHILDREN); + private final FileSystemMasterAuditContext mAuditContext; private final Function mAuditContextSrcInodeFunc; @@ -479,6 +483,10 @@ private SyncStatus syncInternal() throws // If descendantType is ONE, then we shouldn't process any more paths except for those // currently in the queue stopNum = mPendingPaths.size(); + } else if (mGetDirectoryStatusSkipLoadingChildren && mDescendantType == DescendantType.NONE) { + // If descendantType is NONE, do not process any path in the queue after + // the inode itself is loaded. + stopNum = 0; } // process the sync result for the original path @@ -899,6 +907,8 @@ private void syncExistingInodeMetadata( if (mDescendantType == DescendantType.ONE) { syncChildren = syncChildren && mRootScheme.getPath().equals(inodePath.getUri()); + } else if (mDescendantType == DescendantType.NONE && mGetDirectoryStatusSkipLoadingChildren) { + syncChildren = false; } int childCount = inode.isDirectory() ? (int) inode.asDirectory().getChildCount() : 0; diff --git a/core/server/master/src/test/java/alluxio/master/file/FileSystemMasterSyncMetadataTest.java b/core/server/master/src/test/java/alluxio/master/file/FileSystemMasterSyncMetadataTest.java index 8272371b7d69..03d1d64e6b80 100644 --- a/core/server/master/src/test/java/alluxio/master/file/FileSystemMasterSyncMetadataTest.java +++ b/core/server/master/src/test/java/alluxio/master/file/FileSystemMasterSyncMetadataTest.java @@ -325,6 +325,73 @@ public void deleteAlluxioOnlyNoSync() throws Exception { assertFalse(delegateMaster.mSynced.get()); } + /** + * Tests the getStatus operation does not trigger a metadata sync that loads its children. + */ + @Test + public void getStatusOnDirectory() throws Exception { + AlluxioURI ufsMount = setupMockUfsS3Mount(); + short mode = ModeUtils.getUMask("0700").toShort(); + + // Mock dir1 ufs path + AlluxioURI dir1Path = ufsMount.join("dir1"); + UfsDirectoryStatus dir1Status = new UfsDirectoryStatus(dir1Path.getPath(), "", "", mode); + Mockito.when(mUfs.getParsedFingerprint(dir1Path.toString())) + .thenReturn(Fingerprint.create("s3", dir1Status)); + Mockito.when(mUfs.exists(dir1Path.toString())).thenReturn(true); + Mockito.when(mUfs.isDirectory(dir1Path.toString())).thenReturn(true); + Mockito.when(mUfs.isFile(dir1Path.toString())).thenReturn(false); + Mockito.when(mUfs.getStatus(dir1Path.toString())).thenReturn(dir1Status); + Mockito.when(mUfs.getDirectoryStatus(dir1Path.toString())).thenReturn(dir1Status); + + // Mock nested ufs path /dir1/dir2 + AlluxioURI nestedDirectoryPath = ufsMount.join("dir1").join("dir2"); + UfsDirectoryStatus nestedDirStatus = + new UfsDirectoryStatus(dir1Path.getPath(), "", "", mode); + + Mockito.when(mUfs.getParsedFingerprint(nestedDirectoryPath.toString())) + .thenReturn(Fingerprint.create("s3", nestedDirStatus)); + Mockito.when(mUfs.exists(nestedDirectoryPath.toString())).thenReturn(true); + Mockito.when(mUfs.isDirectory(nestedDirectoryPath.toString())).thenReturn(true); + Mockito.when(mUfs.isFile(nestedDirectoryPath.toString())).thenReturn(false); + Mockito.when(mUfs.getStatus(nestedDirectoryPath.toString())).thenReturn(nestedDirStatus); + Mockito.when(mUfs.getDirectoryStatus(nestedDirectoryPath.toString())) + .thenReturn(nestedDirStatus); + + // Mock creating the same directory and nested file in UFS out of band + AlluxioURI dir1 = new AlluxioURI("/mnt/local/dir1"); + AlluxioURI dir2 = new AlluxioURI("/mnt/local/dir1/dir2"); + Mockito.when(mUfs.listStatus(eq(dir1Path.toString()))) + .thenReturn(new UfsStatus[]{new UfsDirectoryStatus("dir2", "", "", mode)}); + Mockito.when(mUfs.listStatus(eq(nestedDirectoryPath.toString()))) + .thenReturn(new UfsStatus[]{}); + + // List the nested directory + // listStatus is called on UFS /dir1/dir2 + mFileSystemMaster.listStatus(dir2, ListStatusContext.mergeFrom( + ListStatusPOptions.newBuilder().setCommonOptions( + FileSystemMasterCommonPOptions.newBuilder().setSyncIntervalMs(0).build()))); + Mockito.verify(mUfs, Mockito.times(0)) + .listStatus(eq(dir1Path.toString())); + Mockito.verify(mUfs, Mockito.times(1)) + .listStatus(eq(nestedDirectoryPath.toString())); + Mockito.verify(mUfs, Mockito.times(1)) + .getStatus(eq(nestedDirectoryPath.toString())); + + // Get the file info of the directory /dir1 + // listStatus is called on UFS /dir1/dir2 + // Make sure there is neither list nor get on UFS /dir1/dir2 + mFileSystemMaster.getFileInfo(dir1, GetStatusContext.mergeFrom( + GetStatusPOptions.newBuilder().setCommonOptions( + FileSystemMasterCommonPOptions.newBuilder().setSyncIntervalMs(0).build()))); + Mockito.verify(mUfs, Mockito.times(0)) + .listStatus(eq(dir1Path.toString())); + Mockito.verify(mUfs, Mockito.times(1)) + .listStatus(eq(nestedDirectoryPath.toString())); + Mockito.verify(mUfs, Mockito.times(1)) + .getStatus(eq(nestedDirectoryPath.toString())); + } + private static class SyncAwareFileSystemMaster extends DefaultFileSystemMaster { AtomicBoolean mSynced = new AtomicBoolean(false); diff --git a/tests/src/test/java/alluxio/client/fs/UfsSyncIntegrationTest.java b/tests/src/test/java/alluxio/client/fs/UfsSyncIntegrationTest.java index b290a4655379..9914cb0c2f93 100644 --- a/tests/src/test/java/alluxio/client/fs/UfsSyncIntegrationTest.java +++ b/tests/src/test/java/alluxio/client/fs/UfsSyncIntegrationTest.java @@ -863,14 +863,20 @@ public void deleteUfsFileGetStatus() throws Exception { // delete the file and wait a bit new File(ufsPath("/delete/file")).delete(); - CommonUtils.sleepMs(2000); + CommonUtils.sleepMs(3000); // getStatus (not listStatus) on the root, with a shorter interval than the sleep. // This will sync that directory. The sync interval has to be long enough for the internal // syncing process to finish within that time. mFileSystem.getStatus(new AlluxioURI(alluxioPath("/delete")), GetStatusPOptions.newBuilder() .setCommonOptions( - FileSystemMasterCommonPOptions.newBuilder().setSyncIntervalMs(1000).build()).build()); + FileSystemMasterCommonPOptions.newBuilder().setSyncIntervalMs(2000).build()).build()); + + // a following list status should trigger a metadata sync even though the path was just synced, + // because the descendant type is ONE this time, and it was NONE previously. + mFileSystem.listStatus(new AlluxioURI(alluxioPath("/delete")), + ListStatusPOptions.newBuilder().setRecursive(false).setCommonOptions( + FileSystemMasterCommonPOptions.newBuilder().setSyncIntervalMs(2000).build()).build()); // verify that the file is deleted, without syncing try { From 669f80e612f2086d1ded170cfc09cdddc3ffbc2c Mon Sep 17 00:00:00 2001 From: fanyang <42925539+fffanyang@users.noreply.github.com> Date: Fri, 24 Feb 2023 11:01:45 +0800 Subject: [PATCH 140/334] Support remove blocks on worker for pagestore when free/delete file ### What changes are proposed in this pull request? Support removeBlock for pagestore. ### Why are the changes needed? To remove metadata and data of blocks and pages on worker when free or delete a file. ### Does this PR introduce any user facing changes? N/A pr-link: Alluxio/alluxio#16895 change-id: cid-a6cc6c0074907f62b9778a8a1cfc0e9f61e74135 --- .../alluxio/worker/page/PagedBlockStore.java | 47 ++++++++++++++++--- .../worker/page/PagedBlockStoreDir.java | 12 +++++ 2 files changed, 53 insertions(+), 6 deletions(-) diff --git a/core/server/worker/src/main/java/alluxio/worker/page/PagedBlockStore.java b/core/server/worker/src/main/java/alluxio/worker/page/PagedBlockStore.java index a3be0892c6ad..2dba6f52ce61 100644 --- a/core/server/worker/src/main/java/alluxio/worker/page/PagedBlockStore.java +++ b/core/server/worker/src/main/java/alluxio/worker/page/PagedBlockStore.java @@ -16,14 +16,19 @@ import alluxio.client.file.cache.CacheManager; import alluxio.client.file.cache.CacheManagerOptions; +import alluxio.client.file.cache.PageId; +import alluxio.client.file.cache.PageInfo; import alluxio.client.file.cache.store.PageStoreDir; import alluxio.conf.AlluxioConfiguration; import alluxio.conf.Configuration; import alluxio.exception.BlockAlreadyExistsException; import alluxio.exception.ExceptionMessage; +import alluxio.exception.PageNotFoundException; import alluxio.exception.runtime.AlluxioRuntimeException; import alluxio.exception.runtime.AlreadyExistsRuntimeException; import alluxio.exception.runtime.BlockDoesNotExistRuntimeException; +import alluxio.exception.runtime.NotFoundRuntimeException; +import alluxio.exception.status.DeadlineExceededException; import alluxio.grpc.Block; import alluxio.grpc.BlockStatus; import alluxio.grpc.ErrorType; @@ -62,6 +67,7 @@ import java.util.Set; import java.util.concurrent.CompletableFuture; import java.util.concurrent.CopyOnWriteArrayList; +import java.util.concurrent.TimeUnit; import java.util.concurrent.atomic.AtomicReference; /** @@ -85,6 +91,7 @@ public class PagedBlockStore implements BlockStore { private final List mBlockStoreEventListeners = new CopyOnWriteArrayList<>(); private final long mPageSize; + private static final Long REMOVE_BLOCK_TIMEOUT_MS = 60_000L; /** * Create an instance of PagedBlockStore. @@ -396,16 +403,13 @@ public void moveBlock(long sessionId, long blockId, AllocateOptions moveOptions) @Override public void removeBlock(long sessionId, long blockId) throws IOException { LOG.debug("removeBlock: sessionId={}, blockId={}", sessionId, blockId); - // TODO(bowen): implement actual removal and replace placeholder values - boolean removeSuccess = true; int dirIndex = getDirIndexOfBlock(blockId); + removeBlockInternal(sessionId, blockId, REMOVE_BLOCK_TIMEOUT_MS); for (BlockStoreEventListener listener : mBlockStoreEventListeners) { synchronized (listener) { listener.onRemoveBlockByClient(blockId); - if (removeSuccess) { - BlockStoreLocation removedFrom = new BlockStoreLocation(DEFAULT_TIER, dirIndex); - listener.onRemoveBlock(blockId, removedFrom); - } + BlockStoreLocation removedFrom = new BlockStoreLocation(DEFAULT_TIER, dirIndex); + listener.onRemoveBlock(blockId, removedFrom); } } } @@ -507,4 +511,35 @@ private int getDirIndexOfBlock(long blockId) { .getDir() .getDirIndex(); } + + private void removeBlockInternal(long sessionId, long blockId, long timeoutMs) + throws IOException { + Optional optionalLock = + mLockManager.tryAcquireBlockLock(sessionId, blockId, BlockLockType.WRITE, + timeoutMs, TimeUnit.MILLISECONDS); + if (!optionalLock.isPresent()) { + throw new DeadlineExceededException( + String.format("Can not acquire lock to remove block %d for session %d after %d ms", + blockId, sessionId, timeoutMs)); + } + try (BlockLock blockLock = optionalLock.get()) { + Set pageIds; + try (LockResource metaLock = new LockResource(mPageMetaStore.getLock().writeLock())) { + if (mPageMetaStore.hasTempBlock(blockId)) { + throw new IllegalStateException( + ExceptionMessage.REMOVE_UNCOMMITTED_BLOCK.getMessage(blockId)); + } + pageIds = mPageMetaStore.getBlock(blockId) + .orElseThrow(() -> new BlockDoesNotExistRuntimeException(blockId)) + .getDir().getBlockPages(blockId); + + for (PageId pageId : pageIds) { + PageInfo pageInfo = mPageMetaStore.removePage(pageId); + pageInfo.getLocalCacheDir().getPageStore().delete(pageId); + } + } + } catch (PageNotFoundException e) { + throw new NotFoundRuntimeException("Page not found: " + e.getMessage(), e); + } + } } diff --git a/core/server/worker/src/main/java/alluxio/worker/page/PagedBlockStoreDir.java b/core/server/worker/src/main/java/alluxio/worker/page/PagedBlockStoreDir.java index d1ca6ad31dfc..b5d232651200 100644 --- a/core/server/worker/src/main/java/alluxio/worker/page/PagedBlockStoreDir.java +++ b/core/server/worker/src/main/java/alluxio/worker/page/PagedBlockStoreDir.java @@ -14,6 +14,7 @@ import static alluxio.worker.page.PagedBlockStoreMeta.DEFAULT_MEDIUM; import static alluxio.worker.page.PagedBlockStoreMeta.DEFAULT_TIER; +import alluxio.client.file.cache.PageId; import alluxio.client.file.cache.PageInfo; import alluxio.client.file.cache.PageStore; import alluxio.client.file.cache.store.PageStoreDir; @@ -265,4 +266,15 @@ public long getTempBlockCachedBytes(long blockId) { public int getBlockCachedPages(long blockId) { return mBlockToPagesMap.get(blockId).size(); } + + /** + * Gets pages in this block. + * + * @param blockId the block id + * @return pages in this block being cached + */ + public Set getBlockPages(long blockId) { + return mBlockToPagesMap.get(blockId).stream().map(PageInfo::getPageId) + .collect(Collectors.toSet()); + } } From cbff62a130e319e11179eea3df0265dfa130acc1 Mon Sep 17 00:00:00 2001 From: bingzheng Date: Fri, 24 Feb 2023 15:06:47 +0800 Subject: [PATCH 141/334] Fix the client stressbench concurrency problem ### What changes are proposed in this pull request? Fix client stressbench concurrency problem. ### Why are the changes needed? Reproduce: Cluster: 1 master and 2 workers Command: `bin/alluxio runClass alluxio.stress.cli.client.StressClientIOBench --operation Write --base alluxio:///stress-client-io-base --write-num-workers 2 --file-size 1m --threads 8` Result: user_root.log image ### Does this PR introduce any user facing changes? Please list the user-facing changes introduced by your change, including 1. change in user-facing APIs 2. addition or removal of property keys 3. webui pr-link: Alluxio/alluxio#16934 change-id: cid-10a6a78a50a8a8bcac455a1bc7f7d1fe43c3642a --- .../java/alluxio/stress/cli/client/ClientIOWritePolicy.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/stress/shell/src/main/java/alluxio/stress/cli/client/ClientIOWritePolicy.java b/stress/shell/src/main/java/alluxio/stress/cli/client/ClientIOWritePolicy.java index 65b485567166..14a7cab7208a 100644 --- a/stress/shell/src/main/java/alluxio/stress/cli/client/ClientIOWritePolicy.java +++ b/stress/shell/src/main/java/alluxio/stress/cli/client/ClientIOWritePolicy.java @@ -65,7 +65,7 @@ public ClientIOWritePolicy(AlluxioConfiguration ignoredConf) {} * @return the address of the worker to write to */ @Override - public Optional getWorker(GetWorkerOptions options) { + public synchronized Optional getWorker(GetWorkerOptions options) { Map eligibleWorkers = new HashMap<>(); for (BlockWorkerInfo info : options.getBlockWorkerInfos()) { eligibleWorkers.put(info.getNetAddress(), info); From 7ee4b748c142f00abc09460b4663c0bf372c2456 Mon Sep 17 00:00:00 2001 From: bingzheng Date: Fri, 24 Feb 2023 15:09:15 +0800 Subject: [PATCH 142/334] [DOCFIX] Fix the wrong option in stress bench doc ### What changes are proposed in this pull request? Fix the wrong option in stress bench doc ### Why are the changes needed? Please clarify why the changes are needed. For instance, 1. If you propose a new API, clarify the use case for a new API. 2. If you fix a bug, describe the bug. ### Does this PR introduce any user facing changes? Please list the user-facing changes introduced by your change, including 1. change in user-facing APIs 2. addition or removal of property keys 3. webui pr-link: Alluxio/alluxio#16925 change-id: cid-923b234e2c29b132b5fbafdfdb64b9903ae5a464 --- docs/en/administration/StressBench.md | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/docs/en/administration/StressBench.md b/docs/en/administration/StressBench.md index fb463b9c3905..ffa03bd58762 100644 --- a/docs/en/administration/StressBench.md +++ b/docs/en/administration/StressBench.md @@ -220,7 +220,7 @@ $ bin/alluxio runClass alluxio.stress.cli.fuse.FuseIOBench --operation LocalRead For example, ```console $ bin/alluxio runClass alluxio.stress.cli.fuse.FuseIOBench --operation LocalRead --local-path /mnt/FuseIOBenchRead --num-dirs 128 \ ---num-files-per-dir 100 --files-size 1m --buffer-size 512k --warmup 5s --duration 30s +--num-files-per-dir 100 --file-size 1m --buffer-size 512k --warmup 5s --duration 30s ``` #### Demo @@ -498,7 +498,7 @@ $ bin/alluxio runClass alluxio.stress.cli.client.StressClientIOBench --operation For example, we are testing Streaming read api, using byte buffers with buffer size 512k. ```console -$ bin/alluxio runClass alluxio.stress.cli.client.StressClientIOBench --operation ReadByteBuffer --files-size 1m --buffer-size 512k --warmup 5s --duration 30s +$ bin/alluxio runClass alluxio.stress.cli.client.StressClientIOBench --operation ReadByteBuffer --file-size 1m --buffer-size 512k --warmup 5s --duration 30s ``` ### Cluster testing From c4b9ecf7a9a4bbbec66116ad82918aabe00b18ea Mon Sep 17 00:00:00 2001 From: maobaolong <307499405@qq.com> Date: Sun, 26 Feb 2023 19:17:03 +0800 Subject: [PATCH 143/334] Fix build error when revision is shorter than 8 chars It will through ArrayOutOfBoundException if we build alluxio in a node which have no git installed, and the `VERSION` would be shorter than 8, even empty string, so substring(8) cannot work anymore. This PR check there are more than 8 chars in the `VERSION` first, otherwise, do not cut the `VERSION` string. pr-link: Alluxio/alluxio#16888 change-id: cid-4e02cc9214317d86bba9d00a6121c5f013dd3255 --- core/common/src/main/java/alluxio/RuntimeConstants.java | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/core/common/src/main/java/alluxio/RuntimeConstants.java b/core/common/src/main/java/alluxio/RuntimeConstants.java index 8480b95cd0e8..8c0e21cfb696 100644 --- a/core/common/src/main/java/alluxio/RuntimeConstants.java +++ b/core/common/src/main/java/alluxio/RuntimeConstants.java @@ -36,7 +36,8 @@ public final class RuntimeConstants { } } - public static final String REVISION_SHORT = ProjectConstants.REVISION.substring(0, 8); + public static final String REVISION_SHORT = ProjectConstants.REVISION.length() > 8 + ? ProjectConstants.REVISION.substring(0, 8) : ProjectConstants.REVISION; public static final String VERSION_AND_REVISION_SHORT = VERSION + "-" + REVISION_SHORT; From 3f8c79cdc91ce78da675f314c509d55893a143a0 Mon Sep 17 00:00:00 2001 From: tian bao <2011xuesong@gmail.com> Date: Sun, 26 Feb 2023 19:37:24 +0800 Subject: [PATCH 144/334] Improve LS by allowing to omit UFS and mount info ### What changes are proposed in this pull request? Improve listStatus 5X performance in some scenarios. ### Why are the changes needed? For instance, Under the scenario: 1. Use Hadoop compatible system to access Alluxio(listStatus) 2. There are many mount point such as more than 500. 3. There are more than 2000 files in a directory. The PathUtils.hasPrefix (comes from MountTable.getMountPoint)method will be called at least 10w (500 * 2000) times. But actually we don't need the information of mount point. The test can be reduced from about 400ms to 70ms under the master branch. The test can be reduced from about 700ms to about 100ms under 2.7.1 branch. ### Does this PR introduce any user facing changes? Please list the user-facing changes introduced by your change, including no pr-link: Alluxio/alluxio#16893 change-id: cid-71d9a351744033426fb1b0633d7b194f94f322b5 --- .../alluxio/hadoop/AbstractFileSystem.java | 8 ++- .../hadoop/AbstractFileSystemTest.java | 62 ++++++++++++++++++- .../main/java/alluxio/conf/PropertyKey.java | 10 +++ .../master/file/DefaultFileSystemMaster.java | 54 +++++++++------- .../main/proto/grpc/file_system_master.proto | 5 ++ .../alluxio/cli/fs/command/LsCommand.java | 18 +++++- 6 files changed, 127 insertions(+), 30 deletions(-) diff --git a/core/client/hdfs/src/main/java/alluxio/hadoop/AbstractFileSystem.java b/core/client/hdfs/src/main/java/alluxio/hadoop/AbstractFileSystem.java index 41bdb08302e8..aa385b0a3aef 100644 --- a/core/client/hdfs/src/main/java/alluxio/hadoop/AbstractFileSystem.java +++ b/core/client/hdfs/src/main/java/alluxio/hadoop/AbstractFileSystem.java @@ -31,6 +31,7 @@ import alluxio.grpc.CreateDirectoryPOptions; import alluxio.grpc.CreateFilePOptions; import alluxio.grpc.DeletePOptions; +import alluxio.grpc.ListStatusPOptions; import alluxio.grpc.SetAttributePOptions; import alluxio.master.MasterInquireClient.Factory; import alluxio.security.CurrentUser; @@ -87,6 +88,7 @@ public abstract class AbstractFileSystem extends org.apache.hadoop.fs.FileSystem private Path mWorkingDir = new Path(AlluxioURI.SEPARATOR); private Statistics mStatistics = null; private String mAlluxioHeader = null; + private boolean mExcludeMountInfoOnListStatus; /** * Constructs a new {@link AbstractFileSystem} instance with specified a {@link FileSystem} @@ -505,6 +507,8 @@ public synchronized void initialize(URI uri, org.apache.hadoop.conf.Configuratio // Creating a new instanced configuration from an AlluxioProperties object isn't expensive. mAlluxioConf = new InstancedConfiguration(alluxioProps); mAlluxioConf.validate(); + mExcludeMountInfoOnListStatus = mAlluxioConf.getBoolean( + PropertyKey.USER_HDFS_CLIENT_EXCLUDE_MOUNT_INFO_ON_LIST_STATUS); if (mFileSystem != null) { return; @@ -580,7 +584,9 @@ public FileStatus[] listStatus(Path path) throws IOException { AlluxioURI uri = getAlluxioPath(path); List statuses; try { - statuses = mFileSystem.listStatus(uri); + ListStatusPOptions listStatusPOptions = ListStatusPOptions.getDefaultInstance().toBuilder() + .setExcludeMountInfo(mExcludeMountInfoOnListStatus).build(); + statuses = mFileSystem.listStatus(uri, listStatusPOptions); } catch (FileDoesNotExistException e) { throw new FileNotFoundException(getAlluxioPath(path).toString()); } catch (AlluxioException e) { diff --git a/core/client/hdfs/src/test/java/alluxio/hadoop/AbstractFileSystemTest.java b/core/client/hdfs/src/test/java/alluxio/hadoop/AbstractFileSystemTest.java index 158643a9a84e..221f5a7947d1 100644 --- a/core/client/hdfs/src/test/java/alluxio/hadoop/AbstractFileSystemTest.java +++ b/core/client/hdfs/src/test/java/alluxio/hadoop/AbstractFileSystemTest.java @@ -39,6 +39,7 @@ import alluxio.conf.PropertyKey; import alluxio.exception.ExceptionMessage; import alluxio.exception.FileAlreadyExistsException; +import alluxio.grpc.ListStatusPOptions; import alluxio.util.ConfigurationUtils; import alluxio.wire.BlockInfo; import alluxio.wire.FileBlockInfo; @@ -406,9 +407,58 @@ public void listStatus() throws Exception { Path path = new Path("/dir"); alluxio.client.file.FileSystem alluxioFs = mock(alluxio.client.file.FileSystem.class); - when(alluxioFs.listStatus(new AlluxioURI(HadoopUtils.getPathWithoutScheme(path)))) + FileSystem alluxioHadoopFs = new FileSystem(alluxioFs); + URI uri = URI.create(Constants.HEADER + "host:1"); + alluxioHadoopFs.initialize(uri, getConf()); + ListStatusPOptions listStatusPOptions = ListStatusPOptions.getDefaultInstance().toBuilder() + .setExcludeMountInfo(alluxioHadoopFs.mAlluxioConf.getBoolean( + PropertyKey.USER_HDFS_CLIENT_EXCLUDE_MOUNT_INFO_ON_LIST_STATUS)).build(); + when(alluxioFs.listStatus(new AlluxioURI(HadoopUtils.getPathWithoutScheme(path)), + listStatusPOptions)) .thenReturn(Lists.newArrayList(new URIStatus(fileInfo1), new URIStatus(fileInfo2))); + + FileStatus[] fileStatuses = alluxioHadoopFs.listStatus(path); + assertFileInfoEqualsFileStatus(fileInfo1, fileStatuses[0]); + assertFileInfoEqualsFileStatus(fileInfo2, fileStatuses[1]); + alluxioHadoopFs.close(); + } + + /** + * Tests that the {@link AbstractFileSystem#listStatus(Path)} method uses + * {@link URIStatus#getLastModificationTimeMs()} correctly without mount info. + */ + @Test + public void listStatusWithoutMountInfo() throws Exception { + FileInfo fileInfo1 = new FileInfo() + .setLastModificationTimeMs(111L) + .setLastAccessTimeMs(123L) + .setFolder(false) + .setOwner("user1") + .setGroup("group1") + .setMode(00755); + FileInfo fileInfo2 = new FileInfo() + .setLastModificationTimeMs(222L) + .setLastAccessTimeMs(234L) + .setFolder(true) + .setOwner("user2") + .setGroup("group2") + .setMode(00644); + + Path path = new Path("/dir"); + alluxio.client.file.FileSystem alluxioFs = + mock(alluxio.client.file.FileSystem.class); FileSystem alluxioHadoopFs = new FileSystem(alluxioFs); + URI uri = URI.create(Constants.HEADER + "host:1"); + Configuration configuration = getConf(); + configuration.setBoolean( + PropertyKey.USER_HDFS_CLIENT_EXCLUDE_MOUNT_INFO_ON_LIST_STATUS.getName(), + true); + alluxioHadoopFs.initialize(uri, configuration); + ListStatusPOptions listStatusPOptions = ListStatusPOptions.getDefaultInstance().toBuilder() + .setExcludeMountInfo(true).build(); + when(alluxioFs.listStatus(new AlluxioURI(HadoopUtils.getPathWithoutScheme(path)), + listStatusPOptions)) + .thenReturn(Lists.newArrayList(new URIStatus(fileInfo1), new URIStatus(fileInfo2))); FileStatus[] fileStatuses = alluxioHadoopFs.listStatus(path); assertFileInfoEqualsFileStatus(fileInfo1, fileStatuses[0]); @@ -426,9 +476,15 @@ public void throwFileNotFoundExceptionWhenListStatusNonExistingTest() throws Exc try { Path path = new Path("/ALLUXIO-2036"); alluxio.client.file.FileSystem alluxioFs = mock(alluxio.client.file.FileSystem.class); - when(alluxioFs.listStatus(new AlluxioURI(HadoopUtils.getPathWithoutScheme(path)))) - .thenThrow(new FileNotFoundException("ALLUXIO-2036 not Found")); alluxioHadoopFs = new FileSystem(alluxioFs); + URI uri = URI.create(Constants.HEADER + "host:1"); + alluxioHadoopFs.initialize(uri, getConf()); + ListStatusPOptions listStatusPOptions = ListStatusPOptions.getDefaultInstance().toBuilder() + .setExcludeMountInfo(alluxioHadoopFs.mAlluxioConf.getBoolean( + PropertyKey.USER_HDFS_CLIENT_EXCLUDE_MOUNT_INFO_ON_LIST_STATUS)).build(); + when(alluxioFs.listStatus(new AlluxioURI(HadoopUtils.getPathWithoutScheme(path)), + listStatusPOptions)) + .thenThrow(new FileNotFoundException("ALLUXIO-2036 not Found")); FileStatus[] fileStatuses = alluxioHadoopFs.listStatus(path); // if we reach here, FileNotFoundException is not thrown hence Fail the test case assertTrue(false); diff --git a/core/common/src/main/java/alluxio/conf/PropertyKey.java b/core/common/src/main/java/alluxio/conf/PropertyKey.java index f5612b790397..caf467b07713 100755 --- a/core/common/src/main/java/alluxio/conf/PropertyKey.java +++ b/core/common/src/main/java/alluxio/conf/PropertyKey.java @@ -6104,6 +6104,14 @@ public String toString() { + "when Alluxio workers are required but not ready.") .setScope(Scope.CLIENT) .build(); + public static final PropertyKey USER_HDFS_CLIENT_EXCLUDE_MOUNT_INFO_ON_LIST_STATUS = + booleanBuilder(Name.USER_HDFS_CLIENT_EXCLUDE_MOUNT_INFO_ON_LIST_STATUS) + .setDefaultValue(false) + .setDescription("If enabled, the mount info will be excluded from the response " + + "when a HDFS client calls alluxio to list status on a directory.") + .setConsistencyCheckLevel(ConsistencyCheckLevel.IGNORE) + .setScope(Scope.CLIENT) + .build(); public static final PropertyKey USER_LOCAL_READER_CHUNK_SIZE_BYTES = dataSizeBuilder(Name.USER_LOCAL_READER_CHUNK_SIZE_BYTES) .setDefaultValue("8MB") @@ -8658,6 +8666,8 @@ public static final class Name { public static final String USER_FILE_WRITE_INIT_MAX_DURATION = "alluxio.user.file.write.init.max.duration"; public static final String USER_HOSTNAME = "alluxio.user.hostname"; + public static final String USER_HDFS_CLIENT_EXCLUDE_MOUNT_INFO_ON_LIST_STATUS = + "alluxio.user.hdfs.client.exclude.mount.info.on.list.status"; public static final String USER_LOCAL_READER_CHUNK_SIZE_BYTES = "alluxio.user.local.reader.chunk.size.bytes"; public static final String USER_LOCAL_WRITER_CHUNK_SIZE_BYTES = diff --git a/core/server/master/src/main/java/alluxio/master/file/DefaultFileSystemMaster.java b/core/server/master/src/main/java/alluxio/master/file/DefaultFileSystemMaster.java index 897442c9f14c..b67d1e000ffd 100644 --- a/core/server/master/src/main/java/alluxio/master/file/DefaultFileSystemMaster.java +++ b/core/server/master/src/main/java/alluxio/master/file/DefaultFileSystemMaster.java @@ -973,14 +973,15 @@ public long getMountIdFromUfsPath(AlluxioURI ufsPath) { private FileInfo getFileInfoInternal(LockedInodePath inodePath) throws UnavailableException, FileDoesNotExistException { - return getFileInfoInternal(inodePath, null); + return getFileInfoInternal(inodePath, null, false); } /** * @param inodePath the {@link LockedInodePath} to get the {@link FileInfo} for * @return the {@link FileInfo} for the given inode */ - private FileInfo getFileInfoInternal(LockedInodePath inodePath, Counter counter) + private FileInfo getFileInfoInternal(LockedInodePath inodePath, Counter counter, + boolean excludeMountInfo) throws FileDoesNotExistException, UnavailableException { int inMemoryPercentage; int inAlluxioPercentage; @@ -1027,20 +1028,22 @@ private FileInfo getFileInfoInternal(LockedInodePath inodePath, Counter counter) } } fileInfo.setXAttr(inode.getXAttr()); - MountTable.Resolution resolution; - try { - resolution = mMountTable.resolve(uri); - } catch (InvalidPathException e) { - throw new FileDoesNotExistException(e.getMessage(), e); - } - AlluxioURI resolvedUri = resolution.getUri(); - fileInfo.setUfsPath(resolvedUri.toString()); - fileInfo.setMountId(resolution.getMountId()); - if (counter == null) { - Metrics.getUfsOpsSavedCounter(resolution.getUfsMountPointUri(), - Metrics.UFSOps.GET_FILE_INFO).inc(); - } else { - counter.inc(); + if (!excludeMountInfo) { + MountTable.Resolution resolution; + try { + resolution = mMountTable.resolve(uri); + } catch (InvalidPathException e) { + throw new FileDoesNotExistException(e.getMessage(), e); + } + AlluxioURI resolvedUri = resolution.getUri(); + fileInfo.setUfsPath(resolvedUri.toString()); + fileInfo.setMountId(resolution.getMountId()); + if (counter == null) { + Metrics.getUfsOpsSavedCounter(resolution.getUfsMountPointUri(), + Metrics.UFSOps.GET_FILE_INFO).inc(); + } else { + counter.inc(); + } } Metrics.FILE_INFOS_GOT.inc(); @@ -1146,13 +1149,15 @@ public void listStatus(AlluxioURI path, ListStatusContext context, ensureFullPathAndUpdateCache(inodePath); auditContext.setSrcInode(inodePath.getInode()); - MountTable.Resolution resolution; + MountTable.Resolution resolution = null; if (!context.getOptions().hasLoadMetadataOnly() || !context.getOptions().getLoadMetadataOnly()) { DescendantType descendantTypeForListStatus = (context.getOptions().getRecursive()) ? DescendantType.ALL : DescendantType.ONE; try { - resolution = mMountTable.resolve(path); + if (!context.getOptions().getExcludeMountInfo()) { + resolution = mMountTable.resolve(path); + } } catch (InvalidPathException e) { throw new FileDoesNotExistException(e.getMessage(), e); } @@ -1172,11 +1177,11 @@ public void listStatus(AlluxioURI path, ListStatusContext context, } // perform the listing listStatusInternal(context, rpcContext, inodePath, auditContext, - descendantTypeForListStatus, resultStream, 0, - Metrics.getUfsOpsSavedCounter(resolution.getUfsMountPointUri(), - Metrics.UFSOps.GET_FILE_INFO), + descendantTypeForListStatus, resultStream, 0, resolution == null ? null : + Metrics.getUfsOpsSavedCounter(resolution.getUfsMountPointUri(), + Metrics.UFSOps.GET_FILE_INFO), partialPathNames, prefixComponents); - if (!ufsAccessed) { + if (!ufsAccessed && resolution != null) { Metrics.getUfsOpsSavedCounter(resolution.getUfsMountPointUri(), Metrics.UFSOps.LIST_STATUS).inc(); } @@ -1219,7 +1224,7 @@ public List listStatus(AlluxioURI path, ListStatusContext context) private void listStatusInternal( ListStatusContext context, RpcContext rpcContext, LockedInodePath currInodePath, AuditContext auditContext, DescendantType descendantType, ResultStream resultStream, - int depth, Counter counter, List partialPath, + int depth, @Nullable Counter counter, List partialPath, List prefixComponents) throws FileDoesNotExistException, UnavailableException, AccessControlException, InvalidPathException { @@ -1242,7 +1247,8 @@ private void listStatusInternal( // at this depth. if ((depth != 0 || inode.isFile()) && prefixComponents.size() <= depth) { if (context.listedItem()) { - resultStream.submit(getFileInfoInternal(currInodePath, counter)); + resultStream.submit(getFileInfoInternal(currInodePath, counter, + context.getOptions().getExcludeMountInfo())); } if (context.isDoneListing()) { return; diff --git a/core/transport/src/main/proto/grpc/file_system_master.proto b/core/transport/src/main/proto/grpc/file_system_master.proto index 11409ac5a83b..558ae24f6217 100644 --- a/core/transport/src/main/proto/grpc/file_system_master.proto +++ b/core/transport/src/main/proto/grpc/file_system_master.proto @@ -238,6 +238,11 @@ message ListStatusPOptions { // being loaded. It is recommended to set this to true after the first call of a // recursive partial listing. optional bool disableAreDescendantsLoadedCheck = 6; + // Mount info will be excluded from the list status response if this field is set to true. + // Resolving a path and obtain the mount info is an expensive operation. + // For clients that do not need this information such as hadoop-compatible clients, + // excluding mount info improves the endpoint performance. + optional bool excludeMountInfo = 7; } message ListStatusPRequest { /** the path of the file or directory */ diff --git a/shell/src/main/java/alluxio/cli/fs/command/LsCommand.java b/shell/src/main/java/alluxio/cli/fs/command/LsCommand.java index e03b623a996e..6b6cfc7d228d 100644 --- a/shell/src/main/java/alluxio/cli/fs/command/LsCommand.java +++ b/shell/src/main/java/alluxio/cli/fs/command/LsCommand.java @@ -117,6 +117,15 @@ public final class LsCommand extends AbstractFileSystemCommand { .desc("list all pinned files") .build(); + private static final Option OMIT_MOUNT_INFO = + Option.builder("m") + .required(false) + .longOpt("omit-mount-info") + .hasArg(false) + .desc("if specified, the status will not include mount point related information, " + + "like the UFS path") + .build(); + private static final Option RECURSIVE_OPTION = Option.builder("R").longOpt("recursive") .required(false) @@ -235,6 +244,7 @@ public Options getOptions() { .addOption(LIST_DIR_AS_FILE_OPTION) .addOption(LIST_HUMAN_READABLE_OPTION) .addOption(LIST_PINNED_FILES_OPTION) + .addOption(OMIT_MOUNT_INFO) .addOption(RECURSIVE_OPTION) .addOption(REVERSE_SORT_OPTION) .addOption(SORT_OPTION) @@ -249,9 +259,11 @@ public Options getOptions() { * @param dirAsFile list the directory status as a plain file * @param hSize print human-readable format sizes * @param sortField sort the result by this field + * @param excludeMountInfo if enabled, the mount info will be excluded from the response */ private void ls(AlluxioURI path, boolean recursive, boolean forceLoadMetadata, boolean dirAsFile, - boolean hSize, boolean pinnedOnly, String sortField, boolean reverse, String timestampOption) + boolean hSize, boolean pinnedOnly, String sortField, boolean reverse, String timestampOption, + boolean excludeMountInfo) throws AlluxioException, IOException { Function timestampFunction = TIMESTAMP_FIELDS.get(timestampOption); if (dirAsFile) { @@ -265,6 +277,7 @@ private void ls(AlluxioURI path, boolean recursive, boolean forceLoadMetadata, b optionsBuilder.setLoadMetadataType(LoadMetadataPType.ALWAYS); } optionsBuilder.setRecursive(recursive); + optionsBuilder.setExcludeMountInfo(excludeMountInfo); if (sortField == null) { mFileSystem.iterateStatus(path, optionsBuilder.build(), @@ -303,7 +316,8 @@ protected void runPlainPath(AlluxioURI path, CommandLine cl) ls(path, cl.hasOption(RECURSIVE_OPTION.getOpt()), cl.hasOption("f"), cl.hasOption("d"), cl.hasOption("h"), cl.hasOption("p"), cl.getOptionValue("sort", null), cl.hasOption("r"), - cl.getOptionValue("timestamp", "lastModificationTime")); + cl.getOptionValue("timestamp", "lastModificationTime"), + cl.hasOption("m") || cl.hasOption("omit-mount-info")); } @Override From db462bd7f306ea2f6223f5a35fc12dca480f14f6 Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Mon, 27 Feb 2023 12:08:41 -0800 Subject: [PATCH 145/334] Bump golang.org/x/net to 0.7.0 in /integration/docker/csi Bumps [golang.org/x/net](https://github.com/golang/net) from 0.0.0-20210510120150-4163338589ed to 0.7.0.
Commits

[![Dependabot compatibility score](https://dependabot-badges.githubapp.com/badges/compatibility_score?dependency-name=golang.org/x/net&package-manager=go_modules&previous-version=0.0.0-20210510120150-4163338589ed&new-version=0.7.0)](https://docs.github.com/en/github/managing-security-vulnerabilities/about-dependabot-security-updates#about-compatibility-scores) Dependabot will resolve any conflicts with this PR as long as you don't alter it yourself. You can also trigger a rebase manually by commenting `@dependabot rebase`. [//]: # (dependabot-automerge-start) [//]: # (dependabot-automerge-end) ---
Dependabot commands and options
You can trigger Dependabot actions by commenting on this PR: - `@dependabot rebase` will rebase this PR - `@dependabot recreate` will recreate this PR, overwriting any edits that have been made to it - `@dependabot merge` will merge this PR after your CI passes on it - `@dependabot squash and merge` will squash and merge this PR after your CI passes on it - `@dependabot cancel merge` will cancel a previously requested merge and block automerging - `@dependabot reopen` will reopen this PR if it is closed - `@dependabot close` will close this PR and stop Dependabot recreating it. You can achieve the same result by closing it manually - `@dependabot ignore this major version` will close this PR and stop Dependabot creating any more for this major version (unless you reopen the PR or upgrade to it yourself) - `@dependabot ignore this minor version` will close this PR and stop Dependabot creating any more for this minor version (unless you reopen the PR or upgrade to it yourself) - `@dependabot ignore this dependency` will close this PR and stop Dependabot creating any more for this dependency (unless you reopen the PR or upgrade to it yourself) - `@dependabot use these labels` will set the current labels as the default for future PRs for this repo and language - `@dependabot use these reviewers` will set the current reviewers as the default for future PRs for this repo and language - `@dependabot use these assignees` will set the current assignees as the default for future PRs for this repo and language - `@dependabot use this milestone` will set the current milestone as the default for future PRs for this repo and language You can disable automated security fix PRs for this repo from the [Security Alerts page](https://github.com/Alluxio/alluxio/network/alerts).
pr-link: Alluxio/alluxio#16947 change-id: cid-6d9f72601394aed56df5d25eeee43eeed668a502 --- integration/docker/csi/go.mod | 6 ++++- integration/docker/csi/go.sum | 46 ++++++++++++++++++++++++++++++----- 2 files changed, 45 insertions(+), 7 deletions(-) diff --git a/integration/docker/csi/go.mod b/integration/docker/csi/go.mod index f4940343b0af..32749147576c 100644 --- a/integration/docker/csi/go.mod +++ b/integration/docker/csi/go.mod @@ -7,8 +7,12 @@ require ( github.com/golang/glog v0.0.0-20210429001901-424d2337a529 github.com/kubernetes-csi/csi-lib-utils v0.7.0 // indirect github.com/kubernetes-csi/drivers v1.0.2 + github.com/pkg/errors v0.8.1 github.com/spf13/cobra v1.1.3 - golang.org/x/net v0.0.0-20210510120150-4163338589ed + golang.org/x/net v0.7.0 google.golang.org/grpc v1.37.1 + k8s.io/api v0.17.0 + k8s.io/apimachinery v0.17.1-beta.0 + k8s.io/client-go v0.17.0 k8s.io/mount-utils v0.21.0 ) diff --git a/integration/docker/csi/go.sum b/integration/docker/csi/go.sum index ad980d43f551..276577ab9d63 100644 --- a/integration/docker/csi/go.sum +++ b/integration/docker/csi/go.sum @@ -80,6 +80,7 @@ github.com/go-openapi/swag v0.0.0-20160704191624-1d0bd113de87/go.mod h1:DXUve3Dp github.com/go-stack/stack v1.8.0/go.mod h1:v0f6uXyyMGvRgIKkXu+yp6POWl0qKG85gN/melR3HDY= github.com/gogo/protobuf v1.1.1/go.mod h1:r8qH/GZQm5c6nD/R0oafs1akxWv10x8SbQlK7atdtwQ= github.com/gogo/protobuf v1.2.1/go.mod h1:hp+jE20tsWTFYpLwKvXlhS1hjn+gTNwPg2I6zVXpSg4= +github.com/gogo/protobuf v1.2.2-0.20190723190241-65acae22fc9d h1:3PaI8p3seN09VjbTYC/QWlUZdZ1qS1zGjy7LH2Wt07I= github.com/gogo/protobuf v1.2.2-0.20190723190241-65acae22fc9d/go.mod h1:SlYgWuQ5SjCEi6WLHjHCa1yvBfUnHcTbrrZtXPKa29o= github.com/golang/glog v0.0.0-20160126235308-23def4e6c14b/go.mod h1:SBH7ygxi8pfUlaOkMMuAQtPIUF8ecWP5IEl/CR7VP2Q= github.com/golang/glog v0.0.0-20210429001901-424d2337a529 h1:2voWjNECnrZRbfwXxHB1/j8wa6xdKn85B5NzgVL/pTU= @@ -110,6 +111,7 @@ github.com/google/go-cmp v0.4.0/go.mod h1:v8dTdLbMG2kIc/vJvl+f65V22dbkXbowE6jgT/ github.com/google/go-cmp v0.5.0 h1:/QaMHBdZ26BB3SSst0Iwl10Epc+xhTquomWX0oZEB6w= github.com/google/go-cmp v0.5.0/go.mod h1:v8dTdLbMG2kIc/vJvl+f65V22dbkXbowE6jgT/gNBxE= github.com/google/gofuzz v0.0.0-20161122191042-44d81051d367/go.mod h1:HP5RmnzzSNb993RKQDq4+1A4ia9nllfqcQFTQJedwGI= +github.com/google/gofuzz v1.0.0 h1:A8PeW59pxE9IoFRqBp37U+mSNaQoZ46F1f0f863XSXw= github.com/google/gofuzz v1.0.0/go.mod h1:dBl0BpW6vV/+mYPU4Po3pmUjxk6FQPldtuIdl/M65Eg= github.com/google/martian v2.1.0+incompatible/go.mod h1:9I4somxYTbIHy5NJKHRl3wXiIaQGbYVAs8BPL6v8lEs= github.com/google/pprof v0.0.0-20181206194817-3ea8567a2e57/go.mod h1:zfwlbNMJ+OItoe0UupaVj+oy1omPYYDuagoSzA8v9mc= @@ -120,6 +122,7 @@ github.com/google/uuid v1.1.2/go.mod h1:TIyPZe4MgqvfeYDBFedMoGGpEw/LqOeaOT+nhxU+ github.com/googleapis/gax-go/v2 v2.0.4/go.mod h1:0Wqv26UfaUD9n4G6kQubkQ+KchISgw+vpHVxEJEs9eg= github.com/googleapis/gax-go/v2 v2.0.5/go.mod h1:DWXyrwAJ9X0FpwwEdw+IPEYBICEFu5mhpdKc/us6bOk= github.com/googleapis/gnostic v0.0.0-20170729233727-0c5108395e2d/go.mod h1:sJBsCZ4ayReDTBIg8b9dl28c5xFWyhBTVRp3pOg5EKY= +github.com/googleapis/gnostic v0.2.0 h1:l6N3VoaVzTncYYW+9yOz2LJJammFZGBO13sqgEhpy9g= github.com/googleapis/gnostic v0.2.0/go.mod h1:sJBsCZ4ayReDTBIg8b9dl28c5xFWyhBTVRp3pOg5EKY= github.com/gophercloud/gophercloud v0.1.0/go.mod h1:vxM41WHh5uqHVBMZHzuwNOHh8XEoIEcSTewFxm1c5g8= github.com/gopherjs/gopherjs v0.0.0-20181017120253-0766667cb4d1/go.mod h1:wJfORRmW1u3UXTncJ5qlYoELFm8eSnnEO6hX4iZ3EWY= @@ -149,12 +152,14 @@ github.com/hashicorp/mdns v1.0.0/go.mod h1:tL+uN++7HEJ6SQLQ2/p+z2pH24WQKWjBPkE0m github.com/hashicorp/memberlist v0.1.3/go.mod h1:ajVTdAv/9Im8oMAAj5G31PhhMCZJV2pPBoIllUwCN7I= github.com/hashicorp/serf v0.8.2/go.mod h1:6hOLApaqBFA1NXqRQAsxw9QxuDEvNxSQRwA/JwenrHc= github.com/hpcloud/tail v1.0.0/go.mod h1:ab1qPbhIpdTxEkNHXyeSf5vhxWSCs/tWer42PpOxQnU= +github.com/imdario/mergo v0.3.5 h1:JboBksRwiiAJWvIYJVo46AfV+IAIKZpfrSzVKj42R4Q= github.com/imdario/mergo v0.3.5/go.mod h1:2EnlNZ0deacrJVfApfmtdGgDfMuh/nq6Ok1EcJh5FfA= github.com/inconshreveable/mousetrap v1.0.0 h1:Z8tu5sraLXCXIcARxBp/8cbvlwVa7Z1NHg9XEKhtSvM= github.com/inconshreveable/mousetrap v1.0.0/go.mod h1:PxqpIevigyE2G7u3NXJIT2ANytuPF1OarO4DADm73n8= github.com/jonboulle/clockwork v0.1.0/go.mod h1:Ii8DK3G1RaLaWxj9trq07+26W01tbo22gdxWY5EU2bo= github.com/json-iterator/go v0.0.0-20180612202835-f2b4162afba3/go.mod h1:+SdeFBvtyEkXs7REEP0seUULqWtbJapLOCVDaaPEHmU= github.com/json-iterator/go v1.1.6/go.mod h1:+SdeFBvtyEkXs7REEP0seUULqWtbJapLOCVDaaPEHmU= +github.com/json-iterator/go v1.1.8 h1:QiWkFLKq0T7mpzwOTu6BzNDbfTE8OLrYhVKYMLF46Ok= github.com/json-iterator/go v1.1.8/go.mod h1:KdQUCv79m/52Kvf8AW2vK1V8akMuk1QjK/uOdHXbAo4= github.com/jstemmer/go-junit-report v0.0.0-20190106144839-af01ea7f8024/go.mod h1:6v2b51hI/fHJwM22ozAgKL4VKDeJcHhJFhtBdhmNjmU= github.com/jtolds/gls v4.20.0+incompatible/go.mod h1:QJZ7F/aHp+rZTRtaJ1ow/lLfFfVYBRgL+9YlvaHOwJU= @@ -188,9 +193,11 @@ github.com/mitchellh/iochan v1.0.0/go.mod h1:JwYml1nuB7xOzsp52dPpHFffvOCDupsG0Qu github.com/mitchellh/mapstructure v0.0.0-20160808181253-ca63d7c062ee/go.mod h1:FVVH3fgwuzCH5S8UJGiWEs2h04kUh9fWfEaFds41c1Y= github.com/mitchellh/mapstructure v1.1.2/go.mod h1:FVVH3fgwuzCH5S8UJGiWEs2h04kUh9fWfEaFds41c1Y= github.com/modern-go/concurrent v0.0.0-20180228061459-e0a39a4cb421/go.mod h1:6dJC0mAP4ikYIbvyc7fijjWJddQyLn8Ig3JB5CqoB9Q= +github.com/modern-go/concurrent v0.0.0-20180306012644-bacd9c7ef1dd h1:TRLaZ9cD/w8PVh93nsPXa1VrQ6jlwL5oN8l14QlcNfg= github.com/modern-go/concurrent v0.0.0-20180306012644-bacd9c7ef1dd/go.mod h1:6dJC0mAP4ikYIbvyc7fijjWJddQyLn8Ig3JB5CqoB9Q= github.com/modern-go/reflect2 v0.0.0-20180320133207-05fbef0ca5da/go.mod h1:bx2lNnkwVCuqBIxFjflWJWanXIb3RllmbCylyMrvgv0= github.com/modern-go/reflect2 v0.0.0-20180701023420-4b7aa43c6742/go.mod h1:bx2lNnkwVCuqBIxFjflWJWanXIb3RllmbCylyMrvgv0= +github.com/modern-go/reflect2 v1.0.1 h1:9f412s+6RmYXLWZSEzVVgPGK7C2PphHj5RJrvfx9AWI= github.com/modern-go/reflect2 v1.0.1/go.mod h1:bx2lNnkwVCuqBIxFjflWJWanXIb3RllmbCylyMrvgv0= github.com/munnerz/goautoneg v0.0.0-20120707110453-a547fc61f48d/go.mod h1:+n7T8mK8HuQTcFwEeznm/DIxMOiR9yIdICNftLE1DvQ= github.com/mwitkow/go-conntrack v0.0.0-20161129095857-cc309e4a2223/go.mod h1:qRWi+5nqEBWmkhHvq77mSJWrCKwh8bxhgT7d/eI7P4U= @@ -208,6 +215,7 @@ github.com/pascaldekloe/goe v0.0.0-20180627143212-57f6aae5913c/go.mod h1:lzWF7FI github.com/pelletier/go-toml v1.2.0/go.mod h1:5z9KED0ma1S8pY6P1sdut58dfprrGBbd/94hg7ilaic= github.com/peterbourgon/diskv v2.0.1+incompatible/go.mod h1:uqqh8zWWbv1HBMNONnaR/tNboyR3/BZd58JJSHlUSCU= github.com/pkg/errors v0.8.0/go.mod h1:bwawxfHBFNV+L2hUp1rHADufV3IMtnDRdf1r5NINEl0= +github.com/pkg/errors v0.8.1 h1:iURUrRGxPUNPdy5/HRSm+Yj6okJ6UtLINN0Q9M4+h3I= github.com/pkg/errors v0.8.1/go.mod h1:bwawxfHBFNV+L2hUp1rHADufV3IMtnDRdf1r5NINEl0= github.com/pmezard/go-difflib v0.0.0-20151028094244-d8ed2627bdf0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4= github.com/pmezard/go-difflib v1.0.0 h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZbAQM= @@ -260,6 +268,7 @@ github.com/stretchr/testify v1.6.1/go.mod h1:6Fq8oRcR53rry900zMqJjRRixrwX3KX962/ github.com/subosito/gotenv v1.2.0/go.mod h1:N0PQaV/YGNqwC0u51sEeR/aUtSLEXKX9iv69rRypqCw= github.com/tmc/grpc-websocket-proxy v0.0.0-20190109142713-0ad062ec5ee5/go.mod h1:ncp9v5uamzpCO7NfCPTXjqaC+bZgJeR0sMTm6dMHP7U= github.com/xiang90/probing v0.0.0-20190116061207-43a291ad63a2/go.mod h1:UETIi67q53MR2AWcXfiuqkDkRtnGDLqkBTpCHuJHxtU= +github.com/yuin/goldmark v1.4.13/go.mod h1:6yULJ656Px+3vBD8DxQVa3kxgyrAnzto9xy5taEt/CY= go.etcd.io/bbolt v1.3.2/go.mod h1:IbVyRI1SCnLcuJnV2u8VeU0CEYM7e686BmAb1XKL+uU= go.opencensus.io v0.21.0/go.mod h1:mSImk1erAIZhrmZN+AvHh14ztQfjbGwt4TtuofqLduU= go.opencensus.io v0.22.0/go.mod h1:+kGneAE2xo2IficOXnaByMWTGM9T73dGwxeWcUqIpI8= @@ -273,6 +282,8 @@ golang.org/x/crypto v0.0.0-20190308221718-c2843e01d9a2/go.mod h1:djNgcEr1/C05ACk golang.org/x/crypto v0.0.0-20190510104115-cbcb75029529/go.mod h1:yigFU9vqHzYiE8UmvKecakEJjdnWj3jj499lnFckfCI= golang.org/x/crypto v0.0.0-20190605123033-f99c8df09eb5/go.mod h1:yigFU9vqHzYiE8UmvKecakEJjdnWj3jj499lnFckfCI= golang.org/x/crypto v0.0.0-20190820162420-60c769a6c586/go.mod h1:yigFU9vqHzYiE8UmvKecakEJjdnWj3jj499lnFckfCI= +golang.org/x/crypto v0.0.0-20210921155107-089bfa567519 h1:7I4JAnoQBe7ZtJcBaYHi5UtiO8tQHbUSXxL+pnGRANg= +golang.org/x/crypto v0.0.0-20210921155107-089bfa567519/go.mod h1:GvvjBRRGRdwPK5ydBHafDWAxML/pGHZbMvKqRZ5+Abc= golang.org/x/exp v0.0.0-20190121172915-509febef88a4/go.mod h1:CJ0aWSM057203Lf6IL+f9T1iT9GByDxfZKAQTCR3kQA= golang.org/x/exp v0.0.0-20190306152737-a1d7652674e8/go.mod h1:CJ0aWSM057203Lf6IL+f9T1iT9GByDxfZKAQTCR3kQA= golang.org/x/exp v0.0.0-20190510132918-efd6b22b2522/go.mod h1:ZjyILWgesfNpC6sMxTJOJm9Kp84zZh5NQWvqDGG3Qr8= @@ -291,6 +302,7 @@ golang.org/x/mobile v0.0.0-20190312151609-d3739f865fa6/go.mod h1:z+o9i4GpDbdi3rU golang.org/x/mobile v0.0.0-20190719004257-d2bd2a29d028/go.mod h1:E/iHnbuqvinMTCcRqshq8CkpyQDoeVncDDYHnLhea+o= golang.org/x/mod v0.0.0-20190513183733-4bf6d317e70e/go.mod h1:mXi4GBBbnImb6dmsKGUJ2LatrhH/nqhxcFungHvyanc= golang.org/x/mod v0.1.0/go.mod h1:0QHyrYULN0/3qlju5TqG8bIK38QM8yzMo5ekMj3DlcY= +golang.org/x/mod v0.6.0-dev.0.20220419223038-86c51ed26bb4/go.mod h1:jJ57K6gSWd91VN4djpZkiMVwK6gcyfeH4XE8wZrZaV4= golang.org/x/net v0.0.0-20170114055629-f2499483f923/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4= golang.org/x/net v0.0.0-20180724234803-3673e40ba225/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4= golang.org/x/net v0.0.0-20180826012351-8a410e7b638d/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4= @@ -309,16 +321,20 @@ golang.org/x/net v0.0.0-20190603091049-60506f45cf65/go.mod h1:HSz+uSET+XFnRR8LxR golang.org/x/net v0.0.0-20190620200207-3b0461eec859/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s= golang.org/x/net v0.0.0-20191004110552-13f9640d40b9/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s= golang.org/x/net v0.0.0-20191209160850-c0dbc17a3553/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s= -golang.org/x/net v0.0.0-20210510120150-4163338589ed h1:p9UgmWI9wKpfYmgaV/IZKGdXc5qEK45tDwwwDyjS26I= -golang.org/x/net v0.0.0-20210510120150-4163338589ed/go.mod h1:9nx3DQGgdP8bBQD5qxJ1jj9UTztislL4KSBs9R2vV5Y= +golang.org/x/net v0.0.0-20210226172049-e18ecbb05110/go.mod h1:m0MpNAwzfU5UDzcl9v0D8zg8gWTRqZa9RBIspLL5mdg= +golang.org/x/net v0.0.0-20220722155237-a158d28d115b/go.mod h1:XRhObCWvk6IyKnWLug+ECip1KBveYUHfp+8e9klMJ9c= +golang.org/x/net v0.7.0 h1:rJrUqqhjsgNp7KqAIc25s9pZnjU7TUcSY7HcVZjdn1g= +golang.org/x/net v0.7.0/go.mod h1:2Tu9+aMcznHK/AK1HMvgo6xiTLG5rD5rZLDS+rp2Bjs= golang.org/x/oauth2 v0.0.0-20180821212333-d2e6202438be/go.mod h1:N/0e6XlmueqKjAGxoOufVs8QHGRruUQn6yWY3a++T0U= golang.org/x/oauth2 v0.0.0-20190226205417-e64efc72b421/go.mod h1:gOpvHmFTYa4IltrdGE7lF6nIHvwfUNPOp7c8zoXwtLw= +golang.org/x/oauth2 v0.0.0-20190604053449-0f29369cfe45 h1:SVwTIAaPC2U/AvvLNZ2a7OVsmBpC8L5BlwK1whH3hm0= golang.org/x/oauth2 v0.0.0-20190604053449-0f29369cfe45/go.mod h1:gOpvHmFTYa4IltrdGE7lF6nIHvwfUNPOp7c8zoXwtLw= golang.org/x/sync v0.0.0-20180314180146-1d60e4601c6f/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= golang.org/x/sync v0.0.0-20181108010431-42b317875d0f/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= golang.org/x/sync v0.0.0-20181221193216-37e7f081c4d4/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= golang.org/x/sync v0.0.0-20190227155943-e225da77a7e6/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= golang.org/x/sync v0.0.0-20190423024810-112230192c58/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= +golang.org/x/sync v0.0.0-20220722155255-886fb9371eb4/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= golang.org/x/sys v0.0.0-20170830134202-bb24a47a89ea/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY= golang.org/x/sys v0.0.0-20180823144017-11551d06cbcc/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY= golang.org/x/sys v0.0.0-20180830151530-49385e6e1522/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY= @@ -338,16 +354,25 @@ golang.org/x/sys v0.0.0-20190624142023-c5567b49c5d0/go.mod h1:h1NjWce9XRLGQEsW7w golang.org/x/sys v0.0.0-20190826190057-c7b8b68b1456/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= golang.org/x/sys v0.0.0-20191220220014-0732a990476f/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= golang.org/x/sys v0.0.0-20201119102817-f84b799fce68/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= -golang.org/x/sys v0.0.0-20210423082822-04245dca01da h1:b3NXsE2LusjYGGjL5bxEVZZORm/YEFFrWFjR8eFrw/c= -golang.org/x/sys v0.0.0-20210423082822-04245dca01da/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= +golang.org/x/sys v0.0.0-20210615035016-665e8c7367d1/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= +golang.org/x/sys v0.0.0-20220520151302-bc2c85ada10a/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= +golang.org/x/sys v0.0.0-20220722155257-8c9f86f7a55f/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= +golang.org/x/sys v0.5.0 h1:MUK/U/4lj1t1oPg0HfuXDN/Z1wv31ZJ/YcPiGccS4DU= +golang.org/x/sys v0.5.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= golang.org/x/term v0.0.0-20201126162022-7de9c90e9dd1/go.mod h1:bj7SfCRtBDWHUb9snDiAeCFNEtKQo2Wmx5Cou7ajbmo= +golang.org/x/term v0.0.0-20210927222741-03fcf44c2211/go.mod h1:jbD1KX2456YbFQfuXm/mYQcufACuNUgVhRMnK/tPxf8= +golang.org/x/term v0.5.0 h1:n2a8QNdAb0sZNpU9R1ALUXBbY+w51fCQDN+7EdxNBsY= +golang.org/x/term v0.5.0/go.mod h1:jMB1sMXY+tzblOD4FWmEbocvup2/aLOaQEp7JmGp78k= golang.org/x/text v0.0.0-20160726164857-2910a502d2bf/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ= golang.org/x/text v0.3.0/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ= golang.org/x/text v0.3.1-0.20180807135948-17ff2d5776d2/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ= golang.org/x/text v0.3.2/go.mod h1:bEr9sfX3Q8Zfm5fL9x+3itogRgK3+ptLWKqgva+5dAk= -golang.org/x/text v0.3.6 h1:aRYxNxv6iGQlyVaZmk6ZgYEDa+Jg18DxebPSrd6bg1M= -golang.org/x/text v0.3.6/go.mod h1:5Zoc/QRtKVWzQhOtBMvqHzDpF6irO9z98xDceosuGiQ= +golang.org/x/text v0.3.3/go.mod h1:5Zoc/QRtKVWzQhOtBMvqHzDpF6irO9z98xDceosuGiQ= +golang.org/x/text v0.3.7/go.mod h1:u+2+/6zg+i71rQMx5EYifcz6MCKuco9NR6JIITiCfzQ= +golang.org/x/text v0.7.0 h1:4BRB4x83lYWy72KwLD/qYDuTu7q9PjSagHvijDw7cLo= +golang.org/x/text v0.7.0/go.mod h1:mrYo+phRRbMaCq/xk9113O4dZlRixOauAjOtrjsXDZ8= golang.org/x/time v0.0.0-20181108054448-85acf8d2951c/go.mod h1:tRJNPiyCQ0inRvYxbN9jk5I+vvW/OXSQhTDSoE431IQ= +golang.org/x/time v0.0.0-20190308202827-9d24e82272b4 h1:SvFZT6jyqRaOeXpc5h/JSfZenJ2O330aBsf7JfSUXmQ= golang.org/x/time v0.0.0-20190308202827-9d24e82272b4/go.mod h1:tRJNPiyCQ0inRvYxbN9jk5I+vvW/OXSQhTDSoE431IQ= golang.org/x/tools v0.0.0-20180221164845-07fd8470d635/go.mod h1:n7NCudcB/nEzxVGmLbDWY5pfWTLqBcC2KZ6jyYvM4mQ= golang.org/x/tools v0.0.0-20180917221912-90fa682c2a6e/go.mod h1:n7NCudcB/nEzxVGmLbDWY5pfWTLqBcC2KZ6jyYvM4mQ= @@ -369,6 +394,8 @@ golang.org/x/tools v0.0.0-20190816200558-6889da9d5479/go.mod h1:b+2E5dAYhXwXZwtn golang.org/x/tools v0.0.0-20190911174233-4f2ddba30aff/go.mod h1:b+2E5dAYhXwXZwtnZ6UAqBI28+e2cm9otk0dWdXHAEo= golang.org/x/tools v0.0.0-20191012152004-8de300cfc20a/go.mod h1:b+2E5dAYhXwXZwtnZ6UAqBI28+e2cm9otk0dWdXHAEo= golang.org/x/tools v0.0.0-20191112195655-aa38f8e97acc/go.mod h1:b+2E5dAYhXwXZwtnZ6UAqBI28+e2cm9otk0dWdXHAEo= +golang.org/x/tools v0.0.0-20191119224855-298f0cb1881e/go.mod h1:b+2E5dAYhXwXZwtnZ6UAqBI28+e2cm9otk0dWdXHAEo= +golang.org/x/tools v0.1.12/go.mod h1:hNGJHUnrk76NpqgfD5Aqm5Crs+Hm0VOH/i9J2+nxYbc= golang.org/x/xerrors v0.0.0-20190717185122-a985d3407aa7/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0= golang.org/x/xerrors v0.0.0-20191204190536-9bdfabe68543 h1:E7g+9GITq07hpfrRu66IVDexMakfv52eLZ2CXBWiKr4= golang.org/x/xerrors v0.0.0-20191204190536-9bdfabe68543/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0= @@ -380,6 +407,7 @@ google.golang.org/api v0.13.0/go.mod h1:iLdEw5Ide6rF15KTC1Kkl0iskquN2gFfn9o9XIsb google.golang.org/appengine v1.1.0/go.mod h1:EbEs0AVv82hx2wNQdGPgUI5lhzA/G0D9YwlJXL52JkM= google.golang.org/appengine v1.4.0/go.mod h1:xpcJRLb0r/rnEns0DIKYYv+WjYCduHsrkT7/EB5XEv4= google.golang.org/appengine v1.5.0/go.mod h1:xpcJRLb0r/rnEns0DIKYYv+WjYCduHsrkT7/EB5XEv4= +google.golang.org/appengine v1.6.1 h1:QzqyMA1tlu6CgqCDUtU9V+ZKhLFT2dkJuANu5QaxI3I= google.golang.org/appengine v1.6.1/go.mod h1:i06prIuMbXzDqacNJfV5OdTW448YApPu5ww/cMBSeb0= google.golang.org/genproto v0.0.0-20180817151627-c66870c02cf8/go.mod h1:JiN7NxoALGmiZfu7CAH4rXhgtRTLTxftemlI0sWmxmc= google.golang.org/genproto v0.0.0-20190307195333-5fe7a883aa19/go.mod h1:VzzqZJRnGkLBvHegQrXjBqPurQTc5/KpmUdxsrq26oE= @@ -419,6 +447,7 @@ gopkg.in/check.v1 v1.0.0-20200227125254-8fa46927fb4f h1:BLraFXnmrev5lT+xlilqcH8X gopkg.in/check.v1 v1.0.0-20200227125254-8fa46927fb4f/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0= gopkg.in/errgo.v2 v2.1.0/go.mod h1:hNsd1EY+bozCKY1Ytp96fpM3vjJbqLJn88ws8XvfDNI= gopkg.in/fsnotify.v1 v1.4.7/go.mod h1:Tz8NjZHkW78fSQdbUxIjBTcgA1z1m8ZHf0WmKUhAMys= +gopkg.in/inf.v0 v0.9.1 h1:73M5CoZyi3ZLMOyDlQh031Cx6N9NDJ2Vvfl76EDAgDc= gopkg.in/inf.v0 v0.9.1/go.mod h1:cWUDdTG/fYaXco+Dcufb5Vnc6Gp2YChqWtbxRZE0mXw= gopkg.in/ini.v1 v1.51.0/go.mod h1:pNLf8WUiyNEtQjuu5G5vTm06TEv9tsIgeAvK8hOrP4k= gopkg.in/resty.v1 v1.12.0/go.mod h1:mDo4pnntr5jdWRML875a/NmxYqAlA73dVijT2AXvQQo= @@ -427,6 +456,7 @@ gopkg.in/yaml.v2 v2.0.0-20170812160011-eb3733d160e7/go.mod h1:JAlM8MvJe8wmxCU4Bl gopkg.in/yaml.v2 v2.2.1/go.mod h1:hI93XBmqTisBFMUTm0b8Fm+jr3Dg1NNxqwp+5A1VGuI= gopkg.in/yaml.v2 v2.2.2/go.mod h1:hI93XBmqTisBFMUTm0b8Fm+jr3Dg1NNxqwp+5A1VGuI= gopkg.in/yaml.v2 v2.2.4/go.mod h1:hI93XBmqTisBFMUTm0b8Fm+jr3Dg1NNxqwp+5A1VGuI= +gopkg.in/yaml.v2 v2.4.0 h1:D8xgwECY7CYvx+Y2n4sBz93Jn9JRvxdiyyo8CTfuKaY= gopkg.in/yaml.v2 v2.4.0/go.mod h1:RDklbk79AGWmwhnvt/jBztapEOGDOx6ZbXqjP6csGnQ= gopkg.in/yaml.v3 v3.0.0-20200313102051-9f266ea9e77c h1:dUUwHk2QECo/6vqA44rthZ8ie2QXMNeKRTHCNY2nXvo= gopkg.in/yaml.v3 v3.0.0-20200313102051-9f266ea9e77c/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM= @@ -435,9 +465,12 @@ honnef.co/go/tools v0.0.0-20190106161140-3f1c8253044a/go.mod h1:rf3lG4BRIbNafJWh honnef.co/go/tools v0.0.0-20190418001031-e561f6794a2a/go.mod h1:rf3lG4BRIbNafJWhAfAdb/ePZxsR/4RtNHQocxwk9r4= honnef.co/go/tools v0.0.0-20190523083050-ea95bdfd59fc/go.mod h1:rf3lG4BRIbNafJWhAfAdb/ePZxsR/4RtNHQocxwk9r4= honnef.co/go/tools v0.0.1-2019.2.3/go.mod h1:a3bituU0lyd329TUQxRnasdCoJDkEUEAqEt0JzvZhAg= +k8s.io/api v0.17.0 h1:H9d/lw+VkZKEVIUc8F3wgiQ+FUXTTr21M87jXLU7yqM= k8s.io/api v0.17.0/go.mod h1:npsyOePkeP0CPwyGfXDHxvypiYMJxBWAMpQxCaJ4ZxI= k8s.io/apimachinery v0.17.0/go.mod h1:b9qmWdKlLuU9EBh+06BtLcSf/Mu89rWL33naRxs1uZg= +k8s.io/apimachinery v0.17.1-beta.0 h1:0Wl/KpAiFOMe9to5h8x2Y6JnjV+BEWJiTcUk1Vx7zdE= k8s.io/apimachinery v0.17.1-beta.0/go.mod h1:b9qmWdKlLuU9EBh+06BtLcSf/Mu89rWL33naRxs1uZg= +k8s.io/client-go v0.17.0 h1:8QOGvUGdqDMFrm9sD6IUFl256BcffynGoe80sxgTEDg= k8s.io/client-go v0.17.0/go.mod h1:TYgR6EUHs6k45hb6KWjVD6jFZvJV4gHDikv/It0xz+k= k8s.io/component-base v0.17.0/go.mod h1:rKuRAokNMY2nn2A6LP/MiwpoaMRHpfRnrPaUJJj1Yoc= k8s.io/gengo v0.0.0-20190128074634-0689ccc1d7d6/go.mod h1:ezvh/TsK7cY6rbqRK0oQQ8IAqLxYwwyPxAX1Pzy0ii0= @@ -456,4 +489,5 @@ k8s.io/utils v0.0.0-20201110183641-67b214c5f920 h1:CbnUZsM497iRC5QMVkHwyl8s2tB3g k8s.io/utils v0.0.0-20201110183641-67b214c5f920/go.mod h1:jPW/WVKK9YHAvNhRxK0md/EJ228hCsBRufyofKtW8HA= rsc.io/binaryregexp v0.2.0/go.mod h1:qTv7/COck+e2FymRvadv62gMdZztPaShugOCi3I+8D8= sigs.k8s.io/structured-merge-diff v0.0.0-20190525122527-15d366b2352e/go.mod h1:wWxsB5ozmmv/SG7nM11ayaAW51xMvak/t1r0CSlcokI= +sigs.k8s.io/yaml v1.1.0 h1:4A07+ZFc2wgJwo8YNlQpr1rVlgUDlxXHhPJciaPY5gs= sigs.k8s.io/yaml v1.1.0/go.mod h1:UJmg0vDUVViEyp3mgSv9WPwZCDxu4rQW1olrI1uml+o= From 492cc7c8abc9212fa90cba4dd0c6efcf0bb60ab5 Mon Sep 17 00:00:00 2001 From: Shawn Sun <32376495+ssz1997@users.noreply.github.com> Date: Mon, 27 Feb 2023 16:49:10 -0800 Subject: [PATCH 146/334] Respect MaxRamPercentage in container Currently if user specifies `MaxRamPercentage` without specifying `Xmx`, Alluxio still gives a defualt `Xmx` which will override the `MaxRamPercentage` because of priority. Fix this by if user specifies `MaxRamPercentage` we don't give a default `Xmx`. First step of solving https://github.com/Alluxio/alluxio/issues/10083 Replace https://github.com/Alluxio/alluxio/pull/15819 pr-link: Alluxio/alluxio#16940 change-id: cid-8f52b90408f6d3a2ba8ee0cf6c623fd85e6b9e15 --- bin/launch-process | 18 +++++++++++------- 1 file changed, 11 insertions(+), 7 deletions(-) diff --git a/bin/launch-process b/bin/launch-process index cc9c8f75dd24..2112406b3dc7 100755 --- a/bin/launch-process +++ b/bin/launch-process @@ -49,8 +49,9 @@ USAGE+=" contains() { if [[ "$1" = *"$2"* ]]; then printf "1" + else + printf "0" fi - printf "0" } # Sets environment variables by sourcing ${ALLUXIO_HOME}/libexec/alluxio-config.sh @@ -124,8 +125,9 @@ launch_master() { fi # use a default Xmx value for the master - local res="$(contains "${ALLUXIO_MASTER_JAVA_OPTS}" "Xmx")" - if [[ "${res}" -eq "0" ]]; then + local contain_xmx="$(contains "${ALLUXIO_MASTER_JAVA_OPTS}")" + local contain_max_percentage="$(contains "${ALLUXIO_MASTER_JAVA_OPTS}" "MaxRAMPercentage")" + if [[ "${contain_xmx}" -eq "0" ]] && [[ "${contain_max_percentage}" -eq "0" ]]; then ALLUXIO_MASTER_JAVA_OPTS+=" -Xmx8g " fi # use a default MetaspaceSize value for the master @@ -142,8 +144,9 @@ launch_master() { # Launch a secondary master process launch_secondary_master() { # use a default Xmx value for the master - local res="$(contains "${ALLUXIO_SECONDARY_MASTER_JAVA_OPTS}" "Xmx")" - if [[ "${res}" -eq "0" ]]; then + local contain_xmx="$(contains "${ALLUXIO_SECONDARY_MASTER_JAVA_OPTS}")" + local contain_max_percentage="$(contains "${ALLUXIO_SECONDARY_MASTER_JAVA_OPTS}" "MaxRAMPercentage")" + if [[ "${contain_xmx}" -eq "0" ]] && [[ "${contain_max_percentage}" -eq "0" ]]; then ALLUXIO_SECONDARY_MASTER_JAVA_OPTS+=" -Xmx8g " fi launch_process "${ALLUXIO_SECONDARY_MASTER_ATTACH_OPTS}" \ @@ -161,8 +164,9 @@ launch_job_master() { # Launch a worker process launch_worker() { # use a default Xmx value for the worker - local res="$(contains "${ALLUXIO_WORKER_JAVA_OPTS}" "Xmx")" - if [[ "${res}" -eq "0" ]]; then + local contain_xmx="$(contains "${ALLUXIO_WORKER_JAVA_OPTS}")" + local contain_max_percentage="$(contains "${ALLUXIO_WORKER_JAVA_OPTS}" "MaxRAMPercentage")" + if [[ "${contain_xmx}" -eq "0" ]] && [[ "${contain_max_percentage}" -eq "0" ]]; then ALLUXIO_WORKER_JAVA_OPTS+=" -Xmx4g " fi From 02bc22c0a24122ad6f5c15f1653e920bb4d2b161 Mon Sep 17 00:00:00 2001 From: Beinan Date: Mon, 27 Feb 2023 16:50:46 -0800 Subject: [PATCH 147/334] Remove the restriction of UFS for local cache ### What changes are proposed in this pull request? Please outline the changes and how this PR fixes the issue. ### Why are the changes needed? Please clarify why the changes are needed. For instance, 1. If you propose a new API, clarify the use case for a new API. 2. If you fix a bug, describe the bug. ### Does this PR introduce any user facing changes? Please list the user-facing changes introduced by your change, including 1. change in user-facing APIs 2. addition or removal of property keys 3. webui pr-link: Alluxio/alluxio#16819 change-id: cid-66e83e07bfcb549398dc45942b475a431d777aa7 --- .../java/alluxio/hadoop/LocalCacheFileSystem.java | 13 ------------- 1 file changed, 13 deletions(-) diff --git a/core/client/hdfs/src/main/java/alluxio/hadoop/LocalCacheFileSystem.java b/core/client/hdfs/src/main/java/alluxio/hadoop/LocalCacheFileSystem.java index 68d841d6ce4d..3781a3b1cfe2 100644 --- a/core/client/hdfs/src/main/java/alluxio/hadoop/LocalCacheFileSystem.java +++ b/core/client/hdfs/src/main/java/alluxio/hadoop/LocalCacheFileSystem.java @@ -15,7 +15,6 @@ import static java.nio.charset.StandardCharsets.UTF_8; import alluxio.AlluxioURI; -import alluxio.Constants; import alluxio.client.file.CacheContext; import alluxio.client.file.URIStatus; import alluxio.client.file.cache.CacheManager; @@ -39,10 +38,8 @@ import java.io.FileNotFoundException; import java.io.IOException; import java.net.URI; -import java.util.HashSet; import java.util.Map; import java.util.Properties; -import java.util.Set; /** * An Alluxio client compatible with Apache Hadoop {@link org.apache.hadoop.fs.FileSystem} @@ -51,12 +48,6 @@ */ public class LocalCacheFileSystem extends org.apache.hadoop.fs.FileSystem { private static final Logger LOG = LoggerFactory.getLogger(LocalCacheFileSystem.class); - private static final Set SUPPORTED_FS = new HashSet() { - { - add(Constants.SCHEME); - add("ws"); - } - }; /** The external Hadoop filesystem to query on cache miss. */ private final org.apache.hadoop.fs.FileSystem mExternalFileSystem; @@ -88,10 +79,6 @@ public LocalCacheFileSystem(org.apache.hadoop.fs.FileSystem fileSystem, @Override public synchronized void initialize(URI uri, org.apache.hadoop.conf.Configuration conf) throws IOException { - if (!SUPPORTED_FS.contains(uri.getScheme())) { - throw new UnsupportedOperationException( - uri.getScheme() + " is not supported as the external filesystem."); - } super.initialize(uri, conf); mHadoopConf = conf; // Set statistics From fb9e88d86289aedcd49b802e7fc5c6fea1e27924 Mon Sep 17 00:00:00 2001 From: linda <39544641+wenfang6@users.noreply.github.com> Date: Tue, 28 Feb 2023 23:43:31 +0800 Subject: [PATCH 148/334] Support display master system status from master ui ### What changes are proposed in this pull request? This pr support display master system status in master's web ui. as below. Snipaste_2023-01-17_10-30-54 ### Why are the changes needed? Sometimes, we want to know cluster info from web ui, and system status is important to check if master is healthy. so it's more convenient for users to know master health status on web ui. ### Does this PR introduce any user facing changes? no pr-link: Alluxio/alluxio#16779 change-id: cid-138f10111eaa5bfa6b13a7919051f510b94d74fd --- .../alluxio/wire/MasterWebUIOverview.java | 22 +++++++++++++++++++ .../meta/AlluxioMasterRestServiceHandler.java | 9 ++++++++ .../containers/pages/Overview/Overview.tsx | 4 ++++ .../__snapshots__/Overview.test.tsx.snap | 8 +++++++ webui/master/src/store/overview/reducer.tsx | 1 + webui/master/src/store/overview/types.tsx | 1 + 6 files changed, 45 insertions(+) diff --git a/core/common/src/main/java/alluxio/wire/MasterWebUIOverview.java b/core/common/src/main/java/alluxio/wire/MasterWebUIOverview.java index d7d594908310..28861a8bf570 100644 --- a/core/common/src/main/java/alluxio/wire/MasterWebUIOverview.java +++ b/core/common/src/main/java/alluxio/wire/MasterWebUIOverview.java @@ -56,6 +56,7 @@ public final class MasterWebUIOverview implements Serializable { private String mRevision; private String mMasterRole; private String mLeaderId; + private String mSystemStatus; /** * Creates a new instance of {@link MasterWebUIOverview}. @@ -302,6 +303,15 @@ public String getLeaderId() { return mLeaderId; } + /** + * Gets system status. + * + * @return the system status + */ + public String getSystemStatus() { + return mSystemStatus; + } + /** * Sets capacity. * @@ -598,6 +608,17 @@ public MasterWebUIOverview setLeaderId(String leaderId) { return this; } + /** + * Sets the system status. + * + * @param systemStatus the system status + * @return the master status system + */ + public MasterWebUIOverview setSystemStatus(String systemStatus) { + mSystemStatus = systemStatus; + return this; + } + @Override public String toString() { return MoreObjects.toStringHelper(this).add("capacity", mCapacity) @@ -616,6 +637,7 @@ public String toString() { .add("uptime", mUptime).add("usedCapacity", mUsedCapacity) .add("version", mVersion).add("revision", mRevision) .add("leaderId", mLeaderId) + .add("systemStatus", mSystemStatus) .add("masterRole", mMasterRole) .toString(); } diff --git a/core/server/master/src/main/java/alluxio/master/meta/AlluxioMasterRestServiceHandler.java b/core/server/master/src/main/java/alluxio/master/meta/AlluxioMasterRestServiceHandler.java index 6d50c07dfe0a..9176a401060a 100644 --- a/core/server/master/src/main/java/alluxio/master/meta/AlluxioMasterRestServiceHandler.java +++ b/core/server/master/src/main/java/alluxio/master/meta/AlluxioMasterRestServiceHandler.java @@ -41,6 +41,7 @@ import alluxio.master.file.FileSystemMaster; import alluxio.master.file.contexts.ListStatusContext; import alluxio.master.file.meta.MountTable; +import alluxio.master.throttle.SystemMonitor.SystemStatus; import alluxio.metrics.MetricKey; import alluxio.metrics.MetricsSystem; import alluxio.security.authentication.AuthenticatedClientUser; @@ -373,6 +374,14 @@ public Response getWebUIOverview() { if (leaderIdGauge != null) { response.setLeaderId((String) leaderIdGauge.getValue()); } + // Add master system status + Gauge systemStatusGauge = MetricsSystem.METRIC_REGISTRY.getGauges() + .get("Master.system.status"); + if (systemStatusGauge != null) { + SystemStatus systemStatus = (SystemStatus) systemStatusGauge.getValue(); + response.setSystemStatus(systemStatus.toString()); + } + return response; }, Configuration.global()); } diff --git a/webui/master/src/containers/pages/Overview/Overview.tsx b/webui/master/src/containers/pages/Overview/Overview.tsx index 70de938aa346..38a836c2eb49 100644 --- a/webui/master/src/containers/pages/Overview/Overview.tsx +++ b/webui/master/src/containers/pages/Overview/Overview.tsx @@ -81,6 +81,10 @@ export class OverviewPresenter extends React.Component { LeaderId {data.leaderId} + + System Status + {data.systemStatus} + {this.renderConfigurationIssues(data.configCheckErrors, 'text-error')} {this.renderConfigurationIssues(data.configCheckWarns, 'text-warning')} {this.renderJournalDiskWarnings(data.journalDiskWarnings, 'text-warning')} diff --git a/webui/master/src/containers/pages/Overview/__snapshots__/Overview.test.tsx.snap b/webui/master/src/containers/pages/Overview/__snapshots__/Overview.test.tsx.snap index 51ed95ca562d..1b589eb071ca 100644 --- a/webui/master/src/containers/pages/Overview/__snapshots__/Overview.test.tsx.snap +++ b/webui/master/src/containers/pages/Overview/__snapshots__/Overview.test.tsx.snap @@ -98,6 +98,14 @@ exports[`Overview Shallow component Matches snapshot 1`] = ` + + + System Status + + + diff --git a/webui/master/src/store/overview/reducer.tsx b/webui/master/src/store/overview/reducer.tsx index fa2d7d1ad645..b2b2d6446360 100644 --- a/webui/master/src/store/overview/reducer.tsx +++ b/webui/master/src/store/overview/reducer.tsx @@ -28,6 +28,7 @@ export const initialOverviewState: IOverviewState = { journalCheckpointTimeWarning: '', journalDiskWarnings: [], leaderId: '', + systemStatus: '', liveWorkerNodes: 0, masterNodeAddress: '', replicaBlockCount: '', diff --git a/webui/master/src/store/overview/types.tsx b/webui/master/src/store/overview/types.tsx index 3943589f32f3..0e84e0e26187 100644 --- a/webui/master/src/store/overview/types.tsx +++ b/webui/master/src/store/overview/types.tsx @@ -27,6 +27,7 @@ export interface IOverview { journalCheckpointTimeWarning: string; journalDiskWarnings: string[]; leaderId: string; + systemStatus: string; liveWorkerNodes: number; masterNodeAddress: string; replicaBlockCount: string; From 8502fbf3bb4bc51431545db5d54d588a033b721e Mon Sep 17 00:00:00 2001 From: Kaijie Chen Date: Wed, 1 Mar 2023 00:25:19 +0800 Subject: [PATCH 149/334] Display more information in WebUI Masters ### What changes are proposed in this pull request? Add more information in WebUI Masters: 1. Start time. 2. Elected / Step-down time. 3. Last checkpoint time. 4. Number of journal entries since checkpoint. 5. Build version. Add fields in meta master RPC: 1. RegisterMasterPOptions * start time * primacy change time * version * revision 2. MasterHeartbeatPOptions * last checkpoint time * journal entries since checkpoint Add metrics keys: 1. Master process start time. 2. Last gain primacy time. 3. Last lose primacy time. `MasterInfo` and WebUI pages are changed accordingly. ### Why are the changes needed? Make Masters tab in WebUI more informative. Fix: #16709 ### Does this PR introduce any user facing changes? WebUI Masters Fixes https://github.com/Alluxio/alluxio/issues/8765 pr-link: Alluxio/alluxio#16636 change-id: cid-ce162c4a6b2341b974daaa8f0823791ed5c032b2 --- .../main/java/alluxio/metrics/MetricKey.java | 15 ++ .../java/alluxio/util/webui/UIMasterInfo.java | 63 ----- .../main/java/alluxio/wire/MasterInfo.java | 235 +++++++++++++++--- .../java/alluxio/wire/MasterWebUIMasters.java | 68 ++--- .../java/alluxio/wire/MasterInfoTest.java | 30 ++- .../alluxio/master/AlluxioMasterProcess.java | 14 ++ .../java/alluxio/master/MasterProcess.java | 2 + .../meta/AlluxioMasterRestServiceHandler.java | 48 ++-- .../master/meta/DefaultMetaMaster.java | 55 ++-- .../java/alluxio/master/meta/MasterInfo.java | 100 +++++++- .../java/alluxio/master/meta/MetaMaster.java | 15 +- .../meta/MetaMasterMasterServiceHandler.java | 4 +- .../RetryHandlingMetaMasterMasterClient.java | 38 ++- .../src/main/proto/grpc/meta_master.proto | 9 +- core/transport/src/main/proto/proto.lock | 36 ++- .../src/constants/types/IMasterInfo.tsx | 9 +- .../src/containers/pages/Masters/Masters.tsx | 62 ++++- .../__snapshots__/Masters.test.tsx.snap | 76 +++++- webui/master/src/store/masters/reducer.tsx | 15 +- webui/master/src/store/masters/types.tsx | 6 +- 20 files changed, 694 insertions(+), 206 deletions(-) delete mode 100644 core/common/src/main/java/alluxio/util/webui/UIMasterInfo.java diff --git a/core/common/src/main/java/alluxio/metrics/MetricKey.java b/core/common/src/main/java/alluxio/metrics/MetricKey.java index db8ccb5f429d..477a8250da90 100644 --- a/core/common/src/main/java/alluxio/metrics/MetricKey.java +++ b/core/common/src/main/java/alluxio/metrics/MetricKey.java @@ -909,6 +909,21 @@ public static String getSyncMetricName(long mountId) { .setDescription("Display master role id") .setMetricType(MetricType.GAUGE) .build(); + public static final MetricKey MASTER_START_TIME = + new Builder("Master.StartTime") + .setDescription("The start time of the master process") + .setMetricType(MetricType.GAUGE) + .build(); + public static final MetricKey MASTER_LAST_GAIN_PRIMACY_TIME = + new Builder("Master.LastGainPrimacyTime") + .setDescription("Last time the master gains primacy") + .setMetricType(MetricType.GAUGE) + .build(); + public static final MetricKey MASTER_LAST_LOSE_PRIMACY_TIME = + new Builder("Master.LastLosePrimacyTime") + .setDescription("Last time the master loses primacy") + .setMetricType(MetricType.GAUGE) + .build(); public static final MetricKey MASTER_JOURNAL_FLUSH_FAILURE = new Builder("Master.JournalFlushFailure") .setDescription("Total number of failed journal flush") diff --git a/core/common/src/main/java/alluxio/util/webui/UIMasterInfo.java b/core/common/src/main/java/alluxio/util/webui/UIMasterInfo.java deleted file mode 100644 index e391b4a3b53e..000000000000 --- a/core/common/src/main/java/alluxio/util/webui/UIMasterInfo.java +++ /dev/null @@ -1,63 +0,0 @@ -/* - * The Alluxio Open Foundation licenses this work under the Apache License, version 2.0 - * (the "License"). You may not use this work except in compliance with the License, which is - * available at www.apache.org/licenses/LICENSE-2.0 - * - * This software is distributed on an "AS IS" basis, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, - * either express or implied, as more fully set forth in the License. - * - * See the NOTICE file distributed with this work for information regarding copyright ownership. - */ - -package alluxio.util.webui; - -import alluxio.util.CommonUtils; - -/** - * Displays information about a master in the UI. - */ -public class UIMasterInfo { - private final String mMasterAddress; - private final long mId; - private final long mLastUpdatedTimeMs; - - /** - * Creates a new instance of {@link UIMasterInfo}. - * - * @param masterAddress The master address - * @param id The master id - * @param lastUpdatedTimeMs The last heart beat in ms - */ - public UIMasterInfo(String masterAddress, long id, long lastUpdatedTimeMs) { - mMasterAddress = masterAddress; - mId = id; - mLastUpdatedTimeMs = lastUpdatedTimeMs; - } - - /** - * Gets master address. - * - * @return the master address - */ - public String getAddress() { - return mMasterAddress; - } - - /** - * Get id. - * - * @return the id - */ - public String getId() { - return Long.toString(mId); - } - - /** - * Get master last update time. - * - * @return the master last update time - */ - public String getLastUpdatedTime() { - return CommonUtils.convertMsToClockTime(mLastUpdatedTimeMs); - } -} diff --git a/core/common/src/main/java/alluxio/wire/MasterInfo.java b/core/common/src/main/java/alluxio/wire/MasterInfo.java index c3df43fe5d09..39485ec376a7 100644 --- a/core/common/src/main/java/alluxio/wire/MasterInfo.java +++ b/core/common/src/main/java/alluxio/wire/MasterInfo.java @@ -11,6 +11,8 @@ package alluxio.wire; +import alluxio.conf.Configuration; +import alluxio.conf.PropertyKey; import alluxio.util.CommonUtils; import com.google.common.base.MoreObjects; @@ -24,31 +26,33 @@ */ @NotThreadSafe public final class MasterInfo { + private static final String NONE = "N/A"; /** Master's address. */ private Address mAddress; /** The id of the master. */ private long mId; - /** Master's last updated time in ms. */ - private long mLastUpdatedTimeMs; + /** Master's start time. */ + private String mStartTime = NONE; + /** Master's last gain primacy time. */ + private String mGainPrimacyTime = NONE; + /** Master's last lose primacy time. */ + private String mLosePrimacyTime = NONE; + /** Master's last updated time. */ + private String mLastUpdatedTime = NONE; + /** Master's version. */ + private String mVersion = NONE; + /** Master's revision. */ + private String mRevision = NONE; + /** Master's last checkpoint time. */ + private String mLastCheckpointTime = NONE; + /** Master's journal entries since last checkpoint. */ + private long mJournalEntriesSinceCheckpoint = 0; /** * Creates a new instance of {@link MasterInfo}. */ public MasterInfo() {} - /** - * Creates a new instance of {@link MasterInfo}. - * - * @param id the master id to use - * @param address the master address to use - * @param lastUpdatedTimeMs the master lastUpdatedTimeMs to use - */ - public MasterInfo(long id, Address address, long lastUpdatedTimeMs) { - mAddress = Preconditions.checkNotNull(address, "address"); - mId = id; - mLastUpdatedTimeMs = lastUpdatedTimeMs; - } - /** * Creates a new instance of {@link MasterInfo}. * @@ -58,7 +62,6 @@ public MasterInfo(long id, Address address, long lastUpdatedTimeMs) { public MasterInfo(long id, Address address) { mAddress = Preconditions.checkNotNull(address, "address"); mId = id; - mLastUpdatedTimeMs = System.currentTimeMillis(); } /** @@ -76,10 +79,59 @@ public long getId() { } /** - * @return the last updated time of the master in ms + * @return the last updated time of the master + */ + public String getLastUpdatedTime() { + return mLastUpdatedTime; + } + + /** + * @return the start time of the master */ - public long getLastUpdatedTimeMs() { - return mLastUpdatedTimeMs; + public String getStartTime() { + return mStartTime; + } + + /** + * @return the last gain primacy time of the master + */ + public String getGainPrimacyTime() { + return mGainPrimacyTime; + } + + /** + * @return the last lose primacy time of the master + */ + public String getLosePrimacyTime() { + return mLosePrimacyTime; + } + + /** + * @return the version of the master + */ + public String getVersion() { + return mVersion; + } + + /** + * @return the revision of the master + */ + public String getRevision() { + return mRevision; + } + + /** + * @return the last checkpoint time + */ + public String getLastCheckpointTime() { + return mLastCheckpointTime; + } + + /** + * @return journal entries since last checkpoint + */ + public long getJournalEntriesSinceCheckpoint() { + return mJournalEntriesSinceCheckpoint; } /** @@ -101,26 +153,128 @@ public MasterInfo setId(long id) { } /** - * @param lastUpdatedTimeMs the last update time in ms + * @param lastUpdatedTime the last update time * @return the master information */ - public MasterInfo setLastUpdatedTimeMs(long lastUpdatedTimeMs) { - mLastUpdatedTimeMs = lastUpdatedTimeMs; + public MasterInfo setLastUpdatedTime(String lastUpdatedTime) { + mLastUpdatedTime = lastUpdatedTime; return this; } - @Override - public String toString() { - return MoreObjects.toStringHelper(this).add("id", mId).add("address", mAddress) - .add("lastUpdatedTime", CommonUtils.convertMsToClockTime(mLastUpdatedTimeMs)) - .toString(); + /** + * @param lastUpdatedTime the last update time in ms + * @return the master information + */ + public MasterInfo setLastUpdatedTimeMs(long lastUpdatedTime) { + return this.setLastUpdatedTime(convertMsToDate(lastUpdatedTime)); + } + + /** + * @param startTime the start time of the master + * @return the master information + */ + public MasterInfo setStartTime(String startTime) { + mStartTime = startTime; + return this; + } + + /** + * @param startTime the start time of the master in ms + * @return the master information + */ + public MasterInfo setStartTimeMs(long startTime) { + return this.setStartTime(convertMsToDate(startTime)); } /** - * Updates the last updated time of the master (in milliseconds). + * @param gainPrimacyTime the last gain primacy time of the master + * @return the master information + */ + public MasterInfo setGainPrimacyTime(String gainPrimacyTime) { + mGainPrimacyTime = gainPrimacyTime; + return this; + } + + /** + * @param gainPrimacyTimeMs the last gain primacy time of the master in ms + * @return the master information + */ + public MasterInfo setGainPrimacyTimeMs(long gainPrimacyTimeMs) { + return this.setGainPrimacyTime(convertMsToDate(gainPrimacyTimeMs)); + } + + /** + * @param losePrimacyTime the last lose primacy time of the master + * @return the master information + */ + public MasterInfo setLosePrimacyTime(String losePrimacyTime) { + mLosePrimacyTime = losePrimacyTime; + return this; + } + + /** + * @param losePrimacyTimeMs the last lose primacy time of the master in ms + * @return the master information + */ + public MasterInfo setLosePrimacyTimeMs(long losePrimacyTimeMs) { + return this.setLosePrimacyTime(convertMsToDate(losePrimacyTimeMs)); + } + + /** + * @param version the version of the master + * @return the master information */ - public void updateLastUpdatedTimeMs() { - mLastUpdatedTimeMs = System.currentTimeMillis(); + public MasterInfo setVersion(String version) { + mVersion = version; + return this; + } + + /** + * @param revision the revision of the master + * @return the master information + */ + public MasterInfo setRevision(String revision) { + mRevision = revision; + return this; + } + + /** + * @param lastCheckpointTime the last checkpoint time + * @return the master information + */ + public MasterInfo setLastCheckpointTime(String lastCheckpointTime) { + mLastCheckpointTime = lastCheckpointTime; + return this; + } + + /** + * @param lastCheckpointTime the last checkpoint time in ms + * @return the master information + */ + public MasterInfo setLastCheckpointTimeMs(long lastCheckpointTime) { + return this.setLastCheckpointTime(convertMsToDate(lastCheckpointTime)); + } + + /** + * @param journalEntriesSinceCheckpoint journal entries since last checkpoint + * @return the master information + */ + public MasterInfo setJournalEntriesSinceCheckpoint(long journalEntriesSinceCheckpoint) { + mJournalEntriesSinceCheckpoint = journalEntriesSinceCheckpoint; + return this; + } + + @Override + public String toString() { + return MoreObjects.toStringHelper(this).add("id", mId).add("address", mAddress) + .add("lastUpdatedTime", mLastUpdatedTime) + .add("startTime", mStartTime) + .add("gainPrimacyTime", mGainPrimacyTime) + .add("losePrimacyTime", mLosePrimacyTime) + .add("lastCheckpointTime", mLastCheckpointTime) + .add("journalEntriesSinceCheckpoint", mJournalEntriesSinceCheckpoint) + .add("version", mVersion) + .add("revision", mRevision).toString(); } @Override @@ -133,11 +287,28 @@ public boolean equals(Object o) { } MasterInfo that = (MasterInfo) o; return mId == that.mId && Objects.equal(mAddress, that.mAddress) - && mLastUpdatedTimeMs == that.mLastUpdatedTimeMs; + && mLastUpdatedTime.equals(that.mLastUpdatedTime) + && mStartTime.equals(that.mStartTime) + && mGainPrimacyTime.equals(that.mGainPrimacyTime) + && mLosePrimacyTime.equals(that.mLosePrimacyTime) + && mLastCheckpointTime.equals(that.mLastCheckpointTime) + && mJournalEntriesSinceCheckpoint == that.mJournalEntriesSinceCheckpoint + && mVersion.equals(that.mVersion) + && mRevision.equals(that.mRevision); } @Override public int hashCode() { - return Objects.hashCode(mId, mAddress, mLastUpdatedTimeMs); + return Objects.hashCode(mId, mAddress, mLastUpdatedTime, mStartTime, mGainPrimacyTime, + mLosePrimacyTime, mLastCheckpointTime, mJournalEntriesSinceCheckpoint, + mVersion, mRevision); + } + + private static String convertMsToDate(long timeMs) { + if (timeMs <= 0) { + return NONE; + } + return CommonUtils.convertMsToDate(timeMs, + Configuration.getString(PropertyKey.USER_DATE_FORMAT_PATTERN)); } } diff --git a/core/common/src/main/java/alluxio/wire/MasterWebUIMasters.java b/core/common/src/main/java/alluxio/wire/MasterWebUIMasters.java index fac2ada17a25..4e88b2e96523 100644 --- a/core/common/src/main/java/alluxio/wire/MasterWebUIMasters.java +++ b/core/common/src/main/java/alluxio/wire/MasterWebUIMasters.java @@ -24,9 +24,9 @@ public final class MasterWebUIMasters implements Serializable { private static final long serialVersionUID = -2709466215687255197L; private boolean mDebug; - private MasterInfo[] mFailedMasterInfos; - private MasterInfo[] mNormalMasterInfos; - private MasterInfo mLeaderMasterInfo; + private MasterInfo[] mLostMasterInfos; + private MasterInfo[] mStandbyMasterInfos; + private MasterInfo mPrimaryMasterInfo; /** * Creates a new instance of {@link MasterWebUIMasters}. @@ -44,37 +44,37 @@ public boolean getDebug() { } /** - * Get failed master infos master info [ ]. + * Get info of lost masters. * - * @return the master info [ ] + * @return an array of lost {@link MasterInfo} */ - public MasterInfo[] getFailedMasterInfos() { - return mFailedMasterInfos; + public MasterInfo[] getLostMasterInfos() { + return mLostMasterInfos; } /** - * Get leader master info master info. + * Get info of standby masters. * - * @return the master info + * @return an array of standby {@link MasterInfo} */ - public MasterInfo[] getNormalMasterInfos() { - return mNormalMasterInfos; + public MasterInfo[] getStandbyMasterInfos() { + return mStandbyMasterInfos; } /** - * Get normal master infos master info [ ]. + * Get info of the primary master. * - * @return the master info [ ] + * @return the primary {@link MasterInfo} */ - public MasterInfo getLeaderMasterInfo() { - return mLeaderMasterInfo; + public MasterInfo getPrimaryMasterInfo() { + return mPrimaryMasterInfo; } /** * Sets debug. * * @param debug the debug - * @return the debug master infos + * @return the {@link MasterWebUIMasters} instance */ public MasterWebUIMasters setDebug(boolean debug) { mDebug = debug; @@ -82,43 +82,43 @@ public MasterWebUIMasters setDebug(boolean debug) { } /** - * Sets failed master infos. + * Sets lost master infos. * - * @param failedMasterInfos the failed master infos - * @return the failed master infos + * @param lostMasterInfos an array of lost {@link MasterInfo} + * @return the {@link MasterWebUIMasters} instance */ - public MasterWebUIMasters setFailedMasterInfos(MasterInfo[] failedMasterInfos) { - mFailedMasterInfos = failedMasterInfos.clone(); + public MasterWebUIMasters setLostMasterInfos(MasterInfo[] lostMasterInfos) { + mLostMasterInfos = lostMasterInfos.clone(); return this; } /** - * Sets normal master infos. + * Sets standby master infos. * - * @param normalMasterInfos the normal master infos - * @return the normal master infos + * @param standbyMasterInfos an array of standby {@link MasterInfo} + * @return the {@link MasterWebUIMasters} instance */ - public MasterWebUIMasters setNormalMasterInfos(MasterInfo[] normalMasterInfos) { - mNormalMasterInfos = normalMasterInfos.clone(); + public MasterWebUIMasters setStandbyMasterInfos(MasterInfo[] standbyMasterInfos) { + mStandbyMasterInfos = standbyMasterInfos.clone(); return this; } /** - * Sets leader master info. + * Sets primary master info. * - * @param leaderMasterInfo the normal master info - * @return the leader master info + * @param primaryMasterInfo the primary {@link MasterInfo} + * @return the {@link MasterWebUIMasters} instance */ - public MasterWebUIMasters setLeaderMasterInfo(MasterInfo leaderMasterInfo) { - mLeaderMasterInfo = leaderMasterInfo; + public MasterWebUIMasters setPrimaryMasterInfo(MasterInfo primaryMasterInfo) { + mPrimaryMasterInfo = primaryMasterInfo; return this; } @Override public String toString() { return MoreObjects.toStringHelper(this).add("debug", mDebug) - .add("failedMasterInfos", mFailedMasterInfos) - .add("normalMasterInfos", mNormalMasterInfos) - .add("leaderMasterInfo", mLeaderMasterInfo).toString(); + .add("lostMasterInfos", mLostMasterInfos) + .add("standbyMasterInfos", mStandbyMasterInfos) + .add("primaryMasterInfo", mPrimaryMasterInfo).toString(); } } diff --git a/core/common/src/test/java/alluxio/wire/MasterInfoTest.java b/core/common/src/test/java/alluxio/wire/MasterInfoTest.java index 51b41f6cc1ca..8c47a594070b 100644 --- a/core/common/src/test/java/alluxio/wire/MasterInfoTest.java +++ b/core/common/src/test/java/alluxio/wire/MasterInfoTest.java @@ -11,7 +11,10 @@ package alluxio.wire; +import alluxio.util.CommonUtils; + import com.fasterxml.jackson.databind.ObjectMapper; +import org.apache.commons.codec.digest.DigestUtils; import org.apache.commons.lang3.RandomStringUtils; import org.junit.Assert; import org.junit.Test; @@ -32,7 +35,14 @@ public void json() throws Exception { public void checkEquality(MasterInfo a, MasterInfo b) { Assert.assertEquals(a.getId(), b.getId()); Assert.assertEquals(a.getAddress(), b.getAddress()); - Assert.assertEquals(a.getLastUpdatedTimeMs(), b.getLastUpdatedTimeMs()); + Assert.assertEquals(a.getLastUpdatedTime(), b.getLastUpdatedTime()); + Assert.assertEquals(a.getStartTime(), b.getStartTime()); + Assert.assertEquals(a.getGainPrimacyTime(), b.getGainPrimacyTime()); + Assert.assertEquals(a.getLosePrimacyTime(), b.getLosePrimacyTime()); + Assert.assertEquals(a.getLastCheckpointTime(), b.getLastCheckpointTime()); + Assert.assertEquals(a.getJournalEntriesSinceCheckpoint(), b.getJournalEntriesSinceCheckpoint()); + Assert.assertEquals(a.getVersion(), b.getVersion()); + Assert.assertEquals(a.getRevision(), b.getRevision()); Assert.assertEquals(a, b); } @@ -40,9 +50,25 @@ public static MasterInfo createRandom() { Random random = new Random(); long id = random.nextLong(); Address address = new Address(RandomStringUtils.randomAlphanumeric(10), random.nextInt()); + long lastUpdatedTimeMs = CommonUtils.getCurrentMs(); + long gainPrimacyTimeMs = lastUpdatedTimeMs - random.nextInt(10000); + long losePrimacyTimeMs = lastUpdatedTimeMs - random.nextInt(10000); + long startTimeMs = gainPrimacyTimeMs - random.nextInt(10000); + String version = String.format("%d.%d.%d", random.nextInt(10), + random.nextInt(20), random.nextInt(10)); + String revision = DigestUtils.sha1Hex(RandomStringUtils.random(10)); + long lastCheckpointTime = startTimeMs + (lastUpdatedTimeMs - startTimeMs) / 2; + long journalEntriesSinceCheckpoint = random.nextInt(1000); MasterInfo result = new MasterInfo(id, address); - result.updateLastUpdatedTimeMs(); + result.setLastUpdatedTimeMs(lastUpdatedTimeMs); + result.setStartTimeMs(startTimeMs); + result.setGainPrimacyTimeMs(gainPrimacyTimeMs); + result.setLosePrimacyTimeMs(losePrimacyTimeMs); + result.setLastCheckpointTimeMs(lastCheckpointTime); + result.setJournalEntriesSinceCheckpoint(journalEntriesSinceCheckpoint); + result.setVersion(version); + result.setRevision(revision); return result; } } diff --git a/core/server/master/src/main/java/alluxio/master/AlluxioMasterProcess.java b/core/server/master/src/main/java/alluxio/master/AlluxioMasterProcess.java index 95e6a21fad20..2e71e0ccc4fa 100644 --- a/core/server/master/src/main/java/alluxio/master/AlluxioMasterProcess.java +++ b/core/server/master/src/main/java/alluxio/master/AlluxioMasterProcess.java @@ -92,6 +92,12 @@ public class AlluxioMasterProcess extends MasterProcess { /** See {@link #isRunning()}. */ private volatile boolean mRunning = false; + /** last time this process gain primacy in ms. */ + private volatile long mLastGainPrimacyTime = 0; + + /** last time this process lose primacy in ms. */ + private volatile long mLastLosePrimacyTime = 0; + /** * Creates a new {@link AlluxioMasterProcess}. */ @@ -107,6 +113,12 @@ public class AlluxioMasterProcess extends MasterProcess { if (Configuration.getBoolean(PropertyKey.MASTER_THROTTLE_ENABLED)) { mRegistry.get(alluxio.master.throttle.DefaultThrottleMaster.class).setMaster(this); } + MetricsSystem.registerGaugeIfAbsent( + MetricKey.MASTER_LAST_GAIN_PRIMACY_TIME.getName(), + () -> mLastGainPrimacyTime); + MetricsSystem.registerGaugeIfAbsent( + MetricKey.MASTER_LAST_LOSE_PRIMACY_TIME.getName(), + () -> mLastLosePrimacyTime); LOG.info("New process created."); } @@ -208,6 +220,7 @@ public void start() throws Exception { LOG.info("Started in stand-by mode."); mLeaderSelector.waitForState(NodeState.PRIMARY); + mLastGainPrimacyTime = CommonUtils.getCurrentMs(); if (!mRunning) { break; } @@ -224,6 +237,7 @@ public void start() throws Exception { throw t; } mLeaderSelector.waitForState(NodeState.STANDBY); + mLastLosePrimacyTime = CommonUtils.getCurrentMs(); if (Configuration.getBoolean(PropertyKey.MASTER_JOURNAL_EXIT_ON_DEMOTION)) { stop(); } else { diff --git a/core/server/master/src/main/java/alluxio/master/MasterProcess.java b/core/server/master/src/main/java/alluxio/master/MasterProcess.java index a0a23badb789..06e12e446fe8 100644 --- a/core/server/master/src/main/java/alluxio/master/MasterProcess.java +++ b/core/server/master/src/main/java/alluxio/master/MasterProcess.java @@ -21,6 +21,7 @@ import alluxio.master.service.SimpleService; import alluxio.master.service.rpc.RpcServerService; import alluxio.master.service.web.WebServerService; +import alluxio.metrics.MetricKey; import alluxio.metrics.MetricsSystem; import alluxio.util.CommonUtils; import alluxio.util.ConfigurationUtils; @@ -90,6 +91,7 @@ public MasterProcess(JournalSystem journalSystem, PrimarySelector leaderSelector mRpcConnectAddress = NetworkAddressUtils.getConnectAddress(rpcService, Configuration.global()); mWebConnectAddress = NetworkAddressUtils.getConnectAddress(webService, Configuration.global()); mStartTimeMs = System.currentTimeMillis(); + MetricsSystem.registerGaugeIfAbsent(MetricKey.MASTER_START_TIME.getName(), () -> mStartTimeMs); } private static InetSocketAddress configureAddress(ServiceType service) { diff --git a/core/server/master/src/main/java/alluxio/master/meta/AlluxioMasterRestServiceHandler.java b/core/server/master/src/main/java/alluxio/master/meta/AlluxioMasterRestServiceHandler.java index 9176a401060a..124677587af4 100644 --- a/core/server/master/src/main/java/alluxio/master/meta/AlluxioMasterRestServiceHandler.java +++ b/core/server/master/src/main/java/alluxio/master/meta/AlluxioMasterRestServiceHandler.java @@ -59,7 +59,6 @@ import alluxio.util.webui.UIFileInfo; import alluxio.util.webui.WebUtils; import alluxio.web.MasterWebServer; -import alluxio.wire.Address; import alluxio.wire.AlluxioMasterInfo; import alluxio.wire.BlockLocation; import alluxio.wire.Capacity; @@ -100,7 +99,6 @@ import java.io.FilenameFilter; import java.io.IOException; import java.io.InputStream; -import java.net.InetSocketAddress; import java.net.URLDecoder; import java.time.Instant; import java.time.ZoneOffset; @@ -875,24 +873,34 @@ public Response getWebUIWorkers() { @GET @Path(WEBUI_MASTERS) public Response getWebUIMasters() { - return RestUtils.call(() -> { - MasterWebUIMasters response = new MasterWebUIMasters(); - - response.setDebug(Configuration.getBoolean(PropertyKey.DEBUG)); - - MasterInfo[] failedMasterInfos = mMetaMaster.getLostMasterInfos(); - response.setFailedMasterInfos(failedMasterInfos); - - MasterInfo[] normalMasterInfos = mMetaMaster.getMasterInfos(); - response.setNormalMasterInfos(normalMasterInfos); - - InetSocketAddress leaderMasterAddress = mMasterProcess.getRpcAddress(); - MasterInfo leaderMasterInfo = new MasterInfo(MASTER_ID_NULL, - new Address(leaderMasterAddress.getHostString(), leaderMasterAddress.getPort()), - System.currentTimeMillis()); - response.setLeaderMasterInfo(leaderMasterInfo); - return response; - }, Configuration.global()); + final Map gauges = MetricsSystem.METRIC_REGISTRY.getGauges(); + Gauge lastCheckpointGauge = gauges + .get(MetricKey.MASTER_JOURNAL_LAST_CHECKPOINT_TIME.getName()); + long lastCheckpointTime = lastCheckpointGauge == null ? 0 + : (long) lastCheckpointGauge.getValue(); + Gauge journalEntriesGauge = gauges + .get(MetricKey.MASTER_JOURNAL_ENTRIES_SINCE_CHECKPOINT.getName()); + long journalEntriesSinceCheckpoint = journalEntriesGauge == null ? 0 + : (long) journalEntriesGauge.getValue(); + + Gauge lastGainPrimacyGuage = gauges + .get(MetricKey.MASTER_LAST_GAIN_PRIMACY_TIME.getName()); + long lastGainPrimacyTime = lastGainPrimacyGuage == null ? 0 + : (long) lastGainPrimacyGuage.getValue(); + + return RestUtils.call(() -> new MasterWebUIMasters() + .setDebug(Configuration.getBoolean(PropertyKey.DEBUG)) + .setLostMasterInfos(mMetaMaster.getLostMasterInfos()) + .setStandbyMasterInfos(mMetaMaster.getStandbyMasterInfos()) + .setPrimaryMasterInfo(new MasterInfo(MASTER_ID_NULL, mMetaMaster.getMasterAddress()) + .setLastUpdatedTimeMs(System.currentTimeMillis()) + .setStartTimeMs(mMasterProcess.getStartTimeMs()) + .setGainPrimacyTimeMs(lastGainPrimacyTime) + .setLastCheckpointTimeMs(lastCheckpointTime) + .setJournalEntriesSinceCheckpoint(journalEntriesSinceCheckpoint) + .setVersion(ProjectConstants.VERSION) + .setRevision(ProjectConstants.REVISION)), + Configuration.global()); } /** diff --git a/core/server/master/src/main/java/alluxio/master/meta/DefaultMetaMaster.java b/core/server/master/src/main/java/alluxio/master/meta/DefaultMetaMaster.java index d6b09d746724..42f966c60bde 100644 --- a/core/server/master/src/main/java/alluxio/master/meta/DefaultMetaMaster.java +++ b/core/server/master/src/main/java/alluxio/master/meta/DefaultMetaMaster.java @@ -30,6 +30,7 @@ import alluxio.grpc.BackupStatusPRequest; import alluxio.grpc.GetConfigurationPOptions; import alluxio.grpc.GrpcService; +import alluxio.grpc.MasterHeartbeatPOptions; import alluxio.grpc.MetaCommand; import alluxio.grpc.RegisterMasterPOptions; import alluxio.grpc.Scope; @@ -487,6 +488,11 @@ public boolean getNewerVersionAvailable() { return mNewerVersionAvailable; } + @Override + public Address getMasterAddress() { + return mMasterAddress; + } + @Override public List
getMasterAddresses() { return mMasterConfigStore.getLiveNodeAddresses(); @@ -498,24 +504,27 @@ public List
getWorkerAddresses() { } @Override - public alluxio.wire.MasterInfo[] getMasterInfos() { - alluxio.wire.MasterInfo[] masterInfos = new alluxio.wire.MasterInfo[mMasters.size()]; - int indexNum = 0; - for (MasterInfo master : mMasters) { - masterInfos[indexNum] = new alluxio.wire.MasterInfo(master.getId(), - master.getAddress(), master.getLastUpdatedTimeMs()); - indexNum++; - } - return masterInfos; + public alluxio.wire.MasterInfo[] getStandbyMasterInfos() { + return toWire(mMasters); } @Override public alluxio.wire.MasterInfo[] getLostMasterInfos() { - alluxio.wire.MasterInfo[] masterInfos = new alluxio.wire.MasterInfo[mLostMasters.size()]; + return toWire(mLostMasters); + } + + private static alluxio.wire.MasterInfo[] toWire(final IndexedSet masters) { + alluxio.wire.MasterInfo[] masterInfos = new alluxio.wire.MasterInfo[masters.size()]; int indexNum = 0; - for (MasterInfo master : mLostMasters) { - masterInfos[indexNum] = new alluxio.wire.MasterInfo(master.getId(), - master.getAddress(), master.getLastUpdatedTimeMs()); + for (MasterInfo master : masters) { + masterInfos[indexNum] = new alluxio.wire.MasterInfo(master.getId(), master.getAddress()) + .setLastUpdatedTimeMs(master.getLastUpdatedTimeMs()) + .setStartTimeMs(master.getStartTimeMs()) + .setLosePrimacyTimeMs(master.getLosePrimacyTimeMs()) + .setLastCheckpointTimeMs(master.getLastCheckpointTimeMs()) + .setJournalEntriesSinceCheckpoint(master.getJournalEntriesSinceCheckpoint()) + .setVersion(master.getVersion()) + .setRevision(master.getRevision()); indexNum++; } return masterInfos; @@ -583,7 +592,7 @@ public boolean isInSafeMode() { } @Override - public MetaCommand masterHeartbeat(long masterId) { + public MetaCommand masterHeartbeat(long masterId, MasterHeartbeatPOptions options) { MasterInfo master = mMasters.getFirstByField(ID_INDEX, masterId); if (master == null) { LOG.warn("Could not find master id: {} for heartbeat.", masterId); @@ -591,6 +600,12 @@ public MetaCommand masterHeartbeat(long masterId) { } master.updateLastUpdatedTimeMs(); + if (options.hasLastCheckpointTime()) { + master.setLastCheckpointTimeMs(options.getLastCheckpointTime()); + } + if (options.hasJournalEntriesSinceCheckpoint()) { + master.setJournalEntriesSinceCheckpoint(options.getJournalEntriesSinceCheckpoint()); + } return MetaCommand.MetaCommand_Nothing; } @@ -604,6 +619,18 @@ public void masterRegister(long masterId, RegisterMasterPOptions options) } master.updateLastUpdatedTimeMs(); + if (options.hasStartTimeMs()) { + master.setStartTimeMs(options.getStartTimeMs()); + } + if (options.hasLosePrimacyTimeMs()) { + master.setLosePrimacyTimeMs(options.getLosePrimacyTimeMs()); + } + if (options.hasVersion()) { + master.setVersion(options.getVersion()); + } + if (options.hasRevision()) { + master.setRevision(options.getRevision()); + } mMasterConfigStore.registerNewConf(master.getAddress(), options.getConfigsList()); diff --git a/core/server/master/src/main/java/alluxio/master/meta/MasterInfo.java b/core/server/master/src/main/java/alluxio/master/meta/MasterInfo.java index 5b7742620886..871db4e26464 100644 --- a/core/server/master/src/main/java/alluxio/master/meta/MasterInfo.java +++ b/core/server/master/src/main/java/alluxio/master/meta/MasterInfo.java @@ -29,6 +29,18 @@ public final class MasterInfo { private final long mId; /** Master's last updated time in ms. */ private long mLastUpdatedTimeMs; + /** Master's start time in ms. */ + private long mStartTimeMs = 0; + /** Master's last lose primacy time in ms. */ + private long mLosePrimacyTimeMs = 0; + /** Master's version. */ + private String mVersion = ""; + /** Master's revision. */ + private String mRevision = ""; + /** Master's last checkpoint time in ms. */ + private long mLastCheckpointTimeMs = 0; + /** Number of journal entries since last checkpoint. */ + private long mJournalEntriesSinceCheckpoint = 0; /** * Creates a new instance of {@link MasterInfo}. @@ -63,10 +75,82 @@ public long getLastUpdatedTimeMs() { return mLastUpdatedTimeMs; } + /** + * @return the start time of the master in ms + */ + public long getStartTimeMs() { + return mStartTimeMs; + } + + /** + * @return the last lose primacy time of the master in ms + */ + public long getLosePrimacyTimeMs() { + return mLosePrimacyTimeMs; + } + + /** + * @return the version of the master + */ + public String getVersion() { + return mVersion; + } + + /** + * @return the revision of the master + */ + public String getRevision() { + return mRevision; + } + + /** + * @return the time of last checkpoint + */ + public long getLastCheckpointTimeMs() { + return mLastCheckpointTimeMs; + } + + /** + * @return number of journal entries since last checkpoint + */ + public long getJournalEntriesSinceCheckpoint() { + return mJournalEntriesSinceCheckpoint; + } + @Override public String toString() { return MoreObjects.toStringHelper(this).add("id", mId).add("address", mAddress) - .add("lastUpdatedTimeMs", mLastUpdatedTimeMs).toString(); + .add("lastUpdatedTimeMs", mLastUpdatedTimeMs).add("startTimeMs", mStartTimeMs) + .add("losePrimacyTimeMs", mLosePrimacyTimeMs) + .add("version", mVersion).add("revision", mRevision).toString(); + } + + /** + * @param startTimeMs the start time of the master in ms + */ + public void setStartTimeMs(long startTimeMs) { + mStartTimeMs = startTimeMs; + } + + /** + * @param losePrimacyTimeMs the last primacy state change time of the master in ms + */ + public void setLosePrimacyTimeMs(long losePrimacyTimeMs) { + mLosePrimacyTimeMs = losePrimacyTimeMs; + } + + /** + * @param version the version of the master + */ + public void setVersion(String version) { + mVersion = version; + } + + /** + * @param revision the revision of the master + */ + public void setRevision(String revision) { + mRevision = revision; } /** @@ -75,4 +159,18 @@ public String toString() { public void updateLastUpdatedTimeMs() { mLastUpdatedTimeMs = System.currentTimeMillis(); } + + /** + * @param lastCheckpointTimeMs the time of last checkpoint + */ + public void setLastCheckpointTimeMs(long lastCheckpointTimeMs) { + mLastCheckpointTimeMs = lastCheckpointTimeMs; + } + + /** + * @param journalEntriesSinceCheckpoint number of journal entries since last checkpoint + */ + public void setJournalEntriesSinceCheckpoint(long journalEntriesSinceCheckpoint) { + mJournalEntriesSinceCheckpoint = journalEntriesSinceCheckpoint; + } } diff --git a/core/server/master/src/main/java/alluxio/master/meta/MetaMaster.java b/core/server/master/src/main/java/alluxio/master/meta/MetaMaster.java index 3038e7830ca4..3dbb7e33318e 100644 --- a/core/server/master/src/main/java/alluxio/master/meta/MetaMaster.java +++ b/core/server/master/src/main/java/alluxio/master/meta/MetaMaster.java @@ -15,6 +15,7 @@ import alluxio.exception.status.NotFoundException; import alluxio.exception.status.UnavailableException; import alluxio.grpc.GetConfigurationPOptions; +import alluxio.grpc.MasterHeartbeatPOptions; import alluxio.grpc.MetaCommand; import alluxio.grpc.RegisterMasterPOptions; import alluxio.master.Master; @@ -99,6 +100,11 @@ void setPathConfiguration(String path, Map properties) */ boolean getNewerVersionAvailable(); + /** + * @return the address of this master + */ + Address getMasterAddress(); + /** * @return the addresses of live masters */ @@ -133,12 +139,12 @@ void setPathConfiguration(String path, Map properties) int getWebPort(); /** - * @return a array of {@link MasterInfo}s of masters + * @return an array of {@link MasterInfo} of standby masters */ - MasterInfo[] getMasterInfos(); + MasterInfo[] getStandbyMasterInfos(); /** - * @return a array of {@link MasterInfo}s of lost masters + * @return an array of {@link MasterInfo} of lost masters */ MasterInfo[] getLostMasterInfos(); @@ -156,9 +162,10 @@ void setPathConfiguration(String path, Map properties) * A standby master periodically heartbeats with the leader master. * * @param masterId the master id + * @param options the options that contains optional master info * @return an optional command for the standby master to execute */ - MetaCommand masterHeartbeat(long masterId); + MetaCommand masterHeartbeat(long masterId, MasterHeartbeatPOptions options); /** * A standby master registers with the leader master. diff --git a/core/server/master/src/main/java/alluxio/master/meta/MetaMasterMasterServiceHandler.java b/core/server/master/src/main/java/alluxio/master/meta/MetaMasterMasterServiceHandler.java index 4af0992906a2..ea5a4eddf631 100644 --- a/core/server/master/src/main/java/alluxio/master/meta/MetaMasterMasterServiceHandler.java +++ b/core/server/master/src/main/java/alluxio/master/meta/MetaMasterMasterServiceHandler.java @@ -68,8 +68,8 @@ public void registerMaster(RegisterMasterPRequest request, @Override public void masterHeartbeat(MasterHeartbeatPRequest request, StreamObserver responseObserver) { - RpcUtils.call(LOG, () -> MasterHeartbeatPResponse.newBuilder() - .setCommand(mMetaMaster.masterHeartbeat(request.getMasterId())).build(), + RpcUtils.call(LOG, () -> MasterHeartbeatPResponse.newBuilder().setCommand( + mMetaMaster.masterHeartbeat(request.getMasterId(), request.getOptions())).build(), "masterHeartbeat", "request=%s", responseObserver, request); } } diff --git a/core/server/master/src/main/java/alluxio/master/meta/RetryHandlingMetaMasterMasterClient.java b/core/server/master/src/main/java/alluxio/master/meta/RetryHandlingMetaMasterMasterClient.java index 3f155c9185b5..44bde127afe1 100644 --- a/core/server/master/src/main/java/alluxio/master/meta/RetryHandlingMetaMasterMasterClient.java +++ b/core/server/master/src/main/java/alluxio/master/meta/RetryHandlingMetaMasterMasterClient.java @@ -13,8 +13,10 @@ import alluxio.AbstractMasterClient; import alluxio.Constants; +import alluxio.ProjectConstants; import alluxio.grpc.ConfigProperty; import alluxio.grpc.GetMasterIdPRequest; +import alluxio.grpc.MasterHeartbeatPOptions; import alluxio.grpc.MasterHeartbeatPRequest; import alluxio.grpc.MetaCommand; import alluxio.grpc.MetaMasterMasterServiceGrpc; @@ -22,13 +24,17 @@ import alluxio.grpc.RegisterMasterPRequest; import alluxio.grpc.ServiceType; import alluxio.master.MasterClientContext; +import alluxio.metrics.MetricKey; +import alluxio.metrics.MetricsSystem; import alluxio.wire.Address; +import com.codahale.metrics.Gauge; import org.slf4j.Logger; import org.slf4j.LoggerFactory; import java.io.IOException; import java.util.List; +import java.util.Map; import javax.annotation.concurrent.ThreadSafe; /** @@ -90,8 +96,22 @@ public long getId(final Address address) throws IOException { * @return whether this master should re-register */ public MetaCommand heartbeat(final long masterId) throws IOException { + final Map gauges = MetricsSystem.METRIC_REGISTRY.getGauges(); + Gauge lastCheckpointGauge = gauges + .get(MetricKey.MASTER_JOURNAL_LAST_CHECKPOINT_TIME.getName()); + Gauge journalEntriesGauge = gauges + .get(MetricKey.MASTER_JOURNAL_ENTRIES_SINCE_CHECKPOINT.getName()); + MasterHeartbeatPOptions.Builder optionsBuilder = MasterHeartbeatPOptions.newBuilder(); + if (lastCheckpointGauge != null) { + optionsBuilder.setLastCheckpointTime((long) lastCheckpointGauge.getValue()); + } + if (journalEntriesGauge != null) { + optionsBuilder.setJournalEntriesSinceCheckpoint((long) journalEntriesGauge.getValue()); + } + return retryRPC(() -> mClient - .masterHeartbeat(MasterHeartbeatPRequest.newBuilder().setMasterId(masterId).build()) + .masterHeartbeat(MasterHeartbeatPRequest.newBuilder().setMasterId(masterId) + .setOptions(optionsBuilder).build()) .getCommand(), LOG, "Heartbeat", "masterId=%d", masterId); } @@ -103,10 +123,22 @@ public MetaCommand heartbeat(final long masterId) throws IOException { */ public void register(final long masterId, final List configList) throws IOException { + final Map gauges = MetricsSystem.METRIC_REGISTRY.getGauges(); + RegisterMasterPOptions.Builder optionsBuilder = RegisterMasterPOptions.newBuilder() + .addAllConfigs(configList) + .setVersion(ProjectConstants.VERSION) + .setRevision(ProjectConstants.REVISION); + Gauge startTimeGauge = gauges.get(MetricKey.MASTER_START_TIME.getName()); + if (startTimeGauge != null) { + optionsBuilder.setStartTimeMs((long) startTimeGauge.getValue()); + } + Gauge lastLosePrimacyGuage = gauges.get(MetricKey.MASTER_LAST_LOSE_PRIMACY_TIME.getName()); + if (lastLosePrimacyGuage != null) { + optionsBuilder.setLosePrimacyTimeMs((long) lastLosePrimacyGuage.getValue()); + } retryRPC(() -> { mClient.registerMaster(RegisterMasterPRequest.newBuilder().setMasterId(masterId) - .setOptions(RegisterMasterPOptions.newBuilder().addAllConfigs(configList).build()) - .build()); + .setOptions(optionsBuilder).build()); return null; }, LOG, "Register", "masterId=%d,configList=%s", masterId, configList); } diff --git a/core/transport/src/main/proto/grpc/meta_master.proto b/core/transport/src/main/proto/grpc/meta_master.proto index c21022e66378..5ec40ce402bd 100644 --- a/core/transport/src/main/proto/grpc/meta_master.proto +++ b/core/transport/src/main/proto/grpc/meta_master.proto @@ -237,6 +237,10 @@ enum MetaCommand { message RegisterMasterPOptions { repeated grpc.ConfigProperty configs = 1; + optional int64 startTimeMs = 2; + optional int64 losePrimacyTimeMs = 3; + optional string version = 4; + optional string revision = 5; } message RegisterMasterPRequest { optional int64 masterId = 1; @@ -244,7 +248,10 @@ message RegisterMasterPRequest { } message RegisterMasterPResponse {} -message MasterHeartbeatPOptions {} +message MasterHeartbeatPOptions { + optional int64 lastCheckpointTime = 1; + optional int64 journalEntriesSinceCheckpoint = 2; +} message MasterHeartbeatPRequest { optional int64 masterId = 1; optional MasterHeartbeatPOptions options = 2; diff --git a/core/transport/src/main/proto/proto.lock b/core/transport/src/main/proto/proto.lock index 46d8959a7381..e135ff873172 100644 --- a/core/transport/src/main/proto/proto.lock +++ b/core/transport/src/main/proto/proto.lock @@ -6168,6 +6168,26 @@ "name": "configs", "type": "grpc.ConfigProperty", "is_repeated": true + }, + { + "id": 2, + "name": "startTimeMs", + "type": "int64" + }, + { + "id": 3, + "name": "losePrimacyTimeMs", + "type": "int64" + }, + { + "id": 4, + "name": "version", + "type": "string" + }, + { + "id": 5, + "name": "revision", + "type": "string" } ] }, @@ -6190,7 +6210,19 @@ "name": "RegisterMasterPResponse" }, { - "name": "MasterHeartbeatPOptions" + "name": "MasterHeartbeatPOptions", + "fields": [ + { + "id": 1, + "name": "lastCheckpointTime", + "type": "int64" + }, + { + "id": 2, + "name": "journalEntriesSinceCheckpoint", + "type": "int64" + } + ] }, { "name": "MasterHeartbeatPRequest", @@ -10683,4 +10715,4 @@ } } ] -} \ No newline at end of file +} diff --git a/webui/master/src/constants/types/IMasterInfo.tsx b/webui/master/src/constants/types/IMasterInfo.tsx index 001b1ecdcf56..163631523c84 100644 --- a/webui/master/src/constants/types/IMasterInfo.tsx +++ b/webui/master/src/constants/types/IMasterInfo.tsx @@ -15,5 +15,12 @@ export interface IMasterInfo { host: string; }; id: string; - lastUpdatedTimeMs: number; + lastUpdatedTime: string; + startTime: string; + gainPrimacyTime: string; + losePrimacyTime: string; + lastCheckpointTime: string; + journalEntriesSinceCheckpoint: number; + version: string; + revision: string; } diff --git a/webui/master/src/containers/pages/Masters/Masters.tsx b/webui/master/src/containers/pages/Masters/Masters.tsx index 21f5bd3420b3..c6b45335512a 100644 --- a/webui/master/src/containers/pages/Masters/Masters.tsx +++ b/webui/master/src/containers/pages/Masters/Masters.tsx @@ -43,18 +43,30 @@ export class MastersPresenter extends React.Component {
-
Leader Master
+
Primary Master
- - + + + + + + + + - - + + + + + + + +
Master HostMaster PortHostPortElectedStartedLast CheckpointJournal EntriesVersionRevision
{mastersData.leaderMasterInfo.address.host}{mastersData.leaderMasterInfo.address.rpcPort}{mastersData.primaryMasterInfo.address.host}{mastersData.primaryMasterInfo.address.rpcPort}{mastersData.primaryMasterInfo.gainPrimacyTime}{mastersData.primaryMasterInfo.startTime}{mastersData.primaryMasterInfo.lastCheckpointTime}{mastersData.primaryMasterInfo.journalEntriesSinceCheckpoint}{mastersData.primaryMasterInfo.version}{mastersData.primaryMasterInfo.revision}
@@ -67,18 +79,30 @@ export class MastersPresenter extends React.Component { {initData.debug ? [D]Master Id : null} - Master Host - Master Port + Host + Port Last Heartbeat + Started + Step-down Time + Last Checkpoint + Journal Entries + Version + Revision - {mastersData.normalMasterInfos.map((masterInfo: IMasterInfo) => ( + {mastersData.standbyMasterInfos.map((masterInfo: IMasterInfo) => ( {initData.debug ? {masterInfo.id} : null} {masterInfo.address.host} {masterInfo.address.rpcPort} - {new Date(masterInfo.lastUpdatedTimeMs).toTimeString()} + {masterInfo.lastUpdatedTime} + {masterInfo.startTime} + {masterInfo.losePrimacyTime} + {masterInfo.lastCheckpointTime} + {masterInfo.journalEntriesSinceCheckpoint} + {masterInfo.version} + {masterInfo.revision} ))} @@ -92,18 +116,30 @@ export class MastersPresenter extends React.Component { {initData.debug ? [D]Master Id : null} - Master Host - Master Port + Host + Port Last Heartbeat + Started + Step-down Time + Last Checkpoint + Journal Entries + Version + Revision - {mastersData.failedMasterInfos.map((masterInfo: IMasterInfo) => ( + {mastersData.lostMasterInfos.map((masterInfo: IMasterInfo) => ( {initData.debug ? {masterInfo.id} : null} {masterInfo.address.host} {masterInfo.address.rpcPort} - {new Date(masterInfo.lastUpdatedTimeMs).toTimeString()} + {masterInfo.lastUpdatedTime} + {masterInfo.startTime} + {masterInfo.losePrimacyTime} + {masterInfo.lastCheckpointTime} + {masterInfo.journalEntriesSinceCheckpoint} + {masterInfo.version} + {masterInfo.revision} ))} diff --git a/webui/master/src/containers/pages/Masters/__snapshots__/Masters.test.tsx.snap b/webui/master/src/containers/pages/Masters/__snapshots__/Masters.test.tsx.snap index 7d9e1f04f38b..1eac96bd383f 100644 --- a/webui/master/src/containers/pages/Masters/__snapshots__/Masters.test.tsx.snap +++ b/webui/master/src/containers/pages/Masters/__snapshots__/Masters.test.tsx.snap @@ -14,7 +14,7 @@ exports[`Masters Shallow component Matches snapshot 1`] = ` className="col-12" >
- Leader Master + Primary Master
+ + + + + + @@ -37,6 +55,14 @@ exports[`Masters Shallow component Matches snapshot 1`] = ` + +
- Master Host + Host - Master Port + Port + + Elected + + Started + + Last Checkpoint + + Journal Entries + + Version + + Revision
0 + + + + 0 + +
@@ -59,14 +85,32 @@ exports[`Masters Shallow component Matches snapshot 1`] = ` - Master Host + Host - Master Port + Port Last Heartbeat + + Started + + + Step-down Time + + + Last Checkpoint + + + Journal Entries + + + Version + + + Revision + @@ -90,14 +134,32 @@ exports[`Masters Shallow component Matches snapshot 1`] = ` - Master Host + Host - Master Port + Port Last Heartbeat + + Started + + + Step-down Time + + + Last Checkpoint + + + Journal Entries + + + Version + + + Revision + diff --git a/webui/master/src/store/masters/reducer.tsx b/webui/master/src/store/masters/reducer.tsx index 7bf5282324a6..b2fc65e6661f 100644 --- a/webui/master/src/store/masters/reducer.tsx +++ b/webui/master/src/store/masters/reducer.tsx @@ -16,15 +16,22 @@ import { IMastersState, MastersActionTypes } from './types'; export const initialMastersState: IMastersState = { data: { debug: false, - failedMasterInfos: [], - normalMasterInfos: [], - leaderMasterInfo: { + lostMasterInfos: [], + standbyMasterInfos: [], + primaryMasterInfo: { address: { rpcPort: 0, host: '', }, id: '', - lastUpdatedTimeMs: 0, + lastUpdatedTime: '', + startTime: '', + gainPrimacyTime: '', + losePrimacyTime: '', + lastCheckpointTime: '', + journalEntriesSinceCheckpoint: 0, + version: '', + revision: '', }, }, errors: undefined, diff --git a/webui/master/src/store/masters/types.tsx b/webui/master/src/store/masters/types.tsx index a9e6d672784d..bf2325d0efd5 100644 --- a/webui/master/src/store/masters/types.tsx +++ b/webui/master/src/store/masters/types.tsx @@ -15,9 +15,9 @@ import { IMasterInfo } from '../../constants'; export interface IMasters { debug: boolean; - failedMasterInfos: IMasterInfo[]; - normalMasterInfos: IMasterInfo[]; - leaderMasterInfo: IMasterInfo; + lostMasterInfos: IMasterInfo[]; + standbyMasterInfos: IMasterInfo[]; + primaryMasterInfo: IMasterInfo; } export enum MastersActionTypes { From d231f017ba4f975ba52b2184ea3234b200da027a Mon Sep 17 00:00:00 2001 From: Shawn Sun <32376495+ssz1997@users.noreply.github.com> Date: Tue, 28 Feb 2023 10:50:37 -0800 Subject: [PATCH 150/334] Revert CSI package upgrade With the CSI packages upgrade we need a newer version of go to compile CSI. An upgrade of go version may introduce more issues, so for unblocking release purpose, revert the upgrade for now. pr-link: Alluxio/alluxio#16972 change-id: cid-f05542972c44f108dff6950d5d80532d549a8ba1 --- integration/docker/csi/go.mod | 6 +---- integration/docker/csi/go.sum | 46 +++++------------------------------ 2 files changed, 7 insertions(+), 45 deletions(-) diff --git a/integration/docker/csi/go.mod b/integration/docker/csi/go.mod index 32749147576c..f4940343b0af 100644 --- a/integration/docker/csi/go.mod +++ b/integration/docker/csi/go.mod @@ -7,12 +7,8 @@ require ( github.com/golang/glog v0.0.0-20210429001901-424d2337a529 github.com/kubernetes-csi/csi-lib-utils v0.7.0 // indirect github.com/kubernetes-csi/drivers v1.0.2 - github.com/pkg/errors v0.8.1 github.com/spf13/cobra v1.1.3 - golang.org/x/net v0.7.0 + golang.org/x/net v0.0.0-20210510120150-4163338589ed google.golang.org/grpc v1.37.1 - k8s.io/api v0.17.0 - k8s.io/apimachinery v0.17.1-beta.0 - k8s.io/client-go v0.17.0 k8s.io/mount-utils v0.21.0 ) diff --git a/integration/docker/csi/go.sum b/integration/docker/csi/go.sum index 276577ab9d63..ad980d43f551 100644 --- a/integration/docker/csi/go.sum +++ b/integration/docker/csi/go.sum @@ -80,7 +80,6 @@ github.com/go-openapi/swag v0.0.0-20160704191624-1d0bd113de87/go.mod h1:DXUve3Dp github.com/go-stack/stack v1.8.0/go.mod h1:v0f6uXyyMGvRgIKkXu+yp6POWl0qKG85gN/melR3HDY= github.com/gogo/protobuf v1.1.1/go.mod h1:r8qH/GZQm5c6nD/R0oafs1akxWv10x8SbQlK7atdtwQ= github.com/gogo/protobuf v1.2.1/go.mod h1:hp+jE20tsWTFYpLwKvXlhS1hjn+gTNwPg2I6zVXpSg4= -github.com/gogo/protobuf v1.2.2-0.20190723190241-65acae22fc9d h1:3PaI8p3seN09VjbTYC/QWlUZdZ1qS1zGjy7LH2Wt07I= github.com/gogo/protobuf v1.2.2-0.20190723190241-65acae22fc9d/go.mod h1:SlYgWuQ5SjCEi6WLHjHCa1yvBfUnHcTbrrZtXPKa29o= github.com/golang/glog v0.0.0-20160126235308-23def4e6c14b/go.mod h1:SBH7ygxi8pfUlaOkMMuAQtPIUF8ecWP5IEl/CR7VP2Q= github.com/golang/glog v0.0.0-20210429001901-424d2337a529 h1:2voWjNECnrZRbfwXxHB1/j8wa6xdKn85B5NzgVL/pTU= @@ -111,7 +110,6 @@ github.com/google/go-cmp v0.4.0/go.mod h1:v8dTdLbMG2kIc/vJvl+f65V22dbkXbowE6jgT/ github.com/google/go-cmp v0.5.0 h1:/QaMHBdZ26BB3SSst0Iwl10Epc+xhTquomWX0oZEB6w= github.com/google/go-cmp v0.5.0/go.mod h1:v8dTdLbMG2kIc/vJvl+f65V22dbkXbowE6jgT/gNBxE= github.com/google/gofuzz v0.0.0-20161122191042-44d81051d367/go.mod h1:HP5RmnzzSNb993RKQDq4+1A4ia9nllfqcQFTQJedwGI= -github.com/google/gofuzz v1.0.0 h1:A8PeW59pxE9IoFRqBp37U+mSNaQoZ46F1f0f863XSXw= github.com/google/gofuzz v1.0.0/go.mod h1:dBl0BpW6vV/+mYPU4Po3pmUjxk6FQPldtuIdl/M65Eg= github.com/google/martian v2.1.0+incompatible/go.mod h1:9I4somxYTbIHy5NJKHRl3wXiIaQGbYVAs8BPL6v8lEs= github.com/google/pprof v0.0.0-20181206194817-3ea8567a2e57/go.mod h1:zfwlbNMJ+OItoe0UupaVj+oy1omPYYDuagoSzA8v9mc= @@ -122,7 +120,6 @@ github.com/google/uuid v1.1.2/go.mod h1:TIyPZe4MgqvfeYDBFedMoGGpEw/LqOeaOT+nhxU+ github.com/googleapis/gax-go/v2 v2.0.4/go.mod h1:0Wqv26UfaUD9n4G6kQubkQ+KchISgw+vpHVxEJEs9eg= github.com/googleapis/gax-go/v2 v2.0.5/go.mod h1:DWXyrwAJ9X0FpwwEdw+IPEYBICEFu5mhpdKc/us6bOk= github.com/googleapis/gnostic v0.0.0-20170729233727-0c5108395e2d/go.mod h1:sJBsCZ4ayReDTBIg8b9dl28c5xFWyhBTVRp3pOg5EKY= -github.com/googleapis/gnostic v0.2.0 h1:l6N3VoaVzTncYYW+9yOz2LJJammFZGBO13sqgEhpy9g= github.com/googleapis/gnostic v0.2.0/go.mod h1:sJBsCZ4ayReDTBIg8b9dl28c5xFWyhBTVRp3pOg5EKY= github.com/gophercloud/gophercloud v0.1.0/go.mod h1:vxM41WHh5uqHVBMZHzuwNOHh8XEoIEcSTewFxm1c5g8= github.com/gopherjs/gopherjs v0.0.0-20181017120253-0766667cb4d1/go.mod h1:wJfORRmW1u3UXTncJ5qlYoELFm8eSnnEO6hX4iZ3EWY= @@ -152,14 +149,12 @@ github.com/hashicorp/mdns v1.0.0/go.mod h1:tL+uN++7HEJ6SQLQ2/p+z2pH24WQKWjBPkE0m github.com/hashicorp/memberlist v0.1.3/go.mod h1:ajVTdAv/9Im8oMAAj5G31PhhMCZJV2pPBoIllUwCN7I= github.com/hashicorp/serf v0.8.2/go.mod h1:6hOLApaqBFA1NXqRQAsxw9QxuDEvNxSQRwA/JwenrHc= github.com/hpcloud/tail v1.0.0/go.mod h1:ab1qPbhIpdTxEkNHXyeSf5vhxWSCs/tWer42PpOxQnU= -github.com/imdario/mergo v0.3.5 h1:JboBksRwiiAJWvIYJVo46AfV+IAIKZpfrSzVKj42R4Q= github.com/imdario/mergo v0.3.5/go.mod h1:2EnlNZ0deacrJVfApfmtdGgDfMuh/nq6Ok1EcJh5FfA= github.com/inconshreveable/mousetrap v1.0.0 h1:Z8tu5sraLXCXIcARxBp/8cbvlwVa7Z1NHg9XEKhtSvM= github.com/inconshreveable/mousetrap v1.0.0/go.mod h1:PxqpIevigyE2G7u3NXJIT2ANytuPF1OarO4DADm73n8= github.com/jonboulle/clockwork v0.1.0/go.mod h1:Ii8DK3G1RaLaWxj9trq07+26W01tbo22gdxWY5EU2bo= github.com/json-iterator/go v0.0.0-20180612202835-f2b4162afba3/go.mod h1:+SdeFBvtyEkXs7REEP0seUULqWtbJapLOCVDaaPEHmU= github.com/json-iterator/go v1.1.6/go.mod h1:+SdeFBvtyEkXs7REEP0seUULqWtbJapLOCVDaaPEHmU= -github.com/json-iterator/go v1.1.8 h1:QiWkFLKq0T7mpzwOTu6BzNDbfTE8OLrYhVKYMLF46Ok= github.com/json-iterator/go v1.1.8/go.mod h1:KdQUCv79m/52Kvf8AW2vK1V8akMuk1QjK/uOdHXbAo4= github.com/jstemmer/go-junit-report v0.0.0-20190106144839-af01ea7f8024/go.mod h1:6v2b51hI/fHJwM22ozAgKL4VKDeJcHhJFhtBdhmNjmU= github.com/jtolds/gls v4.20.0+incompatible/go.mod h1:QJZ7F/aHp+rZTRtaJ1ow/lLfFfVYBRgL+9YlvaHOwJU= @@ -193,11 +188,9 @@ github.com/mitchellh/iochan v1.0.0/go.mod h1:JwYml1nuB7xOzsp52dPpHFffvOCDupsG0Qu github.com/mitchellh/mapstructure v0.0.0-20160808181253-ca63d7c062ee/go.mod h1:FVVH3fgwuzCH5S8UJGiWEs2h04kUh9fWfEaFds41c1Y= github.com/mitchellh/mapstructure v1.1.2/go.mod h1:FVVH3fgwuzCH5S8UJGiWEs2h04kUh9fWfEaFds41c1Y= github.com/modern-go/concurrent v0.0.0-20180228061459-e0a39a4cb421/go.mod h1:6dJC0mAP4ikYIbvyc7fijjWJddQyLn8Ig3JB5CqoB9Q= -github.com/modern-go/concurrent v0.0.0-20180306012644-bacd9c7ef1dd h1:TRLaZ9cD/w8PVh93nsPXa1VrQ6jlwL5oN8l14QlcNfg= github.com/modern-go/concurrent v0.0.0-20180306012644-bacd9c7ef1dd/go.mod h1:6dJC0mAP4ikYIbvyc7fijjWJddQyLn8Ig3JB5CqoB9Q= github.com/modern-go/reflect2 v0.0.0-20180320133207-05fbef0ca5da/go.mod h1:bx2lNnkwVCuqBIxFjflWJWanXIb3RllmbCylyMrvgv0= github.com/modern-go/reflect2 v0.0.0-20180701023420-4b7aa43c6742/go.mod h1:bx2lNnkwVCuqBIxFjflWJWanXIb3RllmbCylyMrvgv0= -github.com/modern-go/reflect2 v1.0.1 h1:9f412s+6RmYXLWZSEzVVgPGK7C2PphHj5RJrvfx9AWI= github.com/modern-go/reflect2 v1.0.1/go.mod h1:bx2lNnkwVCuqBIxFjflWJWanXIb3RllmbCylyMrvgv0= github.com/munnerz/goautoneg v0.0.0-20120707110453-a547fc61f48d/go.mod h1:+n7T8mK8HuQTcFwEeznm/DIxMOiR9yIdICNftLE1DvQ= github.com/mwitkow/go-conntrack v0.0.0-20161129095857-cc309e4a2223/go.mod h1:qRWi+5nqEBWmkhHvq77mSJWrCKwh8bxhgT7d/eI7P4U= @@ -215,7 +208,6 @@ github.com/pascaldekloe/goe v0.0.0-20180627143212-57f6aae5913c/go.mod h1:lzWF7FI github.com/pelletier/go-toml v1.2.0/go.mod h1:5z9KED0ma1S8pY6P1sdut58dfprrGBbd/94hg7ilaic= github.com/peterbourgon/diskv v2.0.1+incompatible/go.mod h1:uqqh8zWWbv1HBMNONnaR/tNboyR3/BZd58JJSHlUSCU= github.com/pkg/errors v0.8.0/go.mod h1:bwawxfHBFNV+L2hUp1rHADufV3IMtnDRdf1r5NINEl0= -github.com/pkg/errors v0.8.1 h1:iURUrRGxPUNPdy5/HRSm+Yj6okJ6UtLINN0Q9M4+h3I= github.com/pkg/errors v0.8.1/go.mod h1:bwawxfHBFNV+L2hUp1rHADufV3IMtnDRdf1r5NINEl0= github.com/pmezard/go-difflib v0.0.0-20151028094244-d8ed2627bdf0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4= github.com/pmezard/go-difflib v1.0.0 h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZbAQM= @@ -268,7 +260,6 @@ github.com/stretchr/testify v1.6.1/go.mod h1:6Fq8oRcR53rry900zMqJjRRixrwX3KX962/ github.com/subosito/gotenv v1.2.0/go.mod h1:N0PQaV/YGNqwC0u51sEeR/aUtSLEXKX9iv69rRypqCw= github.com/tmc/grpc-websocket-proxy v0.0.0-20190109142713-0ad062ec5ee5/go.mod h1:ncp9v5uamzpCO7NfCPTXjqaC+bZgJeR0sMTm6dMHP7U= github.com/xiang90/probing v0.0.0-20190116061207-43a291ad63a2/go.mod h1:UETIi67q53MR2AWcXfiuqkDkRtnGDLqkBTpCHuJHxtU= -github.com/yuin/goldmark v1.4.13/go.mod h1:6yULJ656Px+3vBD8DxQVa3kxgyrAnzto9xy5taEt/CY= go.etcd.io/bbolt v1.3.2/go.mod h1:IbVyRI1SCnLcuJnV2u8VeU0CEYM7e686BmAb1XKL+uU= go.opencensus.io v0.21.0/go.mod h1:mSImk1erAIZhrmZN+AvHh14ztQfjbGwt4TtuofqLduU= go.opencensus.io v0.22.0/go.mod h1:+kGneAE2xo2IficOXnaByMWTGM9T73dGwxeWcUqIpI8= @@ -282,8 +273,6 @@ golang.org/x/crypto v0.0.0-20190308221718-c2843e01d9a2/go.mod h1:djNgcEr1/C05ACk golang.org/x/crypto v0.0.0-20190510104115-cbcb75029529/go.mod h1:yigFU9vqHzYiE8UmvKecakEJjdnWj3jj499lnFckfCI= golang.org/x/crypto v0.0.0-20190605123033-f99c8df09eb5/go.mod h1:yigFU9vqHzYiE8UmvKecakEJjdnWj3jj499lnFckfCI= golang.org/x/crypto v0.0.0-20190820162420-60c769a6c586/go.mod h1:yigFU9vqHzYiE8UmvKecakEJjdnWj3jj499lnFckfCI= -golang.org/x/crypto v0.0.0-20210921155107-089bfa567519 h1:7I4JAnoQBe7ZtJcBaYHi5UtiO8tQHbUSXxL+pnGRANg= -golang.org/x/crypto v0.0.0-20210921155107-089bfa567519/go.mod h1:GvvjBRRGRdwPK5ydBHafDWAxML/pGHZbMvKqRZ5+Abc= golang.org/x/exp v0.0.0-20190121172915-509febef88a4/go.mod h1:CJ0aWSM057203Lf6IL+f9T1iT9GByDxfZKAQTCR3kQA= golang.org/x/exp v0.0.0-20190306152737-a1d7652674e8/go.mod h1:CJ0aWSM057203Lf6IL+f9T1iT9GByDxfZKAQTCR3kQA= golang.org/x/exp v0.0.0-20190510132918-efd6b22b2522/go.mod h1:ZjyILWgesfNpC6sMxTJOJm9Kp84zZh5NQWvqDGG3Qr8= @@ -302,7 +291,6 @@ golang.org/x/mobile v0.0.0-20190312151609-d3739f865fa6/go.mod h1:z+o9i4GpDbdi3rU golang.org/x/mobile v0.0.0-20190719004257-d2bd2a29d028/go.mod h1:E/iHnbuqvinMTCcRqshq8CkpyQDoeVncDDYHnLhea+o= golang.org/x/mod v0.0.0-20190513183733-4bf6d317e70e/go.mod h1:mXi4GBBbnImb6dmsKGUJ2LatrhH/nqhxcFungHvyanc= golang.org/x/mod v0.1.0/go.mod h1:0QHyrYULN0/3qlju5TqG8bIK38QM8yzMo5ekMj3DlcY= -golang.org/x/mod v0.6.0-dev.0.20220419223038-86c51ed26bb4/go.mod h1:jJ57K6gSWd91VN4djpZkiMVwK6gcyfeH4XE8wZrZaV4= golang.org/x/net v0.0.0-20170114055629-f2499483f923/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4= golang.org/x/net v0.0.0-20180724234803-3673e40ba225/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4= golang.org/x/net v0.0.0-20180826012351-8a410e7b638d/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4= @@ -321,20 +309,16 @@ golang.org/x/net v0.0.0-20190603091049-60506f45cf65/go.mod h1:HSz+uSET+XFnRR8LxR golang.org/x/net v0.0.0-20190620200207-3b0461eec859/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s= golang.org/x/net v0.0.0-20191004110552-13f9640d40b9/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s= golang.org/x/net v0.0.0-20191209160850-c0dbc17a3553/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s= -golang.org/x/net v0.0.0-20210226172049-e18ecbb05110/go.mod h1:m0MpNAwzfU5UDzcl9v0D8zg8gWTRqZa9RBIspLL5mdg= -golang.org/x/net v0.0.0-20220722155237-a158d28d115b/go.mod h1:XRhObCWvk6IyKnWLug+ECip1KBveYUHfp+8e9klMJ9c= -golang.org/x/net v0.7.0 h1:rJrUqqhjsgNp7KqAIc25s9pZnjU7TUcSY7HcVZjdn1g= -golang.org/x/net v0.7.0/go.mod h1:2Tu9+aMcznHK/AK1HMvgo6xiTLG5rD5rZLDS+rp2Bjs= +golang.org/x/net v0.0.0-20210510120150-4163338589ed h1:p9UgmWI9wKpfYmgaV/IZKGdXc5qEK45tDwwwDyjS26I= +golang.org/x/net v0.0.0-20210510120150-4163338589ed/go.mod h1:9nx3DQGgdP8bBQD5qxJ1jj9UTztislL4KSBs9R2vV5Y= golang.org/x/oauth2 v0.0.0-20180821212333-d2e6202438be/go.mod h1:N/0e6XlmueqKjAGxoOufVs8QHGRruUQn6yWY3a++T0U= golang.org/x/oauth2 v0.0.0-20190226205417-e64efc72b421/go.mod h1:gOpvHmFTYa4IltrdGE7lF6nIHvwfUNPOp7c8zoXwtLw= -golang.org/x/oauth2 v0.0.0-20190604053449-0f29369cfe45 h1:SVwTIAaPC2U/AvvLNZ2a7OVsmBpC8L5BlwK1whH3hm0= golang.org/x/oauth2 v0.0.0-20190604053449-0f29369cfe45/go.mod h1:gOpvHmFTYa4IltrdGE7lF6nIHvwfUNPOp7c8zoXwtLw= golang.org/x/sync v0.0.0-20180314180146-1d60e4601c6f/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= golang.org/x/sync v0.0.0-20181108010431-42b317875d0f/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= golang.org/x/sync v0.0.0-20181221193216-37e7f081c4d4/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= golang.org/x/sync v0.0.0-20190227155943-e225da77a7e6/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= golang.org/x/sync v0.0.0-20190423024810-112230192c58/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= -golang.org/x/sync v0.0.0-20220722155255-886fb9371eb4/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= golang.org/x/sys v0.0.0-20170830134202-bb24a47a89ea/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY= golang.org/x/sys v0.0.0-20180823144017-11551d06cbcc/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY= golang.org/x/sys v0.0.0-20180830151530-49385e6e1522/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY= @@ -354,25 +338,16 @@ golang.org/x/sys v0.0.0-20190624142023-c5567b49c5d0/go.mod h1:h1NjWce9XRLGQEsW7w golang.org/x/sys v0.0.0-20190826190057-c7b8b68b1456/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= golang.org/x/sys v0.0.0-20191220220014-0732a990476f/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= golang.org/x/sys v0.0.0-20201119102817-f84b799fce68/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= -golang.org/x/sys v0.0.0-20210615035016-665e8c7367d1/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= -golang.org/x/sys v0.0.0-20220520151302-bc2c85ada10a/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= -golang.org/x/sys v0.0.0-20220722155257-8c9f86f7a55f/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= -golang.org/x/sys v0.5.0 h1:MUK/U/4lj1t1oPg0HfuXDN/Z1wv31ZJ/YcPiGccS4DU= -golang.org/x/sys v0.5.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= +golang.org/x/sys v0.0.0-20210423082822-04245dca01da h1:b3NXsE2LusjYGGjL5bxEVZZORm/YEFFrWFjR8eFrw/c= +golang.org/x/sys v0.0.0-20210423082822-04245dca01da/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= golang.org/x/term v0.0.0-20201126162022-7de9c90e9dd1/go.mod h1:bj7SfCRtBDWHUb9snDiAeCFNEtKQo2Wmx5Cou7ajbmo= -golang.org/x/term v0.0.0-20210927222741-03fcf44c2211/go.mod h1:jbD1KX2456YbFQfuXm/mYQcufACuNUgVhRMnK/tPxf8= -golang.org/x/term v0.5.0 h1:n2a8QNdAb0sZNpU9R1ALUXBbY+w51fCQDN+7EdxNBsY= -golang.org/x/term v0.5.0/go.mod h1:jMB1sMXY+tzblOD4FWmEbocvup2/aLOaQEp7JmGp78k= golang.org/x/text v0.0.0-20160726164857-2910a502d2bf/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ= golang.org/x/text v0.3.0/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ= golang.org/x/text v0.3.1-0.20180807135948-17ff2d5776d2/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ= golang.org/x/text v0.3.2/go.mod h1:bEr9sfX3Q8Zfm5fL9x+3itogRgK3+ptLWKqgva+5dAk= -golang.org/x/text v0.3.3/go.mod h1:5Zoc/QRtKVWzQhOtBMvqHzDpF6irO9z98xDceosuGiQ= -golang.org/x/text v0.3.7/go.mod h1:u+2+/6zg+i71rQMx5EYifcz6MCKuco9NR6JIITiCfzQ= -golang.org/x/text v0.7.0 h1:4BRB4x83lYWy72KwLD/qYDuTu7q9PjSagHvijDw7cLo= -golang.org/x/text v0.7.0/go.mod h1:mrYo+phRRbMaCq/xk9113O4dZlRixOauAjOtrjsXDZ8= +golang.org/x/text v0.3.6 h1:aRYxNxv6iGQlyVaZmk6ZgYEDa+Jg18DxebPSrd6bg1M= +golang.org/x/text v0.3.6/go.mod h1:5Zoc/QRtKVWzQhOtBMvqHzDpF6irO9z98xDceosuGiQ= golang.org/x/time v0.0.0-20181108054448-85acf8d2951c/go.mod h1:tRJNPiyCQ0inRvYxbN9jk5I+vvW/OXSQhTDSoE431IQ= -golang.org/x/time v0.0.0-20190308202827-9d24e82272b4 h1:SvFZT6jyqRaOeXpc5h/JSfZenJ2O330aBsf7JfSUXmQ= golang.org/x/time v0.0.0-20190308202827-9d24e82272b4/go.mod h1:tRJNPiyCQ0inRvYxbN9jk5I+vvW/OXSQhTDSoE431IQ= golang.org/x/tools v0.0.0-20180221164845-07fd8470d635/go.mod h1:n7NCudcB/nEzxVGmLbDWY5pfWTLqBcC2KZ6jyYvM4mQ= golang.org/x/tools v0.0.0-20180917221912-90fa682c2a6e/go.mod h1:n7NCudcB/nEzxVGmLbDWY5pfWTLqBcC2KZ6jyYvM4mQ= @@ -394,8 +369,6 @@ golang.org/x/tools v0.0.0-20190816200558-6889da9d5479/go.mod h1:b+2E5dAYhXwXZwtn golang.org/x/tools v0.0.0-20190911174233-4f2ddba30aff/go.mod h1:b+2E5dAYhXwXZwtnZ6UAqBI28+e2cm9otk0dWdXHAEo= golang.org/x/tools v0.0.0-20191012152004-8de300cfc20a/go.mod h1:b+2E5dAYhXwXZwtnZ6UAqBI28+e2cm9otk0dWdXHAEo= golang.org/x/tools v0.0.0-20191112195655-aa38f8e97acc/go.mod h1:b+2E5dAYhXwXZwtnZ6UAqBI28+e2cm9otk0dWdXHAEo= -golang.org/x/tools v0.0.0-20191119224855-298f0cb1881e/go.mod h1:b+2E5dAYhXwXZwtnZ6UAqBI28+e2cm9otk0dWdXHAEo= -golang.org/x/tools v0.1.12/go.mod h1:hNGJHUnrk76NpqgfD5Aqm5Crs+Hm0VOH/i9J2+nxYbc= golang.org/x/xerrors v0.0.0-20190717185122-a985d3407aa7/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0= golang.org/x/xerrors v0.0.0-20191204190536-9bdfabe68543 h1:E7g+9GITq07hpfrRu66IVDexMakfv52eLZ2CXBWiKr4= golang.org/x/xerrors v0.0.0-20191204190536-9bdfabe68543/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0= @@ -407,7 +380,6 @@ google.golang.org/api v0.13.0/go.mod h1:iLdEw5Ide6rF15KTC1Kkl0iskquN2gFfn9o9XIsb google.golang.org/appengine v1.1.0/go.mod h1:EbEs0AVv82hx2wNQdGPgUI5lhzA/G0D9YwlJXL52JkM= google.golang.org/appengine v1.4.0/go.mod h1:xpcJRLb0r/rnEns0DIKYYv+WjYCduHsrkT7/EB5XEv4= google.golang.org/appengine v1.5.0/go.mod h1:xpcJRLb0r/rnEns0DIKYYv+WjYCduHsrkT7/EB5XEv4= -google.golang.org/appengine v1.6.1 h1:QzqyMA1tlu6CgqCDUtU9V+ZKhLFT2dkJuANu5QaxI3I= google.golang.org/appengine v1.6.1/go.mod h1:i06prIuMbXzDqacNJfV5OdTW448YApPu5ww/cMBSeb0= google.golang.org/genproto v0.0.0-20180817151627-c66870c02cf8/go.mod h1:JiN7NxoALGmiZfu7CAH4rXhgtRTLTxftemlI0sWmxmc= google.golang.org/genproto v0.0.0-20190307195333-5fe7a883aa19/go.mod h1:VzzqZJRnGkLBvHegQrXjBqPurQTc5/KpmUdxsrq26oE= @@ -447,7 +419,6 @@ gopkg.in/check.v1 v1.0.0-20200227125254-8fa46927fb4f h1:BLraFXnmrev5lT+xlilqcH8X gopkg.in/check.v1 v1.0.0-20200227125254-8fa46927fb4f/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0= gopkg.in/errgo.v2 v2.1.0/go.mod h1:hNsd1EY+bozCKY1Ytp96fpM3vjJbqLJn88ws8XvfDNI= gopkg.in/fsnotify.v1 v1.4.7/go.mod h1:Tz8NjZHkW78fSQdbUxIjBTcgA1z1m8ZHf0WmKUhAMys= -gopkg.in/inf.v0 v0.9.1 h1:73M5CoZyi3ZLMOyDlQh031Cx6N9NDJ2Vvfl76EDAgDc= gopkg.in/inf.v0 v0.9.1/go.mod h1:cWUDdTG/fYaXco+Dcufb5Vnc6Gp2YChqWtbxRZE0mXw= gopkg.in/ini.v1 v1.51.0/go.mod h1:pNLf8WUiyNEtQjuu5G5vTm06TEv9tsIgeAvK8hOrP4k= gopkg.in/resty.v1 v1.12.0/go.mod h1:mDo4pnntr5jdWRML875a/NmxYqAlA73dVijT2AXvQQo= @@ -456,7 +427,6 @@ gopkg.in/yaml.v2 v2.0.0-20170812160011-eb3733d160e7/go.mod h1:JAlM8MvJe8wmxCU4Bl gopkg.in/yaml.v2 v2.2.1/go.mod h1:hI93XBmqTisBFMUTm0b8Fm+jr3Dg1NNxqwp+5A1VGuI= gopkg.in/yaml.v2 v2.2.2/go.mod h1:hI93XBmqTisBFMUTm0b8Fm+jr3Dg1NNxqwp+5A1VGuI= gopkg.in/yaml.v2 v2.2.4/go.mod h1:hI93XBmqTisBFMUTm0b8Fm+jr3Dg1NNxqwp+5A1VGuI= -gopkg.in/yaml.v2 v2.4.0 h1:D8xgwECY7CYvx+Y2n4sBz93Jn9JRvxdiyyo8CTfuKaY= gopkg.in/yaml.v2 v2.4.0/go.mod h1:RDklbk79AGWmwhnvt/jBztapEOGDOx6ZbXqjP6csGnQ= gopkg.in/yaml.v3 v3.0.0-20200313102051-9f266ea9e77c h1:dUUwHk2QECo/6vqA44rthZ8ie2QXMNeKRTHCNY2nXvo= gopkg.in/yaml.v3 v3.0.0-20200313102051-9f266ea9e77c/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM= @@ -465,12 +435,9 @@ honnef.co/go/tools v0.0.0-20190106161140-3f1c8253044a/go.mod h1:rf3lG4BRIbNafJWh honnef.co/go/tools v0.0.0-20190418001031-e561f6794a2a/go.mod h1:rf3lG4BRIbNafJWhAfAdb/ePZxsR/4RtNHQocxwk9r4= honnef.co/go/tools v0.0.0-20190523083050-ea95bdfd59fc/go.mod h1:rf3lG4BRIbNafJWhAfAdb/ePZxsR/4RtNHQocxwk9r4= honnef.co/go/tools v0.0.1-2019.2.3/go.mod h1:a3bituU0lyd329TUQxRnasdCoJDkEUEAqEt0JzvZhAg= -k8s.io/api v0.17.0 h1:H9d/lw+VkZKEVIUc8F3wgiQ+FUXTTr21M87jXLU7yqM= k8s.io/api v0.17.0/go.mod h1:npsyOePkeP0CPwyGfXDHxvypiYMJxBWAMpQxCaJ4ZxI= k8s.io/apimachinery v0.17.0/go.mod h1:b9qmWdKlLuU9EBh+06BtLcSf/Mu89rWL33naRxs1uZg= -k8s.io/apimachinery v0.17.1-beta.0 h1:0Wl/KpAiFOMe9to5h8x2Y6JnjV+BEWJiTcUk1Vx7zdE= k8s.io/apimachinery v0.17.1-beta.0/go.mod h1:b9qmWdKlLuU9EBh+06BtLcSf/Mu89rWL33naRxs1uZg= -k8s.io/client-go v0.17.0 h1:8QOGvUGdqDMFrm9sD6IUFl256BcffynGoe80sxgTEDg= k8s.io/client-go v0.17.0/go.mod h1:TYgR6EUHs6k45hb6KWjVD6jFZvJV4gHDikv/It0xz+k= k8s.io/component-base v0.17.0/go.mod h1:rKuRAokNMY2nn2A6LP/MiwpoaMRHpfRnrPaUJJj1Yoc= k8s.io/gengo v0.0.0-20190128074634-0689ccc1d7d6/go.mod h1:ezvh/TsK7cY6rbqRK0oQQ8IAqLxYwwyPxAX1Pzy0ii0= @@ -489,5 +456,4 @@ k8s.io/utils v0.0.0-20201110183641-67b214c5f920 h1:CbnUZsM497iRC5QMVkHwyl8s2tB3g k8s.io/utils v0.0.0-20201110183641-67b214c5f920/go.mod h1:jPW/WVKK9YHAvNhRxK0md/EJ228hCsBRufyofKtW8HA= rsc.io/binaryregexp v0.2.0/go.mod h1:qTv7/COck+e2FymRvadv62gMdZztPaShugOCi3I+8D8= sigs.k8s.io/structured-merge-diff v0.0.0-20190525122527-15d366b2352e/go.mod h1:wWxsB5ozmmv/SG7nM11ayaAW51xMvak/t1r0CSlcokI= -sigs.k8s.io/yaml v1.1.0 h1:4A07+ZFc2wgJwo8YNlQpr1rVlgUDlxXHhPJciaPY5gs= sigs.k8s.io/yaml v1.1.0/go.mod h1:UJmg0vDUVViEyp3mgSv9WPwZCDxu4rQW1olrI1uml+o= From db672547c0fe26b5eb433894cda7c8e0d56b7a53 Mon Sep 17 00:00:00 2001 From: Shawn Sun <32376495+ssz1997@users.noreply.github.com> Date: Tue, 28 Feb 2023 16:19:54 -0800 Subject: [PATCH 151/334] Update registry for CSI images The old registry k8s.gcr.io will be frozen early April 2023. The new registry registry.k8s.io will replace the old one. Thus updating the registry for csi images. Source: https://kubernetes.io/blog/2023/02/06/k8s-gcr-io-freeze-announcement/ pr-link: Alluxio/alluxio#16973 change-id: cid-b386c072f2a48c596ac8f9ff4a4c3c0eb9739990 --- integration/kubernetes/helm-chart/alluxio/values.yaml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/integration/kubernetes/helm-chart/alluxio/values.yaml b/integration/kubernetes/helm-chart/alluxio/values.yaml index 000120b2df78..c5ab77c976e4 100644 --- a/integration/kubernetes/helm-chart/alluxio/values.yaml +++ b/integration/kubernetes/helm-chart/alluxio/values.yaml @@ -691,7 +691,7 @@ csi: dnsPolicy: ClusterFirstWithHostNet provisioner: # for kubernetes 1.17 or above - image: k8s.gcr.io/sig-storage/csi-provisioner:v2.0.5 + image: registry.k8s.io/sig-storage/csi-provisioner:v2.0.5 resources: limits: cpu: 100m @@ -723,7 +723,7 @@ csi: cpu: "1" memory: "1Gi" driverRegistrar: - image: k8s.gcr.io/sig-storage/csi-node-driver-registrar:v2.0.0 + image: registry.k8s.io/sig-storage/csi-node-driver-registrar:v2.0.0 resources: limits: cpu: 100m From 38bf6fef6699a2f84083fb692f2532dbe7e2b045 Mon Sep 17 00:00:00 2001 From: kimsu98 <40134677+kimsu98@users.noreply.github.com> Date: Tue, 28 Feb 2023 17:21:05 -0800 Subject: [PATCH 152/334] [DOCFIX] Fix docGen and update config/metrics ### What changes are proposed in this pull request? Fix docGen and update configuration/metrics Please outline the changes and how this PR fixes the issue. ### Why are the changes needed? To keep propertykeys and metrics updated Please clarify why the changes are needed. For instance, 1. If you propose a new API, clarify the use case for a new API. 2. If you fix a bug, describe the bug. ### Does this PR introduce any user facing changes? yes. Docs. Please list the user-facing changes introduced by your change, including 1. change in user-facing APIs 2. addition or removal of property keys 3. webui pr-link: Alluxio/alluxio#16976 change-id: cid-f394b134bf020f7c6065af303ce09a4fac5cfee9 --- core/transport/src/main/proto/proto.lock | 7 ++++- docs/_data/table/common-configuration.csv | 18 ++++++++++-- docs/_data/table/en/common-configuration.yml | 30 +++++++++++++++++++- docs/_data/table/en/master-metrics.yml | 16 +++++++++-- docs/_data/table/en/user-configuration.yml | 14 +++++++-- docs/_data/table/en/worker-configuration.yml | 4 +++ docs/_data/table/en/worker-metrics.yml | 2 ++ docs/_data/table/master-configuration.csv | 2 +- docs/_data/table/master-metrics.csv | 6 ++++ docs/_data/table/user-configuration.csv | 6 +++- docs/_data/table/worker-configuration.csv | 2 ++ docs/_data/table/worker-metrics.csv | 1 + 12 files changed, 97 insertions(+), 11 deletions(-) diff --git a/core/transport/src/main/proto/proto.lock b/core/transport/src/main/proto/proto.lock index e135ff873172..62a094bc02bf 100644 --- a/core/transport/src/main/proto/proto.lock +++ b/core/transport/src/main/proto/proto.lock @@ -2751,6 +2751,11 @@ "id": 6, "name": "disableAreDescendantsLoadedCheck", "type": "bool" + }, + { + "id": 7, + "name": "excludeMountInfo", + "type": "bool" } ] }, @@ -10715,4 +10720,4 @@ } } ] -} +} \ No newline at end of file diff --git a/docs/_data/table/common-configuration.csv b/docs/_data/table/common-configuration.csv index 45b3c91a7b2f..7ce38ca3f23b 100644 --- a/docs/_data/table/common-configuration.csv +++ b/docs/_data/table/common-configuration.csv @@ -49,7 +49,7 @@ alluxio.job.master.web.hostname,"${alluxio.job.master.hostname}" alluxio.job.master.web.port,"20002" alluxio.job.master.worker.heartbeat.interval,"1sec" alluxio.job.master.worker.timeout,"60sec" -alluxio.job.request.batch.size,"20" +alluxio.job.request.batch.size,"1" alluxio.job.retention.time,"1d" alluxio.job.worker.bind.host,"0.0.0.0" alluxio.job.worker.data.port,"30002" @@ -87,13 +87,17 @@ alluxio.proxy.s3.complete.multipart.upload.keepalive.time.interval,"30sec" alluxio.proxy.s3.complete.multipart.upload.min.part.size,"5MB" alluxio.proxy.s3.complete.multipart.upload.pool.size,"20" alluxio.proxy.s3.deletetype,"ALLUXIO_AND_UFS" +alluxio.proxy.s3.global.read.rate.limit.mb,"0" alluxio.proxy.s3.header.metadata.max.size,"2KB" alluxio.proxy.s3.multipart.upload.cleaner.enabled,"true" alluxio.proxy.s3.multipart.upload.cleaner.pool.size,"1" alluxio.proxy.s3.multipart.upload.cleaner.retry.count,"3" alluxio.proxy.s3.multipart.upload.cleaner.retry.delay,"10sec" alluxio.proxy.s3.multipart.upload.cleaner.timeout,"10min" +alluxio.proxy.s3.single.connection.read.rate.limit.mb,"0" alluxio.proxy.s3.tagging.restrictions.enabled,"true" +alluxio.proxy.s3.v2.async.processing.enabled,"false" +alluxio.proxy.s3.v2.version.enabled,"false" alluxio.proxy.s3.writetype,"CACHE_THROUGH" alluxio.proxy.stream.cache.timeout,"1hour" alluxio.proxy.web.bind.host,"0.0.0.0" @@ -106,6 +110,7 @@ alluxio.site.conf.dir,"${alluxio.conf.dir}/,${user.home}/.alluxio/,/etc/alluxio/ alluxio.site.conf.rocks.block.file,"" alluxio.site.conf.rocks.inode.file,"" alluxio.standalone.fuse.jvm.monitor.enabled,"false" +alluxio.standby.master.grpc.enabled,"false" alluxio.standby.master.metrics.sink.enabled,"false" alluxio.standby.master.web.enabled,"false" alluxio.table.catalog.path,"/catalog" @@ -135,7 +140,7 @@ alluxio.underfs.cephfs.mount.uid,"0" alluxio.underfs.cleanup.enabled,"false" alluxio.underfs.cleanup.interval,"1day" alluxio.underfs.eventual.consistency.retry.base.sleep,"50ms" -alluxio.underfs.eventual.consistency.retry.max.num,"20" +alluxio.underfs.eventual.consistency.retry.max.num,"0" alluxio.underfs.eventual.consistency.retry.max.sleep,"30sec" alluxio.underfs.gcs.default.mode,"0700" alluxio.underfs.gcs.directory.suffix,"/" @@ -164,12 +169,21 @@ alluxio.underfs.object.store.mount.shared.publicly,"false" alluxio.underfs.object.store.multi.range.chunk.size,"${alluxio.user.block.size.bytes.default}" alluxio.underfs.object.store.service.threads,"20" alluxio.underfs.object.store.skip.parent.directory.creation,"true" +alluxio.underfs.object.store.streaming.upload.part.timeout,"" +alluxio.underfs.obs.intermediate.upload.clean.age,"3day" +alluxio.underfs.obs.streaming.upload.enabled,"false" +alluxio.underfs.obs.streaming.upload.partition.size,"64MB" +alluxio.underfs.obs.streaming.upload.threads,"20" alluxio.underfs.oss.connection.max,"1024" alluxio.underfs.oss.connection.timeout,"50sec" alluxio.underfs.oss.connection.ttl,"-1" alluxio.underfs.oss.ecs.ram.role,"" +alluxio.underfs.oss.intermediate.upload.clean.age,"3day" alluxio.underfs.oss.retry.max,"3" alluxio.underfs.oss.socket.timeout,"50sec" +alluxio.underfs.oss.streaming.upload.enabled,"false" +alluxio.underfs.oss.streaming.upload.partition.size,"64MB" +alluxio.underfs.oss.streaming.upload.threads,"20" alluxio.underfs.oss.sts.ecs.metadata.service.endpoint,"http://100.100.100.200/latest/meta-data/ram/security-credentials/" alluxio.underfs.oss.sts.enabled,"false" alluxio.underfs.oss.sts.token.refresh.interval.ms,"30m" diff --git a/docs/_data/table/en/common-configuration.yml b/docs/_data/table/en/common-configuration.yml index 51c7b5273aa8..adac718283d0 100644 --- a/docs/_data/table/en/common-configuration.yml +++ b/docs/_data/table/en/common-configuration.yml @@ -174,6 +174,8 @@ alluxio.proxy.s3.complete.multipart.upload.pool.size: 'The complete multipart upload thread pool size.' alluxio.proxy.s3.deletetype: 'Delete type when deleting buckets and objects through S3 API. Valid options are `ALLUXIO_AND_UFS` (delete both in Alluxio and UFS), `ALLUXIO_ONLY` (delete only the buckets or objects in Alluxio namespace).' +alluxio.proxy.s3.global.read.rate.limit.mb: + 'Limit the maximum read speed for all connections. Set value less than or equal to 0 to disable rate limits.' alluxio.proxy.s3.header.metadata.max.size: 'The maximum size to allow for user-defined metadata in S3 PUTrequest headers. Set to 0 to disable size limits.' alluxio.proxy.s3.multipart.upload.cleaner.enabled: @@ -186,8 +188,14 @@ alluxio.proxy.s3.multipart.upload.cleaner.retry.delay: 'The retry delay time when aborting a multipart upload fails.' alluxio.proxy.s3.multipart.upload.cleaner.timeout: 'The timeout for aborting proxy s3 multipart upload automatically.' +alluxio.proxy.s3.single.connection.read.rate.limit.mb: + 'Limit the maximum read speed for each connection. Set value less than or equal to 0 to disable rate limits.' alluxio.proxy.s3.tagging.restrictions.enabled: 'Toggles whether or not the Alluxio S3 API will enforce AWS S3 tagging restrictions (10 tags, 128 character keys, 256 character values) See https://docs.aws.amazon.com/AmazonS3/latest/userguide/tagging-managing.html.' +alluxio.proxy.s3.v2.async.processing.enabled: + '(Experimental) If enabled, handle S3 request in async mode when v2 version of Alluxio s3 proxy service is enabled.' +alluxio.proxy.s3.v2.version.enabled: + '(Experimental) V2, an optimized version of Alluxio s3 proxy service.' alluxio.proxy.s3.writetype: 'Write type when creating buckets and objects through S3 API. Valid options are `MUST_CACHE` (write will only go to Alluxio and must be stored in Alluxio), `CACHE_THROUGH` (try to cache, write to UnderFS synchronously), `ASYNC_THROUGH` (try to cache, write to UnderFS asynchronously), `THROUGH` (no cache, write to UnderFS synchronously).' alluxio.proxy.stream.cache.timeout: @@ -212,6 +220,8 @@ alluxio.site.conf.rocks.inode.file: 'Path of file containing RocksDB inode store configuration. A template configuration cab be found at ${alluxio.conf.dir}/rocks-inode.ini.template. See https://github.com/facebook/rocksdb/blob/main/examples/rocksdb_option_file_example.ini for more information on RocksDB configuration files. If unset then a default configuration will be used.' alluxio.standalone.fuse.jvm.monitor.enabled: 'Whether to enable start JVM monitor thread on the standalone fuse process. This will start a thread to detect JVM-wide pauses induced by GC or other reasons.' +alluxio.standby.master.grpc.enabled: + 'Whether a standby master runs a grpc server' alluxio.standby.master.metrics.sink.enabled: 'Whether a standby master runs the metric sink' alluxio.standby.master.web.enabled: @@ -271,7 +281,7 @@ alluxio.underfs.cleanup.interval: alluxio.underfs.eventual.consistency.retry.base.sleep: 'To handle eventually consistent storage semantics for certain under storages, Alluxio will perform retries when under storage metadata doesn''t match Alluxio''s expectations. These retries use exponential backoff. This property determines the base time for the exponential backoff.' alluxio.underfs.eventual.consistency.retry.max.num: - 'To handle eventually consistent storage semantics for certain under storages, Alluxio will perform retries when under storage metadata doesn''t match Alluxio''s expectations. These retries use exponential backoff. This property determines the maximum number of retries.' + 'To handle eventually consistent storage semantics for certain under storages, Alluxio will perform retries when under storage metadata doesn''t match Alluxio''s expectations. These retries use exponential backoff. This property determines the maximum number of retries. This property defaults to 0 as modern object store UFSs provide strong consistency.' alluxio.underfs.eventual.consistency.retry.max.sleep: 'To handle eventually consistent storage semantics for certain under storages, Alluxio will perform retries when under storage metadata doesn''t match Alluxio''s expectations. These retries use exponential backoff. This property determines the maximum wait time in the backoff.' alluxio.underfs.gcs.default.mode: @@ -328,6 +338,16 @@ alluxio.underfs.object.store.service.threads: 'The number of threads in executor pool for parallel object store UFS operations, such as directory renames and deletes.' alluxio.underfs.object.store.skip.parent.directory.creation: 'Do not create parent directory for new files. Object stores generally uses prefix which is not required for creating new files. Skipping parent directory is recommended for better performance. Set this to false if the object store requires prefix creation for new files.' +alluxio.underfs.object.store.streaming.upload.part.timeout: + 'Timeout for uploading part when using streaming uploads.' +alluxio.underfs.obs.intermediate.upload.clean.age: + 'Streaming uploads may not have been completed/aborted correctly and need periodical ufs cleanup. If ufs cleanup is enabled, intermediate multipart uploads in all non-readonly OBS mount points older than this age will be cleaned. This may impact other ongoing upload operations, so a large clean age is encouraged.' +alluxio.underfs.obs.streaming.upload.enabled: + '(Experimental) If true, using streaming upload to write to OBS.' +alluxio.underfs.obs.streaming.upload.partition.size: + 'Maximum allowable size of a single buffer file when using S3A streaming upload. When the buffer file reaches the partition size, it will be uploaded and the upcoming data will write to other buffer files.If the partition size is too small, OBS upload speed might be affected. ' +alluxio.underfs.obs.streaming.upload.threads: + 'the number of threads to use for streaming upload data to OBS.' alluxio.underfs.oss.connection.max: 'The maximum number of OSS connections.' alluxio.underfs.oss.connection.timeout: @@ -336,10 +356,18 @@ alluxio.underfs.oss.connection.ttl: 'The TTL of OSS connections in ms.' alluxio.underfs.oss.ecs.ram.role: 'The RAM role of current owner of ECS.' +alluxio.underfs.oss.intermediate.upload.clean.age: + 'Streaming uploads may not have been completed/aborted correctly and need periodical ufs cleanup. If ufs cleanup is enabled, intermediate multipart uploads in all non-readonly OSS mount points older than this age will be cleaned. This may impact other ongoing upload operations, so a large clean age is encouraged.' alluxio.underfs.oss.retry.max: 'The maximum number of OSS error retry.' alluxio.underfs.oss.socket.timeout: 'The timeout of OSS socket.' +alluxio.underfs.oss.streaming.upload.enabled: + '(Experimental) If true, using streaming upload to write to OSS.' +alluxio.underfs.oss.streaming.upload.partition.size: + 'Maximum allowable size of a single buffer file when using OSS streaming upload. When the buffer file reaches the partition size, it will be uploaded and the upcoming data will write to other buffer files.If the partition size is too small, OSS upload speed might be affected. ' +alluxio.underfs.oss.streaming.upload.threads: + 'the number of threads to use for streaming upload data to OSS.' alluxio.underfs.oss.sts.ecs.metadata.service.endpoint: 'The ECS metadata service endpoint for Aliyun STS' alluxio.underfs.oss.sts.enabled: diff --git a/docs/_data/table/en/master-metrics.yml b/docs/_data/table/en/master-metrics.yml index ed6fcedccd19..3ab1a4aa6ded 100644 --- a/docs/_data/table/en/master-metrics.yml +++ b/docs/_data/table/en/master-metrics.yml @@ -71,9 +71,9 @@ Master.FilesFreed: Master.FilesPersisted: 'Total number of successfully persisted files' Master.FilesPinned: - 'Total number of currently pinned files' + 'Total number of currently pinned files. Note that IDs for these files are stored in memory.' Master.FilesToBePersisted: - 'Total number of currently to be persisted files' + 'Total number of currently to be persisted files. Note that the IDs for these files are stored in memory.' Master.FreeFileOps: 'Total number of FreeFile operations' Master.GetFileBlockInfoOps: @@ -162,6 +162,10 @@ Master.LastBackupRestoreTimeMs: 'The process time of the last restore from backup' Master.LastBackupTimeMs: 'The process time of the last backup' +Master.LastGainPrimacyTime: + 'Last time the master gains primacy' +Master.LastLosePrimacyTime: + 'Last time the master loses primacy' Master.ListingCacheEvictions: 'The total number of evictions in master listing cache' Master.ListingCacheHits: @@ -246,6 +250,8 @@ Master.RenamePathOps: 'Total number of Rename operations' Master.ReplicaMgmtActiveJobSize: 'Number of active block replication/eviction jobs. These jobs are created by the master to maintain the block replica factor. The value is an estimate with lag. ' +Master.ReplicationLimitedFiles: + 'Number of files that have a replication count set to a non-default value. Note that these files have IDs that are stored in memory.' Master.RocksBlockBackgroundErrors: 'RocksDB block table. Accumulated number of background errors.' Master.RocksBlockBlockCacheCapacity: @@ -352,6 +358,12 @@ Master.SetAclOps: 'Total number of SetAcl operations' Master.SetAttributeOps: 'Total number of SetAttribute operations' +Master.StartTime: + 'The start time of the master process' +Master.TTLBuckets: + 'The number of TTL buckets at the master. Note that these buckets are stored in memory.' +Master.TTLInodes: + 'The total number of inodes contained in TTL buckets at the mater. Note that these inodes are stored in memory.' Master.ToRemoveBlockCount: 'Count of block replicas to be removed from the workers. If 1 block is to be removed from 2 workers, 2 will be counted here.' Master.TotalPaths: diff --git a/docs/_data/table/en/user-configuration.yml b/docs/_data/table/en/user-configuration.yml index 65265e01fe3c..61efaf3f1eb7 100644 --- a/docs/_data/table/en/user-configuration.yml +++ b/docs/_data/table/en/user-configuration.yml @@ -88,6 +88,12 @@ alluxio.user.client.cache.timeout.duration: 'The timeout duration for local cache I/O operations (reading/writing/deleting). When this property is a positive value,local cache operations after timing out will fail and fallback to external file system but transparent to applications; when this property is a negative value, this feature is disabled.' alluxio.user.client.cache.timeout.threads: 'The number of threads to handle cache I/O operation timeout, when alluxio.user.client.cache.timeout.duration is positive.' +alluxio.user.client.cache.ttl.check.interval.seconds: + 'TTL check interval time in seconds.' +alluxio.user.client.cache.ttl.enabled: + 'Whether to support cache quota.' +alluxio.user.client.cache.ttl.threshold.seconds: + 'TTL threshold time in seconds.' alluxio.user.client.report.version.enabled: 'Whether the client reports version information to the server.' alluxio.user.conf.cluster.default.enabled: @@ -103,7 +109,7 @@ alluxio.user.file.copyfromlocal.block.location.policy.class: alluxio.user.file.create.ttl: 'Time to live for files created by a user, no ttl by default.' alluxio.user.file.create.ttl.action: - 'When file''s ttl is expired, the action performs on it. Options: DELETE (default) or FREE' + 'When file''s ttl is expired, the action performs on it. Options: DELETE_ALLUXIO(default), FREE or DELETE' alluxio.user.file.delete.unchecked: 'Whether to check if the UFS contents are in sync with Alluxio before attempting to delete persisted directories recursively.' alluxio.user.file.include.operation.id: @@ -154,6 +160,8 @@ alluxio.user.file.write.tier.default: 'The default tier for choosing a where to write a block. Valid option is any integer. Non-negative values identify tiers starting from top going down (0 identifies the first tier, 1 identifies the second tier, and so on). If the provided value is greater than the number of tiers, it identifies the last tier. Negative values identify tiers starting from the bottom going up (-1 identifies the last tier, -2 identifies the second to last tier, and so on). If the absolute value of the provided value is greater than the number of tiers, it identifies the first tier.' alluxio.user.file.writetype.default: 'Default write type when creating Alluxio files. Valid options are `MUST_CACHE` (write will only go to Alluxio and must be stored in Alluxio), `CACHE_THROUGH` (try to cache, write to UnderFS synchronously), `THROUGH` (no cache, write to UnderFS synchronously), `ASYNC_THROUGH` (write to cache, write to UnderFS asynchronously, replicated alluxio.user.file.replication.durable times in Alluxio before data is persisted.' +alluxio.user.hdfs.client.exclude.mount.info.on.list.status: + 'If enabled, the mount info will be excluded from the response when a HDFS client calls alluxio to list status on a directory.' alluxio.user.hostname: 'The hostname to use for an Alluxio client.' alluxio.user.local.reader.chunk.size.bytes: @@ -267,12 +275,12 @@ alluxio.user.ufs.block.location.all.fallback.enabled: alluxio.user.ufs.block.read.concurrency.max: 'The maximum concurrent readers for one UFS block on one Block Worker.' alluxio.user.ufs.block.read.location.policy: - 'When an Alluxio client reads a file from the UFS, it delegates the read to an Alluxio worker. The client uses this policy to choose which worker to read through. Built-in choices: [<a href="https://docs.alluxio.io/os/javadoc/edge/alluxio/client/block/policy/DeterministicHashPolicy.html">alluxio.client.block.policy.DeterministicHashPolicy</a>, <a href="https://docs.alluxio.io/os/javadoc/edge/alluxio/client/block/policy/LocalFirstAvoidEvictionPolicy.html">alluxio.client.block.policy.LocalFirstAvoidEvictionPolicy</a>, <a href="https://docs.alluxio.io/os/javadoc/edge/alluxio/client/block/policy/LocalFirstPolicy.html">alluxio.client.block.policy.LocalFirstPolicy</a>, <a href="https://docs.alluxio.io/os/javadoc/edge/alluxio/client/block/policy/MostAvailableFirstPolicy.html">alluxio.client.block.policy.MostAvailableFirstPolicy</a>, <a href="https://docs.alluxio.io/os/javadoc/edge/alluxio/client/block/policy/RoundRobinPolicy.html">alluxio.client.block.policy.RoundRobinPolicy</a>, <a href="https://docs.alluxio.io/os/javadoc/edge/alluxio/client/block/policy/SpecificHostPolicy.html">alluxio.client.block.policy.SpecificHostPolicy</a>].' + 'When an Alluxio client reads a file from the UFS, it delegates the read to an Alluxio worker. The client uses this policy to choose which worker to read through. Built-in choices: [<a href="https://docs.alluxio.io/os/javadoc/edge/alluxio/client/block/policy/CapacityBasedDeterministicHashPolicy.html">alluxio.client.block.policy.CapacityBasedDeterministicHashPolicy</a>, <a href="https://docs.alluxio.io/os/javadoc/edge/alluxio/client/block/policy/CapacityBaseRandomPolicy.html">alluxio.client.block.policy.CapacityBaseRandomPolicy</a>, <a href="https://docs.alluxio.io/os/javadoc/edge/alluxio/client/block/policy/DeterministicHashPolicy.html">alluxio.client.block.policy.DeterministicHashPolicy</a>, <a href="https://docs.alluxio.io/os/javadoc/edge/alluxio/client/block/policy/LocalFirstAvoidEvictionPolicy.html">alluxio.client.block.policy.LocalFirstAvoidEvictionPolicy</a>, <a href="https://docs.alluxio.io/os/javadoc/edge/alluxio/client/block/policy/LocalFirstPolicy.html">alluxio.client.block.policy.LocalFirstPolicy</a>, <a href="https://docs.alluxio.io/os/javadoc/edge/alluxio/client/block/policy/MostAvailableFirstPolicy.html">alluxio.client.block.policy.MostAvailableFirstPolicy</a>, <a href="https://docs.alluxio.io/os/javadoc/edge/alluxio/client/block/policy/RoundRobinPolicy.html">alluxio.client.block.policy.RoundRobinPolicy</a>, <a href="https://docs.alluxio.io/os/javadoc/edge/alluxio/client/block/policy/SpecificHostPolicy.html">alluxio.client.block.policy.SpecificHostPolicy</a>].' alluxio.user.ufs.block.read.location.policy.cache.expiration.time: 'Deprecated - When alluxio.user.ufs.block.read.location.policy is set to alluxio.client.block.policy.CapacityBaseRandomPolicy, this specifies cache expire time of block location.' alluxio.user.ufs.block.read.location.policy.cache.size: 'Deprecated - When alluxio.user.ufs.block.read.location.policy is set to alluxio.client.block.policy.CapacityBaseRandomPolicy, this specifies cache size of block location.' alluxio.user.ufs.block.read.location.policy.deterministic.hash.shards: - 'When alluxio.user.ufs.block.read.location.policy is set to alluxio.client.block.policy.DeterministicHashPolicy, this specifies the number of hash shards.' + 'When alluxio.user.ufs.block.read.location.policy is set to alluxio.client.block.policy.DeterministicHashPolicy or alluxio.client.block.policy.CapacityBasedDeterministicHashPolicy, this specifies the number of hash shards.' alluxio.user.worker.list.refresh.interval: 'The interval used to refresh the live worker list on the client' diff --git a/docs/_data/table/en/worker-configuration.yml b/docs/_data/table/en/worker-configuration.yml index 7671d5ace9ed..87bc0d93a5e1 100644 --- a/docs/_data/table/en/worker-configuration.yml +++ b/docs/_data/table/en/worker-configuration.yml @@ -10,6 +10,8 @@ alluxio.worker.block.annotator.lrfu.step.factor: 'A factor in [0, 1] to control the behavior of LRFU: smaller value makes LRFU more similar to LFU; and larger value makes LRFU closer to LRU.' alluxio.worker.block.heartbeat.interval: 'The interval between block workers'' heartbeats to update block status, storage health and other workers'' information to Alluxio Master.' +alluxio.worker.block.heartbeat.report.size.threshold: + 'When alluxio.worker.register.to.all.masters=true, because a worker will send block reports to all masters, we use a threshold to limit the unsent block report size in worker''s memory. If the worker block heartbeat is larger than the threshold, we discard the heartbeat message and force the worker to register with that master with a full report.' alluxio.worker.block.heartbeat.timeout: 'The timeout value of block workers'' heartbeats. If the worker can''t connect to master before this interval expires, the worker will exit.' alluxio.worker.block.master.client.pool.size: @@ -164,6 +166,8 @@ alluxio.worker.register.stream.enabled: 'When the worker registers with the master, whether the request should be broken into a stream of smaller batches. This is useful when the worker''s storage is large and we expect a large number of blocks. ' alluxio.worker.register.stream.response.timeout: 'When the worker registers the master with streaming, the worker will be sending messages to the master during the streaming.During an active stream if the master have not responded to the worker for more than this timeout, the worker will consider the master is hanging and close the stream.' +alluxio.worker.register.to.all.masters: + 'If enabled, workers will register themselves to all masters, instead of primary master only. This can be used to save the master failover time because the new primary immediately knows all existing workers and blocks. Can only be enabled when alluxio.standby.master.grpc.enabled is turned on.' alluxio.worker.remote.io.slow.threshold: 'The time threshold for when a worker remote IO (read or write) of a single buffer is considered slow. When slow IO occurs, it is logged by a sampling logger.' alluxio.worker.reviewer.class: diff --git a/docs/_data/table/en/worker-metrics.yml b/docs/_data/table/en/worker-metrics.yml index a0572b5e2afe..21294a5f60c9 100644 --- a/docs/_data/table/en/worker-metrics.yml +++ b/docs/_data/table/en/worker-metrics.yml @@ -122,6 +122,8 @@ Worker.CapacityTotal: 'Total capacity (in bytes) on all tiers of a specific Alluxio worker' Worker.CapacityUsed: 'Total used bytes on all tiers of a specific Alluxio worker' +Worker.MasterRegistrationSuccessCount: + 'Total number of the succeed master registration.' Worker.RpcQueueLength: 'Length of the worker rpc queue. Use this metric to monitor the RPC pressure on worker.' Worker.RpcThreadActiveCount: diff --git a/docs/_data/table/master-configuration.csv b/docs/_data/table/master-configuration.csv index c9c4a5ea311c..a4d5ea6bef82 100644 --- a/docs/_data/table/master-configuration.csv +++ b/docs/_data/table/master-configuration.csv @@ -120,7 +120,7 @@ alluxio.master.metastore.rocks.block.meta.block.index,"" alluxio.master.metastore.rocks.block.meta.bloom.filter,"false" alluxio.master.metastore.rocks.block.meta.cache.size,"" alluxio.master.metastore.rocks.block.meta.index,"" -alluxio.master.metastore.rocks.checkpoint.compression.level,"-1" +alluxio.master.metastore.rocks.checkpoint.compression.level,"1" alluxio.master.metastore.rocks.edge.block.index,"" alluxio.master.metastore.rocks.edge.bloom.filter,"false" alluxio.master.metastore.rocks.edge.cache.size,"" diff --git a/docs/_data/table/master-metrics.csv b/docs/_data/table/master-metrics.csv index 194dc2ff18c0..adb6a3876f55 100644 --- a/docs/_data/table/master-metrics.csv +++ b/docs/_data/table/master-metrics.csv @@ -81,6 +81,8 @@ Master.LastBackupEntriesCount,GAUGE Master.LastBackupRestoreCount,GAUGE Master.LastBackupRestoreTimeMs,GAUGE Master.LastBackupTimeMs,GAUGE +Master.LastGainPrimacyTime,GAUGE +Master.LastLosePrimacyTime,GAUGE Master.ListingCacheEvictions,COUNTER Master.ListingCacheHits,COUNTER Master.ListingCacheLoadTimes,COUNTER @@ -123,6 +125,7 @@ Master.PathsRenamed,COUNTER Master.PathsUnmounted,COUNTER Master.RenamePathOps,COUNTER Master.ReplicaMgmtActiveJobSize,GAUGE +Master.ReplicationLimitedFiles,COUNTER Master.RocksBlockBackgroundErrors,GAUGE Master.RocksBlockBlockCacheCapacity,GAUGE Master.RocksBlockBlockCachePinnedUsage,GAUGE @@ -176,6 +179,9 @@ Master.RpcThreadActiveCount,GAUGE Master.RpcThreadCurrentCount,GAUGE Master.SetAclOps,COUNTER Master.SetAttributeOps,COUNTER +Master.StartTime,GAUGE +Master.TTLBuckets,GAUGE +Master.TTLInodes,GAUGE Master.ToRemoveBlockCount,GAUGE Master.TotalPaths,GAUGE Master.TotalRpcs,TIMER diff --git a/docs/_data/table/user-configuration.csv b/docs/_data/table/user-configuration.csv index 5688d89b252f..1c407ed1ddf1 100644 --- a/docs/_data/table/user-configuration.csv +++ b/docs/_data/table/user-configuration.csv @@ -44,6 +44,9 @@ alluxio.user.client.cache.store.overhead,"" alluxio.user.client.cache.store.type,"LOCAL" alluxio.user.client.cache.timeout.duration,"-1" alluxio.user.client.cache.timeout.threads,"32" +alluxio.user.client.cache.ttl.check.interval.seconds,"3600" +alluxio.user.client.cache.ttl.enabled,"false" +alluxio.user.client.cache.ttl.threshold.seconds,"10800" alluxio.user.client.report.version.enabled,"false" alluxio.user.conf.cluster.default.enabled,"true" alluxio.user.conf.sync.interval,"1min" @@ -51,7 +54,7 @@ alluxio.user.date.format.pattern,"MM-dd-yyyy HH:mm:ss:SSS" alluxio.user.file.buffer.bytes,"8MB" alluxio.user.file.copyfromlocal.block.location.policy.class,"alluxio.client.block.policy.RoundRobinPolicy" alluxio.user.file.create.ttl,"-1" -alluxio.user.file.create.ttl.action,"DELETE" +alluxio.user.file.create.ttl.action,"DELETE_ALLUXIO" alluxio.user.file.delete.unchecked,"false" alluxio.user.file.include.operation.id,"true" alluxio.user.file.master.client.pool.gc.interval,"120sec" @@ -77,6 +80,7 @@ alluxio.user.file.write.init.sleep.max,"5sec" alluxio.user.file.write.init.sleep.min,"1sec" alluxio.user.file.write.tier.default,"0" alluxio.user.file.writetype.default,"ASYNC_THROUGH" +alluxio.user.hdfs.client.exclude.mount.info.on.list.status,"false" alluxio.user.hostname,"" alluxio.user.local.reader.chunk.size.bytes,"8MB" alluxio.user.local.writer.chunk.size.bytes,"64KB" diff --git a/docs/_data/table/worker-configuration.csv b/docs/_data/table/worker-configuration.csv index ca0f152f003f..4d1a0745a128 100644 --- a/docs/_data/table/worker-configuration.csv +++ b/docs/_data/table/worker-configuration.csv @@ -5,6 +5,7 @@ alluxio.worker.block.annotator.class,"alluxio.worker.block.annotator.LRUAnnotato alluxio.worker.block.annotator.lrfu.attenuation.factor,"2.0" alluxio.worker.block.annotator.lrfu.step.factor,"0.25" alluxio.worker.block.heartbeat.interval,"1sec" +alluxio.worker.block.heartbeat.report.size.threshold,"1000000" alluxio.worker.block.heartbeat.timeout,"${alluxio.worker.master.connect.retry.timeout}" alluxio.worker.block.master.client.pool.size,"11" alluxio.worker.block.store.type,"FILE" @@ -82,6 +83,7 @@ alluxio.worker.register.stream.complete.timeout,"5min" alluxio.worker.register.stream.deadline,"15min" alluxio.worker.register.stream.enabled,"true" alluxio.worker.register.stream.response.timeout,"${alluxio.master.worker.register.stream.response.timeout}" +alluxio.worker.register.to.all.masters,"false" alluxio.worker.remote.io.slow.threshold,"10s" alluxio.worker.reviewer.class,"alluxio.worker.block.reviewer.ProbabilisticBufferReviewer" alluxio.worker.reviewer.probabilistic.hardlimit.bytes,"64MB" diff --git a/docs/_data/table/worker-metrics.csv b/docs/_data/table/worker-metrics.csv index e1a500d79909..63cc3af3c342 100644 --- a/docs/_data/table/worker-metrics.csv +++ b/docs/_data/table/worker-metrics.csv @@ -61,6 +61,7 @@ Worker.CacheUfsBlocks,COUNTER Worker.CapacityFree,GAUGE Worker.CapacityTotal,GAUGE Worker.CapacityUsed,GAUGE +Worker.MasterRegistrationSuccessCount,COUNTER Worker.RpcQueueLength,GAUGE Worker.RpcThreadActiveCount,GAUGE Worker.RpcThreadCurrentCount,GAUGE From 163b3ff50d56475b5b69fa9fd54e82df50be7357 Mon Sep 17 00:00:00 2001 From: elega <445092967@qq.com> Date: Thu, 2 Mar 2023 13:30:50 +0800 Subject: [PATCH 153/334] Add a property to disable file access time ### What changes are proposed in this pull request? Many people do not use file access time. This PR provides an option to disable it. ### Why are the changes needed? To improve the performance ### Does this PR introduce any user facing changes? N/A pr-link: Alluxio/alluxio#16981 change-id: cid-acde5a7a54861e466a7a615c864600fa23470eb2 --- .../main/java/alluxio/conf/PropertyKey.java | 13 +++++++++++++ .../master/file/DefaultFileSystemMaster.java | 19 ++++++++++++++----- 2 files changed, 27 insertions(+), 5 deletions(-) diff --git a/core/common/src/main/java/alluxio/conf/PropertyKey.java b/core/common/src/main/java/alluxio/conf/PropertyKey.java index caf467b07713..d51f0e6a32f9 100755 --- a/core/common/src/main/java/alluxio/conf/PropertyKey.java +++ b/core/common/src/main/java/alluxio/conf/PropertyKey.java @@ -2447,6 +2447,17 @@ public String toString() { + "the master addresses.") .setScope(Scope.ALL) .build(); + + public static final PropertyKey MASTER_FILE_ACCESS_TIME_UPDATER_ENABLED = + booleanBuilder(Name.MASTER_FILE_ACCESS_TIME_UPDATER_ENABLED) + .setDefaultValue(true) + .setDescription("If enabled, file access time updater will update the file last " + + "access time when an inode is accessed. This property can be turned off to improve " + + "performance and reduce the number of journal entries if your application does " + + "not rely on the file access time metadata.") + .setConsistencyCheckLevel(ConsistencyCheckLevel.WARN) + .setScope(Scope.MASTER) + .build(); public static final PropertyKey MASTER_FILE_ACCESS_TIME_JOURNAL_FLUSH_INTERVAL = durationBuilder(Name.MASTER_FILE_ACCESS_TIME_JOURNAL_FLUSH_INTERVAL) .setDefaultValue("1h") @@ -7828,6 +7839,8 @@ public static final class Name { "alluxio.master.cluster.metrics.update.interval"; public static final String MASTER_CONTAINER_ID_RESERVATION_SIZE = "alluxio.master.container.id.reservation.size"; + public static final String MASTER_FILE_ACCESS_TIME_UPDATER_ENABLED = + "alluxio.master.file.access.time.updater.enabled"; public static final String MASTER_FILE_ACCESS_TIME_JOURNAL_FLUSH_INTERVAL = "alluxio.master.file.access.time.journal.flush.interval"; public static final String MASTER_FILE_ACCESS_TIME_UPDATE_PRECISION = diff --git a/core/server/master/src/main/java/alluxio/master/file/DefaultFileSystemMaster.java b/core/server/master/src/main/java/alluxio/master/file/DefaultFileSystemMaster.java index b67d1e000ffd..12f0a3d6f64a 100644 --- a/core/server/master/src/main/java/alluxio/master/file/DefaultFileSystemMaster.java +++ b/core/server/master/src/main/java/alluxio/master/file/DefaultFileSystemMaster.java @@ -399,7 +399,7 @@ public class DefaultFileSystemMaster extends CoreMaster /** Stores the time series for various metrics which are exposed in the UI. */ private final TimeSeriesStore mTimeSeriesStore; - private final AccessTimeUpdater mAccessTimeUpdater; + @Nullable private final AccessTimeUpdater mAccessTimeUpdater; /** Used to check pending/running backup from RPCs. */ protected final CallTracker mStateLockCallTracker; @@ -502,7 +502,10 @@ public Type getType() { mUfsBlockLocationCache = UfsBlockLocationCache.Factory.create(mMountTable); mSyncManager = new ActiveSyncManager(mMountTable, this); mTimeSeriesStore = new TimeSeriesStore(); - mAccessTimeUpdater = new AccessTimeUpdater(this, mInodeTree, masterContext.getJournalSystem()); + mAccessTimeUpdater = + Configuration.getBoolean(PropertyKey.MASTER_FILE_ACCESS_TIME_UPDATER_ENABLED) + ? new AccessTimeUpdater( + this, mInodeTree, masterContext.getJournalSystem()) : null; // Sync executors should allow core threads to time out mSyncPrefetchExecutor.allowCoreThreadTimeOut(true); mSyncMetadataExecutor.allowCoreThreadTimeOut(true); @@ -748,7 +751,9 @@ public void start(Boolean isPrimary) throws IOException { () -> Configuration.getMs(PropertyKey.UNDERFS_CLEANUP_INTERVAL), Configuration.global(), mMasterContext.getUserState())); } - mAccessTimeUpdater.start(); + if (mAccessTimeUpdater != null) { + mAccessTimeUpdater.start(); + } mSyncManager.start(); mLoadManager.start(); } @@ -762,7 +767,9 @@ public void stop() throws IOException { mAsyncAuditLogWriter = null; } mSyncManager.stop(); - mAccessTimeUpdater.stop(); + if (mAccessTimeUpdater != null) { + mAccessTimeUpdater.stop(); + } mLoadManager.stop(); super.stop(); } @@ -5338,7 +5345,9 @@ protected LockingScheme createSyncLockingScheme(AlluxioURI path, } protected void updateAccessTime(RpcContext rpcContext, Inode inode, long opTimeMs) { - mAccessTimeUpdater.updateAccessTime(rpcContext.getJournalContext(), inode, opTimeMs); + if (mAccessTimeUpdater != null) { + mAccessTimeUpdater.updateAccessTime(rpcContext.getJournalContext(), inode, opTimeMs); + } } boolean isAclEnabled() { From df0ae1e5b3ef03095388466c4580849a4d9ee744 Mon Sep 17 00:00:00 2001 From: elega <445092967@qq.com> Date: Thu, 2 Mar 2023 13:31:09 +0800 Subject: [PATCH 154/334] Enhance the capacity report to show worker registration info ### What changes are proposed in this pull request? Screen Shot 2023-02-21 at 10 02 05 PM Please outline the changes and how this PR fixes the issue. ### Why are the changes needed? Please clarify why the changes are needed. For instance, 1. If you propose a new API, clarify the use case for a new API. 2. If you fix a bug, describe the bug. ### Does this PR introduce any user facing changes? Please list the user-facing changes introduced by your change, including 1. change in user-facing APIs 2. addition or removal of property keys 3. webui pr-link: Alluxio/alluxio#16927 change-id: cid-7d95b2513ff7e61b708f8c51a6708f0d80b6195c --- .../client/block/AllMastersWorkerInfo.java | 88 +++++++++++ .../client/block/util/WorkerInfoUtil.java | 146 ++++++++++++++++++ .../master/block/DefaultBlockMaster.java | 17 +- .../cli/fsadmin/report/CapacityCommand.java | 109 ++++++++++++- .../fsadmin/report/CapacityCommandTest.java | 2 + .../CapacityCommandIntegrationTest.java | 2 + 6 files changed, 354 insertions(+), 10 deletions(-) create mode 100644 core/client/fs/src/main/java/alluxio/client/block/AllMastersWorkerInfo.java create mode 100644 core/client/fs/src/main/java/alluxio/client/block/util/WorkerInfoUtil.java diff --git a/core/client/fs/src/main/java/alluxio/client/block/AllMastersWorkerInfo.java b/core/client/fs/src/main/java/alluxio/client/block/AllMastersWorkerInfo.java new file mode 100644 index 000000000000..d858f29276eb --- /dev/null +++ b/core/client/fs/src/main/java/alluxio/client/block/AllMastersWorkerInfo.java @@ -0,0 +1,88 @@ +/* + * The Alluxio Open Foundation licenses this work under the Apache License, version 2.0 + * (the "License"). You may not use this work except in compliance with the License, which is + * available at www.apache.org/licenses/LICENSE-2.0 + * + * This software is distributed on an "AS IS" basis, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, + * either express or implied, as more fully set forth in the License. + * + * See the NOTICE file distributed with this work for information regarding copyright ownership. + */ + +package alluxio.client.block; + +import alluxio.collections.Pair; +import alluxio.wire.WorkerInfo; + +import java.net.InetSocketAddress; +import java.util.List; +import java.util.Map; + +/** + * A data class to persist aggregated worker info from all masters, including standby masters. + * Used when worker all master registration feature is enabled. + */ +public class AllMastersWorkerInfo { + private final Map mWorkerIdAddressMap; + private final List mMasterAddresses; + private final InetSocketAddress mPrimaryMasterAddress; + private final List mPrimaryMasterWorkerInfo; + private final Map>> mWorkerIdInfoMap; + + /** + * @param workerIdAddressMap the worker id to address map + * @param masterAddresses the master addresses + * @param primaryMasterAddress the primary master address + * @param primaryMasterWorkerInfo the worker info of the primary master + * @param workerIdInfoMap the worker id to worker info map + */ + public AllMastersWorkerInfo( + Map workerIdAddressMap, + List masterAddresses, + InetSocketAddress primaryMasterAddress, + List primaryMasterWorkerInfo, + Map>> workerIdInfoMap) { + mWorkerIdAddressMap = workerIdAddressMap; + mMasterAddresses = masterAddresses; + mPrimaryMasterAddress = primaryMasterAddress; + mPrimaryMasterWorkerInfo = primaryMasterWorkerInfo; + mWorkerIdInfoMap = workerIdInfoMap; + } + + /** + * @return the worker id to worker address map + */ + public Map getWorkerIdAddressMap() { + return mWorkerIdAddressMap; + } + + /** + * @return the master addresses for all masters + */ + public List getMasterAddresses() { + return mMasterAddresses; + } + + /** + * @return the primary master address + */ + public InetSocketAddress getPrimaryMasterAddress() { + return mPrimaryMasterAddress; + } + + /** + * @return the worker info for all workers from the primary master + */ + public List getPrimaryMasterWorkerInfo() { + return mPrimaryMasterWorkerInfo; + } + + /** + * @return a map which keys are the worker ids and values are lists of pairs, + * the first element in the pair is the master address and the second element is + * the worker info for such worker id gotten from the master with such master address. + */ + public Map>> getWorkerIdInfoMap() { + return mWorkerIdInfoMap; + } +} diff --git a/core/client/fs/src/main/java/alluxio/client/block/util/WorkerInfoUtil.java b/core/client/fs/src/main/java/alluxio/client/block/util/WorkerInfoUtil.java new file mode 100644 index 000000000000..c8e489d2bc1c --- /dev/null +++ b/core/client/fs/src/main/java/alluxio/client/block/util/WorkerInfoUtil.java @@ -0,0 +1,146 @@ +/* + * The Alluxio Open Foundation licenses this work under the Apache License, version 2.0 + * (the "License"). You may not use this work except in compliance with the License, which is + * available at www.apache.org/licenses/LICENSE-2.0 + * + * This software is distributed on an "AS IS" basis, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, + * either express or implied, as more fully set forth in the License. + * + * See the NOTICE file distributed with this work for information regarding copyright ownership. + */ + +package alluxio.client.block.util; + +import alluxio.ClientContext; +import alluxio.annotation.SuppressFBWarnings; +import alluxio.client.block.AllMastersWorkerInfo; +import alluxio.client.block.BlockMasterClient; +import alluxio.client.block.RetryHandlingBlockMasterClient; +import alluxio.client.block.options.GetWorkerReportOptions; +import alluxio.collections.Pair; +import alluxio.conf.AlluxioConfiguration; +import alluxio.conf.Configuration; +import alluxio.conf.PropertyKey; +import alluxio.master.MasterClientContext; +import alluxio.retry.TimeoutRetry; +import alluxio.util.ConfigurationUtils; +import alluxio.wire.WorkerInfo; + +import com.google.common.annotations.VisibleForTesting; +import com.google.common.base.Preconditions; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import java.io.IOException; +import java.net.InetSocketAddress; +import java.util.ArrayList; +import java.util.HashMap; +import java.util.List; +import java.util.Map; + +/** + * The util class for getting the worker info. + */ +public class WorkerInfoUtil { + private static final Logger LOG = LoggerFactory.getLogger(WorkerInfoUtil.class); + private static final int RETRY_TIMEOUT = 5000; + private static final int RETRY_INTERVAL = 500; + + /** + * Get worker reports from all masters, including standby masters. + * Can only be called when worker all master registration feature is enabled. + * + * @param configuration the cluster configuration + * @param primaryMasterClient the block master client connecting to the primary + * @param options the options to make the GetWorkerReport rpc + * @return the aggregated worker info + */ + public static AllMastersWorkerInfo getWorkerReportsFromAllMasters( + AlluxioConfiguration configuration, + BlockMasterClient primaryMasterClient, + GetWorkerReportOptions options) throws IOException { + Preconditions.checkState( + configuration.getBoolean(PropertyKey.WORKER_REGISTER_TO_ALL_MASTERS), + "GetWorkerReportsFromAllMasters is used to collect worker info from " + + "all masters, including standby masters. " + + "This method requires worker all master registration to be enabled."); + + Preconditions.checkState( + options.getFieldRange().contains(GetWorkerReportOptions.WorkerInfoField.ID)); + Preconditions.checkState( + options.getFieldRange().contains(GetWorkerReportOptions.WorkerInfoField.STATE)); + Preconditions.checkState( + options.getFieldRange().contains(GetWorkerReportOptions.WorkerInfoField.ADDRESS)); + + ClientContext clientContext = ClientContext.create(Configuration.global()); + MasterClientContext masterContext = MasterClientContext.newBuilder(clientContext).build(); + + Preconditions.checkState( + primaryMasterClient.getRemoteSockAddress() instanceof InetSocketAddress); + InetSocketAddress primaryMasterAddress = + (InetSocketAddress) primaryMasterClient.getRemoteSockAddress(); + List masterAddresses = + ConfigurationUtils.getMasterRpcAddresses(configuration); + Preconditions.checkState(masterAddresses.contains(primaryMasterAddress)); + + Map> masterAddressToWorkerInfoMap = new HashMap<>(); + for (InetSocketAddress masterAddress : masterAddresses) { + try (BlockMasterClient client = new RetryHandlingBlockMasterClient( + masterContext, masterAddress, () -> new TimeoutRetry(RETRY_TIMEOUT, RETRY_INTERVAL))) { + List workerInfos = client.getWorkerReport(options); + masterAddressToWorkerInfoMap.put(masterAddress, workerInfos); + } catch (Exception e) { + if (masterAddress.equals(primaryMasterAddress)) { + LOG.error("Failed to get worker report from master: {}", masterContext, e); + throw e; + } + LOG.warn("Failed to get worker report from master: {}", masterContext, e); + } + } + return populateAllMastersWorkerInfo(primaryMasterAddress, masterAddressToWorkerInfoMap); + } + + @VisibleForTesting + @SuppressFBWarnings("WMI_WRONG_MAP_ITERATOR") + static AllMastersWorkerInfo populateAllMastersWorkerInfo( + InetSocketAddress primaryMasterAddress, + Map> masterAddressToWorkerInfoMap) { + Map>> workerIdInfoMap = new HashMap<>(); + Map workerIdAddressMap = new HashMap<>(); + List workerInfosFromPrimaryMaster = null; + + for (InetSocketAddress masterAddress : masterAddressToWorkerInfoMap.keySet()) { + List workerInfo = masterAddressToWorkerInfoMap.get(masterAddress); + if (masterAddress.equals(primaryMasterAddress)) { + workerInfosFromPrimaryMaster = workerInfo; + } + for (WorkerInfo info : workerInfo) { + workerIdInfoMap.compute(info.getId(), (k, v) -> { + if (v == null) { + v = new ArrayList<>(); + } + v.add(new Pair<>(masterAddress, info)); + return v; + }); + workerIdAddressMap.compute(info.getId(), (k, v) -> { + InetSocketAddress workerAddress = + InetSocketAddress.createUnresolved(info.getAddress().getHost(), + info.getAddress().getRpcPort()); + if (v == null) { + return workerAddress; + } + if (!v.equals(workerAddress)) { + throw new RuntimeException(String.format( + "The same worker id %d corresponds to multiple worker name %s %s", + k, v, workerAddress)); + } + return v; + }); + } + } + return new AllMastersWorkerInfo(workerIdAddressMap, + new ArrayList<>(masterAddressToWorkerInfoMap.keySet()), + primaryMasterAddress, + workerInfosFromPrimaryMaster, workerIdInfoMap); + } +} diff --git a/core/server/master/src/main/java/alluxio/master/block/DefaultBlockMaster.java b/core/server/master/src/main/java/alluxio/master/block/DefaultBlockMaster.java index d6f093f5f5fa..e00f8685443d 100644 --- a/core/server/master/src/main/java/alluxio/master/block/DefaultBlockMaster.java +++ b/core/server/master/src/main/java/alluxio/master/block/DefaultBlockMaster.java @@ -362,12 +362,7 @@ public Map getServices() { @Override public Map getStandbyServices() { - Map services = new HashMap<>(); - services.put(ServiceType.BLOCK_MASTER_WORKER_SERVICE, - new GrpcService(ServerInterceptors - .intercept(new BlockMasterWorkerServiceHandler(this), - new ClientContextServerInjector()))); - return services; + return getServices(); } @Override @@ -655,6 +650,10 @@ public List getLostWorkersInfoList() throws UnavailableException { @Override public void removeDecommissionedWorker(long workerId) throws NotFoundException { + if (mStandbyMasterRpcEnabled && mPrimarySelector.getStateUnsafe() == NodeState.STANDBY) { + throw new UnavailableRuntimeException( + "RemoveDecommissionedWorker operation is not supported on standby masters"); + } MasterWorkerInfo worker = getWorker(workerId); Preconditions.checkNotNull(mDecommissionedWorkers .getFirstByField(ADDRESS_INDEX, worker.getWorkerAddress())); @@ -1134,7 +1133,7 @@ protected MasterWorkerInfo recordWorkerRegistration(long workerId) { @Override public long getWorkerId(WorkerNetAddress workerNetAddress) { - if (mStandbyMasterRpcEnabled && mPrimarySelector.getState() == NodeState.STANDBY) { + if (mStandbyMasterRpcEnabled && mPrimarySelector.getStateUnsafe() == NodeState.STANDBY) { throw new UnavailableRuntimeException( "GetWorkerId operation is not supported on standby masters"); } @@ -1393,7 +1392,7 @@ public Command workerHeartbeat(long workerId, Map capacityBytesOnT // by the LostWorkerDetectionHeartbeatExecutor worker.updateLastUpdatedTimeMs(); - if (mWorkerRegisterToAllMasters && mPrimarySelector.getState() == NodeState.STANDBY) { + if (mWorkerRegisterToAllMasters && mPrimarySelector.getStateUnsafe() == NodeState.STANDBY) { waitBlockIdPresent( addedBlocks.values().stream().flatMap(Collection::stream) .collect(Collectors.toList()), workerId); @@ -1419,7 +1418,7 @@ public Command workerHeartbeat(long workerId, Map capacityBytesOnT processWorkerRemovedBlocks(worker, removedBlockIds, false); processWorkerAddedBlocks(worker, addedBlocks); Set toRemoveBlocks = worker.getToRemoveBlocks(); - if (toRemoveBlocks.isEmpty() || mPrimarySelector.getState() == NodeState.STANDBY) { + if (toRemoveBlocks.isEmpty() || mPrimarySelector.getStateUnsafe() == NodeState.STANDBY) { workerCommand = Command.newBuilder().setCommandType(CommandType.Nothing).build(); } else { workerCommand = Command.newBuilder().setCommandType(CommandType.Free) diff --git a/shell/src/main/java/alluxio/cli/fsadmin/report/CapacityCommand.java b/shell/src/main/java/alluxio/cli/fsadmin/report/CapacityCommand.java index 3caac3c2a36e..1d5699acbf41 100644 --- a/shell/src/main/java/alluxio/cli/fsadmin/report/CapacityCommand.java +++ b/shell/src/main/java/alluxio/cli/fsadmin/report/CapacityCommand.java @@ -13,12 +13,16 @@ import alluxio.cli.fsadmin.FileSystemAdminShellUtils; import alluxio.cli.fsadmin.command.ReportCommand; +import alluxio.client.block.AllMastersWorkerInfo; import alluxio.client.block.BlockMasterClient; import alluxio.client.block.options.GetWorkerReportOptions; import alluxio.client.block.options.GetWorkerReportOptions.WorkerInfoField; import alluxio.client.block.options.GetWorkerReportOptions.WorkerRange; +import alluxio.client.block.util.WorkerInfoUtil; +import alluxio.collections.Pair; import alluxio.conf.AlluxioConfiguration; import alluxio.conf.Configuration; +import alluxio.conf.PropertyKey; import alluxio.exception.status.InvalidArgumentException; import alluxio.grpc.Scope; import alluxio.util.FormatUtils; @@ -29,7 +33,9 @@ import java.io.IOException; import java.io.PrintStream; +import java.net.InetSocketAddress; import java.util.Arrays; +import java.util.Collection; import java.util.Collections; import java.util.Comparator; import java.util.EnumSet; @@ -57,6 +63,9 @@ public class CapacityCommand { private TreeMap> mCapacityTierInfoMap; private Map> mUsedTierInfoMap; + private static final String LIVE_WORKER_STATE = "In Service"; + private static final String LOST_WORKER_STATE = "Out of Service"; + /** * Creates a new instance of {@link CapacityCommand}. * @@ -94,7 +103,20 @@ public int run(CommandLine cl) throws IOException { */ public void generateCapacityReport(GetWorkerReportOptions options, AlluxioConfiguration conf) throws IOException { - List workerInfoList = mBlockMasterClient.getWorkerReport(options); + boolean workerRegisterToAllMasters = + conf.getBoolean(PropertyKey.WORKER_REGISTER_TO_ALL_MASTERS); + + final List workerInfoList; + final AllMastersWorkerInfo allMastersWorkerInfo; + if (workerRegisterToAllMasters) { + allMastersWorkerInfo = + WorkerInfoUtil.getWorkerReportsFromAllMasters( + conf, mBlockMasterClient, options); + workerInfoList = allMastersWorkerInfo.getPrimaryMasterWorkerInfo(); + } else { + allMastersWorkerInfo = null; + workerInfoList = mBlockMasterClient.getWorkerReport(options); + } if (workerInfoList.size() == 0) { print("No workers found."); return; @@ -104,6 +126,9 @@ public void generateCapacityReport(GetWorkerReportOptions options, AlluxioConfig collectWorkerInfo(workerInfoList); printAggregatedInfo(options); printWorkerInfo(workerInfoList); + if (workerRegisterToAllMasters) { + printWorkerAllMasterConnectionInfo(allMastersWorkerInfo); + } } /** @@ -184,6 +209,88 @@ private void printAggregatedInfo(GetWorkerReportOptions options) { } } + private String getMasterAddressesString(Set addresses) { + StringBuilder sb = new StringBuilder(); + sb.append("["); + List addressStrings = + addresses.stream().map(it -> masterAddressToString(it, addresses)).sorted().collect( + Collectors.toList()); + for (int i = 0; i < addressStrings.size(); ++i) { + sb.append(addressStrings.get(i)); + if (i != addressStrings.size() - 1) { + sb.append(","); + } + } + sb.append("]"); + return sb.toString(); + } + + private String masterAddressToString( + InetSocketAddress inetSocketAddress, + Collection masterAddresses) { + // If multiple masters share the same host name, we will display the host name + port + // otherwise just the host name. + if (inetSocketAddress.getHostName().equals("localhost") || masterAddresses.stream() + .filter(it -> it.getHostName().equals(inetSocketAddress.getHostName())).count() > 1) { + return inetSocketAddress.toString(); + } + return inetSocketAddress.getHostName(); + } + + private void printWorkerAllMasterConnectionInfo( + AllMastersWorkerInfo allMastersWorkerInfo) { + List masterAddresses = allMastersWorkerInfo.getMasterAddresses(); + int maxWorkerNameLength = + allMastersWorkerInfo.getWorkerIdAddressMap().values().stream() + .map(w -> w.getHostName().length()) + .max(Comparator.comparing(Integer::intValue)).orElse(0); + + int workerNameIndent = 16; + if (workerNameIndent <= maxWorkerNameLength) { + // extend first indent according to the longest worker name by default 5 + workerNameIndent = maxWorkerNameLength + 5; + } + + // Create indentation to tolerate 2 unregistered masters + int maxMasterNameLength = + allMastersWorkerInfo.getWorkerIdAddressMap().values().stream() + .map(w -> masterAddressToString(w, masterAddresses).length()) + .max(Comparator.comparing(Integer::intValue)).orElse(0); + int unregisteredMasterNameIndent = Math.max(24, maxMasterNameLength * 2 + 10); + String format = "%-" + workerNameIndent + + "s %-" + unregisteredMasterNameIndent + "s %-" + unregisteredMasterNameIndent + "s %s"; + print(""); + print(String.format(format, "Worker Name", "Not Registered With", "Lost", "In Service")); + for (Map.Entry>> workerInfoEntry : + allMastersWorkerInfo.getWorkerIdInfoMap().entrySet()) { + if (workerInfoEntry.getValue().stream() + .noneMatch(it -> it.getSecond().getState().equals("In Service"))) { + // Don't display the worker if it has been removed from all masters. + continue; + } + long workerId = workerInfoEntry.getKey(); + InetSocketAddress workerAddress = allMastersWorkerInfo.getWorkerIdAddressMap() + .get(workerId); + String workerName = workerAddress != null ? workerAddress.getHostName() + : "(UNKNOWN, id = " + workerId + ")"; + Set inServiceMasters = + workerInfoEntry.getValue().stream() + .filter(it -> it.getSecond().getState().equals(LIVE_WORKER_STATE)) + .map(alluxio.collections.Pair::getFirst).collect(Collectors.toSet()); + Set lostMasters = + workerInfoEntry.getValue().stream() + .filter(it -> it.getSecond().getState().equals(LOST_WORKER_STATE)) + .map(alluxio.collections.Pair::getFirst).collect(Collectors.toSet()); + Set allMasterAddresses = + new HashSet<>(allMastersWorkerInfo.getMasterAddresses()); + Set notRegisteredMaster = + com.google.common.collect.Sets.difference(allMasterAddresses, + com.google.common.collect.Sets.union(inServiceMasters, lostMasters)); + print(String.format(format, workerName, getMasterAddressesString(notRegisteredMaster), + getMasterAddressesString(lostMasters), getMasterAddressesString(inServiceMasters))); + } + } + /** * Prints worker capacity information. * diff --git a/shell/src/test/java/alluxio/cli/fsadmin/report/CapacityCommandTest.java b/shell/src/test/java/alluxio/cli/fsadmin/report/CapacityCommandTest.java index 4a8d5521194e..129fc67a0095 100644 --- a/shell/src/test/java/alluxio/cli/fsadmin/report/CapacityCommandTest.java +++ b/shell/src/test/java/alluxio/cli/fsadmin/report/CapacityCommandTest.java @@ -301,4 +301,6 @@ private List prepareLongWorkerNameInfoList() { infoList.add(secondInfo); return infoList; } + + // TODO(elega) Add unit tests for the case where worker all master registration is enabled } diff --git a/tests/src/test/java/alluxio/client/cli/fsadmin/command/CapacityCommandIntegrationTest.java b/tests/src/test/java/alluxio/client/cli/fsadmin/command/CapacityCommandIntegrationTest.java index bdbfb93c02d0..2096f672d062 100644 --- a/tests/src/test/java/alluxio/client/cli/fsadmin/command/CapacityCommandIntegrationTest.java +++ b/tests/src/test/java/alluxio/client/cli/fsadmin/command/CapacityCommandIntegrationTest.java @@ -80,4 +80,6 @@ public void tooManyOptions() { + "\nToo many arguments passed in.\n"; Assert.assertEquals(expected, mOutput.toString()); } + + // TODO(elega) Add unit tests for the case where worker all master registration is enabled } From 49a0a556b99a9750485e8ccacf698aae6550b625 Mon Sep 17 00:00:00 2001 From: Kaijie Chen Date: Thu, 2 Mar 2023 13:33:12 +0800 Subject: [PATCH 155/334] [DOCFIX] Correct docs about Ratis ### What changes are proposed in this pull request? Correct docs about Ratis. ### Why are the changes needed? Embedded journal is introduced in #8219, then Copycat is replaced by Ratis in #12181. Some docs are not updated. ### Does this PR introduce any user facing changes? No. pr-link: Alluxio/alluxio#16985 change-id: cid-592a5c06991c5b067690031ef7fffed9d2e6fac6 --- .../java/alluxio/master/journal/raft/RaftJournalSystem.java | 2 +- docs/cn/deploy/Running-Alluxio-On-a-HA-Cluster.md | 2 +- docs/en/deploy/Running-Alluxio-On-a-HA-Cluster.md | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/core/server/common/src/main/java/alluxio/master/journal/raft/RaftJournalSystem.java b/core/server/common/src/main/java/alluxio/master/journal/raft/RaftJournalSystem.java index a0ecfd8f2b3d..2fffe155726e 100644 --- a/core/server/common/src/main/java/alluxio/master/journal/raft/RaftJournalSystem.java +++ b/core/server/common/src/main/java/alluxio/master/journal/raft/RaftJournalSystem.java @@ -164,7 +164,7 @@ * so we allow a snapshot to be taken once a day at a user-configured time. To support this, * all state changes must first acquire a read lock, and snapshotting requires the * corresponding write lock. Once we have the write lock for all state machines, we enable - * snapshots in Copycat through our AtomicBoolean, then wait for any snapshot to complete. + * snapshots in Ratis through our AtomicBoolean, then wait for any snapshot to complete. */ @ThreadSafe public class RaftJournalSystem extends AbstractJournalSystem { diff --git a/docs/cn/deploy/Running-Alluxio-On-a-HA-Cluster.md b/docs/cn/deploy/Running-Alluxio-On-a-HA-Cluster.md index f5b0f6578bdb..887b31597b56 100644 --- a/docs/cn/deploy/Running-Alluxio-On-a-HA-Cluster.md +++ b/docs/cn/deploy/Running-Alluxio-On-a-HA-Cluster.md @@ -47,7 +47,7 @@ alluxio.master.embedded.journal.addresses= - 第二个属性 `alluxio.master.mount.table.root.ufs=` 设置为挂载到Alluxio根目录的底层存储URI。 一定保证master节点和所有worker节点都可以访问此共享存储。 示例包括`alluxio.master.mount.table.root.ufs=hdfs://1.2.3.4:9000/alluxio/root/`或`alluxio.master.mount.table.root.ufs=s3://bucket/dir/` 。 - 第三个属性 `alluxio.master.embedded.journal.addresses` 设置参加Alluxio leading master选举的master节点集。默认的嵌入式日志端口是 `19200`。例如: `alluxio.master.embedded.journal.addresses=master_hostname_1:19200,master_hostname_2:19200,master_hostname_3:19200` -嵌入式日记特性依赖于 [Copycat](https://github.com/atomix/copycat) 内置leader选举功能。内置leader选举功能不能与Zookeeper一起使用,因为系统不能出现多种leader选举机制选出不同leader的情况。启用嵌入式日记就启用了Alluxio的内置leader election机制。请参阅[嵌入式日志配置文档]({{ '/en/operation/Journal.html' | relativize_url}}#embedded-journal-configuration),以了解更多详细信息以及使用内部leader选举配置HA集群的替代方案。 +嵌入式日记特性依赖于 [Ratis](https://github.com/apache/ratis) 内置leader选举功能。内置leader选举功能不能与Zookeeper一起使用,因为系统不能出现多种leader选举机制选出不同leader的情况。启用嵌入式日记就启用了Alluxio的内置leader election机制。请参阅[嵌入式日志配置文档]({{ '/en/operation/Journal.html' | relativize_url}}#embedded-journal-configuration),以了解更多详细信息以及使用内部leader选举配置HA集群的替代方案。 ### 选项2:Zookeeper和共享日志存储 diff --git a/docs/en/deploy/Running-Alluxio-On-a-HA-Cluster.md b/docs/en/deploy/Running-Alluxio-On-a-HA-Cluster.md index b350789e76cb..c61945835cea 100644 --- a/docs/en/deploy/Running-Alluxio-On-a-HA-Cluster.md +++ b/docs/en/deploy/Running-Alluxio-On-a-HA-Cluster.md @@ -78,7 +78,7 @@ Explanation: The default embedded journal port is `19200`. An example: `alluxio.master.embedded.journal.addresses=master_hostname_1:19200,master_hostname_2:19200,master_hostname_3:19200` -Note that embedded journal feature relies on [Ratis](https://github.com/apache/incubator-ratis) which uses +Note that embedded journal feature relies on [Ratis](https://github.com/apache/ratis) which uses leader election based on the Raft protocol and has its own format for storing journal entries. The built-in leader election cannot work with Zookeeper since the journal formats between these configurations may not match. From 1bf26fa05fb9c524f4d0396f93f62cdc4e03d82d Mon Sep 17 00:00:00 2001 From: fsl <1171313930@qq.com> Date: Fri, 3 Mar 2023 05:44:45 +0800 Subject: [PATCH 156/334] Update ci version ### What changes are proposed in this pull request? Please outline the changes and how this PR fixes the issue. https://github.com/Alluxio/alluxio/issues/16963 ### Why are the changes needed? Please clarify why the changes are needed. For instance, 1. If you propose a new API, clarify the use case for a new API. 2. If you fix a bug, describe the bug. ### Does this PR introduce any user facing changes? Please list the user-facing changes introduced by your change, including 1. change in user-facing APIs 2. addition or removal of property keys 3. webui pr-link: Alluxio/alluxio#16964 change-id: cid-8da7ea42471d3b615289397def114ab384440f2f --- .github/workflows/checkstyle.yml | 8 ++++---- .github/workflows/fuse_integration_tests.yml | 8 ++++---- .github/workflows/java8_integration_tests.yml | 8 ++++---- .github/workflows/java8_integration_tests_ft.yml | 8 ++++---- .github/workflows/java8_integration_tests_webui.yml | 8 ++++---- .github/workflows/java8_unit_tests.yml | 8 ++++---- .github/workflows/stale.yaml | 2 +- 7 files changed, 25 insertions(+), 25 deletions(-) diff --git a/.github/workflows/checkstyle.yml b/.github/workflows/checkstyle.yml index 24ffe1695b53..2ce22c7c1f78 100644 --- a/.github/workflows/checkstyle.yml +++ b/.github/workflows/checkstyle.yml @@ -8,7 +8,7 @@ jobs: steps: - name: checkout repo - uses: actions/checkout@v2 + uses: actions/checkout@v3 - name: Use Node.js ${{ matrix.node-version }} uses: actions/setup-node@v1 @@ -16,7 +16,7 @@ jobs: node-version: '10.11.0' - name: Cache local Maven repository - uses: actions/cache@v2 + uses: actions/cache@v3 with: path: ~/.m2/repository key: ${{ runner.os }}-maven-${{ hashFiles('**/pom.xml') }} @@ -24,7 +24,7 @@ jobs: ${{ runner.os }}-maven- - name: Cache local Go modules - uses: actions/cache@v2 + uses: actions/cache@v3 with: path: ~/go/pkg/mod key: ${{ runner.os }}-gomod-${{ hashFiles('**/go.mod') }} @@ -41,7 +41,7 @@ jobs: - name: Archive artifacts continue-on-error: true - uses: actions/upload-artifact@v2 + uses: actions/upload-artifact@v3 if: always() with: name: artifact diff --git a/.github/workflows/fuse_integration_tests.yml b/.github/workflows/fuse_integration_tests.yml index ea9422bf8ded..25631570da8e 100644 --- a/.github/workflows/fuse_integration_tests.yml +++ b/.github/workflows/fuse_integration_tests.yml @@ -18,7 +18,7 @@ jobs: steps: - name: checkout repo - uses: actions/checkout@v2 + uses: actions/checkout@v3 - name: Use Node.js ${{ matrix.node-version }} uses: actions/setup-node@v1 @@ -26,13 +26,13 @@ jobs: node-version: '10.11.0' - name: Cache local Maven repository - uses: actions/cache@v2 + uses: actions/cache@v3 with: path: ~/.m2/repository key: ${{ runner.os }}-maven-java${{ matrix.java }}-${{ hashFiles('**/pom.xml') }} - name: Cache local Go modules - uses: actions/cache@v2 + uses: actions/cache@v3 with: path: ~/go/pkg/mod key: ${{ runner.os }}-gomod-${{ hashFiles('**/go.mod') }} @@ -50,7 +50,7 @@ jobs: - name: Archive artifacts continue-on-error: true - uses: actions/upload-artifact@v2 + uses: actions/upload-artifact@v3 if: always() with: name: artifact diff --git a/.github/workflows/java8_integration_tests.yml b/.github/workflows/java8_integration_tests.yml index c10b5f648adc..454428cad849 100644 --- a/.github/workflows/java8_integration_tests.yml +++ b/.github/workflows/java8_integration_tests.yml @@ -31,7 +31,7 @@ jobs: steps: - name: checkout repo - uses: actions/checkout@v2 + uses: actions/checkout@v3 - name: Use Node.js ${{ matrix.node-version }} uses: actions/setup-node@v1 @@ -39,13 +39,13 @@ jobs: node-version: '10.11.0' - name: Cache local Maven repository - uses: actions/cache@v2 + uses: actions/cache@v3 with: path: ~/.m2/repository key: ${{ runner.os }}-maven-java8-${{ hashFiles('**/pom.xml') }} - name: Cache local Go modules - uses: actions/cache@v2 + uses: actions/cache@v3 with: path: ~/go/pkg/mod key: ${{ runner.os }}-gomod-${{ hashFiles('**/go.mod') }} @@ -63,7 +63,7 @@ jobs: - name: Archive artifacts continue-on-error: true - uses: actions/upload-artifact@v2 + uses: actions/upload-artifact@v3 if: always() with: name: artifact diff --git a/.github/workflows/java8_integration_tests_ft.yml b/.github/workflows/java8_integration_tests_ft.yml index 1a22af092bbb..894854235706 100644 --- a/.github/workflows/java8_integration_tests_ft.yml +++ b/.github/workflows/java8_integration_tests_ft.yml @@ -21,7 +21,7 @@ jobs: steps: - name: checkout repo - uses: actions/checkout@v2 + uses: actions/checkout@v3 - name: Use Node.js ${{ matrix.node-version }} uses: actions/setup-node@v1 @@ -29,13 +29,13 @@ jobs: node-version: '10.11.0' - name: Cache local Maven repository - uses: actions/cache@v2 + uses: actions/cache@v3 with: path: ~/.m2/repository key: ${{ runner.os }}-maven-java8-${{ hashFiles('**/pom.xml') }} - name: Cache local Go modules - uses: actions/cache@v2 + uses: actions/cache@v3 with: path: ~/go/pkg/mod key: ${{ runner.os }}-gomod-${{ hashFiles('**/go.mod') }} @@ -55,7 +55,7 @@ jobs: - name: Archive artifacts continue-on-error: true - uses: actions/upload-artifact@v2 + uses: actions/upload-artifact@v3 if: always() with: name: artifact diff --git a/.github/workflows/java8_integration_tests_webui.yml b/.github/workflows/java8_integration_tests_webui.yml index 85c4ba987062..cc82ee0b2a93 100644 --- a/.github/workflows/java8_integration_tests_webui.yml +++ b/.github/workflows/java8_integration_tests_webui.yml @@ -19,7 +19,7 @@ jobs: steps: - name: checkout repo - uses: actions/checkout@v2 + uses: actions/checkout@v3 - name: Use Node.js ${{ matrix.node-version }} uses: actions/setup-node@v1 @@ -27,13 +27,13 @@ jobs: node-version: '10.11.0' - name: Cache local Maven repository - uses: actions/cache@v2 + uses: actions/cache@v3 with: path: ~/.m2/repository key: ${{ runner.os }}-maven-java8-${{ hashFiles('**/pom.xml') }} - name: Cache local Go modules - uses: actions/cache@v2 + uses: actions/cache@v3 with: path: ~/go/pkg/mod key: ${{ runner.os }}-gomod-${{ hashFiles('**/go.mod') }} @@ -51,7 +51,7 @@ jobs: - name: Archive artifacts continue-on-error: true - uses: actions/upload-artifact@v2 + uses: actions/upload-artifact@v3 if: always() with: name: artifact diff --git a/.github/workflows/java8_unit_tests.yml b/.github/workflows/java8_unit_tests.yml index 833688c22806..062ae2df3524 100644 --- a/.github/workflows/java8_unit_tests.yml +++ b/.github/workflows/java8_unit_tests.yml @@ -21,7 +21,7 @@ jobs: steps: - name: checkout repo - uses: actions/checkout@v2 + uses: actions/checkout@v3 - name: Use Node.js ${{ matrix.node-version }} uses: actions/setup-node@v1 @@ -29,13 +29,13 @@ jobs: node-version: '10.11.0' - name: Cache local Maven repository - uses: actions/cache@v2 + uses: actions/cache@v3 with: path: ~/.m2/repository key: ${{ runner.os }}-maven-java8-${{ hashFiles('**/pom.xml') }} - name: Cache local Go modules - uses: actions/cache@v2 + uses: actions/cache@v3 with: path: ~/go/pkg/mod key: ${{ runner.os }}-gomod-${{ hashFiles('**/go.mod') }} @@ -53,7 +53,7 @@ jobs: - name: Archive artifacts continue-on-error: true - uses: actions/upload-artifact@v2 + uses: actions/upload-artifact@v3 if: always() with: name: artifact diff --git a/.github/workflows/stale.yaml b/.github/workflows/stale.yaml index 9e8fe31fadda..72ba9f046749 100644 --- a/.github/workflows/stale.yaml +++ b/.github/workflows/stale.yaml @@ -12,7 +12,7 @@ jobs: permissions: issues: write # for writing stale message pull-requests: write # for writing stale message - runs-on: ubuntu-20.04 + runs-on: ubuntu-22.04 if: github.repository == 'alluxio/alluxio' steps: - uses: actions/stale@v6 From b70c79539b7942b1d1236ea9a53fb9c1235a566c Mon Sep 17 00:00:00 2001 From: Shawn Sun <32376495+ssz1997@users.noreply.github.com> Date: Thu, 2 Mar 2023 15:09:17 -0800 Subject: [PATCH 157/334] Update helm chart ChangeLog and Chart Update the Change Log and Chart for this change: https://github.com/Alluxio/alluxio/pull/16973 pr-link: Alluxio/alluxio#16987 change-id: cid-5a2513f1512d23f3a62e5128fffc1bc59cf01d7f --- integration/kubernetes/helm-chart/alluxio/CHANGELOG.md | 3 +++ integration/kubernetes/helm-chart/alluxio/Chart.yaml | 2 +- 2 files changed, 4 insertions(+), 1 deletion(-) diff --git a/integration/kubernetes/helm-chart/alluxio/CHANGELOG.md b/integration/kubernetes/helm-chart/alluxio/CHANGELOG.md index ef1bcbd863f1..c1dd1f7b6d25 100644 --- a/integration/kubernetes/helm-chart/alluxio/CHANGELOG.md +++ b/integration/kubernetes/helm-chart/alluxio/CHANGELOG.md @@ -320,3 +320,6 @@ - Improve indentation in worker daemonset template - Configure ports in master service following values.yaml + +0.6.54 +- Update CSI driver and provisioner image diff --git a/integration/kubernetes/helm-chart/alluxio/Chart.yaml b/integration/kubernetes/helm-chart/alluxio/Chart.yaml index 28eeba30c3fd..5eb113d8268b 100644 --- a/integration/kubernetes/helm-chart/alluxio/Chart.yaml +++ b/integration/kubernetes/helm-chart/alluxio/Chart.yaml @@ -12,7 +12,7 @@ name: alluxio apiVersion: v1 description: Open source data orchestration for analytics and machine learning in any cloud. -version: 0.6.53 +version: 0.6.54 home: https://www.alluxio.io/ maintainers: - name: Adit Madan From 654969975d7d6cbef076f2a2f2f0ec4c8db8a782 Mon Sep 17 00:00:00 2001 From: Jiacheng Liu Date: Fri, 3 Mar 2023 13:51:37 +0800 Subject: [PATCH 158/334] Improve state lock tracking and report on error ### What changes are proposed in this pull request? 1. Change tracking threads to keeping thread read lock holder count 2. Log jstack on state lock error so we can further check who are holding the state lock ### Why are the changes needed? Make debugging state lock issues slightly easier. ### Does this PR introduce any user facing changes? A chunky jstack output will appear in the master.log, which may look intimidating at first glance. This is good because I want people to notice and send the output to us. pr-link: Alluxio/alluxio#16984 change-id: cid-6c88ff9cf694cb0aee3fd09359d01f377dd017d6 --- .../main/java/alluxio/conf/PropertyKey.java | 10 ++++ .../java/alluxio/master/StateLockManager.java | 59 ++++++++++++++----- .../alluxio/master/StateLockManagerTest.java | 9 +-- .../master/file/DefaultFileSystemMaster.java | 2 +- .../alluxio/master/file/FileSystemMaster.java | 2 +- .../FileSystemMasterClientServiceHandler.java | 5 +- 6 files changed, 64 insertions(+), 23 deletions(-) diff --git a/core/common/src/main/java/alluxio/conf/PropertyKey.java b/core/common/src/main/java/alluxio/conf/PropertyKey.java index d51f0e6a32f9..8f3474d2c074 100755 --- a/core/common/src/main/java/alluxio/conf/PropertyKey.java +++ b/core/common/src/main/java/alluxio/conf/PropertyKey.java @@ -3310,6 +3310,14 @@ public String toString() { + "if this property is true. This property is available since 1.7.1") .setScope(Scope.MASTER) .build(); + public static final PropertyKey MASTER_STATE_LOCK_ERROR_THRESHOLD = + intBuilder(Name.MASTER_STATE_LOCK_ERROR_THRESHOLD) + .setDefaultValue(20) + .setDescription("Used to trace and debug state lock issues. When a thread recursively " + + "acquires the state lock more than threshold, log an error for further debugging.") + .setConsistencyCheckLevel(ConsistencyCheckLevel.IGNORE) + .setScope(Scope.MASTER) + .build(); public static final PropertyKey MASTER_TIERED_STORE_GLOBAL_LEVEL0_ALIAS = stringBuilder(Name.MASTER_TIERED_STORE_GLOBAL_LEVEL0_ALIAS) .setDefaultValue(Constants.MEDIUM_MEM) @@ -8101,6 +8109,8 @@ public static final class Name { "alluxio.master.skip.root.acl.check"; public static final String MASTER_STARTUP_BLOCK_INTEGRITY_CHECK_ENABLED = "alluxio.master.startup.block.integrity.check.enabled"; + public static final String MASTER_STATE_LOCK_ERROR_THRESHOLD = + "alluxio.master.state.lock.error.threshold"; public static final String MASTER_TIERED_STORE_GLOBAL_LEVEL0_ALIAS = "alluxio.master.tieredstore.global.level0.alias"; public static final String MASTER_TIERED_STORE_GLOBAL_LEVEL1_ALIAS = diff --git a/core/server/common/src/main/java/alluxio/master/StateLockManager.java b/core/server/common/src/main/java/alluxio/master/StateLockManager.java index 25baa80a74be..6c06f0af12c5 100644 --- a/core/server/common/src/main/java/alluxio/master/StateLockManager.java +++ b/core/server/common/src/main/java/alluxio/master/StateLockManager.java @@ -28,15 +28,20 @@ import java.io.IOException; import java.util.ArrayList; +import java.util.Collection; +import java.util.Collections; import java.util.Date; import java.util.List; +import java.util.Map; import java.util.Set; +import java.util.concurrent.ConcurrentHashMap; import java.util.concurrent.Executors; import java.util.concurrent.ScheduledExecutorService; import java.util.concurrent.ScheduledFuture; import java.util.concurrent.TimeUnit; import java.util.concurrent.TimeoutException; import java.util.concurrent.atomic.AtomicBoolean; +import java.util.concurrent.atomic.LongAdder; import java.util.concurrent.locks.Lock; import java.util.concurrent.locks.ReentrantLock; import java.util.concurrent.locks.ReentrantReadWriteLock; @@ -59,9 +64,10 @@ public class StateLockManager { /** The state-lock. */ private ReentrantReadWriteLock mStateLock = new ReentrantReadWriteLock(true); - /** The set of threads that are waiting for or holding the state-lock in shared mode. */ private Set mSharedWaitersAndHolders; + /** Stores the name of each thread whos taking locks. */ + private Map mSharedLockHolders = new ConcurrentHashMap<>(); /** Scheduler that is used for interrupt-cycle. */ private ScheduledExecutorService mScheduler; @@ -77,6 +83,11 @@ public class StateLockManager { private ScheduledFuture mInterrupterFuture; /** Whether interrupt-cycle is entered. */ private AtomicBoolean mInterruptCycleTicking = new AtomicBoolean(false); + /** + * Logs when a thread acquires the shared state lock too many times, + * which indicates a deep recursion. + */ + private int mLogThreshold = Configuration.getInt(PropertyKey.MASTER_STATE_LOCK_ERROR_THRESHOLD); /** This is the deadline for forcing the lock. */ private long mForcedDurationMs; @@ -143,7 +154,7 @@ public LockResource lockShared() throws InterruptedException { final int readLockCount = mStateLock.getReadLockCount(); if (readLockCount > READ_LOCK_COUNT_HIGH) { SAMPLING_LOG.info("Read Lock Count Too High: {} {}", readLockCount, - mSharedWaitersAndHolders); + mSharedLockHolders); } } @@ -158,12 +169,36 @@ public LockResource lockShared() throws InterruptedException { } // Register thread for interrupt cycle. mSharedWaitersAndHolders.add(Thread.currentThread()); - // Grab the lock interruptibly. - mStateLock.readLock().lockInterruptibly(); + String threadName = Thread.currentThread().getName(); + mSharedLockHolders.computeIfAbsent(threadName, k -> new LongAdder()).increment(); + if (mSharedLockHolders.get(threadName).longValue() > mLogThreshold) { + Exception e = new Exception("Thread recursion is deeper than " + mLogThreshold); + LOG.warn("Current thread is {}. All state lock holders are {}", + threadName, mSharedLockHolders, e); + } + try { + // Grab the lock interruptibly. + mStateLock.readLock().lockInterruptibly(); + } catch (Error e) { + // An Error is thrown when the lock is acquired 65536 times, log the jstack before exiting + LOG.error("Logging all thread stacks before exiting", e); + ThreadUtils.logAllThreads(); + throw e; + } // Return the resource. // Register an action to remove the thread from holders registry before releasing the lock. return new LockResource(mStateLock.readLock(), false, false, () -> { - mSharedWaitersAndHolders.remove(Thread.currentThread()); + // This is invoked in the same thread at the end of try-with-resource + Thread removedFrom = Thread.currentThread(); + mSharedLockHolders.computeIfPresent(removedFrom.getName(), (k, v) -> { + mSharedWaitersAndHolders.remove(Thread.currentThread()); + if (v.longValue() <= 1L) { + return null; + } else { + v.decrement(); + return v; + } + }); }); } @@ -233,9 +268,8 @@ public LockResource lockExclusive(StateLockOptions lockOptions, activateInterruptCycle(); // Force the lock. LOG.info("Thread-{} forcing the lock with {} waiters/holders: {}", - ThreadUtils.getCurrentThreadIdentifier(), mSharedWaitersAndHolders.size(), - mSharedWaitersAndHolders.stream().map((th) -> Long.toString(th.getId())) - .collect(Collectors.joining(","))); + ThreadUtils.getCurrentThreadIdentifier(), mSharedLockHolders.size(), + mSharedLockHolders); try { if (beforeAttempt != null) { beforeAttempt.run(); @@ -262,13 +296,8 @@ public LockResource lockExclusive(StateLockOptions lockOptions, /** * @return the list of thread identifiers that are waiting and holding on the shared lock */ - public List getSharedWaitersAndHolders() { - List result = new ArrayList<>(); - - for (Thread waiterOrHolder : mSharedWaitersAndHolders) { - result.add(ThreadUtils.getThreadIdentifier(waiterOrHolder)); - } - return result; + public Collection getSharedWaitersAndHolders() { + return Collections.unmodifiableSet(mSharedLockHolders.keySet()); } /** diff --git a/core/server/common/src/test/java/alluxio/master/StateLockManagerTest.java b/core/server/common/src/test/java/alluxio/master/StateLockManagerTest.java index 50fa62984b41..0494fa50f0a1 100644 --- a/core/server/common/src/test/java/alluxio/master/StateLockManagerTest.java +++ b/core/server/common/src/test/java/alluxio/master/StateLockManagerTest.java @@ -18,7 +18,6 @@ import alluxio.conf.PropertyKey; import alluxio.resource.LockResource; import alluxio.util.CommonUtils; -import alluxio.util.ThreadUtils; import com.google.common.util.concurrent.SettableFuture; import org.junit.Assert; @@ -26,7 +25,7 @@ import org.junit.Test; import org.junit.rules.ExpectedException; -import java.util.List; +import java.util.Collection; import java.util.concurrent.ExecutionException; import java.util.concurrent.TimeoutException; import java.util.concurrent.locks.Lock; @@ -140,6 +139,7 @@ public void testExclusiveOnlyMode() throws Throwable { } @Test + // TODO(jiacheng): run this test before committing public void testGetStateLockSharedWaitersAndHolders() throws Throwable { final StateLockManager stateLockManager = new StateLockManager(); @@ -149,10 +149,11 @@ public void testGetStateLockSharedWaitersAndHolders() throws Throwable { StateLockingThread sharedHolderThread = new StateLockingThread(stateLockManager, false); sharedHolderThread.start(); sharedHolderThread.waitUntilStateLockAcquired(); - final List sharedWaitersAndHolders = stateLockManager.getSharedWaitersAndHolders(); + final Collection sharedWaitersAndHolders = + stateLockManager.getSharedWaitersAndHolders(); assertEquals(i, sharedWaitersAndHolders.size()); assertTrue(sharedWaitersAndHolders.contains( - ThreadUtils.getThreadIdentifier(sharedHolderThread))); + sharedHolderThread.getName())); } } diff --git a/core/server/master/src/main/java/alluxio/master/file/DefaultFileSystemMaster.java b/core/server/master/src/main/java/alluxio/master/file/DefaultFileSystemMaster.java index 12f0a3d6f64a..d0c58547503c 100644 --- a/core/server/master/src/main/java/alluxio/master/file/DefaultFileSystemMaster.java +++ b/core/server/master/src/main/java/alluxio/master/file/DefaultFileSystemMaster.java @@ -5375,7 +5375,7 @@ public String getRootInodeOwner() { } @Override - public List getStateLockSharedWaitersAndHolders() { + public Collection getStateLockSharedWaitersAndHolders() { return mMasterContext.getStateLockManager().getSharedWaitersAndHolders(); } diff --git a/core/server/master/src/main/java/alluxio/master/file/FileSystemMaster.java b/core/server/master/src/main/java/alluxio/master/file/FileSystemMaster.java index a270cea637e2..b9d474b0e38a 100644 --- a/core/server/master/src/main/java/alluxio/master/file/FileSystemMaster.java +++ b/core/server/master/src/main/java/alluxio/master/file/FileSystemMaster.java @@ -627,7 +627,7 @@ void activeSyncMetadata(AlluxioURI path, Collection changedFiles, /** * @return the list of thread identifiers that are waiting and holding the state lock */ - List getStateLockSharedWaitersAndHolders(); + Collection getStateLockSharedWaitersAndHolders(); /** * Mark a path as needed synchronization with the UFS, when this path or any diff --git a/core/server/master/src/main/java/alluxio/master/file/FileSystemMasterClientServiceHandler.java b/core/server/master/src/main/java/alluxio/master/file/FileSystemMasterClientServiceHandler.java index adb6b3fe1229..c1705bb25878 100644 --- a/core/server/master/src/main/java/alluxio/master/file/FileSystemMasterClientServiceHandler.java +++ b/core/server/master/src/main/java/alluxio/master/file/FileSystemMasterClientServiceHandler.java @@ -104,6 +104,7 @@ import org.slf4j.LoggerFactory; import java.util.ArrayList; +import java.util.Collection; import java.util.HashMap; import java.util.List; import java.util.Map; @@ -471,9 +472,9 @@ public void setAcl(SetAclPRequest request, StreamObserver respo @Override public void getStateLockHolders(GetStateLockHoldersPRequest request, - StreamObserver responseObserver) { + StreamObserver responseObserver) { RpcUtils.call(LOG, () -> { - final List holders = mFileSystemMaster.getStateLockSharedWaitersAndHolders(); + final Collection holders = mFileSystemMaster.getStateLockSharedWaitersAndHolders(); return GetStateLockHoldersPResponse.newBuilder().addAllThreads(holders).build(); }, "getStateLockHolders", "request=%s", responseObserver, request); } From 4af81fa9140a713cb2db447eeaf43b538f1a569b Mon Sep 17 00:00:00 2001 From: Rico Chiu Date: Fri, 3 Mar 2023 02:25:14 -0800 Subject: [PATCH 159/334] [DOCFIX] Add docs for fs loadMetadata cmd pr-link: Alluxio/alluxio#16999 change-id: cid-0cd9a428e225a6e1a56a17d4f5bca2e37e643fae --- docs/en/operation/User-CLI.md | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) diff --git a/docs/en/operation/User-CLI.md b/docs/en/operation/User-CLI.md index fb0572ce205c..103b23b78a67 100644 --- a/docs/en/operation/User-CLI.md +++ b/docs/en/operation/User-CLI.md @@ -1149,6 +1149,24 @@ even if the data is already available on a remote worker. $ ./bin/alluxio fs load --local ``` +### loadMetadata + +The `loadMetadata` command loads metadata about a path in the UFS to Alluxio. +No data will be transferred. +This command is a client-side optimization without storing all returned `ls` results, preventing OOM for massive amount of small files. +This is useful when data has been added to the UFS outside of Alluxio and users are expected to reference the new data. +This command is more efficient than using the `ls` command since it does not store any directory or file information to be returned. + +Options: +* `-R` option recursively loads metadata in subdirectories +* `-F` option updates the metadata of the existing file forcibly + +For example, `loadMetadata` can be used to load metadata for a path in the UFS. +The -F option will force the loading of metadata even if there are existing metadata entries for the path. +```console +$ ./bin/alluxio fs loadMetadata -R -F +``` + ### location The `location` command returns the addresses of all the Alluxio workers which contain blocks From eee4bd36af32c65217297644c76d73aa4dfbc54c Mon Sep 17 00:00:00 2001 From: fsl <1171313930@qq.com> Date: Sat, 4 Mar 2023 04:42:36 +0800 Subject: [PATCH 160/334] Fix jsoup CVE-2021-37714 ### What changes are proposed in this pull request? Please outline the changes and how this PR fixes the issue. ### Why are the changes needed? Please clarify why the changes are needed. For instance, 1. If you propose a new API, clarify the use case for a new API. 2. If you fix a bug, describe the bug. ### Does this PR introduce any user facing changes? Please list the user-facing changes introduced by your change, including 1. change in user-facing APIs 2. addition or removal of property keys 3. webui pr-link: Alluxio/alluxio#17001 change-id: cid-6a69a3f8466e5fc85371049a8df7b1a5092765bc --- underfs/web/pom.xml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/underfs/web/pom.xml b/underfs/web/pom.xml index a622265beb7a..77c829fc1fe9 100755 --- a/underfs/web/pom.xml +++ b/underfs/web/pom.xml @@ -40,7 +40,7 @@ org.jsoup jsoup - 1.8.3 + 1.14.2 From 5436fef19f1ff341ed26d52c211aae793c4bf324 Mon Sep 17 00:00:00 2001 From: fsl <1171313930@qq.com> Date: Sat, 4 Mar 2023 04:46:26 +0800 Subject: [PATCH 161/334] Update operator for README.md ### What changes are proposed in this pull request? Please outline the changes and how this PR fixes the issue. ### Why are the changes needed? Please clarify why the changes are needed. For instance, 1. If you propose a new API, clarify the use case for a new API. 2. If you fix a bug, describe the bug. ### Does this PR introduce any user facing changes? Please list the user-facing changes introduced by your change, including 1. change in user-facing APIs 2. addition or removal of property keys 3. webui pr-link: Alluxio/alluxio#16994 change-id: cid-8c6354de432645e90aaf89e48d20a4ec7ae91de7 --- .../kubernetes/operator/alluxio/README.md | 19 ++++++++++--------- 1 file changed, 10 insertions(+), 9 deletions(-) diff --git a/integration/kubernetes/operator/alluxio/README.md b/integration/kubernetes/operator/alluxio/README.md index 21164fcfc088..51b2dc8e137b 100644 --- a/integration/kubernetes/operator/alluxio/README.md +++ b/integration/kubernetes/operator/alluxio/README.md @@ -11,14 +11,15 @@ git clone https://github.com/Alluxio/alluxio.git 2. Copy to the Gopath ``` -mkdir -p /go/src/github.com/Alluxio/ -cp -rf /tmp/alluxio/integration/kubernetes/operator/alluxio /go/src/github.com/Alluxio/ +cd alluxio +mkdir -p $GOPATH/src/github.com/Alluxio/ +cp -rf ./integration/kubernetes/operator/alluxio $GOPATH/src/github.com/Alluxio/ ``` 3. Build the binary ``` -cd /go/src/github.com/Alluxio/ +cd $GOPATH/src/github.com/Alluxio/ make ``` @@ -62,8 +63,8 @@ kubectl create ns alluxio-system 3. Patch the MutatingWebhookConfiguration by set caBundle with correct value from Kubernetes cluster: ``` -cat /go/src/github.com/Alluxio/alluxio/deploy/webhook/manifests.yaml | \ - /go/src/github.com/Alluxio/alluxio/cmd/webhook/webhook-patch-ca-bundle.sh > \ +cat $GOPATH/src/github.com/Alluxio/alluxio/deploy/webhook/manifests.yaml | \ + $GOPATH/src/github.com/Alluxio/alluxio/cmd/webhook/webhook-patch-ca-bundle.sh > \ /tmp/mutatingwebhook-ca-bundle.yaml ``` @@ -71,10 +72,10 @@ cat /go/src/github.com/Alluxio/alluxio/deploy/webhook/manifests.yaml | \ ``` #kubectl create ns alluxio-system -kubectl create -f /go/src/github.com/Alluxio/alluxio/deploy/role-binding.yaml -kubectl apply -f /go/src/github.com/Alluxio/alluxio/config/webhook/webhook.yaml -kubectl apply -f /go/src/github.com/Alluxio/alluxio/config/webhook/service.yaml -kubectl apply -f /go/src/github.com/Alluxio/alluxio/config/manager/manager.yaml +kubectl create -f $GOPATH/src/github.com/Alluxio/alluxio/deploy/role-binding.yaml +kubectl apply -f $GOPATH/src/github.com/Alluxio/alluxio/config/webhook/webhook.yaml +kubectl apply -f $GOPATH/src/github.com/Alluxio/alluxio/config/webhook/service.yaml +kubectl apply -f $GOPATH/src/github.com/Alluxio/alluxio/config/manager/manager.yaml kubectl apply -f /tmp/mutatingwebhook-ca-bundle.yaml kubectl get mutatingwebhookconfigurations ``` From 82700c07a0fba9b71d804171d1bfdb440d36334f Mon Sep 17 00:00:00 2001 From: jianghuazhu <740087514@qq.com> Date: Mon, 6 Mar 2023 10:57:58 +0800 Subject: [PATCH 162/334] [DOCFIX] Fix some errors occurred when formatting alluxio ### What changes are proposed in this pull request? Some documentation has been modified to suggest better use of alluxio. ### Why are the changes needed? If JAVA_HOME is not set properly, you will encounter some errors when formatting. Details: https://github.com/Alluxio/alluxio/issues/16949 ### Does this PR introduce any user facing changes? Please list the user-facing changes introduced by your change, including 1. change in user-facing APIs 2. addition or removal of property keys 3. webui pr-link: Alluxio/alluxio#16950 change-id: cid-f26f2b78cc67a6cd771913920b4b18ad170a2987 --- docs/cn/overview/Getting-Started.md | 12 ++++++++++++ docs/en/overview/Getting-Started.md | 13 +++++++++++++ 2 files changed, 25 insertions(+) diff --git a/docs/cn/overview/Getting-Started.md b/docs/cn/overview/Getting-Started.md index f3f18244e27d..fa195af59897 100644 --- a/docs/cn/overview/Getting-Started.md +++ b/docs/cn/overview/Getting-Started.md @@ -48,6 +48,18 @@ $ cd alluxio-{{site.ALLUXIO_VERSION_STRING}} ## 配置 Alluxio +在`${ALLUXIO_HOME}/conf`目录下,根据模板文件创建`conf/alluxio-env.sh`配置文件。 + +```console +$ cp conf/alluxio-env.sh.template conf/alluxio-env.sh +``` + +在`conf/alluxio-env.sh`文件中为 `JAVA_HOME` 增加配置.例如: + +```console +$ echo "JAVA_HOME=/path/to/java/home" >> conf/alluxio-env.sh +``` + 在`${ALLUXIO_HOME}/conf`目录下,根据模板文件创建`conf/alluxio-site.properties`配置文件。 ```console diff --git a/docs/en/overview/Getting-Started.md b/docs/en/overview/Getting-Started.md index aa991c793777..44bd0ef5abf4 100644 --- a/docs/en/overview/Getting-Started.md +++ b/docs/en/overview/Getting-Started.md @@ -50,6 +50,19 @@ to as `${ALLUXIO_HOME}`. ## Configuring Alluxio +In the `${ALLUXIO_HOME}/conf` directory, create the `conf/alluxio-env.sh` configuration +file by copying the template file. + +```console +$ cp conf/alluxio-env.sh.template conf/alluxio-env.sh +``` + +In `conf/alluxio-env.sh`, adds configuration for `JAVA_HOME`. For example: + +```console +$ echo "JAVA_HOME=/path/to/java/home" >> conf/alluxio-env.sh +``` + In the `${ALLUXIO_HOME}/conf` directory, create the `conf/alluxio-site.properties` configuration file by copying the template file. From 5bd5cb214b8a43ef7e54b6dc9a4c3e49e8730735 Mon Sep 17 00:00:00 2001 From: fsl <1171313930@qq.com> Date: Mon, 6 Mar 2023 10:59:55 +0800 Subject: [PATCH 163/334] [DOCFIX] Add OpenSSF Scorecard for README.md ### What changes are proposed in this pull request? Please outline the changes and how this PR fixes the issue. ### Why are the changes needed? Please clarify why the changes are needed. For instance, 1. If you propose a new API, clarify the use case for a new API. 2. If you fix a bug, describe the bug. ### Does this PR introduce any user facing changes? Please list the user-facing changes introduced by your change, including 1. change in user-facing APIs 2. addition or removal of property keys 3. webui pr-link: Alluxio/alluxio#17003 change-id: cid-7d67d49738af92378015c8c8e9508ed697f40f60 --- README.md | 1 + 1 file changed, 1 insertion(+) diff --git a/README.md b/README.md index aeb94026ac50..7d9db7169b92 100644 --- a/README.md +++ b/README.md @@ -4,6 +4,7 @@ [![Release](https://img.shields.io/github/release/alluxio/alluxio/all.svg)](https://www.alluxio.io/download) [![Docker Pulls](https://img.shields.io/docker/pulls/alluxio/alluxio.svg)](https://hub.docker.com/r/alluxio/alluxio) [![Documentation](https://img.shields.io/badge/docs-reference-blue.svg)](https://www.alluxio.io/docs) +[![OpenSSF Scorecard](https://api.securityscorecards.dev/projects/github.com/Alluxio/alluxio/badge)](https://api.securityscorecards.dev/projects/github.com/Alluxio/alluxio) [![Twitter Follow](https://img.shields.io/twitter/follow/alluxio.svg?label=Follow&style=social)](https://twitter.com/intent/follow?screen_name=alluxio) [![License](https://img.shields.io/github/license/alluxio/alluxio.svg)](https://github.com/Alluxio/alluxio/blob/master/LICENSE) From 5cc79dada32dbe6abd9907c6ad41a882beae8177 Mon Sep 17 00:00:00 2001 From: fsl <1171313930@qq.com> Date: Mon, 6 Mar 2023 10:59:59 +0800 Subject: [PATCH 164/334] Update metrics/README.md ### What changes are proposed in this pull request? Please outline the changes and how this PR fixes the issue. ### Why are the changes needed? Please clarify why the changes are needed. For instance, 1. If you propose a new API, clarify the use case for a new API. 2. If you fix a bug, describe the bug. ### Does this PR introduce any user facing changes? Please list the user-facing changes introduced by your change, including 1. change in user-facing APIs 2. addition or removal of property keys 3. webui pr-link: Alluxio/alluxio#16992 change-id: cid-6a943a5c65150acf2e39d79ad1579bd50a31da8a --- integration/metrics/README.md | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/integration/metrics/README.md b/integration/metrics/README.md index 19a8391a1bbf..c736e2395bfd 100644 --- a/integration/metrics/README.md +++ b/integration/metrics/README.md @@ -38,3 +38,8 @@ bin/alluxio copyDir conf ``` 6. Restart alluxio master and workers 7. Point your browser to `MASTER_IP:16686` for tracing and `MASTER_IP:9090` for metrics +8. Stop alluxio master and workers +``` +docker-compose -f docker-compose-master.yaml down +docker-compose -f docker-compose-worker.yaml down +``` \ No newline at end of file From 0ca2b53aa4e06ba4f274949f36c561aaa9b9e264 Mon Sep 17 00:00:00 2001 From: Xinran Dong <81548653+007DXR@users.noreply.github.com> Date: Mon, 6 Mar 2023 15:00:23 +0800 Subject: [PATCH 165/334] Reduce time cost of checking bucket path in S3 API ### What changes are proposed in this pull request? - I create `BUCKET_PATH_CACHE` as a static object to `S3RestHandlerService`. - I create property key `PROXY_S3_BUCKETPATHCACHE_TIMEOUT_MS` to determine how long each entry should be automatically removed from the cache. - I create function `checkPathIsAlluxioDirectory(FileSystem, String,@Nullable S3AuditContext, Cache bucketPathCache)` ,which plays the same role as `checkPathIsAlluxioDirectory(FileSystem, String,@Nullable S3AuditContext)` but more efficient. - I change S3RestHandlerService.java to call `checkPathIsAlluxioDirectory` with argument `BUCKET_PATH_CACHE`. - I add boolean value `checkS3BucketPath` as a field of `CreateFilePOptions` and `CreateDirectoryPOptions`. This flag is used in `FileSystemMasterClientServiceHandler` to check the bucket path existent if `checkS3BucketPath` is `TRUE`. ### Why are the changes needed? 1. `BUCKET_PATH_CACHE` highly improves the efficiency while creating objects. For example: Set up an Alluxio cluster with 1 master(aws ec2 m5.2xlarge) and 2 workers(aws ec2 m5.4xlarge); benchmark S3 API with warp cmd. ``` WARP_ACCESS_KEY=minioadmin WARP_SECRET_KEY=minioadmin ./warp put --concurrent=80 --obj.size=64KiB --host=cluster0120-masters-1 --disable-multipart --insecure --md5 --duration=3m ``` Benchmark result is like: |PUT operation|before|after| |-|-|-| | Average Throughput |48.49 MiB/s, 775.82 obj/s|59.66 MiB/s, 954.57 obj/s| |Time percentage: checkPathIsAlluxioDirectory()/createObjectOrUploadPart()|7.70%|0.50%| |Time percentage: checkPathIsAlluxioDirectory()/total_time|3.31%|0.21%| 2. Users can modify `alluxio.proxy.s3.bucketpathcache.timeout` in `conf/alluxio-site.properties` to change the time period of cache expiration. ### Does this PR introduce any user facing changes? Users can modify `alluxio.proxy.s3.bucketpathcache.timeout` in `conf/alluxio-site.properties` to change the time period of cache expiration. The default value is `1min`. Users can set it `0min` to close the function. |Property Name|Default|Description| |-|-|-| |alluxio.proxy.s3.bucketpathcache.timeout|1min| Expire bucket path statistics in cache for this time period. Set `0min` to disable the cache.| > Note: `create file` and `create bucket` operations are always accurate , but other operations bring incosistent results because of the cache to bucket path. All in all, alluxio makes sure that every modification is cosistent and correct. pr-link: Alluxio/alluxio#16806 change-id: cid-fdaff4b5a6fdd3eca834cfaf8caea3fe38618005 --- .../main/java/alluxio/conf/PropertyKey.java | 12 ++++ .../alluxio/exception/ExceptionMessage.java | 1 + .../main/java/alluxio/util/io/PathUtils.java | 23 +++++++ .../FileSystemMasterClientServiceHandler.java | 23 +++++++ .../proxy/s3/S3RestServiceHandler.java | 64 +++++++++++++------ .../java/alluxio/proxy/s3/S3RestUtils.java | 31 ++++++++- .../main/proto/grpc/file_system_master.proto | 2 + core/transport/src/main/proto/proto.lock | 10 +++ .../client/rest/S3ClientRestApiTest.java | 50 +++++++++++++-- 9 files changed, 189 insertions(+), 27 deletions(-) diff --git a/core/common/src/main/java/alluxio/conf/PropertyKey.java b/core/common/src/main/java/alluxio/conf/PropertyKey.java index 8f3474d2c074..a3fad9597a70 100755 --- a/core/common/src/main/java/alluxio/conf/PropertyKey.java +++ b/core/common/src/main/java/alluxio/conf/PropertyKey.java @@ -5359,6 +5359,15 @@ public String toString() { .setConsistencyCheckLevel(ConsistencyCheckLevel.WARN) .setScope(Scope.SERVER) .build(); + public static final PropertyKey PROXY_S3_BUCKETPATHCACHE_TIMEOUT_MS = + durationBuilder(Name.PROXY_S3_BUCKETPATHCACHE_TIMEOUT_MS) + .setAlias("alluxio.proxy.s3.bucketpathcache.timeout.ms") + .setDefaultValue("1min") + .setDescription("Expire bucket path statistics in cache for this time period. " + + "Set 0min to disable the cache.") + .setConsistencyCheckLevel(ConsistencyCheckLevel.IGNORE) + .setScope(Scope.NONE) + .build(); public static final PropertyKey PROXY_S3_SINGLE_CONNECTION_READ_RATE_LIMIT_MB = intBuilder(Name.PROXY_S3_SINGLE_CONNECTION_READ_RATE_LIMIT_MB) .setConsistencyCheckLevel(ConsistencyCheckLevel.ENFORCE) @@ -5374,6 +5383,7 @@ public String toString() { + "Set value less than or equal to 0 to disable rate limits.") .setDefaultValue(0) .setScope(Scope.SERVER) + .build(); // @@ -8511,6 +8521,8 @@ public static final class Name { public static final String PROXY_S3_V2_ASYNC_PROCESSING_ENABLED = "alluxio.proxy.s3.v2.async.processing.enabled"; public static final String S3_UPLOADS_ID_XATTR_KEY = "s3_uploads_mulitpartupload_id"; + public static final String PROXY_S3_BUCKETPATHCACHE_TIMEOUT_MS = + "alluxio.proxy.s3.bucketpathcache.timeout"; public static final String PROXY_S3_GLOBAL_READ_RATE_LIMIT_MB = "alluxio.proxy.s3.global.read.rate.limit.mb"; public static final String PROXY_S3_SINGLE_CONNECTION_READ_RATE_LIMIT_MB = diff --git a/core/common/src/main/java/alluxio/exception/ExceptionMessage.java b/core/common/src/main/java/alluxio/exception/ExceptionMessage.java index 8723ed1b0b19..ec304eb2e8f2 100644 --- a/core/common/src/main/java/alluxio/exception/ExceptionMessage.java +++ b/core/common/src/main/java/alluxio/exception/ExceptionMessage.java @@ -27,6 +27,7 @@ public enum ExceptionMessage { // general PATH_DOES_NOT_EXIST("Path \"{0}\" does not exist."), + BUCKET_DOES_NOT_EXIST("Bucket \"{0}\" does not exist."), PATH_DOES_NOT_EXIST_PARTIAL_LISTING("Path \"{0}\" was removed during listing."), INODE_NOT_FOUND_PARTIAL_LISTING("\"{0}\" Inode was not found during partial listing. It was " + "likely removed across listing calls."), diff --git a/core/common/src/main/java/alluxio/util/io/PathUtils.java b/core/common/src/main/java/alluxio/util/io/PathUtils.java index c95240b063f1..cb731a9d7270 100644 --- a/core/common/src/main/java/alluxio/util/io/PathUtils.java +++ b/core/common/src/main/java/alluxio/util/io/PathUtils.java @@ -191,6 +191,29 @@ public static String getParentCleaned(String cleanedPath) throws InvalidPathExce return parent; } + /** + * Gets the first level directory of the path. + * For example, + * + *
+   * {@code
+   * getFirstLevelDirectory("/a/xx/").equals("/a");
+   * getFirstLevelDirectory("/a/").equals("/a");
+   * }
+   * 
+ * + * @param path the path + * @return the first level directory of the path; + * @throws InvalidPathException if the path is the root or invalid + */ + public static String getFirstLevelDirectory(String path) throws InvalidPathException { + String[] paths = getPathComponents(path); + if (paths.length < 2) { + throw new InvalidPathException(path + " has no first level directory"); + } + return AlluxioURI.SEPARATOR + paths[1]; + } + /** * Join two path elements for ufs, separated by {@link AlluxioURI#SEPARATOR}. * diff --git a/core/server/master/src/main/java/alluxio/master/file/FileSystemMasterClientServiceHandler.java b/core/server/master/src/main/java/alluxio/master/file/FileSystemMasterClientServiceHandler.java index c1705bb25878..71a6768d0c06 100644 --- a/core/server/master/src/main/java/alluxio/master/file/FileSystemMasterClientServiceHandler.java +++ b/core/server/master/src/main/java/alluxio/master/file/FileSystemMasterClientServiceHandler.java @@ -16,6 +16,8 @@ import alluxio.conf.Configuration; import alluxio.conf.PropertyKey; import alluxio.exception.AlluxioException; +import alluxio.exception.ExceptionMessage; +import alluxio.exception.FileDoesNotExistException; import alluxio.grpc.CheckAccessPRequest; import alluxio.grpc.CheckAccessPResponse; import alluxio.grpc.CheckConsistencyPOptions; @@ -30,6 +32,7 @@ import alluxio.grpc.CreateFilePResponse; import alluxio.grpc.DeletePRequest; import alluxio.grpc.DeletePResponse; +import alluxio.grpc.ExistsPOptions; import alluxio.grpc.ExistsPRequest; import alluxio.grpc.ExistsPResponse; import alluxio.grpc.FileSystemMasterClientServiceGrpc; @@ -95,6 +98,7 @@ import alluxio.master.file.contexts.SetAttributeContext; import alluxio.recorder.Recorder; import alluxio.underfs.UfsMode; +import alluxio.util.io.PathUtils; import alluxio.wire.MountPointInfo; import alluxio.wire.SyncPointInfo; @@ -103,6 +107,7 @@ import org.slf4j.Logger; import org.slf4j.LoggerFactory; +import java.io.IOException; import java.util.ArrayList; import java.util.Collection; import java.util.HashMap; @@ -183,11 +188,26 @@ public void completeFile(CompleteFilePRequest request, }, "CompleteFile", "request=%s", responseObserver, request); } + private void checkBucketPathExists(String path) + throws AlluxioException, IOException { + + String bucketPath = PathUtils.getFirstLevelDirectory(path); + boolean exists = mFileSystemMaster.exists(getAlluxioURI(bucketPath), + ExistsContext.create(ExistsPOptions.getDefaultInstance().toBuilder())); + if (!exists) { + throw new FileDoesNotExistException( + ExceptionMessage.BUCKET_DOES_NOT_EXIST.getMessage(bucketPath)); + } + } + @Override public void createDirectory(CreateDirectoryPRequest request, StreamObserver responseObserver) { CreateDirectoryPOptions options = request.getOptions(); RpcUtils.call(LOG, () -> { + if (request.getOptions().getCheckS3BucketPath()) { + checkBucketPathExists(request.getPath()); + } AlluxioURI pathUri = getAlluxioURI(request.getPath()); mFileSystemMaster.createDirectory(pathUri, CreateDirectoryContext.create(options.toBuilder()) .withTracker(new GrpcCallTracker(responseObserver))); @@ -199,6 +219,9 @@ public void createDirectory(CreateDirectoryPRequest request, public void createFile(CreateFilePRequest request, StreamObserver responseObserver) { RpcUtils.call(LOG, () -> { + if (request.getOptions().getCheckS3BucketPath()) { + checkBucketPathExists(request.getPath()); + } AlluxioURI pathUri = getAlluxioURI(request.getPath()); return CreateFilePResponse.newBuilder() .setFileInfo(GrpcUtils.toProto(mFileSystemMaster.createFile(pathUri, diff --git a/core/server/proxy/src/main/java/alluxio/proxy/s3/S3RestServiceHandler.java b/core/server/proxy/src/main/java/alluxio/proxy/s3/S3RestServiceHandler.java index 94627fac01be..fb86ee921833 100644 --- a/core/server/proxy/src/main/java/alluxio/proxy/s3/S3RestServiceHandler.java +++ b/core/server/proxy/src/main/java/alluxio/proxy/s3/S3RestServiceHandler.java @@ -42,6 +42,8 @@ import com.fasterxml.jackson.dataformat.xml.XmlMapper; import com.google.common.base.Preconditions; +import com.google.common.cache.Cache; +import com.google.common.cache.CacheBuilder; import com.google.common.io.BaseEncoding; import com.google.common.io.ByteStreams; import com.google.common.net.InetAddresses; @@ -67,6 +69,7 @@ import java.util.List; import java.util.Map; import java.util.UUID; +import java.util.concurrent.TimeUnit; import java.util.regex.Matcher; import java.util.regex.Pattern; import java.util.stream.Collectors; @@ -107,7 +110,13 @@ public final class S3RestServiceHandler { public static final String BUCKET_PARAM = "{bucket}/"; /* Object is after bucket in the URL path */ public static final String OBJECT_PARAM = "{bucket}/{object:.+}"; - + public static final int BUCKET_PATH_CACHE_SIZE = 65536; + private static final Cache BUCKET_PATH_CACHE = CacheBuilder.newBuilder() + .maximumSize(BUCKET_PATH_CACHE_SIZE) + .expireAfterWrite( + Configuration.global().getMs(PropertyKey.PROXY_S3_BUCKETPATHCACHE_TIMEOUT_MS), + TimeUnit.MILLISECONDS) + .build(); private final FileSystem mMetaFS; private final InstancedConfiguration mSConf; @@ -216,6 +225,7 @@ public Response listAllMyBuckets() { // debatable (?) potentially breaks backcompat(?) .filter(URIStatus::isFolder) .collect(Collectors.toList()); + buckets.forEach((uri) -> BUCKET_PATH_CACHE.put(new AlluxioURI(uri.getPath()), true)); return new ListAllMyBucketsResult(buckets); } }); @@ -236,7 +246,8 @@ public Response headBucket( final FileSystem userFs = S3RestUtils.createFileSystemForUser(user, mMetaFS); try (S3AuditContext auditContext = createAuditContext("headBucket", user, bucket, null)) { - S3RestUtils.checkPathIsAlluxioDirectory(userFs, bucketPath, auditContext); + S3RestUtils.checkPathIsAlluxioDirectory(userFs, bucketPath, auditContext, + BUCKET_PATH_CACHE); } return Response.ok().build(); }); @@ -302,7 +313,7 @@ public Response getBucket(@PathParam("bucket") final String bucket, final FileSystem userFs = S3RestUtils.createFileSystemForUser(user, mMetaFS); try (S3AuditContext auditContext = createAuditContext("listObjects", user, bucket, null)) { - S3RestUtils.checkPathIsAlluxioDirectory(userFs, path, auditContext); + S3RestUtils.checkPathIsAlluxioDirectory(userFs, path, auditContext, BUCKET_PATH_CACHE); if (tagging != null) { // GetBucketTagging AlluxioURI uri = new AlluxioURI(path); try { @@ -494,7 +505,8 @@ public Response createBucket(@PathParam("bucket") final String bucket, try (S3AuditContext auditContext = createAuditContext("createBucket", user, bucket, null)) { if (tagging != null) { // PutBucketTagging - S3RestUtils.checkPathIsAlluxioDirectory(userFs, bucketPath, auditContext); + S3RestUtils.checkPathIsAlluxioDirectory(userFs, bucketPath, auditContext, + BUCKET_PATH_CACHE); try { TaggingData tagData = new XmlMapper().readerFor(TaggingData.class) .readValue(is); @@ -576,6 +588,7 @@ public Response createBucket(@PathParam("bucket") final String bucket, } catch (Exception e) { throw S3RestUtils.toBucketS3Exception(e, bucketPath, auditContext); } + BUCKET_PATH_CACHE.put(new AlluxioURI(bucketPath), true); return Response.Status.OK; } }); @@ -607,7 +620,8 @@ public Response deleteBucket(@PathParam("bucket") final String bucket, try (S3AuditContext auditContext = createAuditContext("deleteBucket", user, bucket, null)) { - S3RestUtils.checkPathIsAlluxioDirectory(userFs, bucketPath, auditContext); + S3RestUtils.checkPathIsAlluxioDirectory(userFs, bucketPath, auditContext, + BUCKET_PATH_CACHE); if (tagging != null) { // DeleteBucketTagging LOG.debug("DeleteBucketTagging bucket={}", bucketPath); @@ -626,15 +640,16 @@ public Response deleteBucket(@PathParam("bucket") final String bucket, } // Delete the bucket. - DeletePOptions options = DeletePOptions.newBuilder().setAlluxioOnly(Configuration - .get(PropertyKey.PROXY_S3_DELETE_TYPE) - .equals(Constants.S3_DELETE_IN_ALLUXIO_ONLY)) + DeletePOptions options = DeletePOptions.newBuilder().setAlluxioOnly( + Configuration.get(PropertyKey.PROXY_S3_DELETE_TYPE) + .equals(Constants.S3_DELETE_IN_ALLUXIO_ONLY)) .build(); try { userFs.delete(new AlluxioURI(bucketPath), options); } catch (Exception e) { throw S3RestUtils.toBucketS3Exception(e, bucketPath, auditContext); } + BUCKET_PATH_CACHE.put(new AlluxioURI(bucketPath), false); return Response.Status.NO_CONTENT; } }); @@ -716,7 +731,8 @@ public Response createObjectOrUploadPart(@HeaderParam("Content-MD5") final Strin String bucketPath = S3RestUtils.parsePath(AlluxioURI.SEPARATOR + bucket); try (S3AuditContext auditContext = createAuditContext("createObject", user, bucket, object)) { - S3RestUtils.checkPathIsAlluxioDirectory(userFs, bucketPath, auditContext); + S3RestUtils.checkPathIsAlluxioDirectory(userFs, bucketPath, auditContext, + BUCKET_PATH_CACHE); String objectPath = bucketPath + AlluxioURI.SEPARATOR + object; if (objectPath.endsWith(AlluxioURI.SEPARATOR)) { @@ -732,6 +748,7 @@ public Response createObjectOrUploadPart(@HeaderParam("Content-MD5") final Strin .setGroupBits(Bits.ALL) .setOtherBits(Bits.NONE).build()) .setAllowExists(true) + .setCheckS3BucketPath(true) .build(); userFs.createDirectory(new AlluxioURI(objectPath), dirOptions); } catch (FileAlreadyExistsException e) { @@ -825,6 +842,7 @@ public Response createObjectOrUploadPart(@HeaderParam("Content-MD5") final Strin .setOtherBits(Bits.NONE).build()) .setWriteType(S3RestUtils.getS3WriteType()) .putAllXattr(xattrMap).setXattrPropStrat(XAttrPropagationStrategy.LEAF_NODE) + .setCheckS3BucketPath(true) .setOverwrite(true) .build(); @@ -896,6 +914,7 @@ public Response createObjectOrUploadPart(@HeaderParam("Content-MD5") final Strin .setOwnerBits(Bits.ALL) .setGroupBits(Bits.ALL) .setOtherBits(Bits.NONE).build()) + .setCheckS3BucketPath(true) .setOverwrite(true); // Handle metadata directive if (metadataDirective == S3Constants.Directive.REPLACE @@ -1006,7 +1025,8 @@ public Response initiateMultipartUpload( TaggingData tagData = null; try (S3AuditContext auditContext = createAuditContext("initiateMultipartUpload", user, bucket, object)) { - S3RestUtils.checkPathIsAlluxioDirectory(userFs, bucketPath, auditContext); + S3RestUtils.checkPathIsAlluxioDirectory(userFs, bucketPath, auditContext, + BUCKET_PATH_CACHE); if (taggingHeader != null) { // Parse the tagging header if it exists for PutObject try { tagData = S3RestUtils.deserializeTaggingHeader(taggingHeader, mMaxHeaderMetadataSize); @@ -1043,7 +1063,8 @@ public Response initiateMultipartUpload( .setOwnerBits(Bits.ALL) .setGroupBits(Bits.ALL) .setOtherBits(Bits.NONE).build()) - .setWriteType(S3RestUtils.getS3WriteType()).build()); + .setWriteType(S3RestUtils.getS3WriteType()) + .build()); // Create the Alluxio multipart upload metadata file if (contentType != null) { @@ -1190,7 +1211,8 @@ private Response listParts(final String bucket, String bucketPath = S3RestUtils.parsePath(AlluxioURI.SEPARATOR + bucket); try (S3AuditContext auditContext = createAuditContext("listParts", user, bucket, object)) { - S3RestUtils.checkPathIsAlluxioDirectory(userFs, bucketPath, auditContext); + S3RestUtils.checkPathIsAlluxioDirectory(userFs, bucketPath, auditContext, + BUCKET_PATH_CACHE); AlluxioURI tmpDir = new AlluxioURI( S3RestUtils.getMultipartTemporaryDirForObject(bucketPath, object, uploadId)); @@ -1198,8 +1220,8 @@ private Response listParts(final String bucket, S3RestUtils.checkStatusesForUploadId(mMetaFS, userFs, tmpDir, uploadId); } catch (Exception e) { throw S3RestUtils.toObjectS3Exception((e instanceof FileDoesNotExistException) - ? new S3Exception(object, S3ErrorCode.NO_SUCH_UPLOAD) : e, - object, auditContext); + ? new S3Exception(object, S3ErrorCode.NO_SUCH_UPLOAD) : e, + object, auditContext); } try { @@ -1304,7 +1326,8 @@ private Response getObjectTags(final String bucket, AlluxioURI uri = new AlluxioURI(objectPath); try (S3AuditContext auditContext = createAuditContext("getObjectTags", user, bucket, object)) { - S3RestUtils.checkPathIsAlluxioDirectory(userFs, bucketPath, auditContext); + S3RestUtils.checkPathIsAlluxioDirectory(userFs, bucketPath, auditContext, + BUCKET_PATH_CACHE); try { TaggingData tagData = S3RestUtils.deserializeTags(userFs.getStatus(uri).getXAttr()); LOG.debug("GetObjectTagging tagData={}", tagData); @@ -1361,7 +1384,7 @@ private void abortMultipartUpload(String bucket, String object, new AlluxioURI(S3RestUtils.getMultipartTemporaryDirForObject(bucketPath, object, uploadId)); try (S3AuditContext auditContext = createAuditContext("abortMultipartUpload", user, bucket, object)) { - S3RestUtils.checkPathIsAlluxioDirectory(userFs, bucketPath, auditContext); + S3RestUtils.checkPathIsAlluxioDirectory(userFs, bucketPath, auditContext, BUCKET_PATH_CACHE); try { S3RestUtils.checkStatusesForUploadId(mMetaFS, userFs, multipartTemporaryDir, uploadId); } catch (Exception e) { @@ -1391,12 +1414,13 @@ private void deleteObject(String bucket, String object) throws S3Exception { String bucketPath = S3RestUtils.parsePath(AlluxioURI.SEPARATOR + bucket); // Delete the object. String objectPath = bucketPath + AlluxioURI.SEPARATOR + object; - DeletePOptions options = DeletePOptions.newBuilder().setAlluxioOnly(Configuration - .get(PropertyKey.PROXY_S3_DELETE_TYPE).equals(Constants.S3_DELETE_IN_ALLUXIO_ONLY)) + DeletePOptions options = DeletePOptions.newBuilder().setAlluxioOnly( + Configuration.get(PropertyKey.PROXY_S3_DELETE_TYPE) + .equals(Constants.S3_DELETE_IN_ALLUXIO_ONLY)) .build(); try (S3AuditContext auditContext = createAuditContext("deleteObject", user, bucket, object)) { - S3RestUtils.checkPathIsAlluxioDirectory(userFs, bucketPath, auditContext); + S3RestUtils.checkPathIsAlluxioDirectory(userFs, bucketPath, auditContext, BUCKET_PATH_CACHE); try { userFs.delete(new AlluxioURI(objectPath), options); } catch (FileDoesNotExistException | DirectoryNotEmptyException e) { @@ -1422,7 +1446,7 @@ private void deleteObjectTags(String bucket, String object) .build(); try (S3AuditContext auditContext = createAuditContext("deleteObjectTags", user, bucket, object)) { - S3RestUtils.checkPathIsAlluxioDirectory(userFs, bucketPath, auditContext); + S3RestUtils.checkPathIsAlluxioDirectory(userFs, bucketPath, auditContext, BUCKET_PATH_CACHE); try { userFs.setAttribute(new AlluxioURI(objectPath), attrPOptions); } catch (Exception e) { diff --git a/core/server/proxy/src/main/java/alluxio/proxy/s3/S3RestUtils.java b/core/server/proxy/src/main/java/alluxio/proxy/s3/S3RestUtils.java index a7963319c820..19c6a07acb55 100644 --- a/core/server/proxy/src/main/java/alluxio/proxy/s3/S3RestUtils.java +++ b/core/server/proxy/src/main/java/alluxio/proxy/s3/S3RestUtils.java @@ -21,6 +21,7 @@ import alluxio.exception.AccessControlException; import alluxio.exception.AlluxioException; import alluxio.exception.DirectoryNotEmptyException; +import alluxio.exception.ExceptionMessage; import alluxio.exception.FileAlreadyExistsException; import alluxio.exception.FileDoesNotExistException; import alluxio.exception.InvalidPathException; @@ -39,6 +40,7 @@ import com.fasterxml.jackson.dataformat.xml.XmlMapper; import com.google.common.annotations.VisibleForTesting; +import com.google.common.cache.Cache; import com.google.common.primitives.Longs; import com.google.common.util.concurrent.RateLimiter; import com.google.protobuf.ByteString; @@ -59,6 +61,7 @@ import java.util.Map; import java.util.Optional; import java.util.TreeMap; +import java.util.regex.Pattern; import javax.annotation.Nonnull; import javax.annotation.Nullable; import javax.security.auth.Subject; @@ -246,6 +249,10 @@ public static S3Exception toObjectS3Exception(Exception exception, String resour } catch (DirectoryNotEmptyException e) { return new S3Exception(e, resource, S3ErrorCode.PRECONDITION_FAILED); } catch (FileDoesNotExistException e) { + if (Pattern.matches(ExceptionMessage.BUCKET_DOES_NOT_EXIST.getMessage(".*"), + e.getMessage())) { + return new S3Exception(e, resource, S3ErrorCode.NO_SUCH_BUCKET); + } return new S3Exception(e, resource, S3ErrorCode.NO_SUCH_KEY); } catch (AccessControlException e) { return new S3Exception(e, resource, S3ErrorCode.ACCESS_DENIED_ERROR); @@ -284,8 +291,8 @@ public static void checkPathIsAlluxioDirectory(FileSystem fs, String bucketPath, try { URIStatus status = fs.getStatus(new AlluxioURI(bucketPath)); if (!status.isFolder()) { - throw new InvalidPathException("Bucket " + bucketPath - + " is not a valid Alluxio directory."); + throw new FileDoesNotExistException( + ExceptionMessage.BUCKET_DOES_NOT_EXIST.getMessage(bucketPath)); } } catch (Exception e) { if (auditContext != null) { @@ -295,6 +302,26 @@ public static void checkPathIsAlluxioDirectory(FileSystem fs, String bucketPath, } } + /** + * Check if a path in alluxio is a directory. + * + * @param fs instance of {@link FileSystem} + * @param bucketPath bucket complete path + * @param auditContext the audit context for exception + * @param bucketPathCache cache the bucket path for a certain time period + */ + public static void checkPathIsAlluxioDirectory(FileSystem fs, String bucketPath, + @Nullable S3AuditContext auditContext, + Cache bucketPathCache) + throws S3Exception { + AlluxioURI uri = new AlluxioURI(bucketPath); + if (Boolean.TRUE.equals(bucketPathCache.getIfPresent(uri))) { + return; + } + checkPathIsAlluxioDirectory(fs, bucketPath, auditContext); + bucketPathCache.put(uri, true); + } + /** * Fetches and returns the corresponding {@link URIStatus} for both * the multipart upload temp directory and the Alluxio S3 metadata file. diff --git a/core/transport/src/main/proto/grpc/file_system_master.proto b/core/transport/src/main/proto/grpc/file_system_master.proto index 558ae24f6217..3f8179352670 100644 --- a/core/transport/src/main/proto/grpc/file_system_master.proto +++ b/core/transport/src/main/proto/grpc/file_system_master.proto @@ -115,6 +115,7 @@ message CreateDirectoryPOptions { optional FileSystemMasterCommonPOptions commonOptions = 5; map xattr = 6; optional XAttrPropagationStrategy xattrPropStrat = 7 [default = NEW_PATHS]; + optional bool checkS3BucketPath = 8; } message CreateDirectoryPRequest { /** the path of the directory */ @@ -139,6 +140,7 @@ message CreateFilePOptions { map xattr = 11; optional XAttrPropagationStrategy xattrPropStrat = 12 [default = NEW_PATHS]; optional bool overwrite = 13; + optional bool checkS3BucketPath = 14; } message CreateFilePRequest { /** the path of the file */ diff --git a/core/transport/src/main/proto/proto.lock b/core/transport/src/main/proto/proto.lock index 62a094bc02bf..879b1f1a9af5 100644 --- a/core/transport/src/main/proto/proto.lock +++ b/core/transport/src/main/proto/proto.lock @@ -2348,6 +2348,11 @@ "value": "NEW_PATHS" } ] + }, + { + "id": 8, + "name": "checkS3BucketPath", + "type": "bool" } ], "maps": [ @@ -2454,6 +2459,11 @@ "id": 13, "name": "overwrite", "type": "bool" + }, + { + "id": 14, + "name": "checkS3BucketPath", + "type": "bool" } ], "maps": [ diff --git a/tests/src/test/java/alluxio/client/rest/S3ClientRestApiTest.java b/tests/src/test/java/alluxio/client/rest/S3ClientRestApiTest.java index e9a2adcfef96..3c7a54a32e23 100644 --- a/tests/src/test/java/alluxio/client/rest/S3ClientRestApiTest.java +++ b/tests/src/test/java/alluxio/client/rest/S3ClientRestApiTest.java @@ -294,10 +294,9 @@ public void listBucketUnauthorized() throws Exception { @Test public void listNonExistentBucket() throws Exception { - String bucketName = "bucket"; - //empty parameters - List statuses = mFileSystem.listStatus(new AlluxioURI("/"), - ListStatusPOptions.newBuilder().setRecursive(true).build()); +// the bucket name should never be used in other unit tests +// to ensure the bucket path cache doesn't have this bucket name + String bucketName = "non_existent_bucket"; // Verify 404 HTTP status & NoSuchBucket S3 error code HttpURLConnection connection = new TestCase(mHostname, mPort, mBaseUri, @@ -993,7 +992,7 @@ public void testGetDeletedObject() throws Exception { FileUtils.deleteQuietly( new File(sResource.get().getAlluxioHome() + "/underFSStorage/" + fullObjectKey)); - // Verify the object is exist in the alluxio. + // Verify the object is existent in the alluxio. List fileInfos = mFileSystemMaster.listStatus(bucketURI, ListStatusContext.defaults()); Assert.assertEquals(1, fileInfos.size()); @@ -1008,6 +1007,47 @@ public void testGetDeletedObject() throws Exception { Assert.assertEquals(S3ErrorCode.Name.NO_SUCH_KEY, response.getCode()); } + @Test + public void putObjectToDeletedBucket() throws Exception { + String object = CommonUtils.randomAlphaNumString(DATA_SIZE); + createBucketRestCall("bucket"); + // delete the bucket in alluxio and UFS, but the bucket remains in BUCKET_PATH_CACHE + mFileSystem.delete(new AlluxioURI("/bucket")); + // put object to non-existent bucket + HttpURLConnection connection = new TestCase(mHostname, mPort, mBaseUri, + "bucket/object", NO_PARAMS, HttpMethod.PUT, + getDefaultOptionsWithAuth() + .setBody(object.getBytes()) + .setContentType(TestCaseOptions.OCTET_STREAM_CONTENT_TYPE) + .setMD5(computeObjectChecksum(object.getBytes()))) + .execute(); + + Assert.assertEquals(404, connection.getResponseCode()); + S3Error response = + new XmlMapper().readerFor(S3Error.class).readValue(connection.getErrorStream()); + Assert.assertEquals(S3ErrorCode.Name.NO_SUCH_BUCKET, response.getCode()); + } + + @Test + public void putDirectoryToDeletedBucket() throws Exception { + createBucketRestCall("bucket"); + // delete the bucket in alluxio and UFS, but the bucket remains in BUCKET_PATH_CACHE + mFileSystem.delete(new AlluxioURI("/bucket")); + // put directory to non-existent bucket + HttpURLConnection connection = new TestCase(mHostname, mPort, mBaseUri, + "bucket/directory/", NO_PARAMS, HttpMethod.PUT, + getDefaultOptionsWithAuth() + .setBody(new byte[] {}) + .setContentType(TestCaseOptions.OCTET_STREAM_CONTENT_TYPE) + .setMD5(computeObjectChecksum(new byte[] {}))) + .execute(); + + Assert.assertEquals(404, connection.getResponseCode()); + S3Error response = + new XmlMapper().readerFor(S3Error.class).readValue(connection.getErrorStream()); + Assert.assertEquals(S3ErrorCode.Name.NO_SUCH_BUCKET, response.getCode()); + } + @Test public void putDirectoryObject() throws Exception { final String bucketName = "directory-bucket"; From 6c53027e5ec3b92bd06b6f10bd040c6a6dd3ae19 Mon Sep 17 00:00:00 2001 From: fsl <1171313930@qq.com> Date: Mon, 6 Mar 2023 15:39:27 +0800 Subject: [PATCH 166/334] Bump go from 1.13 to 1.18 ### What changes are proposed in this pull request? Please outline the changes and how this PR fixes the issue. ### Why are the changes needed? Please clarify why the changes are needed. For instance, 1. If you propose a new API, clarify the use case for a new API. 2. If you fix a bug, describe the bug. ### Does this PR introduce any user facing changes? Please list the user-facing changes introduced by your change, including 1. change in user-facing APIs 2. addition or removal of property keys 3. webui pr-link: Alluxio/alluxio#16995 change-id: cid-05244aa43d5db621fbc1bd8cf65ea00a4f5b7332 --- .../kubernetes/operator/alluxio/Dockerfile | 4 +- .../alluxio/examples/demo-1/docker/Dockerfile | 2 +- .../kubernetes/operator/alluxio/go.mod | 50 ++++++++++++++++++- 3 files changed, 52 insertions(+), 4 deletions(-) diff --git a/integration/kubernetes/operator/alluxio/Dockerfile b/integration/kubernetes/operator/alluxio/Dockerfile index c5b197fa175e..9b14d2028338 100644 --- a/integration/kubernetes/operator/alluxio/Dockerfile +++ b/integration/kubernetes/operator/alluxio/Dockerfile @@ -10,7 +10,7 @@ # # Build the manager binary -FROM golang:1.14.2 as builder +FROM golang:1.18-alpine:3.17 as builder WORKDIR /go/src/github.com/Alluxio/alluxio COPY . . @@ -21,7 +21,7 @@ RUN CGO_ENABLED=0 GOOS=linux GOARCH=amd64 GO111MODULE=off go build -gcflags="-N RUN go get github.com/go-delve/delve/cmd/dlv -FROM alpine:3.10 +FROM alpine:3.17 RUN apk add --update curl tzdata iproute2 bash libc6-compat vim && \ rm -rf /var/cache/apk/* && \ cp /usr/share/zoneinfo/Asia/Shanghai /etc/localtime && \ diff --git a/integration/kubernetes/operator/alluxio/examples/demo-1/docker/Dockerfile b/integration/kubernetes/operator/alluxio/examples/demo-1/docker/Dockerfile index 26c5daeb2300..28e41ef7c05f 100644 --- a/integration/kubernetes/operator/alluxio/examples/demo-1/docker/Dockerfile +++ b/integration/kubernetes/operator/alluxio/examples/demo-1/docker/Dockerfile @@ -9,7 +9,7 @@ # See the NOTICE file distributed with this work for information regarding copyright ownership. # -FROM alpine:3.11 +FROM alpine:3.17 RUN apk add --update curl tzdata iproute2 bash libc6-compat vim rsync && \ rm -rf /var/cache/apk/* && \ diff --git a/integration/kubernetes/operator/alluxio/go.mod b/integration/kubernetes/operator/alluxio/go.mod index 024bc9b91e42..c8e4f2874310 100644 --- a/integration/kubernetes/operator/alluxio/go.mod +++ b/integration/kubernetes/operator/alluxio/go.mod @@ -1,6 +1,6 @@ module github.com/Alluxio/alluxio -go 1.13 +go 1.18 require ( github.com/docker/go-units v0.3.3 @@ -17,6 +17,54 @@ require ( sigs.k8s.io/controller-runtime v0.3.0 ) +require ( + cloud.google.com/go v0.34.0 // indirect + github.com/beorn7/perks v1.0.0 // indirect + github.com/davecgh/go-spew v1.1.1 // indirect + github.com/docker/spdystream v0.0.0-20160310174837-449fdfce4d96 // indirect + github.com/evanphx/json-patch v4.5.0+incompatible // indirect + github.com/go-logr/zapr v0.1.0 // indirect + github.com/gogo/protobuf v1.1.1 // indirect + github.com/golang/groupcache v0.0.0-20180513044358-24b0969c4cb7 // indirect + github.com/golang/protobuf v1.3.2 // indirect + github.com/google/go-cmp v0.3.0 // indirect + github.com/google/gofuzz v0.0.0-20170612174753-24818f796faf // indirect + github.com/google/uuid v1.0.0 // indirect + github.com/googleapis/gnostic v0.3.1 // indirect + github.com/hashicorp/golang-lru v0.5.0 // indirect + github.com/hpcloud/tail v1.0.0 // indirect + github.com/imdario/mergo v0.3.6 // indirect + github.com/json-iterator/go v1.1.6 // indirect + github.com/matttproud/golang_protobuf_extensions v1.0.1 // indirect + github.com/modern-go/concurrent v0.0.0-20180306012644-bacd9c7ef1dd // indirect + github.com/modern-go/reflect2 v1.0.1 // indirect + github.com/prometheus/client_golang v1.0.0 // indirect + github.com/prometheus/client_model v0.0.0-20190129233127-fd36f4220a90 // indirect + github.com/prometheus/common v0.4.1 // indirect + github.com/prometheus/procfs v0.0.2 // indirect + github.com/spf13/pflag v1.0.2 // indirect + go.uber.org/atomic v1.3.2 // indirect + go.uber.org/multierr v1.1.0 // indirect + golang.org/x/crypto v0.0.0-20190308221718-c2843e01d9a2 // indirect + golang.org/x/net v0.0.0-20190812203447-cdfb69ac37fc // indirect + golang.org/x/oauth2 v0.0.0-20190402181905-9f3314589c9a // indirect + golang.org/x/sys v0.0.0-20190312061237-fead79001313 // indirect + golang.org/x/text v0.3.1-0.20181227161524-e6919f6577db // indirect + golang.org/x/time v0.0.0-20180412165947-fbb02b2291d2 // indirect + golang.org/x/xerrors v0.0.0-20190717185122-a985d3407aa7 // indirect + gomodules.xyz/jsonpatch/v2 v2.0.1 // indirect + google.golang.org/appengine v1.5.0 // indirect + gopkg.in/fsnotify.v1 v1.4.7 // indirect + gopkg.in/inf.v0 v0.9.1 // indirect + gopkg.in/tomb.v1 v1.0.0-20141024135613-dd632973f1e7 // indirect + k8s.io/apiextensions-apiserver v0.0.0 // indirect + k8s.io/klog v0.3.3 // indirect + k8s.io/kube-openapi v0.0.0-20190228160746-b3a7cee44a30 // indirect + k8s.io/utils v0.0.0-20190506122338-8fab8cb257d5 // indirect + sigs.k8s.io/testing_frameworks v0.1.1 // indirect + sigs.k8s.io/yaml v1.1.0 // indirect +) + replace k8s.io/api => k8s.io/api v0.0.0-20190918195907-bd6ac527cfd2 replace k8s.io/apiextensions-apiserver => k8s.io/apiextensions-apiserver v0.0.0-20190918201827-3de75813f604 From 201672f64867ff4536f9826acaa2b03d995dd515 Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Sun, 5 Mar 2023 23:56:56 -0800 Subject: [PATCH 167/334] Bump jsoup from 1.14.2 to 1.15.3 in /underfs/web Bumps [jsoup](https://github.com/jhy/jsoup) from 1.8.3 to 1.15.3.
Release notes

Sourced from jsoup's releases.

jsoup 1.15.3

jsoup 1.15.3 is out now, and includes a security fix for potential XSS attacks, along with other bug fixes and improvements, including more descriptive validation error messages.

Details:

jsoup 1.15.2 is out now with a bunch of improvements and bug fixes.

jsoup 1.15.1 is out now with a bunch of improvements and bug fixes.

jsoup 1.14.3

jsoup 1.14.3 is out now, adding native XPath selector support, improved \<template> support, and also includes a bunch of bug fixes, improvements, and performance enhancements.

See the release announcement for the full changelog.

jsoup 1.14.2

Caught by the fuzz! jsoup 1.14.2 is out now, and includes a set of parser bug fixes and improvements for handling rough HTML and XML, as identified by the Jazzer JVM fuzzer. This release also includes other fixes and improvements.

See the release announcement for the full changelog.

jsoup 1.14.1

jsoup 1.14.1 is out now, with simple request session management, increased parse robustness, and a ton of other improvements, speed-ups, and bug fixes.

See the full announcement for all the details on what's changed.

jsoup 1.13.1

See the release notes.

<dependency>
<!-- jsoup HTML parser library @ https://jsoup.org/ -->
<groupId>org.jsoup</groupId>
<artifactId>jsoup</artifactId>
<version>1.13.1</version>
</dependency>

jsoup-1.12.2

No release notes provided.

Changelog

Sourced from jsoup's changelog.

jsoup changelog

Release 1.15.3 [2022-Aug-24]

  • Security: fixed an issue where the jsoup cleaner may incorrectly sanitize crafted XSS attempts if SafeList.preserveRelativeLinks is enabled. https://github.com/jhy/jsoup/security/advisories/GHSA-gp7f-rwcx-9369

  • Improvement: the Cleaner will preserve the source position of cleaned elements, if source tracking is enabled in the original parse.

  • Improvement: the error messages output from Validate are more descriptive. Exceptions are now ValidationExceptions (extending IllegalArgumentException). Stack traces do not include the Validate class, to make it simpler to see where the exception originated. Common validation errors including malformed URLs and empty selector results have more explicit error messages.

  • Bugfix: the DataUtil would incorrectly read from InputStreams that emitted reads less than the requested size. This lead to incorrect results when parsing from chunked server responses, for e.g. jhy/jsoup#1807

  • Build Improvement: added implementation version and related fields to the jar manifest. jhy/jsoup#1809

*** Release 1.15.2 [2022-Jul-04]

  • Improvement: added the ability to track the position (line, column, index) in the original input source from where a given node was parsed. Accessible via Node.sourceRange() and Element.endSourceRange(). jhy/jsoup#1790

  • Improvement: added Element.firstElementChild(), Element.lastElementChild(), Node.firstChild(), Node.lastChild(), as convenient accessors to those child nodes and elements.

  • Improvement: added Element.expectFirst(cssQuery), which is just like Element.selectFirst(), but instead of returning a null if there is no match, will throw an IllegalArgumentException. This is useful if you want to simply abort processing if an expected match is not found.

  • Improvement: when pretty-printing HTML, doctypes are emitted on a newline if there is a preceding comment. jhy/jsoup#1664

  • Improvement: when pretty-printing, trim the leading and trailing spaces of textnodes in block tags when possible, so that they are indented correctly. jhy/jsoup#1798

  • Improvement: in Element#selectXpath(), disable namespace awareness. This makes it possible to always select elements by their simple local name, regardless of whether an xmlns attribute was set. jhy/jsoup#1801

  • Bugfix: when using the readToByteBuffer method, such as in Connection.Response.body(), if the document has not already been parsed and must be read fully, and there is any maximum buffer size being applied, only the default internal buffer size is read. jhy/jsoup#1774

... (truncated)

Commits
  • c596417 [maven-release-plugin] prepare release jsoup-1.15.3
  • d2d9ac3 Changelog for URL cleaner improvement
  • 4ea768d Strip control characters from URLs when resolving absolute URLs
  • 985f1fe Include help link for malformed URLs
  • 6b67d05 Improved Validate error messages
  • 653da57 Normalized API doc link
  • 5ed84f6 Simplified the Test Server startup
  • c58112a Set the read size correctly when capped
  • fa13c80 Added jar manifest default implementation entries.
  • 5b19390 Bump maven-resources-plugin from 3.2.0 to 3.3.0 (#1814)
  • Additional commits viewable in compare view

[![Dependabot compatibility score](https://dependabot-badges.githubapp.com/badges/compatibility_score?dependency-name=org.jsoup:jsoup&package-manager=maven&previous-version=1.8.3&new-version=1.15.3)](https://docs.github.com/en/github/managing-security-vulnerabilities/about-dependabot-security-updates#about-compatibility-scores) Dependabot will resolve any conflicts with this PR as long as you don't alter it yourself. You can also trigger a rebase manually by commenting `@dependabot rebase`. [//]: # (dependabot-automerge-start) [//]: # (dependabot-automerge-end) ---
Dependabot commands and options
You can trigger Dependabot actions by commenting on this PR: - `@dependabot rebase` will rebase this PR - `@dependabot recreate` will recreate this PR, overwriting any edits that have been made to it - `@dependabot merge` will merge this PR after your CI passes on it - `@dependabot squash and merge` will squash and merge this PR after your CI passes on it - `@dependabot cancel merge` will cancel a previously requested merge and block automerging - `@dependabot reopen` will reopen this PR if it is closed - `@dependabot close` will close this PR and stop Dependabot recreating it. You can achieve the same result by closing it manually - `@dependabot ignore this major version` will close this PR and stop Dependabot creating any more for this major version (unless you reopen the PR or upgrade to it yourself) - `@dependabot ignore this minor version` will close this PR and stop Dependabot creating any more for this minor version (unless you reopen the PR or upgrade to it yourself) - `@dependabot ignore this dependency` will close this PR and stop Dependabot creating any more for this dependency (unless you reopen the PR or upgrade to it yourself) - `@dependabot use these labels` will set the current labels as the default for future PRs for this repo and language - `@dependabot use these reviewers` will set the current reviewers as the default for future PRs for this repo and language - `@dependabot use these assignees` will set the current assignees as the default for future PRs for this repo and language - `@dependabot use this milestone` will set the current milestone as the default for future PRs for this repo and language You can disable automated security fix PRs for this repo from the [Security Alerts page](https://github.com/Alluxio/alluxio/network/alerts).
pr-link: Alluxio/alluxio#16138 change-id: cid-4ba9ae2074103e629d3ef12dcd3f0036f3199203 --- underfs/web/pom.xml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) mode change 100755 => 100644 underfs/web/pom.xml diff --git a/underfs/web/pom.xml b/underfs/web/pom.xml old mode 100755 new mode 100644 index 77c829fc1fe9..77915309ac51 --- a/underfs/web/pom.xml +++ b/underfs/web/pom.xml @@ -40,7 +40,7 @@ org.jsoup jsoup - 1.14.2 + 1.15.3 From 7f2f15adc8e865abcb66020ceb7524475c8ad19c Mon Sep 17 00:00:00 2001 From: lucyge2022 <111789461+lucyge2022@users.noreply.github.com> Date: Mon, 6 Mar 2023 14:15:25 -0800 Subject: [PATCH 168/334] Enable v2 s3 proxy by default ### What changes are proposed in this pull request? Enable v2 s3 proxy by default. ### Why are the changes needed? ### Does this PR introduce any user facing changes? s3 api will go thru v2 proxy service now. pr-link: Alluxio/alluxio#16937 change-id: cid-fd406105d77cadcebc7acb29c1d250215985e1ab --- core/common/src/main/java/alluxio/conf/PropertyKey.java | 6 +++--- .../proxy/src/main/java/alluxio/proxy/s3/S3ObjectTask.java | 1 + 2 files changed, 4 insertions(+), 3 deletions(-) diff --git a/core/common/src/main/java/alluxio/conf/PropertyKey.java b/core/common/src/main/java/alluxio/conf/PropertyKey.java index a3fad9597a70..dfba5626fe20 100755 --- a/core/common/src/main/java/alluxio/conf/PropertyKey.java +++ b/core/common/src/main/java/alluxio/conf/PropertyKey.java @@ -5210,8 +5210,8 @@ public String toString() { .build(); public static final PropertyKey PROXY_S3_MULTIPART_UPLOAD_CLEANER_ENABLED = booleanBuilder(Name.PROXY_S3_MULTIPART_UPLOAD_CLEANER_ENABLED) - .setDefaultValue(true) - .setDescription("Whether or not to enable automatic cleanup of long-running " + .setDefaultValue(false) + .setDescription("Enable automatic cleanup of long-running " + "multipart uploads.") .setConsistencyCheckLevel(ConsistencyCheckLevel.ENFORCE) .setScope(Scope.SERVER) @@ -5308,7 +5308,7 @@ public String toString() { .build(); public static final PropertyKey PROXY_S3_V2_VERSION_ENABLED = booleanBuilder(Name.PROXY_S3_V2_VERSION_ENABLED) - .setDefaultValue(false) + .setDefaultValue(true) .setDescription("(Experimental) V2, an optimized version of " + "Alluxio s3 proxy service.") .setConsistencyCheckLevel(ConsistencyCheckLevel.ENFORCE) diff --git a/core/server/proxy/src/main/java/alluxio/proxy/s3/S3ObjectTask.java b/core/server/proxy/src/main/java/alluxio/proxy/s3/S3ObjectTask.java index 0c3a08e599c1..165f6fe75fb3 100644 --- a/core/server/proxy/src/main/java/alluxio/proxy/s3/S3ObjectTask.java +++ b/core/server/proxy/src/main/java/alluxio/proxy/s3/S3ObjectTask.java @@ -816,6 +816,7 @@ public Response continueTask() { .setGroupBits(Bits.ALL) .setOtherBits(Bits.NONE).build()) .setWriteType(S3RestUtils.getS3WriteType()) + .setOverwrite(true) .build(); return createObject(objectPath, userFs, filePOptions, auditContext); } From a6f3393bbf91ed2dca100d61158d11aa4f7f5ebc Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Mon, 6 Mar 2023 14:46:29 -0800 Subject: [PATCH 169/334] Bump golang.org/x/text to 0.3.8 in integration/kubernetes/operator Bumps [golang.org/x/text](https://github.com/golang/text) from 0.3.1-0.20181227161524-e6919f6577db to 0.3.8.
Commits

[![Dependabot compatibility score](https://dependabot-badges.githubapp.com/badges/compatibility_score?dependency-name=golang.org/x/text&package-manager=go_modules&previous-version=0.3.1-0.20181227161524-e6919f6577db&new-version=0.3.8)](https://docs.github.com/en/github/managing-security-vulnerabilities/about-dependabot-security-updates#about-compatibility-scores) Dependabot will resolve any conflicts with this PR as long as you don't alter it yourself. You can also trigger a rebase manually by commenting `@dependabot rebase`. [//]: # (dependabot-automerge-start) [//]: # (dependabot-automerge-end) ---
Dependabot commands and options
You can trigger Dependabot actions by commenting on this PR: - `@dependabot rebase` will rebase this PR - `@dependabot recreate` will recreate this PR, overwriting any edits that have been made to it - `@dependabot merge` will merge this PR after your CI passes on it - `@dependabot squash and merge` will squash and merge this PR after your CI passes on it - `@dependabot cancel merge` will cancel a previously requested merge and block automerging - `@dependabot reopen` will reopen this PR if it is closed - `@dependabot close` will close this PR and stop Dependabot recreating it. You can achieve the same result by closing it manually - `@dependabot ignore this major version` will close this PR and stop Dependabot creating any more for this major version (unless you reopen the PR or upgrade to it yourself) - `@dependabot ignore this minor version` will close this PR and stop Dependabot creating any more for this minor version (unless you reopen the PR or upgrade to it yourself) - `@dependabot ignore this dependency` will close this PR and stop Dependabot creating any more for this dependency (unless you reopen the PR or upgrade to it yourself) - `@dependabot use these labels` will set the current labels as the default for future PRs for this repo and language - `@dependabot use these reviewers` will set the current reviewers as the default for future PRs for this repo and language - `@dependabot use these assignees` will set the current assignees as the default for future PRs for this repo and language - `@dependabot use this milestone` will set the current milestone as the default for future PRs for this repo and language You can disable automated security fix PRs for this repo from the [Security Alerts page](https://github.com/Alluxio/alluxio/network/alerts).
pr-link: Alluxio/alluxio#17015 change-id: cid-766310c345780786d08582b8b424d6d60c51d635 --- integration/kubernetes/operator/alluxio/go.mod | 4 ++-- integration/kubernetes/operator/alluxio/go.sum | 6 ++++-- 2 files changed, 6 insertions(+), 4 deletions(-) diff --git a/integration/kubernetes/operator/alluxio/go.mod b/integration/kubernetes/operator/alluxio/go.mod index c8e4f2874310..08b3d2840353 100644 --- a/integration/kubernetes/operator/alluxio/go.mod +++ b/integration/kubernetes/operator/alluxio/go.mod @@ -48,8 +48,8 @@ require ( golang.org/x/crypto v0.0.0-20190308221718-c2843e01d9a2 // indirect golang.org/x/net v0.0.0-20190812203447-cdfb69ac37fc // indirect golang.org/x/oauth2 v0.0.0-20190402181905-9f3314589c9a // indirect - golang.org/x/sys v0.0.0-20190312061237-fead79001313 // indirect - golang.org/x/text v0.3.1-0.20181227161524-e6919f6577db // indirect + golang.org/x/sys v0.0.0-20220722155257-8c9f86f7a55f // indirect + golang.org/x/text v0.3.8 // indirect golang.org/x/time v0.0.0-20180412165947-fbb02b2291d2 // indirect golang.org/x/xerrors v0.0.0-20190717185122-a985d3407aa7 // indirect gomodules.xyz/jsonpatch/v2 v2.0.1 // indirect diff --git a/integration/kubernetes/operator/alluxio/go.sum b/integration/kubernetes/operator/alluxio/go.sum index d54512208772..1f4426dfc8a5 100644 --- a/integration/kubernetes/operator/alluxio/go.sum +++ b/integration/kubernetes/operator/alluxio/go.sum @@ -319,11 +319,13 @@ golang.org/x/sys v0.0.0-20180909124046-d0be0721c37e/go.mod h1:STP8DvDyc/dI5b8T5h golang.org/x/sys v0.0.0-20181004145325-8469e314837c/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY= golang.org/x/sys v0.0.0-20181116152217-5ac8a444bdc5/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY= golang.org/x/sys v0.0.0-20190215142949-d0b11bdaac8a/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY= -golang.org/x/sys v0.0.0-20190312061237-fead79001313 h1:pczuHS43Cp2ktBEEmLwScxgjWsBSzdaQiKzUyf3DTTc= golang.org/x/sys v0.0.0-20190312061237-fead79001313/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= +golang.org/x/sys v0.0.0-20220722155257-8c9f86f7a55f h1:v4INt8xihDGvnrfjMDVXGxw9wrfxYyCjk0KbXjhR55s= +golang.org/x/sys v0.0.0-20220722155257-8c9f86f7a55f/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= golang.org/x/text v0.3.0/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ= -golang.org/x/text v0.3.1-0.20181227161524-e6919f6577db h1:6/JqlYfC1CCaLnGceQTI+sDGhC9UBSPAsBqI0Gun6kU= golang.org/x/text v0.3.1-0.20181227161524-e6919f6577db/go.mod h1:bEr9sfX3Q8Zfm5fL9x+3itogRgK3+ptLWKqgva+5dAk= +golang.org/x/text v0.3.8 h1:nAL+RVCQ9uMn3vJZbV+MRnydTJFPf8qqY42YiA6MrqY= +golang.org/x/text v0.3.8/go.mod h1:E6s5w1FMmriuDzIBO73fBruAKo1PCIq6d2Q6DHfQ8WQ= golang.org/x/time v0.0.0-20161028155119-f51c12702a4d/go.mod h1:tRJNPiyCQ0inRvYxbN9jk5I+vvW/OXSQhTDSoE431IQ= golang.org/x/time v0.0.0-20180412165947-fbb02b2291d2 h1:+DCIGbF/swA92ohVg0//6X2IVY3KZs6p9mix0ziNYJM= golang.org/x/time v0.0.0-20180412165947-fbb02b2291d2/go.mod h1:tRJNPiyCQ0inRvYxbN9jk5I+vvW/OXSQhTDSoE431IQ= From bb2eb3a65defb5fa55f0e0e8806ab1e1157cbe93 Mon Sep 17 00:00:00 2001 From: Shawn Sun <32376495+ssz1997@users.noreply.github.com> Date: Mon, 6 Mar 2023 14:59:46 -0800 Subject: [PATCH 170/334] Add new line at EOF Add new line at end of file pr-link: Alluxio/alluxio#17018 change-id: cid-2626068ed3e135fb6feb49dc59d35bfc9f4b3775 --- integration/metrics/README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/integration/metrics/README.md b/integration/metrics/README.md index c736e2395bfd..006cefabe543 100644 --- a/integration/metrics/README.md +++ b/integration/metrics/README.md @@ -42,4 +42,4 @@ bin/alluxio copyDir conf ``` docker-compose -f docker-compose-master.yaml down docker-compose -f docker-compose-worker.yaml down -``` \ No newline at end of file +``` From f9d4ad86cc07fee984b03ffc4ae9f3c8c162f089 Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Mon, 6 Mar 2023 15:14:11 -0800 Subject: [PATCH 171/334] Bump golang.org/x/crypto to 0.1.0 in /integration/kubernetes/operator/ Bumps [golang.org/x/crypto](https://github.com/golang/crypto) from 0.0.0-20190308221718-c2843e01d9a2 to 0.1.0.
Commits

[![Dependabot compatibility score](https://dependabot-badges.githubapp.com/badges/compatibility_score?dependency-name=golang.org/x/crypto&package-manager=go_modules&previous-version=0.0.0-20190308221718-c2843e01d9a2&new-version=0.1.0)](https://docs.github.com/en/github/managing-security-vulnerabilities/about-dependabot-security-updates#about-compatibility-scores) Dependabot will resolve any conflicts with this PR as long as you don't alter it yourself. You can also trigger a rebase manually by commenting `@dependabot rebase`. [//]: # (dependabot-automerge-start) [//]: # (dependabot-automerge-end) ---
Dependabot commands and options
You can trigger Dependabot actions by commenting on this PR: - `@dependabot rebase` will rebase this PR - `@dependabot recreate` will recreate this PR, overwriting any edits that have been made to it - `@dependabot merge` will merge this PR after your CI passes on it - `@dependabot squash and merge` will squash and merge this PR after your CI passes on it - `@dependabot cancel merge` will cancel a previously requested merge and block automerging - `@dependabot reopen` will reopen this PR if it is closed - `@dependabot close` will close this PR and stop Dependabot recreating it. You can achieve the same result by closing it manually - `@dependabot ignore this major version` will close this PR and stop Dependabot creating any more for this major version (unless you reopen the PR or upgrade to it yourself) - `@dependabot ignore this minor version` will close this PR and stop Dependabot creating any more for this minor version (unless you reopen the PR or upgrade to it yourself) - `@dependabot ignore this dependency` will close this PR and stop Dependabot creating any more for this dependency (unless you reopen the PR or upgrade to it yourself) - `@dependabot use these labels` will set the current labels as the default for future PRs for this repo and language - `@dependabot use these reviewers` will set the current reviewers as the default for future PRs for this repo and language - `@dependabot use these assignees` will set the current assignees as the default for future PRs for this repo and language - `@dependabot use this milestone` will set the current milestone as the default for future PRs for this repo and language You can disable automated security fix PRs for this repo from the [Security Alerts page](https://github.com/Alluxio/alluxio/network/alerts).
pr-link: Alluxio/alluxio#17012 change-id: cid-cd8603258c629cae81b5e1ceefe2643aa995e320 --- integration/kubernetes/operator/alluxio/go.mod | 9 +++++---- integration/kubernetes/operator/alluxio/go.sum | 16 ++++++++++------ 2 files changed, 15 insertions(+), 10 deletions(-) diff --git a/integration/kubernetes/operator/alluxio/go.mod b/integration/kubernetes/operator/alluxio/go.mod index 08b3d2840353..ce4c1a708714 100644 --- a/integration/kubernetes/operator/alluxio/go.mod +++ b/integration/kubernetes/operator/alluxio/go.mod @@ -45,11 +45,12 @@ require ( github.com/spf13/pflag v1.0.2 // indirect go.uber.org/atomic v1.3.2 // indirect go.uber.org/multierr v1.1.0 // indirect - golang.org/x/crypto v0.0.0-20190308221718-c2843e01d9a2 // indirect - golang.org/x/net v0.0.0-20190812203447-cdfb69ac37fc // indirect + golang.org/x/crypto v0.1.0 // indirect + golang.org/x/net v0.1.0 // indirect golang.org/x/oauth2 v0.0.0-20190402181905-9f3314589c9a // indirect - golang.org/x/sys v0.0.0-20220722155257-8c9f86f7a55f // indirect - golang.org/x/text v0.3.8 // indirect + golang.org/x/sys v0.1.0 // indirect + golang.org/x/term v0.1.0 // indirect + golang.org/x/text v0.4.0 // indirect golang.org/x/time v0.0.0-20180412165947-fbb02b2291d2 // indirect golang.org/x/xerrors v0.0.0-20190717185122-a985d3407aa7 // indirect gomodules.xyz/jsonpatch/v2 v2.0.1 // indirect diff --git a/integration/kubernetes/operator/alluxio/go.sum b/integration/kubernetes/operator/alluxio/go.sum index 1f4426dfc8a5..d0a4bef6fac1 100644 --- a/integration/kubernetes/operator/alluxio/go.sum +++ b/integration/kubernetes/operator/alluxio/go.sum @@ -293,8 +293,9 @@ go.uber.org/zap v1.9.1 h1:XCJQEf3W6eZaVwhRBof6ImoYGJSITeKWsyeh3HFu/5o= go.uber.org/zap v1.9.1/go.mod h1:vwi/ZaCAaUcBkycHslxD9B2zi4UTXhF60s6SWpuDF0Q= golang.org/x/crypto v0.0.0-20180904163835-0709b304e793/go.mod h1:6SG95UA2DQfeDnfUPMdvaQW0Q7yPrPDi9nlGo2tz2b4= golang.org/x/crypto v0.0.0-20181025213731-e84da0312774/go.mod h1:6SG95UA2DQfeDnfUPMdvaQW0Q7yPrPDi9nlGo2tz2b4= -golang.org/x/crypto v0.0.0-20190308221718-c2843e01d9a2 h1:VklqNMn3ovrHsnt90PveolxSbWFaJdECFbxSq0Mqo2M= golang.org/x/crypto v0.0.0-20190308221718-c2843e01d9a2/go.mod h1:djNgcEr1/C05ACkg1iLfiJU5Ep61QUkGW8qpdssI0+w= +golang.org/x/crypto v0.1.0 h1:MDRAIl0xIo9Io2xV565hzXHw3zVseKrJKodhohM5CjU= +golang.org/x/crypto v0.1.0/go.mod h1:RecgLatLF4+eUMCP1PoPZQb+cVrJcOPbHkTkbkB9sbw= golang.org/x/exp v0.0.0-20190125153040-c74c464bbbf2/go.mod h1:CJ0aWSM057203Lf6IL+f9T1iT9GByDxfZKAQTCR3kQA= golang.org/x/exp v0.0.0-20190312203227-4b39c73a6495/go.mod h1:ZjyILWgesfNpC6sMxTJOJm9Kp84zZh5NQWvqDGG3Qr8= golang.org/x/image v0.0.0-20190227222117-0694c2d4d067/go.mod h1:kZ7UVZpmo3dzQBMxlp+ypCbDeSB+sBbTgSJuh5dn5js= @@ -307,8 +308,9 @@ golang.org/x/net v0.0.0-20181114220301-adae6a3d119a/go.mod h1:mL1N/T3taQHkDXs73r golang.org/x/net v0.0.0-20181201002055-351d144fa1fc/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4= golang.org/x/net v0.0.0-20190108225652-1e06a53dbb7e/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4= golang.org/x/net v0.0.0-20190311183353-d8887717615a/go.mod h1:t9HGtf8HONx5eT2rtn7q6eTqICYqUVnKs3thJo3Qplg= -golang.org/x/net v0.0.0-20190812203447-cdfb69ac37fc h1:gkKoSkUmnU6bpS/VhkuO27bzQeSA51uaEfbOW5dNb68= golang.org/x/net v0.0.0-20190812203447-cdfb69ac37fc/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4= +golang.org/x/net v0.1.0 h1:hZ/3BUoy5aId7sCpA/Tc5lt8DkFgdVS2onTpJsZ/fl0= +golang.org/x/net v0.1.0/go.mod h1:Cx3nUiGt4eDBEyega/BKRp+/AlGL8hYe7U9odMt2Cco= golang.org/x/oauth2 v0.0.0-20190402181905-9f3314589c9a h1:tImsplftrFpALCYumobsd0K86vlAs/eXGFms2txfJfA= golang.org/x/oauth2 v0.0.0-20190402181905-9f3314589c9a/go.mod h1:gOpvHmFTYa4IltrdGE7lF6nIHvwfUNPOp7c8zoXwtLw= golang.org/x/sync v0.0.0-20180314180146-1d60e4601c6f/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= @@ -320,12 +322,14 @@ golang.org/x/sys v0.0.0-20181004145325-8469e314837c/go.mod h1:STP8DvDyc/dI5b8T5h golang.org/x/sys v0.0.0-20181116152217-5ac8a444bdc5/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY= golang.org/x/sys v0.0.0-20190215142949-d0b11bdaac8a/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY= golang.org/x/sys v0.0.0-20190312061237-fead79001313/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= -golang.org/x/sys v0.0.0-20220722155257-8c9f86f7a55f h1:v4INt8xihDGvnrfjMDVXGxw9wrfxYyCjk0KbXjhR55s= -golang.org/x/sys v0.0.0-20220722155257-8c9f86f7a55f/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= +golang.org/x/sys v0.1.0 h1:kunALQeHf1/185U1i0GOB/fy1IPRDDpuoOOqRReG57U= +golang.org/x/sys v0.1.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= +golang.org/x/term v0.1.0 h1:g6Z6vPFA9dYBAF7DWcH6sCcOntplXsDKcliusYijMlw= +golang.org/x/term v0.1.0/go.mod h1:jbD1KX2456YbFQfuXm/mYQcufACuNUgVhRMnK/tPxf8= golang.org/x/text v0.3.0/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ= golang.org/x/text v0.3.1-0.20181227161524-e6919f6577db/go.mod h1:bEr9sfX3Q8Zfm5fL9x+3itogRgK3+ptLWKqgva+5dAk= -golang.org/x/text v0.3.8 h1:nAL+RVCQ9uMn3vJZbV+MRnydTJFPf8qqY42YiA6MrqY= -golang.org/x/text v0.3.8/go.mod h1:E6s5w1FMmriuDzIBO73fBruAKo1PCIq6d2Q6DHfQ8WQ= +golang.org/x/text v0.4.0 h1:BrVqGRd7+k1DiOgtnFvAkoQEWQvBc25ouMJM6429SFg= +golang.org/x/text v0.4.0/go.mod h1:mrYo+phRRbMaCq/xk9113O4dZlRixOauAjOtrjsXDZ8= golang.org/x/time v0.0.0-20161028155119-f51c12702a4d/go.mod h1:tRJNPiyCQ0inRvYxbN9jk5I+vvW/OXSQhTDSoE431IQ= golang.org/x/time v0.0.0-20180412165947-fbb02b2291d2 h1:+DCIGbF/swA92ohVg0//6X2IVY3KZs6p9mix0ziNYJM= golang.org/x/time v0.0.0-20180412165947-fbb02b2291d2/go.mod h1:tRJNPiyCQ0inRvYxbN9jk5I+vvW/OXSQhTDSoE431IQ= From b75a7d31dc41a96e1853995dedea4df0672f50fb Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Mon, 6 Mar 2023 15:18:26 -0800 Subject: [PATCH 172/334] Bump golang.org/x/net to 0.7.0 in /integration/kubernetes/operator/ MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit [//]: # (dependabot-start) ⚠️ **Dependabot is rebasing this PR** ⚠️ Rebasing might not happen immediately, so don't worry if this takes some time. Note: if you make any changes to this PR yourself, they will take precedence over the rebase. --- [//]: # (dependabot-end) Bumps [golang.org/x/net](https://github.com/golang/net) from 0.0.0-20190812203447-cdfb69ac37fc to 0.7.0.
Commits

[![Dependabot compatibility score](https://dependabot-badges.githubapp.com/badges/compatibility_score?dependency-name=golang.org/x/net&package-manager=go_modules&previous-version=0.0.0-20190812203447-cdfb69ac37fc&new-version=0.7.0)](https://docs.github.com/en/github/managing-security-vulnerabilities/about-dependabot-security-updates#about-compatibility-scores) Dependabot will resolve any conflicts with this PR as long as you don't alter it yourself. You can also trigger a rebase manually by commenting `@dependabot rebase`. [//]: # (dependabot-automerge-start) [//]: # (dependabot-automerge-end) ---
Dependabot commands and options
You can trigger Dependabot actions by commenting on this PR: - `@dependabot rebase` will rebase this PR - `@dependabot recreate` will recreate this PR, overwriting any edits that have been made to it - `@dependabot merge` will merge this PR after your CI passes on it - `@dependabot squash and merge` will squash and merge this PR after your CI passes on it - `@dependabot cancel merge` will cancel a previously requested merge and block automerging - `@dependabot reopen` will reopen this PR if it is closed - `@dependabot close` will close this PR and stop Dependabot recreating it. You can achieve the same result by closing it manually - `@dependabot ignore this major version` will close this PR and stop Dependabot creating any more for this major version (unless you reopen the PR or upgrade to it yourself) - `@dependabot ignore this minor version` will close this PR and stop Dependabot creating any more for this minor version (unless you reopen the PR or upgrade to it yourself) - `@dependabot ignore this dependency` will close this PR and stop Dependabot creating any more for this dependency (unless you reopen the PR or upgrade to it yourself) - `@dependabot use these labels` will set the current labels as the default for future PRs for this repo and language - `@dependabot use these reviewers` will set the current reviewers as the default for future PRs for this repo and language - `@dependabot use these assignees` will set the current assignees as the default for future PRs for this repo and language - `@dependabot use this milestone` will set the current milestone as the default for future PRs for this repo and language You can disable automated security fix PRs for this repo from the [Security Alerts page](https://github.com/Alluxio/alluxio/network/alerts).
pr-link: Alluxio/alluxio#17013 change-id: cid-841f1e6859951f39642f32b2be8bb4928529b004 --- integration/kubernetes/operator/alluxio/go.mod | 8 ++++---- integration/kubernetes/operator/alluxio/go.sum | 16 ++++++++-------- 2 files changed, 12 insertions(+), 12 deletions(-) diff --git a/integration/kubernetes/operator/alluxio/go.mod b/integration/kubernetes/operator/alluxio/go.mod index ce4c1a708714..dd7839830237 100644 --- a/integration/kubernetes/operator/alluxio/go.mod +++ b/integration/kubernetes/operator/alluxio/go.mod @@ -46,11 +46,11 @@ require ( go.uber.org/atomic v1.3.2 // indirect go.uber.org/multierr v1.1.0 // indirect golang.org/x/crypto v0.1.0 // indirect - golang.org/x/net v0.1.0 // indirect + golang.org/x/net v0.7.0 // indirect golang.org/x/oauth2 v0.0.0-20190402181905-9f3314589c9a // indirect - golang.org/x/sys v0.1.0 // indirect - golang.org/x/term v0.1.0 // indirect - golang.org/x/text v0.4.0 // indirect + golang.org/x/sys v0.5.0 // indirect + golang.org/x/term v0.5.0 // indirect + golang.org/x/text v0.7.0 // indirect golang.org/x/time v0.0.0-20180412165947-fbb02b2291d2 // indirect golang.org/x/xerrors v0.0.0-20190717185122-a985d3407aa7 // indirect gomodules.xyz/jsonpatch/v2 v2.0.1 // indirect diff --git a/integration/kubernetes/operator/alluxio/go.sum b/integration/kubernetes/operator/alluxio/go.sum index d0a4bef6fac1..a8d08c52a1f0 100644 --- a/integration/kubernetes/operator/alluxio/go.sum +++ b/integration/kubernetes/operator/alluxio/go.sum @@ -309,8 +309,8 @@ golang.org/x/net v0.0.0-20181201002055-351d144fa1fc/go.mod h1:mL1N/T3taQHkDXs73r golang.org/x/net v0.0.0-20190108225652-1e06a53dbb7e/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4= golang.org/x/net v0.0.0-20190311183353-d8887717615a/go.mod h1:t9HGtf8HONx5eT2rtn7q6eTqICYqUVnKs3thJo3Qplg= golang.org/x/net v0.0.0-20190812203447-cdfb69ac37fc/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4= -golang.org/x/net v0.1.0 h1:hZ/3BUoy5aId7sCpA/Tc5lt8DkFgdVS2onTpJsZ/fl0= -golang.org/x/net v0.1.0/go.mod h1:Cx3nUiGt4eDBEyega/BKRp+/AlGL8hYe7U9odMt2Cco= +golang.org/x/net v0.7.0 h1:rJrUqqhjsgNp7KqAIc25s9pZnjU7TUcSY7HcVZjdn1g= +golang.org/x/net v0.7.0/go.mod h1:2Tu9+aMcznHK/AK1HMvgo6xiTLG5rD5rZLDS+rp2Bjs= golang.org/x/oauth2 v0.0.0-20190402181905-9f3314589c9a h1:tImsplftrFpALCYumobsd0K86vlAs/eXGFms2txfJfA= golang.org/x/oauth2 v0.0.0-20190402181905-9f3314589c9a/go.mod h1:gOpvHmFTYa4IltrdGE7lF6nIHvwfUNPOp7c8zoXwtLw= golang.org/x/sync v0.0.0-20180314180146-1d60e4601c6f/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= @@ -322,14 +322,14 @@ golang.org/x/sys v0.0.0-20181004145325-8469e314837c/go.mod h1:STP8DvDyc/dI5b8T5h golang.org/x/sys v0.0.0-20181116152217-5ac8a444bdc5/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY= golang.org/x/sys v0.0.0-20190215142949-d0b11bdaac8a/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY= golang.org/x/sys v0.0.0-20190312061237-fead79001313/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= -golang.org/x/sys v0.1.0 h1:kunALQeHf1/185U1i0GOB/fy1IPRDDpuoOOqRReG57U= -golang.org/x/sys v0.1.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= -golang.org/x/term v0.1.0 h1:g6Z6vPFA9dYBAF7DWcH6sCcOntplXsDKcliusYijMlw= -golang.org/x/term v0.1.0/go.mod h1:jbD1KX2456YbFQfuXm/mYQcufACuNUgVhRMnK/tPxf8= +golang.org/x/sys v0.5.0 h1:MUK/U/4lj1t1oPg0HfuXDN/Z1wv31ZJ/YcPiGccS4DU= +golang.org/x/sys v0.5.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= +golang.org/x/term v0.5.0 h1:n2a8QNdAb0sZNpU9R1ALUXBbY+w51fCQDN+7EdxNBsY= +golang.org/x/term v0.5.0/go.mod h1:jMB1sMXY+tzblOD4FWmEbocvup2/aLOaQEp7JmGp78k= golang.org/x/text v0.3.0/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ= golang.org/x/text v0.3.1-0.20181227161524-e6919f6577db/go.mod h1:bEr9sfX3Q8Zfm5fL9x+3itogRgK3+ptLWKqgva+5dAk= -golang.org/x/text v0.4.0 h1:BrVqGRd7+k1DiOgtnFvAkoQEWQvBc25ouMJM6429SFg= -golang.org/x/text v0.4.0/go.mod h1:mrYo+phRRbMaCq/xk9113O4dZlRixOauAjOtrjsXDZ8= +golang.org/x/text v0.7.0 h1:4BRB4x83lYWy72KwLD/qYDuTu7q9PjSagHvijDw7cLo= +golang.org/x/text v0.7.0/go.mod h1:mrYo+phRRbMaCq/xk9113O4dZlRixOauAjOtrjsXDZ8= golang.org/x/time v0.0.0-20161028155119-f51c12702a4d/go.mod h1:tRJNPiyCQ0inRvYxbN9jk5I+vvW/OXSQhTDSoE431IQ= golang.org/x/time v0.0.0-20180412165947-fbb02b2291d2 h1:+DCIGbF/swA92ohVg0//6X2IVY3KZs6p9mix0ziNYJM= golang.org/x/time v0.0.0-20180412165947-fbb02b2291d2/go.mod h1:tRJNPiyCQ0inRvYxbN9jk5I+vvW/OXSQhTDSoE431IQ= From 0645ea804b1b83f457fcd7288abfe43392952116 Mon Sep 17 00:00:00 2001 From: elega <445092967@qq.com> Date: Tue, 7 Mar 2023 14:24:27 +0800 Subject: [PATCH 173/334] Cache Block Location to save memory ### What changes are proposed in this pull request? Cache block location object to reduce memory consumption ### Why are the changes needed? Number of possible Block location value is limited, up to 9 * number of workers. By caching it, memory consumption can be saved. ### Does this PR introduce any user facing changes? N/A pr-link: Alluxio/alluxio#16953 change-id: cid-9464ad2e70284b50d4c8a05dd5138f35e78bcb2d --- .../util/proto/BlockLocationUtils.java | 87 +++++++++++++++++++ .../util/proto/BlockLocationUtilsTest.java | 49 +++++++++++ .../master/block/DefaultBlockMaster.java | 20 ++--- .../stress/cli/RpcBenchPreparationUtils.java | 3 +- 4 files changed, 147 insertions(+), 12 deletions(-) create mode 100644 core/common/src/main/java/alluxio/util/proto/BlockLocationUtils.java create mode 100644 core/common/src/test/java/alluxio/util/proto/BlockLocationUtilsTest.java diff --git a/core/common/src/main/java/alluxio/util/proto/BlockLocationUtils.java b/core/common/src/main/java/alluxio/util/proto/BlockLocationUtils.java new file mode 100644 index 000000000000..d53ec25fda47 --- /dev/null +++ b/core/common/src/main/java/alluxio/util/proto/BlockLocationUtils.java @@ -0,0 +1,87 @@ +/* + * The Alluxio Open Foundation licenses this work under the Apache License, version 2.0 + * (the "License"). You may not use this work except in compliance with the License, which is + * available at www.apache.org/licenses/LICENSE-2.0 + * + * This software is distributed on an "AS IS" basis, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, + * either express or implied, as more fully set forth in the License. + * + * See the NOTICE file distributed with this work for information regarding copyright ownership. + */ + +package alluxio.util.proto; + +import alluxio.collections.IndexDefinition; +import alluxio.collections.IndexedSet; +import alluxio.proto.meta.Block.BlockLocation; + +import com.google.common.base.Preconditions; +import com.google.common.collect.Sets; + +import java.util.Set; + +/** + * An util class to create cached grpc block locations. + */ +public class BlockLocationUtils { + private static final IndexDefinition OBJECT_INDEX = + IndexDefinition.ofUnique((b) -> b); + + private static final IndexDefinition WORKER_ID_INDEX = + IndexDefinition.ofNonUnique(BlockLocation::getWorkerId); + + // TODO(maobaolong): Add a metric to monitor the size of mLocationCacheMap + private static final IndexedSet BLOCK_LOCATION_CACHE = + new IndexedSet<>(OBJECT_INDEX, WORKER_ID_INDEX); + + private static final Set VALID_MEDIUM_TYPE_VALUES = + Sets.newHashSet("MEM", "HDD", "SSD"); + + /** + * Get a shared grpc block location object. If it does not exist, create and cache it. + * Because the valid values of tierAlias and mediumType are only MEM, SSD and HDD, + * The size of the cache map is limited. + * + * @param workerId the worker id + * @param tierAlias the tier alias + * @param mediumType the medium type + * @return a shared block location object from the cache + */ + public static BlockLocation getCached( + long workerId, String tierAlias, String mediumType) { + BlockLocation location = BlockLocation + .newBuilder() + .setWorkerId(workerId) + .setTier(tierAlias) + .setMediumType(mediumType) + .build(); + return getCached(location); + } + + /** + * Get a shared grpc block location object. If it does not exist, create and cache it. + * Because the valid values of tierAlias and mediumType are only MEM, SSD and HDD, + * The size of the cache map is limited. + * + * @param blockLocation the block location to cache + * @return a shared block location object from the cache + */ + public static BlockLocation getCached(BlockLocation blockLocation) { + Preconditions.checkState(VALID_MEDIUM_TYPE_VALUES.contains(blockLocation.getTier()), + "TierAlias must be one of {MEM, HDD and SSD} but got %s", + blockLocation.getTier()); + Preconditions.checkState(VALID_MEDIUM_TYPE_VALUES.contains(blockLocation.getMediumType()), + "MediumType must be one of {MEM, HDD and SSD} but got %s", + blockLocation.getMediumType()); + BLOCK_LOCATION_CACHE.add(blockLocation); + return BLOCK_LOCATION_CACHE.getFirstByField(OBJECT_INDEX, blockLocation); + } + + /** + * Evict cache entries by worker id. + * @param workerId the worker id + */ + public static void evictByWorkerId(long workerId) { + BLOCK_LOCATION_CACHE.removeByField(WORKER_ID_INDEX, workerId); + } +} diff --git a/core/common/src/test/java/alluxio/util/proto/BlockLocationUtilsTest.java b/core/common/src/test/java/alluxio/util/proto/BlockLocationUtilsTest.java new file mode 100644 index 000000000000..2655aee3e36d --- /dev/null +++ b/core/common/src/test/java/alluxio/util/proto/BlockLocationUtilsTest.java @@ -0,0 +1,49 @@ +/* + * The Alluxio Open Foundation licenses this work under the Apache License, version 2.0 + * (the "License"). You may not use this work except in compliance with the License, which is + * available at www.apache.org/licenses/LICENSE-2.0 + * + * This software is distributed on an "AS IS" basis, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, + * either express or implied, as more fully set forth in the License. + * + * See the NOTICE file distributed with this work for information regarding copyright ownership. + */ + +package alluxio.util.proto; + +import static org.junit.Assert.assertEquals; +import static org.junit.Assert.assertNotSame; +import static org.junit.Assert.assertSame; + +import alluxio.proto.meta.Block.BlockLocation; + +import org.junit.Test; + +public class BlockLocationUtilsTest { + @Test + public void testBlockLocationCached() { + BlockLocation location1 = BlockLocationUtils.getCached(1, "HDD", "SSD"); + assertEquals("HDD", location1.getTier()); + assertEquals("SSD", location1.getMediumType()); + assertEquals(1, location1.getWorkerId()); + + BlockLocation location2 = BlockLocationUtils.getCached(1, "HDD", "SSD"); + assertSame(location1, location2); + assertEquals(location1, location2); + + BlockLocation location3 = BlockLocationUtils.getCached(location2); + assertSame(location1, location3); + assertEquals(location1, location3); + + BlockLocationUtils.evictByWorkerId(1); + + BlockLocation location4 = BlockLocationUtils.getCached(1, "HDD", "SSD"); + assertNotSame(location1, location4); + assertEquals(location1, location4); + } + + @Test(expected = IllegalStateException.class) + public void testInvalidValue() { + BlockLocationUtils.getCached(1, "INVALID", "SSD"); + } +} diff --git a/core/server/master/src/main/java/alluxio/master/block/DefaultBlockMaster.java b/core/server/master/src/main/java/alluxio/master/block/DefaultBlockMaster.java index e00f8685443d..02582147d329 100644 --- a/core/server/master/src/main/java/alluxio/master/block/DefaultBlockMaster.java +++ b/core/server/master/src/main/java/alluxio/master/block/DefaultBlockMaster.java @@ -73,6 +73,7 @@ import alluxio.util.executor.ExecutorServiceFactories; import alluxio.util.executor.ExecutorServiceFactory; import alluxio.util.network.NetworkAddressUtils; +import alluxio.util.proto.BlockLocationUtils; import alluxio.wire.Address; import alluxio.wire.BlockInfo; import alluxio.wire.RegisterLease; @@ -404,11 +405,10 @@ public boolean processJournalEntry(JournalEntry entry) { return true; } // The master is running and the journal is from an existing worker - mBlockMetaStore.addLocation(blockInfoEntry.getBlockId(), BlockLocation.newBuilder() - .setWorkerId(workerId) - .setTier(blockLocation.getTierAlias()) - .setMediumType(blockLocation.getMediumType()) - .build()); + mBlockMetaStore.addLocation(blockInfoEntry.getBlockId(), BlockLocationUtils.getCached( + workerId, blockLocation.getTierAlias(), blockLocation.getMediumType()) + ); + worker.addBlock(blockInfoEntry.getBlockId()); LOG.debug("Added BlockLocation for {} to worker {}", blockInfoEntry.getBlockId(), workerId); } @@ -983,11 +983,8 @@ public void commitBlock(long workerId, long usedBytesOnTier, String tierAlias, } } // Update the block metadata with the new worker location. - mBlockMetaStore.addLocation(blockId, BlockLocation.newBuilder() - .setWorkerId(workerId) - .setTier(tierAlias) - .setMediumType(mediumType) - .build()); + mBlockMetaStore.addLocation(blockId, BlockLocationUtils.getCached( + workerId, tierAlias, mediumType)); // This worker has this block, so it is no longer lost. mLostBlocks.remove(blockId); @@ -1543,7 +1540,7 @@ private void processWorkerAddedBlocks(MasterWorkerInfo workerInfo, Preconditions.checkState(location.getWorkerId() == workerInfo.getId(), "BlockLocation has a different workerId %s from the request sender's workerId %s", location.getWorkerId(), workerInfo.getId()); - mBlockMetaStore.addLocation(blockId, location); + mBlockMetaStore.addLocation(blockId, BlockLocationUtils.getCached(location)); mLostBlocks.remove(blockId); } else { invalidBlockCount++; @@ -1751,6 +1748,7 @@ private void processLostWorker(MasterWorkerInfo worker) { // mark these blocks to-remove from the worker. // So if the worker comes back again the blocks are kept. processWorkerRemovedBlocks(worker, worker.getBlocks(), false); + BlockLocationUtils.evictByWorkerId(worker.getId()); } private void deleteWorkerMetadata(MasterWorkerInfo worker) { diff --git a/stress/shell/src/main/java/alluxio/stress/cli/RpcBenchPreparationUtils.java b/stress/shell/src/main/java/alluxio/stress/cli/RpcBenchPreparationUtils.java index 8a362fece565..e3f9269593fc 100644 --- a/stress/shell/src/main/java/alluxio/stress/cli/RpcBenchPreparationUtils.java +++ b/stress/shell/src/main/java/alluxio/stress/cli/RpcBenchPreparationUtils.java @@ -181,7 +181,8 @@ public static Map> generateBlockIdOnTiers( for (int i = 0; i < dirConfigs.size(); i++) { int dirNumBlocks = dirConfigs.get(i); LOG.info("Found dir on tier {} with {} blocks", tierConfig.getKey(), dirNumBlocks); - BlockStoreLocation loc = new BlockStoreLocation(tierConfig.getKey().toString(), i); + BlockStoreLocation loc = new BlockStoreLocation( + tierConfig.getKey().toString(), i, tierConfig.getKey().toString()); List blockIds = generateDecreasingNumbers(blockIdStart, dirNumBlocks); blockMap.put(loc, blockIds); blockIdStart -= dirNumBlocks; From 7d71892c2d4a36a3a256eab180165ff5f38248f1 Mon Sep 17 00:00:00 2001 From: fsl <1171313930@qq.com> Date: Tue, 7 Mar 2023 16:18:28 +0800 Subject: [PATCH 174/334] Fix jackson CVE-2022-42003 ### What changes are proposed in this pull request? Please outline the changes and how this PR fixes the issue. ### Why are the changes needed? Please clarify why the changes are needed. For instance, 1. If you propose a new API, clarify the use case for a new API. 2. If you fix a bug, describe the bug. ### Does this PR introduce any user facing changes? Please list the user-facing changes introduced by your change, including 1. change in user-facing APIs 2. addition or removal of property keys 3. webui pr-link: Alluxio/alluxio#17000 change-id: cid-d63519db59fbec5065f93971219930edc90815c5 --- pom.xml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pom.xml b/pom.xml index 60e7bafd3f16..e2f332c1d115 100644 --- a/pom.xml +++ b/pom.xml @@ -153,7 +153,7 @@ UTF-8 ${git.commit.time} 1.7.30 - 2.13.3 + 2.13.5 3.1.0-5.8.5 5.6.19 2 From 0fd418ad6ebee54fe4bbcb9f21fec9669d3a5403 Mon Sep 17 00:00:00 2001 From: Jiacheng Liu Date: Wed, 8 Mar 2023 13:44:34 +0800 Subject: [PATCH 175/334] [SMALLFIX] Add a missing mediumType in test mocking worker register NA pr-link: Alluxio/alluxio#17033 change-id: cid-661dd0421bd1cfb13d30b6442ed8989f787f00bb --- .../test/java/alluxio/server/block/RegisterStreamTestUtils.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/src/test/java/alluxio/server/block/RegisterStreamTestUtils.java b/tests/src/test/java/alluxio/server/block/RegisterStreamTestUtils.java index e66d22f94ee4..e011b42a5282 100644 --- a/tests/src/test/java/alluxio/server/block/RegisterStreamTestUtils.java +++ b/tests/src/test/java/alluxio/server/block/RegisterStreamTestUtils.java @@ -108,7 +108,7 @@ public static List generateRegisterStreamForWorkerWithTi public static List generateRegisterStreamForWorkerWithBlocks( long workerId, long blockSize, List blockList) { Map> blockMap = new HashMap<>(); - BlockStoreLocation mem = new BlockStoreLocation("MEM", 0); + BlockStoreLocation mem = new BlockStoreLocation("MEM", 0, "MEM"); blockMap.put(mem, blockList); // We just use the RegisterStreamer to generate the batch of requests From c9ed6344446457100b6da0965a96ddbcd870f140 Mon Sep 17 00:00:00 2001 From: Haoning Sun Date: Thu, 9 Mar 2023 00:36:01 +0800 Subject: [PATCH 176/334] Remove HdfsUnderFileSystemFactory from service loading in COSN UFS jar ### What changes are proposed in this pull request? Remove HdfsUnderFileSystemFactory from service loading in COSN UFS jar. ### Why are the changes needed? Avoid loading HdfsUnderFileSystemFactory from alluxio-underfs-cosn-*.jar. ### Does this PR introduce any user facing changes? No. pr-link: Alluxio/alluxio#17024 change-id: cid-ed0eeacd667ac1b0e14d95a0c5b1034416acfbea --- underfs/cosn/pom.xml | 31 +++++++++++++++++++++++++++++++ 1 file changed, 31 insertions(+) diff --git a/underfs/cosn/pom.xml b/underfs/cosn/pom.xml index c8c2611d35c8..fb9927f24074 100644 --- a/underfs/cosn/pom.xml +++ b/underfs/cosn/pom.xml @@ -69,6 +69,37 @@ org.apache.maven.plugins maven-shade-plugin + + + shade + package + + shade + + + + + *:* + + LICENSE + META-INF/LICENSE + META-INF/*.SF + META-INF/*.DSA + META-INF/*.RSA + + alluxio/underfs/hdfs/HdfsUnderFileSystemFactory.* + + + + org.alluxio:alluxio-underfs-hdfs + + META-INF/services/alluxio.underfs.UnderFileSystemFactory + + + + + + com.coderplus.maven.plugins From ba0ce14729ffb97e5fbf4844ecbd3796e37a4daa Mon Sep 17 00:00:00 2001 From: Shawn Sun <32376495+ssz1997@users.noreply.github.com> Date: Wed, 8 Mar 2023 16:05:18 -0800 Subject: [PATCH 177/334] [DOCFIX] Update tensorflow script link Move the tensorflow script from personal github account to Alluxio. pr-link: Alluxio/alluxio#17030 change-id: cid-96c8251b31270e03190b5f0d2a0cf4459e096e5d --- docs/en/compute/Tensorflow.md | 4 +- docs/resources/mnist_classification.py | 102 +++++++++++++++++++++++++ 2 files changed, 104 insertions(+), 2 deletions(-) create mode 100644 docs/resources/mnist_classification.py diff --git a/docs/en/compute/Tensorflow.md b/docs/en/compute/Tensorflow.md index 4ac7002e425f..8d3c3c22f42c 100644 --- a/docs/en/compute/Tensorflow.md +++ b/docs/en/compute/Tensorflow.md @@ -118,11 +118,11 @@ total 0 ### Run image recognition test -Download the [image recognition script](https://github.com/ssz1997/AlluxioFuseTensorflowExample/blob/main/mnist_test.py) +Download the [image recognition script](https://github.com/Alluxio/alluxio/tree/master/docs/resources/mnist_classification.py) and run it with the training data `/mnt/fuse/mnist.npz`. ```shell -$ curl -o mnist_test.py -L https://github.com/ssz1997/AlluxioFuseTensorflowExample/blob/main/mnist_test.py?raw=true +$ curl -o mnist_test.py -L https://github.com/Alluxio/alluxio/tree/master/docs/resources/mnist_classification.py?raw=true $ python3 mnist_test.py /mnt/fuse/mnist.npz ``` diff --git a/docs/resources/mnist_classification.py b/docs/resources/mnist_classification.py new file mode 100644 index 000000000000..8cd5eafe6065 --- /dev/null +++ b/docs/resources/mnist_classification.py @@ -0,0 +1,102 @@ +""" +This code is modified from https://www.tensorflow.org/tutorials/quickstart/advanced, which is under Apache 2.0 License. +""" + +import numpy as np +import sys +import tensorflow as tf +print("TensorFlow version:", tf.__version__) + +from tensorflow.keras.layers import Dense, Flatten, Conv2D +from tensorflow.keras import Model + +if len(sys.argv) < 2: + print("Please provide the dataset") + sys.exit() + +with np.load(sys.argv[1]) as f: + x_train, y_train = f['x_train'], f['y_train'] + x_test, y_test = f['x_test'], f['y_test'] +x_train, x_test = x_train / 255.0, x_test / 255.0 + +# Add a channels dimension +x_train = x_train[..., tf.newaxis].astype("float32") +x_test = x_test[..., tf.newaxis].astype("float32") + +train_ds = tf.data.Dataset.from_tensor_slices( + (x_train, y_train)).shuffle(10000).batch(32) + +test_ds = tf.data.Dataset.from_tensor_slices((x_test, y_test)).batch(32) + +class MyModel(Model): + def __init__(self): + super(MyModel, self).__init__() + self.conv1 = Conv2D(32, 3, activation='relu') + self.flatten = Flatten() + self.d1 = Dense(128, activation='relu') + self.d2 = Dense(10) + + def call(self, x): + x = self.conv1(x) + x = self.flatten(x) + x = self.d1(x) + return self.d2(x) + +# Create an instance of the model +model = MyModel() + +loss_object = tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True) + +optimizer = tf.keras.optimizers.Adam() + +train_loss = tf.keras.metrics.Mean(name='train_loss') +train_accuracy = tf.keras.metrics.SparseCategoricalAccuracy(name='train_accuracy') + +test_loss = tf.keras.metrics.Mean(name='test_loss') +test_accuracy = tf.keras.metrics.SparseCategoricalAccuracy(name='test_accuracy') + +@tf.function +def train_step(images, labels): + with tf.GradientTape() as tape: + # training=True is only needed if there are layers with different + # behavior during training versus inference (e.g. Dropout). + predictions = model(images, training=True) + loss = loss_object(labels, predictions) + gradients = tape.gradient(loss, model.trainable_variables) + optimizer.apply_gradients(zip(gradients, model.trainable_variables)) + + train_loss(loss) + train_accuracy(labels, predictions) + +@tf.function +def test_step(images, labels): + # training=False is only needed if there are layers with different + # behavior during training versus inference (e.g. Dropout). + predictions = model(images, training=False) + t_loss = loss_object(labels, predictions) + + test_loss(t_loss) + test_accuracy(labels, predictions) + +EPOCHS = 5 + +for epoch in range(EPOCHS): + # Reset the metrics at the start of the next epoch + train_loss.reset_states() + train_accuracy.reset_states() + test_loss.reset_states() + test_accuracy.reset_states() + + for images, labels in train_ds: + train_step(images, labels) + + for test_images, test_labels in test_ds: + test_step(test_images, test_labels) + + print( + f'Epoch {epoch + 1}, ' + f'Loss: {train_loss.result()}, ' + f'Accuracy: {train_accuracy.result() * 100}, ' + f'Test Loss: {test_loss.result()}, ' + f'Test Accuracy: {test_accuracy.result() * 100}' + ) From ce543363e806dfc3dc02acc01fe2f4a20347969f Mon Sep 17 00:00:00 2001 From: ssyssy Date: Wed, 8 Mar 2023 20:58:19 -0800 Subject: [PATCH 178/334] Set ttlaction back to free ### What changes are proposed in this pull request? Set the ttlAction back to free to fix the backward compatibility issue from our daily sanity test. ### Why are the changes needed? Fix the backward compatibility proto issue from a previous PR https://github.com/Alluxio/alluxio/pull/16823. ### Does this PR introduce any user facing changes? No. pr-link: Alluxio/alluxio#17039 change-id: cid-887a9d5ddfe7f4f9e0b929a83f529278b1d9bd90 --- .../file/options/OutStreamOptionsTest.java | 2 +- .../main/java/alluxio/conf/PropertyKey.java | 4 +-- .../alluxio/master/file/InodeTtlChecker.java | 4 +-- .../master/file/FileSystemMasterTest.java | 34 +++++++++++++------ .../fs/FileSystemMasterIntegrationTest.java | 10 ++++-- .../alluxio/client/fs/TtlIntegrationTest.java | 11 +++--- 6 files changed, 42 insertions(+), 23 deletions(-) diff --git a/core/client/fs/src/test/java/alluxio/client/file/options/OutStreamOptionsTest.java b/core/client/fs/src/test/java/alluxio/client/file/options/OutStreamOptionsTest.java index 839de3facc89..a8726f5c2b64 100644 --- a/core/client/fs/src/test/java/alluxio/client/file/options/OutStreamOptionsTest.java +++ b/core/client/fs/src/test/java/alluxio/client/file/options/OutStreamOptionsTest.java @@ -102,7 +102,7 @@ public void defaults() throws IOException { assertEquals(ModeUtils.applyFileUMask(Mode.defaults(), mConf.getString(PropertyKey.SECURITY_AUTHORIZATION_PERMISSION_UMASK)), options.getMode()); assertEquals(Constants.NO_TTL, options.getCommonOptions().getTtl()); - assertEquals(TtlAction.DELETE_ALLUXIO, options.getCommonOptions().getTtlAction()); + assertEquals(TtlAction.FREE, options.getCommonOptions().getTtlAction()); assertEquals(ufsType, options.getUnderStorageType()); assertEquals(WriteType.CACHE_THROUGH, options.getWriteType()); assertEquals(Constants.LAST_TIER, options.getWriteTier()); diff --git a/core/common/src/main/java/alluxio/conf/PropertyKey.java b/core/common/src/main/java/alluxio/conf/PropertyKey.java index dfba5626fe20..fa645e2411a7 100755 --- a/core/common/src/main/java/alluxio/conf/PropertyKey.java +++ b/core/common/src/main/java/alluxio/conf/PropertyKey.java @@ -5772,9 +5772,9 @@ public String toString() { .build(); public static final PropertyKey USER_FILE_CREATE_TTL_ACTION = enumBuilder(Name.USER_FILE_CREATE_TTL_ACTION, TtlAction.class) - .setDefaultValue(TtlAction.DELETE_ALLUXIO) + .setDefaultValue(TtlAction.FREE) .setDescription("When file's ttl is expired, the action performs on it. Options: " - + "DELETE_ALLUXIO(default), FREE or DELETE") + + "FREE(default), DELETE_ALLUXIO or DELETE") .setScope(Scope.CLIENT) .build(); public static final PropertyKey USER_FILE_UFS_TIER_ENABLED = diff --git a/core/server/master/src/main/java/alluxio/master/file/InodeTtlChecker.java b/core/server/master/src/main/java/alluxio/master/file/InodeTtlChecker.java index c1f467b21848..e3c6978627af 100644 --- a/core/server/master/src/main/java/alluxio/master/file/InodeTtlChecker.java +++ b/core/server/master/src/main/java/alluxio/master/file/InodeTtlChecker.java @@ -84,7 +84,7 @@ public void heartbeat() throws InterruptedException { TtlAction ttlAction = inode.getTtlAction(); LOG.info("Path {} TTL has expired, performing action {}", path.getPath(), ttlAction); switch (ttlAction) { - case FREE: + case FREE: // Default: FREE // public free method will lock the path, and check WRITE permission required at // parent of file if (inode.isDirectory()) { @@ -114,7 +114,7 @@ public void heartbeat() throws InterruptedException { mFileSystemMaster.delete(path, DeleteContext.defaults()); } break; - case DELETE_ALLUXIO: // Default: DELETE_ALLUXIO + case DELETE_ALLUXIO: // public delete method will lock the path, and check WRITE permission required at // parent of file if (inode.isDirectory()) { diff --git a/core/server/master/src/test/java/alluxio/master/file/FileSystemMasterTest.java b/core/server/master/src/test/java/alluxio/master/file/FileSystemMasterTest.java index 4ac3bef41ae1..3b767b278ed3 100644 --- a/core/server/master/src/test/java/alluxio/master/file/FileSystemMasterTest.java +++ b/core/server/master/src/test/java/alluxio/master/file/FileSystemMasterTest.java @@ -625,7 +625,10 @@ public void ttlDirectoryDelete() throws Exception { FileInfo fileInfo = mFileSystemMaster.getFileInfo(dirId); assertEquals(fileInfo.getFileId(), dirId); HeartbeatScheduler.execute(HeartbeatContext.MASTER_TTL_CHECK); - mThrown.expect(FileDoesNotExistException.class); + // TTL is set to 0, the directory should have been freed during last TTL check. + assertEquals(0, + mFileSystemMaster.getFileInfo(NESTED_DIR_URI, GET_STATUS_CONTEXT) + .getInAlluxioPercentage()); mFileSystemMaster.getFileInfo(dirId); } @@ -646,7 +649,10 @@ public void ttlDirectoryDeleteReplay() throws Exception { FileInfo fileInfo = mFileSystemMaster.getFileInfo(dirId); assertEquals(fileInfo.getFileId(), dirId); HeartbeatScheduler.execute(HeartbeatContext.MASTER_TTL_CHECK); - mThrown.expect(FileDoesNotExistException.class); + // TTL is set to 0, the directory should have been freed during last TTL check. + assertEquals(0, + mFileSystemMaster.getFileInfo(NESTED_DIR_URI, GET_STATUS_CONTEXT) + .getInAlluxioPercentage()); mFileSystemMaster.getFileInfo(dirId); } @@ -764,8 +770,10 @@ public void setTtlForFileWithNoTtl() throws Exception { SetAttributeContext.mergeFrom(SetAttributePOptions.newBuilder() .setCommonOptions(FileSystemMasterCommonPOptions.newBuilder().setTtl(0)))); HeartbeatScheduler.execute(HeartbeatContext.MASTER_TTL_CHECK); - // TTL is set to 0, the file should have been deleted during last TTL check. - mThrown.expect(FileDoesNotExistException.class); + // TTL is set to 0, the file should have been freed during last TTL check. + assertEquals(0, + mFileSystemMaster.getFileInfo(NESTED_URI, GET_STATUS_CONTEXT) + .getInAlluxioPercentage()); mFileSystemMaster.getFileInfo(fileId); } @@ -791,8 +799,10 @@ public void setTtlForDirectoryWithNoTtl() throws Exception { SetAttributeContext.mergeFrom(SetAttributePOptions.newBuilder() .setCommonOptions(FileSystemMasterCommonPOptions.newBuilder().setTtl(0)))); HeartbeatScheduler.execute(HeartbeatContext.MASTER_TTL_CHECK); - // TTL is set to 0, the file should have been deleted during last TTL check. - mThrown.expect(FileDoesNotExistException.class); + // TTL is set to 0, the file should have been freed during last TTL check. + assertEquals(0, + mFileSystemMaster.getFileInfo(NESTED_URI, GET_STATUS_CONTEXT) + .getInAlluxioPercentage()); mFileSystemMaster.getFileInfo(NESTED_URI, GET_STATUS_CONTEXT); mFileSystemMaster.getFileInfo(NESTED_DIR_URI, GET_STATUS_CONTEXT); mFileSystemMaster.getFileInfo(NESTED_FILE_URI, GET_STATUS_CONTEXT); @@ -817,8 +827,10 @@ public void setSmallerTtlForFileWithTtl() throws Exception { SetAttributeContext.mergeFrom(SetAttributePOptions.newBuilder() .setCommonOptions(FileSystemMasterCommonPOptions.newBuilder().setTtl(0)))); HeartbeatScheduler.execute(HeartbeatContext.MASTER_TTL_CHECK); - // TTL is set to 0, the file should have been deleted during last TTL check. - mThrown.expect(FileDoesNotExistException.class); + // TTL is set to 0, the file should have been freed during last TTL check. + assertEquals(0, + mFileSystemMaster.getFileInfo(NESTED_URI, GET_STATUS_CONTEXT) + .getInAlluxioPercentage()); mFileSystemMaster.getFileInfo(fileId); } @@ -840,8 +852,10 @@ public void setSmallerTtlForDirectoryWithTtl() throws Exception { SetAttributeContext.mergeFrom(SetAttributePOptions.newBuilder() .setCommonOptions(FileSystemMasterCommonPOptions.newBuilder().setTtl(0)))); HeartbeatScheduler.execute(HeartbeatContext.MASTER_TTL_CHECK); - // TTL is set to 0, the file should have been deleted during last TTL check. - mThrown.expect(FileDoesNotExistException.class); + // TTL is set to 0, the file should have been freed during last TTL check. + assertEquals(0, + mFileSystemMaster.getFileInfo(NESTED_URI, GET_STATUS_CONTEXT) + .getInAlluxioPercentage()); mFileSystemMaster.getFileInfo(NESTED_URI, GET_STATUS_CONTEXT); } diff --git a/tests/src/test/java/alluxio/client/fs/FileSystemMasterIntegrationTest.java b/tests/src/test/java/alluxio/client/fs/FileSystemMasterIntegrationTest.java index 9b8c1d33e9c6..e1c49aface14 100644 --- a/tests/src/test/java/alluxio/client/fs/FileSystemMasterIntegrationTest.java +++ b/tests/src/test/java/alluxio/client/fs/FileSystemMasterIntegrationTest.java @@ -187,7 +187,7 @@ public void clientFileInfoEmptyFile() throws Exception { assertFalse(fileInfo.isPersisted()); assertFalse(fileInfo.isPinned()); Assert.assertEquals(Constants.NO_TTL, fileInfo.getTtl()); - Assert.assertEquals(TtlAction.DELETE_ALLUXIO, fileInfo.getTtlAction()); + Assert.assertEquals(TtlAction.FREE, fileInfo.getTtlAction()); Assert.assertEquals(TEST_USER, fileInfo.getOwner()); Assert.assertEquals(0644, (short) fileInfo.getMode()); } @@ -669,8 +669,12 @@ public void ttlExpiredCreateFile() throws Exception { // Sleep for the ttl expiration. CommonUtils.sleepMs(2 * TTL_CHECKER_INTERVAL_MS); HeartbeatScheduler.execute(HeartbeatContext.MASTER_TTL_CHECK); - mThrown.expect(FileDoesNotExistException.class); - mFsMaster.getFileInfo(fileId); + HeartbeatScheduler.await(HeartbeatContext.MASTER_TTL_CHECK, 10, TimeUnit.SECONDS); + HeartbeatScheduler.schedule(HeartbeatContext.MASTER_TTL_CHECK); + HeartbeatScheduler.await(HeartbeatContext.MASTER_TTL_CHECK, 10, TimeUnit.SECONDS); + FileInfo fileInfo = mFsMaster.getFileInfo(fileId); + Assert.assertEquals(Constants.NO_TTL, fileInfo.getTtl()); + Assert.assertEquals(TtlAction.DELETE, fileInfo.getTtlAction()); } @Test diff --git a/tests/src/test/java/alluxio/client/fs/TtlIntegrationTest.java b/tests/src/test/java/alluxio/client/fs/TtlIntegrationTest.java index 03b5935348c6..b4cd881477d3 100644 --- a/tests/src/test/java/alluxio/client/fs/TtlIntegrationTest.java +++ b/tests/src/test/java/alluxio/client/fs/TtlIntegrationTest.java @@ -91,8 +91,9 @@ public void expireManyAfterDeleteAlluxio() throws Exception { // Only the even-index files should expire. long ttl = i % 2 == 0 ? TTL_INTERVAL_MS / 2 : TTL_INTERVAL_MS * 1000; mOutStream = mFileSystem.createFile(files[i], - CreateFilePOptions.newBuilder().setWriteType(WritePType.CACHE_THROUGH) - .setCommonOptions(FileSystemMasterCommonPOptions.newBuilder().setTtl(ttl)).build()); + CreateFilePOptions.newBuilder().setWriteType(WritePType.CACHE_THROUGH) + .setCommonOptions(FileSystemMasterCommonPOptions.newBuilder().setTtl(ttl) + .setTtlAction(TtlAction.DELETE_ALLUXIO)).build()); mOutStream.write(mBuffer, 0, 10); mOutStream.close(); @@ -130,9 +131,9 @@ public void expireManyAfterFree() throws Exception { // Only the even-index files should expire. long ttl = i % 2 == 0 ? TTL_INTERVAL_MS / 2 : TTL_INTERVAL_MS * 1000; mOutStream = mFileSystem.createFile(files[i], - CreateFilePOptions.newBuilder().setWriteType(WritePType.CACHE_THROUGH) - .setCommonOptions(FileSystemMasterCommonPOptions.newBuilder().setTtl(ttl) - .setTtlAction(TtlAction.FREE)).build()); + CreateFilePOptions.newBuilder().setWriteType(WritePType.CACHE_THROUGH) + .setCommonOptions(FileSystemMasterCommonPOptions.newBuilder().setTtl(ttl)) + .build()); mOutStream.write(mBuffer, 0, 10); mOutStream.close(); From 5425c29020013f993351c04ffa9cc6965221ccc2 Mon Sep 17 00:00:00 2001 From: humengyu Date: Thu, 9 Mar 2023 13:45:13 +0800 Subject: [PATCH 179/334] Fix fuse metadata command ClassCastException ### What changes are proposed in this pull request? 1. Add a method `getUnderlyingFileSystem()` in DelegatingFileSystem to get underlying FileSystem; 2. Add judgment logic to the fileSystem in AbstractMetadataCacheSubCommand. ### Why are the changes needed? https://github.com/Alluxio/alluxio/issues/17029 ### Does this PR introduce any user facing changes? Please list the user-facing changes introduced by your change, including 1. change in user-facing APIs? false 3. addition or removal of property keys? false 4. webui? false pr-link: Alluxio/alluxio#17035 change-id: cid-a60c68574b05e7db7676c14f54f03ce5a9858b72 --- .../client/file/DelegatingFileSystem.java | 7 +++++ .../AbstractMetadataCacheSubCommand.java | 28 ++++++++++++++++++- 2 files changed, 34 insertions(+), 1 deletion(-) diff --git a/core/client/fs/src/main/java/alluxio/client/file/DelegatingFileSystem.java b/core/client/fs/src/main/java/alluxio/client/file/DelegatingFileSystem.java index 58ca4b70fa3f..abcdd8ca5fc7 100644 --- a/core/client/fs/src/main/java/alluxio/client/file/DelegatingFileSystem.java +++ b/core/client/fs/src/main/java/alluxio/client/file/DelegatingFileSystem.java @@ -264,4 +264,11 @@ public String getLoadProgress(AlluxioURI path, public void close() throws IOException { mDelegatedFileSystem.close(); } + + /** + * @return the underlying fileSystem + */ + public FileSystem getUnderlyingFileSystem() { + return mDelegatedFileSystem; + } } diff --git a/integration/fuse/src/main/java/alluxio/cli/command/metadatacache/AbstractMetadataCacheSubCommand.java b/integration/fuse/src/main/java/alluxio/cli/command/metadatacache/AbstractMetadataCacheSubCommand.java index 1e78590f542c..1444ca554b1b 100644 --- a/integration/fuse/src/main/java/alluxio/cli/command/metadatacache/AbstractMetadataCacheSubCommand.java +++ b/integration/fuse/src/main/java/alluxio/cli/command/metadatacache/AbstractMetadataCacheSubCommand.java @@ -16,6 +16,7 @@ import alluxio.client.file.FileSystem; import alluxio.client.file.MetadataCachingFileSystem; import alluxio.client.file.URIStatus; +import alluxio.client.file.cache.LocalCacheFileSystem; import alluxio.conf.AlluxioConfiguration; import alluxio.conf.PropertyKey; import alluxio.exception.runtime.InvalidArgumentRuntimeException; @@ -43,7 +44,7 @@ public URIStatus run(AlluxioURI path, String[] argv) throws InvalidArgumentExcep + "not supported when %s is false", getCommandName(), PropertyKey.USER_METADATA_CACHE_ENABLED.getName())); } - return runSubCommand(path, argv, (MetadataCachingFileSystem) mFileSystem); + return runSubCommand(path, argv, findMetadataCachingFileSystem()); } /** @@ -53,4 +54,29 @@ public URIStatus run(AlluxioURI path, String[] argv) throws InvalidArgumentExcep */ protected abstract URIStatus runSubCommand(AlluxioURI path, String[] argv, MetadataCachingFileSystem fs); + + /** + * Find MetadataCachingFileSystem by given filesystem. + */ + private MetadataCachingFileSystem findMetadataCachingFileSystem() { + if (mFileSystem instanceof MetadataCachingFileSystem) { + return (MetadataCachingFileSystem) mFileSystem; + } + if (mFileSystem instanceof LocalCacheFileSystem) { + FileSystem underlyingFileSystem = ((LocalCacheFileSystem) mFileSystem) + .getUnderlyingFileSystem(); + if (underlyingFileSystem instanceof MetadataCachingFileSystem) { + return (MetadataCachingFileSystem) underlyingFileSystem; + } else { + throw new IllegalStateException( + "The expected underlying FileSystem of LocalCacheFileSystem " + + "is MetadataCachingFileSystem, but found " + + mFileSystem.getClass().getSimpleName()); + } + } + throw new IllegalStateException( + String.format("The expected FileSystem is %s or %s, but found %s", + MetadataCachingFileSystem.class.getSimpleName(), + LocalCacheFileSystem.class.getSimpleName(), mFileSystem.getClass().getSimpleName())); + } } From 1c326ec29ef892970b529fb755ed7f2cad8491c3 Mon Sep 17 00:00:00 2001 From: Huang Hua Date: Thu, 9 Mar 2023 15:41:41 +0800 Subject: [PATCH 180/334] Fix UT of alluxio.master.transport.GrpcMessagingTransportTest The alluxio.master.transport.GrpcMessagingTransportTest.testServerClosed fails randomly. This test may fail at different reasons and the following expcetions may be thrown out: IllegalStateException or io.grpc.StatusRuntimeException. ### What changes are proposed in this pull request? Change the expected Exceptions in assertion. ### Why are the changes needed? This test fails like: ``` Error: 8.418 [ERROR] Tests run: 5, Failures: 2, Errors: 0, Skipped: 0, Time elapsed: 1.214 s <<< FAILURE! - in alluxio.master.transport.GrpcMessagingTransportTest [11033](https://github.com/Alluxio/alluxio/actions/runs/4360970594/jobs/7624399342#step:6:11034) Error: 8.425 [ERROR] alluxio.master.transport.GrpcMessagingTransportTest.testServerClosed Time elapsed: 1.004 s <<< FAILURE! [11034](https://github.com/Alluxio/alluxio/actions/runs/4360970594/jobs/7624399342#step:6:11035) java.lang.AssertionError [11035](https://github.com/Alluxio/alluxio/actions/runs/4360970594/jobs/7624399342#step:6:11036) at org.junit.Assert.fail(Assert.java:87) [11036](https://github.com/Alluxio/alluxio/actions/runs/4360970594/jobs/7624399342#step:6:11037) at org.junit.Assert.assertTrue(Assert.java:42) [11037](https://github.com/Alluxio/alluxio/actions/runs/4360970594/jobs/7624399342#step:6:11038) at org.junit.Assert.assertTrue(Assert.java:53) [11038](https://github.com/Alluxio/alluxio/actions/runs/4360970594/jobs/7624399342#step:6:11039) at alluxio.master.transport.GrpcMessagingTransportTest.testServerClosed(GrpcMessagingTransportTest.java:153) [11039](https://github.com/Alluxio/alluxio/actions/runs/4360970594/jobs/7624399342#step:6:11040) at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method) [11040](https://github.com/Alluxio/alluxio/actions/runs/4360970594/jobs/7624399342#step:6:11041) at sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62) [11041](https://github.com/Alluxio/alluxio/actions/runs/4360970594/jobs/7624399342#step:6:11042) at sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43) [11042](https://github.com/Alluxio/alluxio/actions/runs/4360970594/jobs/7624399342#step:6:11043) at java.lang.reflect.Method.invoke(Method.java:498) [11043](https://github.com/Alluxio/alluxio/actions/runs/4360970594/jobs/7624399342#step:6:11044) at org.junit.runners.model.FrameworkMethod$1.runReflectiveCall(FrameworkMethod.java:59) [11044](https://github.com/Alluxio/alluxio/actions/runs/4360970594/jobs/7624399342#step:6:11045) at org.junit.internal.runners.model.ReflectiveCallable.run(ReflectiveCallable.java:12) [11045](https://github.com/Alluxio/alluxio/actions/runs/4360970594/jobs/7624399342#step:6:11046) at org.junit.runners.model.FrameworkMethod.invokeExplosively(FrameworkMethod.java:56) [11046](https://github.com/Alluxio/alluxio/actions/runs/4360970594/jobs/7624399342#step:6:11047) at org.junit.internal.runners.statements.InvokeMethod.evaluate(InvokeMethod.java:17) [11047](https://github.com/Alluxio/alluxio/actions/runs/4360970594/jobs/7624399342#step:6:11048) at org.junit.internal.runners.statements.RunBefores.evaluate(RunBefores.java:26) [11048](https://github.com/Alluxio/alluxio/actions/runs/4360970594/jobs/7624399342#step:6:11049) at org.junit.internal.runners.statements.RunAfters.evaluate(RunAfters.java:27) [11049](https://github.com/Alluxio/alluxio/actions/runs/4360970594/jobs/7624399342#step:6:11050) at org.junit.runners.ParentRunner$3.evaluate(ParentRunner.java:306) [11050](https://github.com/Alluxio/alluxio/actions/runs/4360970594/jobs/7624399342#step:6:11051) at org.junit.runners.BlockJUnit4ClassRunner$1.evaluate(BlockJUnit4ClassRunner.java:100) [11051](https://github.com/Alluxio/alluxio/actions/runs/4360970594/jobs/7624399342#step:6:11052) at org.junit.runners.ParentRunner.runLeaf(ParentRunner.java:366) [11052](https://github.com/Alluxio/alluxio/actions/runs/4360970594/jobs/7624399342#step:6:11053) at org.junit.runners.BlockJUnit4ClassRunner.runChild(BlockJUnit4ClassRunner.java:103) [11053](https://github.com/Alluxio/alluxio/actions/runs/4360970594/jobs/7624399342#step:6:11054) at org.junit.runners.BlockJUnit4ClassRunner.runChild(BlockJUnit4ClassRunner.java:63) [11054](https://github.com/Alluxio/alluxio/actions/runs/4360970594/jobs/7624399342#step:6:11055) at org.junit.runners.ParentRunner$4.run(ParentRunner.java:331) [11055](https://github.com/Alluxio/alluxio/actions/runs/4360970594/jobs/7624399342#step:6:11056) at org.junit.runners.ParentRunner$1.schedule(ParentRunner.java:79) [11056](https://github.com/Alluxio/alluxio/actions/runs/4360970594/jobs/7624399342#step:6:11057) at org.junit.runners.ParentRunner.runChildren(ParentRunner.java:329) [11057](https://github.com/Alluxio/alluxio/actions/runs/4360970594/jobs/7624399342#step:6:11058) at org.junit.runners.ParentRunner.access$100(ParentRunner.java:66) [11058](https://github.com/Alluxio/alluxio/actions/runs/4360970594/jobs/7624399342#step:6:11059) at org.junit.runners.ParentRunner$2.evaluate(ParentRunner.java:293) [11059](https://github.com/Alluxio/alluxio/actions/runs/4360970594/jobs/7624399342#step:6:11060) at org.junit.runners.ParentRunner$3.evaluate(ParentRunner.java:306) [11060](https://github.com/Alluxio/alluxio/actions/runs/4360970594/jobs/7624399342#step:6:11061) at org.junit.runners.ParentRunner.run(ParentRunner.java:413) [11061](https://github.com/Alluxio/alluxio/actions/runs/4360970594/jobs/7624399342#step:6:11062) at org.apache.maven.surefire.junit4.JUnit4Provider.execute(JUnit4Provider.java:364) [11062](https://github.com/Alluxio/alluxio/actions/runs/4360970594/jobs/7624399342#step:6:11063) at org.apache.maven.surefire.junit4.JUnit4Provider.executeWithRerun(JUnit4Provider.java:272) [11063](https://github.com/Alluxio/alluxio/actions/runs/4360970594/jobs/7624399342#step:6:11064) at org.apache.maven.surefire.junit4.JUnit4Provider.executeTestSet(JUnit4Provider.java:237) [11064](https://github.com/Alluxio/alluxio/actions/runs/4360970594/jobs/7624399342#step:6:11065) at org.apache.maven.surefire.junit4.JUnit4Provider.invoke(JUnit4Provider.java:158) [11065](https://github.com/Alluxio/alluxio/actions/runs/4360970594/jobs/7624399342#step:6:11066) at org.apache.maven.surefire.booter.ForkedBooter.runSuitesInProcess(ForkedBooter.java:428) [11066](https://github.com/Alluxio/alluxio/actions/runs/4360970594/jobs/7624399342#step:6:11067) at org.apache.maven.surefire.booter.ForkedBooter.execute(ForkedBooter.java:162) [11067](https://github.com/Alluxio/alluxio/actions/runs/4360970594/jobs/7624399342#step:6:11068) at org.apache.maven.surefire.booter.ForkedBooter.run(ForkedBooter.java:562) [11068](https://github.com/Alluxio/alluxio/actions/runs/4360970594/jobs/7624399342#step:6:11069) at org.apache.maven.surefire.booter.ForkedBooter.main(ForkedBooter.java:548) [11069](https://github.com/Alluxio/alluxio/actions/runs/4360970594/jobs/7624399342#step:6:11070) ``` It fails because the Exceptions returned are different from what's expected. ### Does this PR introduce any user facing changes? N/A pr-link: Alluxio/alluxio#17034 change-id: cid-79b839a09e8bb8ce828108eaf353793435089842 --- .../alluxio/master/transport/GrpcMessagingTransportTest.java | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/core/server/common/src/test/java/alluxio/master/transport/GrpcMessagingTransportTest.java b/core/server/common/src/test/java/alluxio/master/transport/GrpcMessagingTransportTest.java index b08e8aec7186..2bfee6a1f9b3 100644 --- a/core/server/common/src/test/java/alluxio/master/transport/GrpcMessagingTransportTest.java +++ b/core/server/common/src/test/java/alluxio/master/transport/GrpcMessagingTransportTest.java @@ -18,6 +18,7 @@ import io.atomix.catalyst.buffer.BufferOutput; import io.atomix.catalyst.serializer.CatalystSerializable; import io.atomix.catalyst.serializer.Serializer; +import io.grpc.StatusRuntimeException; import org.junit.After; import org.junit.Assert; import org.junit.Before; @@ -150,7 +151,8 @@ public void testServerClosed() throws Exception { try { sendRequest(clientConnection, new DummyRequest("dummy")).get(); } catch (ExecutionException e) { - Assert.assertTrue(e.getCause() instanceof IllegalStateException); + Assert.assertTrue(e.getCause() instanceof IllegalStateException + || e.getCause() instanceof StatusRuntimeException); failed = true; } Assert.assertTrue(failed); From 2766440e3e35cbb406beb08113d161a3ceeb9809 Mon Sep 17 00:00:00 2001 From: tian bao <2011xuesong@gmail.com> Date: Thu, 9 Mar 2023 15:55:29 +0800 Subject: [PATCH 181/334] Support getBlockInfo excluding mount-related info ### What changes are proposed in this pull request? Like https://github.com/Alluxio/alluxio/pull/16893 and https://github.com/Alluxio/alluxio/pull/16944, Before blockinfo will use mountInfo, so it should not use mountInfo after set excludeMountInfo to true. This purpose is also to improve rpc performance with HDFS client. pr-link: Alluxio/alluxio#17006 change-id: cid-40e4961f7fbeee067d0b8ff1bc7d927d11b9745c --- .../master/file/DefaultFileSystemMaster.java | 19 ++++++++++++------- 1 file changed, 12 insertions(+), 7 deletions(-) diff --git a/core/server/master/src/main/java/alluxio/master/file/DefaultFileSystemMaster.java b/core/server/master/src/main/java/alluxio/master/file/DefaultFileSystemMaster.java index d0c58547503c..0fb78c18e7c6 100644 --- a/core/server/master/src/main/java/alluxio/master/file/DefaultFileSystemMaster.java +++ b/core/server/master/src/main/java/alluxio/master/file/DefaultFileSystemMaster.java @@ -1008,7 +1008,7 @@ private FileInfo getFileInfoInternal(LockedInodePath inodePath, Counter counter, List fileBlockInfos = new ArrayList<>(blockInfos.size()); for (BlockInfo blockInfo : blockInfos) { - fileBlockInfos.add(generateFileBlockInfo(inodePath, blockInfo)); + fileBlockInfos.add(generateFileBlockInfo(inodePath, blockInfo, excludeMountInfo)); } fileInfo.setFileBlockInfos(fileBlockInfos); } @@ -1028,7 +1028,7 @@ private FileInfo getFileInfoInternal(LockedInodePath inodePath, Counter counter, fileInfo.getBlockIds(), fileInfo.getLength(), fileInfo.getBlockSizeBytes(), null); // Reset file-block-info list with the new list. try { - fileInfo.setFileBlockInfos(getFileBlockInfoListInternal(inodePath)); + fileInfo.setFileBlockInfos(getFileBlockInfoListInternal(inodePath, excludeMountInfo)); } catch (InvalidPathException e) { throw new FileDoesNotExistException( String.format("Hydration failed for file: %s", inodePath.getUri()), e); @@ -2380,7 +2380,7 @@ public List getFileBlockInfoList(AlluxioURI path) auditContext.setAllowed(false); throw e; } - List ret = getFileBlockInfoListInternal(inodePath); + List ret = getFileBlockInfoListInternal(inodePath, false); Metrics.FILE_BLOCK_INFOS_GOT.inc(); auditContext.setSucceeded(true); return ret; @@ -2389,16 +2389,18 @@ public List getFileBlockInfoList(AlluxioURI path) /** * @param inodePath the {@link LockedInodePath} to get the info for + * @param excludeMountInfo exclude the mount info * @return a list of {@link FileBlockInfo} for all the blocks of the given inode */ - private List getFileBlockInfoListInternal(LockedInodePath inodePath) + private List getFileBlockInfoListInternal(LockedInodePath inodePath, + boolean excludeMountInfo) throws InvalidPathException, FileDoesNotExistException, UnavailableException { InodeFile file = inodePath.getInodeFile(); List blockInfoList = mBlockMaster.getBlockInfoList(file.getBlockIds()); List ret = new ArrayList<>(blockInfoList.size()); for (BlockInfo blockInfo : blockInfoList) { - ret.add(generateFileBlockInfo(inodePath, blockInfo)); + ret.add(generateFileBlockInfo(inodePath, blockInfo, excludeMountInfo)); } return ret; } @@ -2409,9 +2411,11 @@ private List getFileBlockInfoListInternal(LockedInodePath inodePa * * @param inodePath the file the block is a part of * @param blockInfo the {@link BlockInfo} to generate the {@link FileBlockInfo} from + * @param excludeMountInfo exclude the mount info * @return a new {@link FileBlockInfo} for the block */ - private FileBlockInfo generateFileBlockInfo(LockedInodePath inodePath, BlockInfo blockInfo) + private FileBlockInfo generateFileBlockInfo(LockedInodePath inodePath, BlockInfo blockInfo, + boolean excludeMountInfo) throws FileDoesNotExistException { InodeFile file = inodePath.getInodeFile(); FileBlockInfo fileBlockInfo = new FileBlockInfo(); @@ -2422,7 +2426,8 @@ private FileBlockInfo generateFileBlockInfo(LockedInodePath inodePath, BlockInfo long offset = file.getBlockSizeBytes() * BlockId.getSequenceNumber(blockInfo.getBlockId()); fileBlockInfo.setOffset(offset); - if (fileBlockInfo.getBlockInfo().getLocations().isEmpty() && file.isPersisted()) { + if (!excludeMountInfo && fileBlockInfo.getBlockInfo().getLocations().isEmpty() + && file.isPersisted()) { // No alluxio locations, but there is a checkpoint in the under storage system. Add the // locations from the under storage system. long blockId = fileBlockInfo.getBlockInfo().getBlockId(); From 952bd4ba4a629ca945cbb24144caa84a9a287411 Mon Sep 17 00:00:00 2001 From: Jiacheng Liu Date: Thu, 9 Mar 2023 18:45:17 +0800 Subject: [PATCH 182/334] Merge journal context in LostFileDetector ### What changes are proposed in this pull request? `LostFileDetector` checks all files that are possibly LOST in Alluxio, meaning the data only resides in Alluxio (not PERSISTED) and now cannot be found(no block found). We used to create one JournalContext when updating each file's status. In this PR we change that to batching, to avoid too many synchronous flushing. This is consistent with https://github.com/Alluxio/alluxio/pull/16529 It is fine to batch because: 1. It's relatively low impact if we lose entries marking inodes as LOST. Losing a file is HIGH impact, while not identifying a lost file is LOW impact. That identification is asynchronous and can be turned off anyways. 2. Even if an inode is marked LOST, we don't have any special handling... The client will just try to read it the same way and results in IOException. That means the LOST status is not important (unless later we really give it some attention). ### Why are the changes needed? Performance improvements ### Does this PR introduce any user facing changes? Users shouldn't perceive this. pr-link: Alluxio/alluxio#16971 change-id: cid-766a33e80bfd76df769d8261642aa1c7b18f99ee --- .../alluxio/master/file/LostFileDetector.java | 54 ++++++++++--------- 1 file changed, 29 insertions(+), 25 deletions(-) diff --git a/core/server/master/src/main/java/alluxio/master/file/LostFileDetector.java b/core/server/master/src/main/java/alluxio/master/file/LostFileDetector.java index 3eadbc4a5c25..535bec900ec9 100644 --- a/core/server/master/src/main/java/alluxio/master/file/LostFileDetector.java +++ b/core/server/master/src/main/java/alluxio/master/file/LostFileDetector.java @@ -61,7 +61,7 @@ public LostFileDetector(FileSystemMaster fileSystemMaster, BlockMaster blockMast @Override public void heartbeat() throws InterruptedException { Iterator iter = mBlockMaster.getLostBlocksIterator(); - Set markedFiles = new HashSet<>(); + Set toMarkFiles = new HashSet<>(); while (iter.hasNext()) { if (Thread.interrupted()) { throw new InterruptedException("LostFileDetector interrupted."); @@ -69,48 +69,52 @@ public void heartbeat() throws InterruptedException { long blockId = iter.next(); long containerId = BlockId.getContainerId(blockId); long fileId = IdUtils.createFileId(containerId); - if (markedFiles.contains(fileId)) { + if (toMarkFiles.contains(fileId)) { iter.remove(); continue; } - boolean markAsLost = false; try ( LockedInodePath inodePath = mInodeTree.lockFullInodePath(fileId, LockPattern.READ, NoopJournalContext.INSTANCE) ) { Inode inode = inodePath.getInode(); if (inode.getPersistenceState() != PersistenceState.PERSISTED) { - markAsLost = true; + toMarkFiles.add(fileId); } + iter.remove(); } catch (FileDoesNotExistException e) { LOG.debug("Exception trying to get inode from inode tree", e); iter.remove(); continue; } + } - if (markAsLost) { - // update the state - try (JournalContext journalContext = mFileSystemMaster.createJournalContext(); - LockedInodePath inodePath = - mInodeTree.lockFullInodePath(fileId, LockPattern.WRITE_INODE, journalContext)) { - Inode inode = inodePath.getInode(); - if (inode.getPersistenceState() != PersistenceState.PERSISTED) { - mInodeTree.updateInode(journalContext, - UpdateInodeEntry.newBuilder().setId(inode.getId()) - .setPersistenceState(PersistenceState.LOST.name()).build()); - markedFiles.add(fileId); + if (toMarkFiles.size() > 0) { + // Here the candidate block has been removed from the checklist + // But the journal entries have not yet been flushed + // If the journal entries are lost, we will never be able to mark them again, + // because the worker will never report those removedBlocks to the master again + // This is fine because the LOST status is purely for display now + try (JournalContext journalContext = mFileSystemMaster.createJournalContext()) { + // update the state on the 2nd pass + for (long fileId : toMarkFiles) { + try (LockedInodePath inodePath = mInodeTree.lockFullInodePath( + fileId, LockPattern.WRITE_INODE, journalContext)) { + Inode inode = inodePath.getInode(); + if (inode.getPersistenceState() != PersistenceState.PERSISTED) { + mInodeTree.updateInode(journalContext, + UpdateInodeEntry.newBuilder().setId(inode.getId()) + .setPersistenceState(PersistenceState.LOST.name()).build()); + toMarkFiles.add(fileId); + } + } catch (FileDoesNotExistException e) { + LOG.debug("Failed to mark file {} as lost. The inode does not exist anymore.", + fileId, e); } - iter.remove(); - } catch (FileDoesNotExistException e) { - LOG.debug("Failed to mark file {} as lost. The inode does not exist anymore.", - fileId, e); - iter.remove(); - } catch (UnavailableException e) { - LOG.warn("Failed to mark files LOST because the journal is not available. " - + "{} files are affected: {}", - markedFiles.size(), markedFiles, e); - break; } + } catch (UnavailableException e) { + LOG.error("Failed to mark files LOST because the journal is not available. " + + "{} files are affected: {}", toMarkFiles.size(), toMarkFiles, e); } } } From cb20ccddf8c9442969ae98de7e6737e6bafa650a Mon Sep 17 00:00:00 2001 From: Jiacheng Liu Date: Thu, 9 Mar 2023 22:13:45 +0800 Subject: [PATCH 183/334] Merge JournalContext for persistence ### What changes are proposed in this pull request? `PersistenceChecker` and `PersistenceScheduler` will regularly check all files that need persistence. Some fields in the inode is used to track the persistence status. So PersistenceChecker and PersistentSchedule will constantly update inodes, which creates journal entries. Currently one JournalContext is created on processing each file, and closed after this file is processed. This means the journal entries are flushed once per file. In this change, we merge those flushes to: 1. Reduce frequent JournalContext usage, which acquires the state lock 2. Reduce the number of synchronous journal flushes The idea is also consistent with https://github.com/Alluxio/alluxio/pull/16529 The correctness is guaranteed by: 1. It is fine to lose those journal entries on async persist on failover. If an inode status is not PERSISTED, the new primary will attempt to persist it anyways. ### Why are the changes needed? Mentioned above ### Does this PR introduce any user facing changes? Users should not perceive this change. pr-link: Alluxio/alluxio#16966 change-id: cid-cdb5750bb222a3a84fa219e5f9cda2045a34e137 --- .../master/file/DefaultFileSystemMaster.java | 166 ++++++++++-------- 1 file changed, 94 insertions(+), 72 deletions(-) diff --git a/core/server/master/src/main/java/alluxio/master/file/DefaultFileSystemMaster.java b/core/server/master/src/main/java/alluxio/master/file/DefaultFileSystemMaster.java index 0fb78c18e7c6..a5157cb44b4a 100644 --- a/core/server/master/src/main/java/alluxio/master/file/DefaultFileSystemMaster.java +++ b/core/server/master/src/main/java/alluxio/master/file/DefaultFileSystemMaster.java @@ -220,6 +220,7 @@ import java.util.concurrent.LinkedBlockingQueue; import java.util.concurrent.ThreadPoolExecutor; import java.util.concurrent.TimeUnit; +import java.util.concurrent.atomic.AtomicInteger; import java.util.function.Function; import java.util.function.Supplier; import java.util.stream.Collectors; @@ -4410,10 +4411,10 @@ public void close() {} // Nothing to clean up * * @param fileId the file ID */ - private void handleExpired(long fileId) throws AlluxioException, UnavailableException { - try (JournalContext journalContext = createJournalContext(); - LockedInodePath inodePath = mInodeTree - .lockFullInodePath(fileId, LockPattern.WRITE_INODE, journalContext)) { + private void handleExpired(long fileId, JournalContext journalContext, + AtomicInteger journalCount) throws AlluxioException { + try (LockedInodePath inodePath = mInodeTree + .lockFullInodePath(fileId, LockPattern.WRITE_INODE, journalContext)) { InodeFile inode = inodePath.getInodeFile(); switch (inode.getPersistenceState()) { case LOST: @@ -4434,6 +4435,7 @@ private void handleExpired(long fileId) throws AlluxioException, UnavailableExce .setPersistJobId(Constants.PERSISTENCE_INVALID_JOB_ID) .setTempUfsPath(Constants.PERSISTENCE_INVALID_UFS_PATH) .build()); + journalCount.addAndGet(2); break; default: throw new IllegalStateException( @@ -4447,7 +4449,8 @@ private void handleExpired(long fileId) throws AlluxioException, UnavailableExce * * @param fileId the file ID */ - private void handleReady(long fileId) throws AlluxioException, IOException { + private void handleReady(long fileId, JournalContext journalContext, AtomicInteger journalCount) + throws AlluxioException, IOException { alluxio.time.ExponentialTimer timer = mPersistRequests.get(fileId); // Lookup relevant file information. AlluxioURI uri; @@ -4513,15 +4516,15 @@ private void handleReady(long fileId) throws AlluxioException, IOException { mPersistJobs.put(fileId, new PersistJob(jobId, fileId, uri, tempUfsPath, timer)); // Update the inode and journal the change. - try (JournalContext journalContext = createJournalContext(); - LockedInodePath inodePath = mInodeTree - .lockFullInodePath(fileId, LockPattern.WRITE_INODE, journalContext)) { + try (LockedInodePath inodePath = mInodeTree + .lockFullInodePath(fileId, LockPattern.WRITE_INODE, journalContext)) { InodeFile inode = inodePath.getInodeFile(); mInodeTree.updateInodeFile(journalContext, UpdateInodeFileEntry.newBuilder() .setId(inode.getId()) .setPersistJobId(jobId) .setTempUfsPath(tempUfsPath) .build()); + journalCount.incrementAndGet(); } } @@ -4538,76 +4541,92 @@ private void handleReady(long fileId) throws AlluxioException, IOException { public void heartbeat() throws InterruptedException { LOG.debug("Async Persist heartbeat start"); java.util.concurrent.TimeUnit.SECONDS.sleep(mQuietPeriodSeconds); - // Process persist requests. - for (long fileId : mPersistRequests.keySet()) { - // Throw if interrupted. - if (Thread.interrupted()) { - throw new InterruptedException("PersistenceScheduler interrupted."); - } - boolean remove = true; - alluxio.time.ExponentialTimer timer = mPersistRequests.get(fileId); - if (timer == null) { - // This could occur if a key is removed from mPersistRequests while we are iterating. - continue; - } - alluxio.time.ExponentialTimer.Result timerResult = timer.tick(); - if (timerResult == alluxio.time.ExponentialTimer.Result.NOT_READY) { - // operation is not ready to be scheduled - continue; - } - AlluxioURI uri = null; - try { - try (LockedInodePath inodePath = mInodeTree - .lockFullInodePath(fileId, LockPattern.READ, NoopJournalContext.INSTANCE)) { - uri = inodePath.getUri(); - } catch (FileDoesNotExistException e) { - LOG.debug("The file (id={}) to be persisted was not found. Likely this file has been " - + "removed by users", fileId, e); - continue; + AtomicInteger journalCounter = new AtomicInteger(0); + try (JournalContext journalContext = createJournalContext()) { + // Process persist requests. + for (long fileId : mPersistRequests.keySet()) { + if (journalCounter.get() > 100) { + // The only exception thrown from flush() will be UnavailableException + // See catch (UnavailableException e) + journalContext.flush(); + journalCounter.set(0); } - try { - checkUfsMode(uri, OperationType.WRITE); - } catch (Exception e) { - LOG.warn("Unable to schedule persist request for path {}: {}", uri, e.toString()); - // Retry when ufs mode permits operation - remove = false; + // Throw if interrupted. + if (Thread.interrupted()) { + throw new InterruptedException("PersistenceScheduler interrupted."); + } + boolean remove = true; + alluxio.time.ExponentialTimer timer = mPersistRequests.get(fileId); + if (timer == null) { + // This could occur if a key is removed from mPersistRequests while we are iterating. continue; } - switch (timerResult) { - case EXPIRED: - handleExpired(fileId); - break; - case READY: - handleReady(fileId); - break; - default: - throw new IllegalStateException("Unrecognized timer state: " + timerResult); + alluxio.time.ExponentialTimer.Result timerResult = timer.tick(); + if (timerResult == alluxio.time.ExponentialTimer.Result.NOT_READY) { + // operation is not ready to be scheduled + continue; } - } catch (FileDoesNotExistException | InvalidPathException e) { - LOG.warn("The file {} (id={}) to be persisted was not found : {}", uri, fileId, - e.toString()); - LOG.debug("Exception: ", e); - } catch (UnavailableException e) { - LOG.warn("Failed to persist file {}, will retry later: {}", uri, e.toString()); - remove = false; - } catch (ResourceExhaustedException e) { - LOG.warn("The job service is busy, will retry later: {}", e.toString()); - LOG.debug("Exception: ", e); - mQuietPeriodSeconds = (mQuietPeriodSeconds == 0) ? 1 : - Math.min(MAX_QUIET_PERIOD_SECONDS, mQuietPeriodSeconds * 2); - remove = false; - // End the method here until the next heartbeat. No more jobs should be scheduled during - // the current heartbeat if the job master is at full capacity. - return; - } catch (Exception e) { - LOG.warn("Unexpected exception encountered when scheduling the persist job for file {} " - + "(id={}) : {}", uri, fileId, e.toString()); - LOG.debug("Exception: ", e); - } finally { - if (remove) { - mPersistRequests.remove(fileId); + AlluxioURI uri = null; + try { + try (LockedInodePath inodePath = mInodeTree + .lockFullInodePath(fileId, LockPattern.READ, NoopJournalContext.INSTANCE)) { + uri = inodePath.getUri(); + } catch (FileDoesNotExistException e) { + LOG.debug("The file (id={}) to be persisted was not found. Likely this file has been " + + "removed by users", fileId, e); + continue; + } + try { + checkUfsMode(uri, OperationType.WRITE); + } catch (Exception e) { + LOG.warn("Unable to schedule persist request for path {}: {}", uri, e.toString()); + // Retry when ufs mode permits operation + remove = false; + continue; + } + switch (timerResult) { + case EXPIRED: + handleExpired(fileId, journalContext, journalCounter); + break; + case READY: + handleReady(fileId, journalContext, journalCounter); + break; + default: + throw new IllegalStateException("Unrecognized timer state: " + timerResult); + } + } catch (FileDoesNotExistException | InvalidPathException e) { + LOG.warn("The file {} (id={}) to be persisted was not found : {}", uri, fileId, + e.toString()); + LOG.debug("Exception: ", e); + } catch (ResourceExhaustedException e) { + LOG.warn("The job service is busy, will retry later: {}", e.toString()); + LOG.debug("Exception: ", e); + mQuietPeriodSeconds = (mQuietPeriodSeconds == 0) ? 1 : + Math.min(MAX_QUIET_PERIOD_SECONDS, mQuietPeriodSeconds * 2); + remove = false; + // End the method here until the next heartbeat. No more jobs should be scheduled during + // the current heartbeat if the job master is at full capacity. + return; + } catch (Exception e) { + LOG.warn("Unexpected exception encountered when scheduling the persist job for file {} " + + "(id={}) : {}", uri, fileId, e.toString()); + LOG.debug("Exception: ", e); + } finally { + if (remove) { + mPersistRequests.remove(fileId); + } } } + } catch (UnavailableException e) { + // Two ways to arrive here: + // 1. createJournalContext() fails, the batch processing has not started yet + // 2. flush() fails and the queue is dirty, the JournalContext will be closed and flushed, + // but the flush will not succeed + // The context is MasterJournalContext, so an UnavailableException indicates either + // the primary failed over, or journal is closed + // In either case, it is fine to close JournalContext and throw away the journal entries + // The next primary will process all TO_BE_PERSISTED files and create new persist jobs + LOG.error("Journal is not running, cannot persist files"); } } } @@ -4638,6 +4657,9 @@ private void handleSuccess(PersistJob job) { String tempUfsPath = job.getTempUfsPath(); List blockIds = new ArrayList<>(); UfsManager.UfsClient ufsClient = null; + // This journal flush is per job and cannot be batched easily, + // because each execution is in a separate thread and this thread doesn't wait for those + // to complete try (JournalContext journalContext = createJournalContext(); LockedInodePath inodePath = mInodeTree .lockFullInodePath(fileId, LockPattern.WRITE_INODE, journalContext)) { From 8edf508ebd49238f55f64011dcdd1ca40731da3f Mon Sep 17 00:00:00 2001 From: jja725 Date: Thu, 9 Mar 2023 18:13:42 -0800 Subject: [PATCH 184/334] Refactor LoadManager to Scheduler ### What changes are proposed in this pull request? Refactoring LoadManager to Scheduler. Redefine the job progress metrics since total file count is hard to be accurate when we have retry and verification ### Why are the changes needed? for other jobs ### Does this PR introduce any user facing changes? updated API in FileSystem ``` /** * Submit a job. * * @param jobRequest the job request * @return job id if job is submitted, empty if a load of the same path already exists */ Optional submitJob(JobRequest jobRequest); /** * Stop a job. * * @param jobDescription the job description * @return true if job is stopped, false if cannot find job */ boolean stopJob(String jobDescription); /** * Get progress of a job. * * @param jobDescription the job description * @param format progress report format * @param verbose whether to return verbose report * @return the job progress */ String getLoadProgress(String jobDescription, JobProgressReportFormat format, boolean verbose); ``` pr-link: Alluxio/alluxio#16982 change-id: cid-611019296583cfb8a92fd425f43836771f156485 --- .../alluxio/client/file/BaseFileSystem.java | 19 +- .../client/file/DelegatingFileSystem.java | 19 +- .../java/alluxio/client/file/FileSystem.java | 39 +- .../client/file/FileSystemMasterClient.java | 39 +- .../RetryHandlingFileSystemMasterClient.java | 77 ++- .../client/file/ufs/UfsBaseFileSystem.java | 14 +- .../file/MockFileSystemMasterClient.java | 15 +- .../cache/LocalCacheFileInStreamTest.java | 13 +- .../main/java/alluxio/job/JobDescription.java | 123 ++++ .../src/main/java/alluxio/job/JobRequest.java | 31 + .../main/java/alluxio/job/LoadJobRequest.java | 69 +++ .../journal/checkpoint/CheckpointName.java | 2 +- .../master/file/DefaultFileSystemMaster.java | 26 +- .../FileSystemMasterClientServiceHandler.java | 79 ++- .../master/file/loadmanager/LoadManager.java | 572 ------------------ .../java/alluxio/master/job/FileIterable.java | 199 ++++++ .../master/job/JobFactoryProducer.java | 53 ++ .../master/job/JournalLoadJobFactory.java | 59 ++ .../{file/loadmanager => job}/LoadJob.java | 468 +++++++------- .../alluxio/master/job/LoadJobFactory.java | 65 ++ .../scheduler/DefaultWorkerProvider.java | 65 ++ .../scheduler/JournaledJobMetaStore.java | 89 +++ .../alluxio/master/scheduler/Scheduler.java | 402 ++++++++++++ .../file/scheduler/FileIterableTest.java | 49 ++ .../LoadJobTest.java | 89 +-- .../LoadTestUtils.java | 5 +- .../SchedulerTest.java} | 429 ++++++++----- .../main/proto/grpc/file_system_master.proto | 52 +- core/transport/src/main/proto/proto.lock | 90 +-- .../fuse/auth/AbstractAuthPolicyTest.java | 13 +- .../cli/MockFuseFileSystemMasterClient.java | 15 +- job/common/pom.xml | 6 + .../main/java/alluxio/scheduler/job/Job.java | 133 ++++ .../alluxio/scheduler/job/JobFactory.java | 22 + .../alluxio/scheduler/job/JobMetaStore.java | 31 + .../java/alluxio/scheduler/job/JobState.java | 24 +- .../main/java/alluxio/scheduler/job/Task.java | 46 ++ .../alluxio/scheduler/job/WorkerProvider.java | 43 ++ .../fsmaster/FileSystemMasterBase.java | 2 +- .../alluxio/cli/fs/command/LoadCommand.java | 40 +- .../command/LoadCommandIntegrationTest.java | 4 +- 41 files changed, 2389 insertions(+), 1241 deletions(-) create mode 100644 core/common/src/main/java/alluxio/job/JobDescription.java create mode 100644 core/common/src/main/java/alluxio/job/JobRequest.java create mode 100644 core/common/src/main/java/alluxio/job/LoadJobRequest.java delete mode 100644 core/server/master/src/main/java/alluxio/master/file/loadmanager/LoadManager.java create mode 100644 core/server/master/src/main/java/alluxio/master/job/FileIterable.java create mode 100644 core/server/master/src/main/java/alluxio/master/job/JobFactoryProducer.java create mode 100644 core/server/master/src/main/java/alluxio/master/job/JournalLoadJobFactory.java rename core/server/master/src/main/java/alluxio/master/{file/loadmanager => job}/LoadJob.java (63%) create mode 100644 core/server/master/src/main/java/alluxio/master/job/LoadJobFactory.java create mode 100644 core/server/master/src/main/java/alluxio/master/scheduler/DefaultWorkerProvider.java create mode 100644 core/server/master/src/main/java/alluxio/master/scheduler/JournaledJobMetaStore.java create mode 100644 core/server/master/src/main/java/alluxio/master/scheduler/Scheduler.java create mode 100644 core/server/master/src/test/java/alluxio/master/file/scheduler/FileIterableTest.java rename core/server/master/src/test/java/alluxio/master/file/{loadmanager => scheduler}/LoadJobTest.java (62%) rename core/server/master/src/test/java/alluxio/master/file/{loadmanager => scheduler}/LoadTestUtils.java (97%) rename core/server/master/src/test/java/alluxio/master/file/{loadmanager/LoadManagerTest.java => scheduler/SchedulerTest.java} (52%) create mode 100644 job/common/src/main/java/alluxio/scheduler/job/Job.java create mode 100644 job/common/src/main/java/alluxio/scheduler/job/JobFactory.java create mode 100644 job/common/src/main/java/alluxio/scheduler/job/JobMetaStore.java rename core/server/master/src/main/java/alluxio/master/file/loadmanager/LoadJobState.java => job/common/src/main/java/alluxio/scheduler/job/JobState.java (77%) create mode 100644 job/common/src/main/java/alluxio/scheduler/job/Task.java create mode 100644 job/common/src/main/java/alluxio/scheduler/job/WorkerProvider.java diff --git a/core/client/fs/src/main/java/alluxio/client/file/BaseFileSystem.java b/core/client/fs/src/main/java/alluxio/client/file/BaseFileSystem.java index e9c707b11ad6..dcca12711155 100644 --- a/core/client/fs/src/main/java/alluxio/client/file/BaseFileSystem.java +++ b/core/client/fs/src/main/java/alluxio/client/file/BaseFileSystem.java @@ -45,10 +45,10 @@ import alluxio.grpc.ExistsPOptions; import alluxio.grpc.FreePOptions; import alluxio.grpc.GetStatusPOptions; +import alluxio.grpc.JobProgressReportFormat; import alluxio.grpc.ListStatusPOptions; import alluxio.grpc.ListStatusPartialPOptions; import alluxio.grpc.LoadMetadataPType; -import alluxio.grpc.LoadProgressReportFormat; import alluxio.grpc.MountPOptions; import alluxio.grpc.OpenFilePOptions; import alluxio.grpc.RenamePOptions; @@ -57,6 +57,8 @@ import alluxio.grpc.SetAclPOptions; import alluxio.grpc.SetAttributePOptions; import alluxio.grpc.UnmountPOptions; +import alluxio.job.JobDescription; +import alluxio.job.JobRequest; import alluxio.master.MasterInquireClient; import alluxio.resource.CloseableResource; import alluxio.security.authorization.AclEntry; @@ -523,28 +525,27 @@ public void needsSync(AlluxioURI path) } @Override - public boolean submitLoad(AlluxioURI path, java.util.OptionalLong bandwidth, - boolean usePartialListing, boolean verify) { + public Optional submitJob(JobRequest jobRequest) { try (CloseableResource client = mFsContext.acquireMasterClientResource()) { - return client.get().submitLoad(path, bandwidth, usePartialListing, verify); + return client.get().submitJob(jobRequest); } } @Override - public boolean stopLoad(AlluxioURI path) { + public boolean stopJob(JobDescription jobDescription) { try (CloseableResource client = mFsContext.acquireMasterClientResource()) { - return client.get().stopLoad(path); + return client.get().stopJob(jobDescription); } } @Override - public String getLoadProgress(AlluxioURI path, - Optional format, boolean verbose) { + public String getJobProgress(JobDescription jobDescription, + JobProgressReportFormat format, boolean verbose) { try (CloseableResource client = mFsContext.acquireMasterClientResource()) { - return client.get().getLoadProgress(path, format, verbose); + return client.get().getJobProgress(jobDescription, format, verbose); } } diff --git a/core/client/fs/src/main/java/alluxio/client/file/DelegatingFileSystem.java b/core/client/fs/src/main/java/alluxio/client/file/DelegatingFileSystem.java index abcdd8ca5fc7..19f77a4d0849 100644 --- a/core/client/fs/src/main/java/alluxio/client/file/DelegatingFileSystem.java +++ b/core/client/fs/src/main/java/alluxio/client/file/DelegatingFileSystem.java @@ -27,9 +27,9 @@ import alluxio.grpc.ExistsPOptions; import alluxio.grpc.FreePOptions; import alluxio.grpc.GetStatusPOptions; +import alluxio.grpc.JobProgressReportFormat; import alluxio.grpc.ListStatusPOptions; import alluxio.grpc.ListStatusPartialPOptions; -import alluxio.grpc.LoadProgressReportFormat; import alluxio.grpc.MountPOptions; import alluxio.grpc.OpenFilePOptions; import alluxio.grpc.RenamePOptions; @@ -38,6 +38,8 @@ import alluxio.grpc.SetAclPOptions; import alluxio.grpc.SetAttributePOptions; import alluxio.grpc.UnmountPOptions; +import alluxio.job.JobDescription; +import alluxio.job.JobRequest; import alluxio.security.authorization.AclEntry; import alluxio.wire.BlockLocationInfo; import alluxio.wire.MountPointInfo; @@ -244,20 +246,19 @@ public void needsSync(AlluxioURI path) throws IOException, AlluxioException { } @Override - public boolean submitLoad(AlluxioURI path, java.util.OptionalLong bandwidth, - boolean usePartialListing, boolean verify) { - return mDelegatedFileSystem.submitLoad(path, bandwidth, usePartialListing, verify); + public Optional submitJob(JobRequest jobRequest) { + return mDelegatedFileSystem.submitJob(jobRequest); } @Override - public boolean stopLoad(AlluxioURI path) { - return mDelegatedFileSystem.stopLoad(path); + public boolean stopJob(JobDescription jobDescription) { + return mDelegatedFileSystem.stopJob(jobDescription); } @Override - public String getLoadProgress(AlluxioURI path, - Optional format, boolean verbose) { - return mDelegatedFileSystem.getLoadProgress(path, format, verbose); + public String getJobProgress(JobDescription jobDescription, + JobProgressReportFormat format, boolean verbose) { + return mDelegatedFileSystem.getJobProgress(jobDescription, format, verbose); } @Override diff --git a/core/client/fs/src/main/java/alluxio/client/file/FileSystem.java b/core/client/fs/src/main/java/alluxio/client/file/FileSystem.java index e57e7bc702e2..dccc68cde6d2 100644 --- a/core/client/fs/src/main/java/alluxio/client/file/FileSystem.java +++ b/core/client/fs/src/main/java/alluxio/client/file/FileSystem.java @@ -37,11 +37,11 @@ import alluxio.grpc.ExistsPOptions; import alluxio.grpc.FreePOptions; import alluxio.grpc.GetStatusPOptions; +import alluxio.grpc.JobProgressReportFormat; import alluxio.grpc.ListStatusPOptions; import alluxio.grpc.ListStatusPartialPOptions; import alluxio.grpc.LoadMetadataPOptions; import alluxio.grpc.LoadMetadataPType; -import alluxio.grpc.LoadProgressReportFormat; import alluxio.grpc.MountPOptions; import alluxio.grpc.OpenFilePOptions; import alluxio.grpc.RenamePOptions; @@ -50,6 +50,8 @@ import alluxio.grpc.SetAclPOptions; import alluxio.grpc.SetAttributePOptions; import alluxio.grpc.UnmountPOptions; +import alluxio.job.JobDescription; +import alluxio.job.JobRequest; import alluxio.security.authorization.AclEntry; import alluxio.security.user.UserState; import alluxio.util.CommonUtils; @@ -742,30 +744,29 @@ default void unmount(AlluxioURI path) throws IOException, AlluxioException { void needsSync(AlluxioURI path) throws IOException, AlluxioException; /** - * Submit a load job. - * @param path alluxio path to be loaded - * @param bandwidth bandwidth allocated to this load, unlimited if empty - * @param usePartialListing whether to use partial listing - * @param verify whether to verify after load finishes - * @return true if job is submitted, false if a load of the same path already exists + * Submit a job to scheduler. + * + * @param jobRequest the job request + * @return job id if job is submitted, empty if a job with description already exists */ - boolean submitLoad(AlluxioURI path, java.util.OptionalLong bandwidth, - boolean usePartialListing, boolean verify); + Optional submitJob(JobRequest jobRequest); /** - * Stop a load job. - * @param path alluxio path to be stopped - * @return true if job is stopped, false if cannot find job + * Stop a job in scheduler. + * + * @param jobDescription the job description + * @return true if job is stopped, false if we cannot find job */ - boolean stopLoad(AlluxioURI path); + boolean stopJob(JobDescription jobDescription); /** - * Get progress of a load job. - * @param path alluxio path to get progress - * @param format progress report format + * Get progress of a job. + * + * @param jobDescription the job description + * @param format progress report format * @param verbose whether to return verbose report - * @return the load job progress + * @return the job progress */ - String getLoadProgress(AlluxioURI path, - Optional format, boolean verbose); + String getJobProgress(JobDescription jobDescription, + JobProgressReportFormat format, boolean verbose); } diff --git a/core/client/fs/src/main/java/alluxio/client/file/FileSystemMasterClient.java b/core/client/fs/src/main/java/alluxio/client/file/FileSystemMasterClient.java index dff5d49cfe61..02943a854c82 100644 --- a/core/client/fs/src/main/java/alluxio/client/file/FileSystemMasterClient.java +++ b/core/client/fs/src/main/java/alluxio/client/file/FileSystemMasterClient.java @@ -25,6 +25,7 @@ import alluxio.grpc.ExistsPOptions; import alluxio.grpc.FreePOptions; import alluxio.grpc.GetStatusPOptions; +import alluxio.grpc.JobProgressReportFormat; import alluxio.grpc.ListStatusPOptions; import alluxio.grpc.ListStatusPartialPOptions; import alluxio.grpc.MountPOptions; @@ -34,6 +35,8 @@ import alluxio.grpc.SetAclPOptions; import alluxio.grpc.SetAttributePOptions; import alluxio.grpc.UpdateUfsModePOptions; +import alluxio.job.JobDescription; +import alluxio.job.JobRequest; import alluxio.master.MasterClientContext; import alluxio.security.authorization.AclEntry; import alluxio.wire.MountPointInfo; @@ -41,6 +44,7 @@ import java.util.List; import java.util.Map; +import java.util.Optional; import java.util.function.Consumer; /** @@ -344,30 +348,29 @@ void updateUfsMode(AlluxioURI ufsUri, UpdateUfsModePOptions options) void needsSync(AlluxioURI path) throws AlluxioStatusException; /** - * Submit a directory load job. - * @param path alluxio path to be loaded - * @param bandwidth bandwidth allocated to this load, unlimited if empty - * @param usePartialListing whether to use partial listing - * @param verify whether to verify after load finishes - * @return true if job is submitted, false if a load of the same path already exists + * Submit a job to scheduler. + * + * @param job the job request to submit + * @return jobId if job is submitted, empty if a job already exists */ - boolean submitLoad(AlluxioURI path, java.util.OptionalLong bandwidth, - boolean usePartialListing, boolean verify); + Optional submitJob(JobRequest job); /** - * Stop a directory load. - * @param path alluxio path to be stopped - * @return true if job is stopped, false if cannot find job + * Stop a job. + * + * @param jobDescription job description be stopped + * @return true if job is stopped, false if we cannot find job */ - boolean stopLoad(AlluxioURI path); + boolean stopJob(JobDescription jobDescription); /** - * Get progress of a load job. - * @param path alluxio path to get progress - * @param format progress report format + * Get progress of a job. + * + * @param jobDescription job description to get progress + * @param format progress report format * @param verbose whether to return verbose report - * @return the load job progress + * @return the job progress */ - String getLoadProgress(AlluxioURI path, - java.util.Optional format, boolean verbose); + String getJobProgress(JobDescription jobDescription, + JobProgressReportFormat format, boolean verbose); } diff --git a/core/client/fs/src/main/java/alluxio/client/file/RetryHandlingFileSystemMasterClient.java b/core/client/fs/src/main/java/alluxio/client/file/RetryHandlingFileSystemMasterClient.java index 64f10ed3d40c..0673cde84952 100644 --- a/core/client/fs/src/main/java/alluxio/client/file/RetryHandlingFileSystemMasterClient.java +++ b/core/client/fs/src/main/java/alluxio/client/file/RetryHandlingFileSystemMasterClient.java @@ -33,6 +33,8 @@ import alluxio.grpc.FreePOptions; import alluxio.grpc.FreePRequest; import alluxio.grpc.GetFilePathPRequest; +import alluxio.grpc.GetJobProgressPRequest; +import alluxio.grpc.GetJobProgressPResponse; import alluxio.grpc.GetMountTablePRequest; import alluxio.grpc.GetNewBlockIdForFilePOptions; import alluxio.grpc.GetNewBlockIdForFilePRequest; @@ -42,6 +44,8 @@ import alluxio.grpc.GetStatusPRequest; import alluxio.grpc.GetSyncPathListPRequest; import alluxio.grpc.GrpcUtils; +import alluxio.grpc.JobProgressPOptions; +import alluxio.grpc.JobProgressReportFormat; import alluxio.grpc.ListStatusPOptions; import alluxio.grpc.ListStatusPRequest; import alluxio.grpc.ListStatusPartialPOptions; @@ -61,18 +65,26 @@ import alluxio.grpc.SetAttributePOptions; import alluxio.grpc.SetAttributePRequest; import alluxio.grpc.StartSyncPRequest; +import alluxio.grpc.StopJobPRequest; +import alluxio.grpc.StopJobPResponse; import alluxio.grpc.StopSyncPRequest; +import alluxio.grpc.SubmitJobPRequest; +import alluxio.grpc.SubmitJobPResponse; import alluxio.grpc.UnmountPOptions; import alluxio.grpc.UnmountPRequest; import alluxio.grpc.UpdateMountPRequest; import alluxio.grpc.UpdateUfsModePOptions; import alluxio.grpc.UpdateUfsModePRequest; +import alluxio.job.JobDescription; +import alluxio.job.JobRequest; import alluxio.master.MasterClientContext; import alluxio.retry.CountingRetry; import alluxio.security.authorization.AclEntry; import alluxio.util.FileSystemOptionsUtils; import alluxio.wire.SyncPointInfo; +import com.google.protobuf.ByteString; +import org.apache.commons.lang3.SerializationUtils; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -80,6 +92,7 @@ import java.util.HashMap; import java.util.List; import java.util.Map; +import java.util.Optional; import java.util.Spliterator; import java.util.Spliterators; import java.util.function.Consumer; @@ -419,43 +432,45 @@ public void needsSync(AlluxioURI path) throws AlluxioStatusException { } @Override - public boolean submitLoad(AlluxioURI path, java.util.OptionalLong bandwidth, - boolean usePartialListing, boolean verify) { - alluxio.grpc.LoadPathPOptions.Builder options = alluxio.grpc.LoadPathPOptions - .newBuilder().setPartialListing(usePartialListing).setVerify(verify); - if (bandwidth.isPresent()) { - options.setBandwidth(bandwidth.getAsLong()); - } + public Optional submitJob(JobRequest job) { connectWithRuntimeException(); - alluxio.grpc.LoadPathPResponse response = mClient.loadPath( - alluxio.grpc.LoadPathPRequest.newBuilder() - .setPath(path.getPath()) - .setOptions(options.build()) - .build()); - return response.getNewLoadSubmitted(); + final ByteString requestBody = ByteString.copyFrom(SerializationUtils.serialize(job)); + SubmitJobPRequest request = SubmitJobPRequest + .newBuilder() + .setRequestBody(requestBody) + .build(); + SubmitJobPResponse response = mClient.submitJob(request); + return response.hasJobId() ? Optional.of(response.getJobId()) : Optional.empty(); } @Override - public boolean stopLoad(AlluxioURI path) { + public boolean stopJob(JobDescription jobDescription) { connectWithRuntimeException(); - alluxio.grpc.StopLoadPathPResponse response = mClient.stopLoadPath( - alluxio.grpc.StopLoadPathPRequest.newBuilder() - .setPath(path.getPath()) - .build()); - return response.getExistingLoadStopped(); - } - - @Override - public String getLoadProgress(AlluxioURI path, - java.util.Optional format, boolean verbose) { - alluxio.grpc.LoadProgressPOptions.Builder options = - alluxio.grpc.LoadProgressPOptions.newBuilder() - .setVerbose(verbose); - format.map(options::setFormat); + StopJobPResponse response = mClient.stopJob(StopJobPRequest + .newBuilder() + .setJobDescription(alluxio.grpc.JobDescription + .newBuilder() + .setType(jobDescription.getType()) + .setPath(jobDescription.getPath()) + .build()) + .build()); + return response.getJobStopped(); + } + + @Override + public String getJobProgress(JobDescription jobDescription, + JobProgressReportFormat format, boolean verbose) { + JobProgressPOptions.Builder options = JobProgressPOptions.newBuilder() + .setVerbose(verbose) + .setFormat(format); connectWithRuntimeException(); - alluxio.grpc.GetLoadProgressPResponse response = mClient.getLoadProgress( - alluxio.grpc.GetLoadProgressPRequest.newBuilder() - .setPath(path.getPath()) + GetJobProgressPResponse response = mClient.getJobProgress( + GetJobProgressPRequest.newBuilder() + .setJobDescription(alluxio.grpc.JobDescription + .newBuilder() + .setType(jobDescription.getType()) + .setPath(jobDescription.getPath()) + .build()) .setOptions(options.build()) .build()); return response.getProgressReport(); diff --git a/core/client/fs/src/main/java/alluxio/client/file/ufs/UfsBaseFileSystem.java b/core/client/fs/src/main/java/alluxio/client/file/ufs/UfsBaseFileSystem.java index 5130ea713517..3d8f1befbcfc 100644 --- a/core/client/fs/src/main/java/alluxio/client/file/ufs/UfsBaseFileSystem.java +++ b/core/client/fs/src/main/java/alluxio/client/file/ufs/UfsBaseFileSystem.java @@ -30,9 +30,9 @@ import alluxio.grpc.ExistsPOptions; import alluxio.grpc.FreePOptions; import alluxio.grpc.GetStatusPOptions; +import alluxio.grpc.JobProgressReportFormat; import alluxio.grpc.ListStatusPOptions; import alluxio.grpc.ListStatusPartialPOptions; -import alluxio.grpc.LoadProgressReportFormat; import alluxio.grpc.MountPOptions; import alluxio.grpc.OpenFilePOptions; import alluxio.grpc.RenamePOptions; @@ -41,6 +41,8 @@ import alluxio.grpc.SetAclPOptions; import alluxio.grpc.SetAttributePOptions; import alluxio.grpc.UnmountPOptions; +import alluxio.job.JobDescription; +import alluxio.job.JobRequest; import alluxio.resource.CloseableResource; import alluxio.security.authorization.AclEntry; import alluxio.security.authorization.Mode; @@ -73,7 +75,6 @@ import java.util.List; import java.util.Map; import java.util.Optional; -import java.util.OptionalLong; import java.util.function.Consumer; import java.util.stream.Collectors; import javax.annotation.concurrent.ThreadSafe; @@ -396,19 +397,18 @@ public void needsSync(AlluxioURI path) throws IOException, AlluxioException { } @Override - public boolean submitLoad(AlluxioURI path, OptionalLong bandwidth, - boolean usePartialListing, boolean verify) { + public Optional submitJob(JobRequest jobRequest) { throw new UnsupportedOperationException(); } @Override - public boolean stopLoad(AlluxioURI path) { + public boolean stopJob(JobDescription jobDescription) { throw new UnsupportedOperationException(); } @Override - public String getLoadProgress(AlluxioURI path, - Optional format, boolean verbose) { + public String getJobProgress(JobDescription jobDescription, + JobProgressReportFormat format, boolean verbose) { throw new UnsupportedOperationException(); } diff --git a/core/client/fs/src/test/java/alluxio/client/file/MockFileSystemMasterClient.java b/core/client/fs/src/test/java/alluxio/client/file/MockFileSystemMasterClient.java index 3272f5b63106..2b5d44680fc6 100644 --- a/core/client/fs/src/test/java/alluxio/client/file/MockFileSystemMasterClient.java +++ b/core/client/fs/src/test/java/alluxio/client/file/MockFileSystemMasterClient.java @@ -23,6 +23,7 @@ import alluxio.grpc.ExistsPOptions; import alluxio.grpc.FreePOptions; import alluxio.grpc.GetStatusPOptions; +import alluxio.grpc.JobProgressReportFormat; import alluxio.grpc.ListStatusPOptions; import alluxio.grpc.ListStatusPartialPOptions; import alluxio.grpc.MountPOptions; @@ -32,6 +33,8 @@ import alluxio.grpc.SetAclPOptions; import alluxio.grpc.SetAttributePOptions; import alluxio.grpc.UpdateUfsModePOptions; +import alluxio.job.JobDescription; +import alluxio.job.JobRequest; import alluxio.security.authorization.AclEntry; import alluxio.wire.MountPointInfo; import alluxio.wire.SyncPointInfo; @@ -42,6 +45,7 @@ import java.util.Collections; import java.util.List; import java.util.Map; +import java.util.Optional; import java.util.function.Consumer; /** @@ -236,19 +240,18 @@ public void needsSync(AlluxioURI path) throws AlluxioStatusException { } @Override - public boolean submitLoad(AlluxioURI path, java.util.OptionalLong bandwidth, - boolean usePartialListing, boolean verify) { - return false; + public Optional submitJob(JobRequest job) { + return Optional.empty(); } @Override - public boolean stopLoad(AlluxioURI path) { + public boolean stopJob(JobDescription jobDescription) { return false; } @Override - public String getLoadProgress(AlluxioURI path, - java.util.Optional format, boolean verbose) { + public String getJobProgress(JobDescription jobDescription, + JobProgressReportFormat format, boolean verbose) { return null; } } diff --git a/core/client/fs/src/test/java/alluxio/client/file/cache/LocalCacheFileInStreamTest.java b/core/client/fs/src/test/java/alluxio/client/file/cache/LocalCacheFileInStreamTest.java index 9119e26384d2..630faf1d4e0e 100644 --- a/core/client/fs/src/test/java/alluxio/client/file/cache/LocalCacheFileInStreamTest.java +++ b/core/client/fs/src/test/java/alluxio/client/file/cache/LocalCacheFileInStreamTest.java @@ -39,9 +39,9 @@ import alluxio.grpc.ExistsPOptions; import alluxio.grpc.FreePOptions; import alluxio.grpc.GetStatusPOptions; +import alluxio.grpc.JobProgressReportFormat; import alluxio.grpc.ListStatusPOptions; import alluxio.grpc.ListStatusPartialPOptions; -import alluxio.grpc.LoadProgressReportFormat; import alluxio.grpc.MountPOptions; import alluxio.grpc.OpenFilePOptions; import alluxio.grpc.RenamePOptions; @@ -50,6 +50,8 @@ import alluxio.grpc.SetAclPOptions; import alluxio.grpc.SetAttributePOptions; import alluxio.grpc.UnmountPOptions; +import alluxio.job.JobDescription; +import alluxio.job.JobRequest; import alluxio.metrics.MetricKey; import alluxio.metrics.MetricsSystem; import alluxio.security.authorization.AclEntry; @@ -897,19 +899,18 @@ public void needsSync(AlluxioURI path) { } @Override - public boolean submitLoad(AlluxioURI path, java.util.OptionalLong bandwidth, - boolean usePartialListing, boolean verify) { + public Optional submitJob(JobRequest jobRequest) { throw new UnsupportedOperationException(); } @Override - public boolean stopLoad(AlluxioURI path) { + public boolean stopJob(JobDescription jobDescription) { throw new UnsupportedOperationException(); } @Override - public String getLoadProgress(AlluxioURI path, - Optional format, boolean verbose) { + public String getJobProgress(JobDescription jobDescription, + JobProgressReportFormat format, boolean verbose) { throw new UnsupportedOperationException(); } diff --git a/core/common/src/main/java/alluxio/job/JobDescription.java b/core/common/src/main/java/alluxio/job/JobDescription.java new file mode 100644 index 000000000000..4ab98594a202 --- /dev/null +++ b/core/common/src/main/java/alluxio/job/JobDescription.java @@ -0,0 +1,123 @@ +/* + * The Alluxio Open Foundation licenses this work under the Apache License, version 2.0 + * (the "License"). You may not use this work except in compliance with the License, which is + * available at www.apache.org/licenses/LICENSE-2.0 + * + * This software is distributed on an "AS IS" basis, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, + * either express or implied, as more fully set forth in the License. + * + * See the NOTICE file distributed with this work for information regarding copyright ownership. + */ + +package alluxio.job; + +import com.google.common.base.MoreObjects; +import com.google.common.base.Objects; + +/** + * Job description that used as a key to identify the job in the scheduler. + */ +public class JobDescription { + + private final String mPath; + private final String mType; + + private JobDescription(String type, String path) { + mPath = path; + mType = type; + } + + /** + * @return the path of the job affected + */ + public String getType() { + return mType; + } + + /** + * @return the type of the job + */ + public String getPath() { + return mPath; + } + + @Override + public boolean equals(Object o) { + if (this == o) { + return true; + } + if (o == null || getClass() != o.getClass()) { + return false; + } + JobDescription that = (JobDescription) o; + return Objects.equal(mPath, that.mPath) && Objects.equal(mType, that.mType); + } + + @Override + public int hashCode() { + return Objects.hashCode(mType, mPath); + } + + @Override + public String toString() { + return MoreObjects + .toStringHelper(this) + .add("Path", mPath) + .add("Type", mType) + .toString(); + } + + /** + * create a job description from JobDescription proto. + * @param jobDescription JobDescription proto + * @return job description + */ + public static JobDescription from(alluxio.grpc.JobDescription jobDescription) { + return new JobDescription(jobDescription.getType(), jobDescription.getPath()); + } + + /** + * @return the job description builder + */ + public static Builder newBuilder() { + return new Builder(); + } + + /** + * Builder for {@link JobDescription}. + */ + public static class Builder { + private String mPath; + private String mType; + + private Builder() {} + + /** + * set path. + * @param path affected path + * @return builder + */ + public Builder setPath(String path) { + mPath = path; + return this; + } + + /** + * set job type. + * @param type job type + * @return builder + */ + public Builder setType(String type) { + mType = type; + return this; + } + + /** + * build job description. + * @return job description + */ + public JobDescription build() { + return new JobDescription(mType, mPath); + } + } +} diff --git a/core/common/src/main/java/alluxio/job/JobRequest.java b/core/common/src/main/java/alluxio/job/JobRequest.java new file mode 100644 index 000000000000..01d8eaf3861c --- /dev/null +++ b/core/common/src/main/java/alluxio/job/JobRequest.java @@ -0,0 +1,31 @@ +/* + * The Alluxio Open Foundation licenses this work under the Apache License, version 2.0 + * (the "License"). You may not use this work except in compliance with the License, which is + * available at www.apache.org/licenses/LICENSE-2.0 + * + * This software is distributed on an "AS IS" basis, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, + * either express or implied, as more fully set forth in the License. + * + * See the NOTICE file distributed with this work for information regarding copyright ownership. + */ + +package alluxio.job; + +import com.fasterxml.jackson.annotation.JsonIgnoreProperties; +import com.fasterxml.jackson.annotation.JsonTypeInfo; + +import java.io.Serializable; + +/** + * A job request that can be used to create Job. All the subclasses are both Java and JSON + * serializable. + */ +@JsonIgnoreProperties(ignoreUnknown = true) +@JsonTypeInfo(use = JsonTypeInfo.Id.CLASS, include = JsonTypeInfo.As.PROPERTY, property = "@type") +public interface JobRequest extends Serializable { + + /** + * @return the type of the job + */ + String getType(); +} diff --git a/core/common/src/main/java/alluxio/job/LoadJobRequest.java b/core/common/src/main/java/alluxio/job/LoadJobRequest.java new file mode 100644 index 000000000000..818d4e75e9d0 --- /dev/null +++ b/core/common/src/main/java/alluxio/job/LoadJobRequest.java @@ -0,0 +1,69 @@ +/* + * The Alluxio Open Foundation licenses this work under the Apache License, version 2.0 + * (the "License"). You may not use this work except in compliance with the License, which is + * available at www.apache.org/licenses/LICENSE-2.0 + * + * This software is distributed on an "AS IS" basis, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, + * either express or implied, as more fully set forth in the License. + * + * See the NOTICE file distributed with this work for information regarding copyright ownership. + */ + +package alluxio.job; + +import alluxio.grpc.LoadJobPOptions; + +import com.fasterxml.jackson.annotation.JsonProperty; +import com.google.common.base.MoreObjects; +import com.google.common.base.Preconditions; + +import javax.annotation.concurrent.ThreadSafe; + +/** + * The request of loading files. + */ +@ThreadSafe +public class LoadJobRequest implements JobRequest { + private static final String TYPE = "load"; + private static final long serialVersionUID = -4100882786127020489L; + private final String mPath; + private final LoadJobPOptions mOptions; + + /** + * @param path the file path + * @param options load job options + **/ + public LoadJobRequest(@JsonProperty("path") String path, + @JsonProperty("loadJobPOptions") LoadJobPOptions options) { + mPath = Preconditions.checkNotNull(path, "The file path cannot be null"); + mOptions = Preconditions.checkNotNull(options, "The load job options cannot be null"); + } + + /** + * @return the file path + */ + public String getPath() { + return mPath; + } + + /** + * @return job options + */ + public LoadJobPOptions getOptions() { + return mOptions; + } + + @Override + public String toString() { + return MoreObjects + .toStringHelper(this) + .add("Path", mPath) + .add("Options", mOptions) + .toString(); + } + + @Override + public String getType() { + return TYPE; + } +} diff --git a/core/server/common/src/main/java/alluxio/master/journal/checkpoint/CheckpointName.java b/core/server/common/src/main/java/alluxio/master/journal/checkpoint/CheckpointName.java index 9ce742199271..75615919331b 100644 --- a/core/server/common/src/main/java/alluxio/master/journal/checkpoint/CheckpointName.java +++ b/core/server/common/src/main/java/alluxio/master/journal/checkpoint/CheckpointName.java @@ -39,5 +39,5 @@ public enum CheckpointName { TABLE_MASTER_TRANSFORM_MANAGER, TO_BE_PERSISTED_FILE_IDS, TTL_BUCKET_LIST, - LOAD_MANAGER, + SCHEDULER, } diff --git a/core/server/master/src/main/java/alluxio/master/file/DefaultFileSystemMaster.java b/core/server/master/src/main/java/alluxio/master/file/DefaultFileSystemMaster.java index a5157cb44b4a..4bfac61254d1 100644 --- a/core/server/master/src/main/java/alluxio/master/file/DefaultFileSystemMaster.java +++ b/core/server/master/src/main/java/alluxio/master/file/DefaultFileSystemMaster.java @@ -20,6 +20,7 @@ import alluxio.ClientContext; import alluxio.Constants; import alluxio.Server; +import alluxio.client.file.FileSystemContext; import alluxio.client.job.JobMasterClient; import alluxio.client.job.JobMasterClientPool; import alluxio.clock.SystemClock; @@ -121,6 +122,9 @@ import alluxio.master.metastore.InodeStore; import alluxio.master.metastore.ReadOnlyInodeStore; import alluxio.master.metrics.TimeSeriesStore; +import alluxio.master.scheduler.DefaultWorkerProvider; +import alluxio.master.scheduler.JournaledJobMetaStore; +import alluxio.master.scheduler.Scheduler; import alluxio.metrics.Metric; import alluxio.metrics.MetricInfo; import alluxio.metrics.MetricKey; @@ -404,7 +408,7 @@ public class DefaultFileSystemMaster extends CoreMaster /** Used to check pending/running backup from RPCs. */ protected final CallTracker mStateLockCallTracker; - private final alluxio.master.file.loadmanager.LoadManager mLoadManager; + private final Scheduler mScheduler; final Clock mClock; @@ -511,7 +515,9 @@ public Type getType() { mSyncPrefetchExecutor.allowCoreThreadTimeOut(true); mSyncMetadataExecutor.allowCoreThreadTimeOut(true); mActiveSyncMetadataExecutor.allowCoreThreadTimeOut(true); - mLoadManager = new alluxio.master.file.loadmanager.LoadManager(this); + FileSystemContext schedulerFsContext = FileSystemContext.create(); + JournaledJobMetaStore jobMetaStore = new JournaledJobMetaStore(this); + mScheduler = new Scheduler(new DefaultWorkerProvider(this, schedulerFsContext), jobMetaStore); // The mount table should come after the inode tree because restoring the mount table requires // that the inode tree is already restored. @@ -522,7 +528,7 @@ public Type getType() { add(mMountTable); add(mUfsManager); add(mSyncManager); - add(mLoadManager); + add(jobMetaStore); } }; mJournaledGroup = new JournaledGroup(journaledComponents, CheckpointName.FILE_SYSTEM_MASTER); @@ -566,7 +572,7 @@ private static MountInfo getRootMountInfo(MasterUfsManager ufsManager) { public Map getServices() { Map services = new HashMap<>(); services.put(ServiceType.FILE_SYSTEM_MASTER_CLIENT_SERVICE, new GrpcService(ServerInterceptors - .intercept(new FileSystemMasterClientServiceHandler(this, mLoadManager), + .intercept(new FileSystemMasterClientServiceHandler(this, mScheduler), new ClientContextServerInjector()))); services.put(ServiceType.FILE_SYSTEM_MASTER_JOB_SERVICE, new GrpcService(ServerInterceptors .intercept(new FileSystemMasterJobServiceHandler(this), @@ -756,7 +762,7 @@ public void start(Boolean isPrimary) throws IOException { mAccessTimeUpdater.start(); } mSyncManager.start(); - mLoadManager.start(); + mScheduler.start(); } } @@ -771,7 +777,7 @@ public void stop() throws IOException { if (mAccessTimeUpdater != null) { mAccessTimeUpdater.stop(); } - mLoadManager.stop(); + mScheduler.stop(); super.stop(); } @@ -5428,10 +5434,10 @@ public void needsSync(AlluxioURI path) throws InvalidPathException { } /** - * Get load manager. - * @return load manager + * Get scheduler. + * @return scheduler */ - public alluxio.master.file.loadmanager.LoadManager getLoadManager() { - return mLoadManager; + public Scheduler getScheduler() { + return mScheduler; } } diff --git a/core/server/master/src/main/java/alluxio/master/file/FileSystemMasterClientServiceHandler.java b/core/server/master/src/main/java/alluxio/master/file/FileSystemMasterClientServiceHandler.java index 71a6768d0c06..3821f04302b4 100644 --- a/core/server/master/src/main/java/alluxio/master/file/FileSystemMasterClientServiceHandler.java +++ b/core/server/master/src/main/java/alluxio/master/file/FileSystemMasterClientServiceHandler.java @@ -40,6 +40,8 @@ import alluxio.grpc.FreePResponse; import alluxio.grpc.GetFilePathPRequest; import alluxio.grpc.GetFilePathPResponse; +import alluxio.grpc.GetJobProgressPRequest; +import alluxio.grpc.GetJobProgressPResponse; import alluxio.grpc.GetMountTablePRequest; import alluxio.grpc.GetMountTablePResponse; import alluxio.grpc.GetNewBlockIdForFilePRequest; @@ -52,6 +54,7 @@ import alluxio.grpc.GetSyncPathListPRequest; import alluxio.grpc.GetSyncPathListPResponse; import alluxio.grpc.GrpcUtils; +import alluxio.grpc.JobProgressReportFormat; import alluxio.grpc.ListStatusPRequest; import alluxio.grpc.ListStatusPResponse; import alluxio.grpc.ListStatusPartialPRequest; @@ -72,14 +75,21 @@ import alluxio.grpc.SetAttributePResponse; import alluxio.grpc.StartSyncPRequest; import alluxio.grpc.StartSyncPResponse; +import alluxio.grpc.StopJobPRequest; +import alluxio.grpc.StopJobPResponse; import alluxio.grpc.StopSyncPRequest; import alluxio.grpc.StopSyncPResponse; +import alluxio.grpc.SubmitJobPRequest; +import alluxio.grpc.SubmitJobPResponse; import alluxio.grpc.UnmountPRequest; import alluxio.grpc.UnmountPResponse; import alluxio.grpc.UpdateMountPRequest; import alluxio.grpc.UpdateMountPResponse; import alluxio.grpc.UpdateUfsModePRequest; import alluxio.grpc.UpdateUfsModePResponse; +import alluxio.job.JobDescription; +import alluxio.job.JobRequest; +import alluxio.job.util.SerializationUtils; import alluxio.master.file.contexts.CheckAccessContext; import alluxio.master.file.contexts.CheckConsistencyContext; import alluxio.master.file.contexts.CompleteFileContext; @@ -96,7 +106,10 @@ import alluxio.master.file.contexts.ScheduleAsyncPersistenceContext; import alluxio.master.file.contexts.SetAclContext; import alluxio.master.file.contexts.SetAttributeContext; +import alluxio.master.job.JobFactoryProducer; +import alluxio.master.scheduler.Scheduler; import alluxio.recorder.Recorder; +import alluxio.scheduler.job.Job; import alluxio.underfs.UfsMode; import alluxio.util.io.PathUtils; import alluxio.wire.MountPointInfo; @@ -123,19 +136,19 @@ public final class FileSystemMasterClientServiceHandler private static final Logger LOG = LoggerFactory.getLogger(FileSystemMasterClientServiceHandler.class); private final FileSystemMaster mFileSystemMaster; - private final alluxio.master.file.loadmanager.LoadManager mLoadManager; + private final Scheduler mScheduler; /** * Creates a new instance of {@link FileSystemMasterClientServiceHandler}. * * @param fileSystemMaster the {@link FileSystemMaster} the handler uses internally - * @param loadManager the {@link alluxio.master.file.loadmanager.LoadManager} + * @param scheduler the {@link Scheduler} */ public FileSystemMasterClientServiceHandler(FileSystemMaster fileSystemMaster, - alluxio.master.file.loadmanager.LoadManager loadManager) { + Scheduler scheduler) { Preconditions.checkNotNull(fileSystemMaster, "fileSystemMaster"); mFileSystemMaster = fileSystemMaster; - mLoadManager = Preconditions.checkNotNull(loadManager, "loadManager"); + mScheduler = Preconditions.checkNotNull(scheduler, "scheduler"); } @Override @@ -512,38 +525,44 @@ public void needsSync(NeedsSyncRequest request, } @Override - public void loadPath(alluxio.grpc.LoadPathPRequest request, - StreamObserver responseObserver) { + public void submitJob(SubmitJobPRequest request, + StreamObserver responseObserver) { + RpcUtils.call(LOG, () -> { - boolean submitted = mLoadManager.submitLoad( - request.getPath(), - request.getOptions().hasBandwidth() - ? java.util.OptionalLong.of(request.getOptions().getBandwidth()) - : java.util.OptionalLong.empty(), - request.getOptions().hasPartialListing() && request.getOptions().getPartialListing(), - request.getOptions().hasVerify() && request.getOptions().getVerify()); - return alluxio.grpc.LoadPathPResponse.newBuilder() - .setNewLoadSubmitted(submitted) - .build(); - }, "LoadPath", "request=%s", responseObserver, request); + JobRequest jobRequest; + try { + jobRequest = (JobRequest) SerializationUtils.deserialize(request + .getRequestBody() + .toByteArray()); + } catch (Exception e) { + throw new IllegalArgumentException("fail to parse job request", e); + } + Job job = JobFactoryProducer.create(jobRequest, mFileSystemMaster).create(); + boolean submitted = mScheduler.submitJob(job); + SubmitJobPResponse.Builder builder = SubmitJobPResponse.newBuilder(); + if (submitted) { + builder.setJobId(job.getJobId()); + } + return builder.build(); + }, "submitJob", "request=%s", responseObserver, request); } @Override - public void stopLoadPath(alluxio.grpc.StopLoadPathPRequest request, - StreamObserver responseObserver) { + public void stopJob(StopJobPRequest request, + StreamObserver responseObserver) { RpcUtils.call(LOG, () -> { - boolean stopped = mLoadManager.stopLoad(request.getPath()); - return alluxio.grpc.StopLoadPathPResponse.newBuilder() - .setExistingLoadStopped(stopped) + boolean stopped = mScheduler.stopJob(JobDescription.from(request.getJobDescription())); + return alluxio.grpc.StopJobPResponse.newBuilder() + .setJobStopped(stopped) .build(); - }, "stopLoadPath", "request=%s", responseObserver, request); + }, "stopJob", "request=%s", responseObserver, request); } @Override - public void getLoadProgress(alluxio.grpc.GetLoadProgressPRequest request, - StreamObserver responseObserver) { + public void getJobProgress(GetJobProgressPRequest request, + StreamObserver responseObserver) { RpcUtils.call(LOG, () -> { - alluxio.grpc.LoadProgressReportFormat format = alluxio.grpc.LoadProgressReportFormat.TEXT; + JobProgressReportFormat format = JobProgressReportFormat.TEXT; if (request.hasOptions() && request.getOptions().hasFormat()) { format = request.getOptions().getFormat(); } @@ -551,11 +570,11 @@ public void getLoadProgress(alluxio.grpc.GetLoadProgressPRequest request, if (request.hasOptions() && request.getOptions().hasVerbose()) { verbose = request.getOptions().getVerbose(); } - return alluxio.grpc.GetLoadProgressPResponse.newBuilder() - .setProgressReport(mLoadManager.getLoadProgress( - request.getPath(), format, verbose)) + return GetJobProgressPResponse.newBuilder() + .setProgressReport(mScheduler.getJobProgress( + JobDescription.from(request.getJobDescription()), format, verbose)) .build(); - }, "getLoadProgress", "request=%s", responseObserver, request); + }, "getJobProgress", "request=%s", responseObserver, request); } /** diff --git a/core/server/master/src/main/java/alluxio/master/file/loadmanager/LoadManager.java b/core/server/master/src/main/java/alluxio/master/file/loadmanager/LoadManager.java deleted file mode 100644 index d26a396569da..000000000000 --- a/core/server/master/src/main/java/alluxio/master/file/loadmanager/LoadManager.java +++ /dev/null @@ -1,572 +0,0 @@ -/* - * The Alluxio Open Foundation licenses this work under the Apache License, version 2.0 - * (the "License"). You may not use this work except in compliance with the License, which is - * available at www.apache.org/licenses/LICENSE-2.0 - * - * This software is distributed on an "AS IS" basis, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, - * either express or implied, as more fully set forth in the License. - * - * See the NOTICE file distributed with this work for information regarding copyright ownership. - */ - -package alluxio.master.file.loadmanager; - -import static java.lang.String.format; - -import alluxio.AlluxioURI; -import alluxio.Constants; -import alluxio.client.block.stream.BlockWorkerClient; -import alluxio.client.file.FileSystemContext; -import alluxio.conf.Configuration; -import alluxio.conf.PropertyKey; -import alluxio.exception.AccessControlException; -import alluxio.exception.FileDoesNotExistException; -import alluxio.exception.InvalidPathException; -import alluxio.exception.runtime.AlluxioRuntimeException; -import alluxio.exception.runtime.InternalRuntimeException; -import alluxio.exception.runtime.NotFoundRuntimeException; -import alluxio.exception.runtime.ResourceExhaustedRuntimeException; -import alluxio.exception.runtime.UnauthenticatedRuntimeException; -import alluxio.exception.runtime.UnavailableRuntimeException; -import alluxio.exception.status.UnavailableException; -import alluxio.grpc.Block; -import alluxio.grpc.BlockStatus; -import alluxio.grpc.LoadProgressReportFormat; -import alluxio.grpc.LoadRequest; -import alluxio.grpc.LoadResponse; -import alluxio.grpc.TaskStatus; -import alluxio.grpc.UfsReadOptions; -import alluxio.master.file.FileSystemMaster; -import alluxio.master.file.contexts.CheckAccessContext; -import alluxio.master.journal.JournalContext; -import alluxio.master.journal.Journaled; -import alluxio.master.journal.checkpoint.CheckpointName; -import alluxio.metrics.MetricKey; -import alluxio.metrics.MetricsSystem; -import alluxio.proto.journal.Job; -import alluxio.proto.journal.Journal; -import alluxio.resource.CloseableIterator; -import alluxio.resource.CloseableResource; -import alluxio.security.User; -import alluxio.security.authentication.AuthenticatedClientUser; -import alluxio.util.ThreadFactoryUtils; -import alluxio.util.ThreadUtils; -import alluxio.wire.WorkerInfo; - -import com.codahale.metrics.Counter; -import com.codahale.metrics.Meter; -import com.google.common.annotations.VisibleForTesting; -import com.google.common.collect.ImmutableMap; -import com.google.common.collect.ImmutableSet; -import com.google.common.collect.Iterators; -import com.google.common.util.concurrent.ListenableFuture; -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; - -import java.io.IOException; -import java.util.HashSet; -import java.util.List; -import java.util.Map; -import java.util.Optional; -import java.util.OptionalLong; -import java.util.Set; -import java.util.UUID; -import java.util.concurrent.CancellationException; -import java.util.concurrent.ConcurrentHashMap; -import java.util.concurrent.ExecutionException; -import java.util.concurrent.Executors; -import java.util.concurrent.ScheduledExecutorService; -import java.util.concurrent.TimeUnit; -import javax.annotation.concurrent.ThreadSafe; - -/** - * The Load manager which controls load operations. It's not thread-safe since start and stop - * method is not thread-safe. But we should only have one thread call these two method. - */ -@ThreadSafe -public final class LoadManager implements Journaled { - private static final Logger LOG = LoggerFactory.getLogger(LoadManager.class); - private static final int CAPACITY = 100; - private static final long WORKER_UPDATE_INTERVAL = Configuration.getMs( - PropertyKey.MASTER_WORKER_INFO_CACHE_REFRESH_TIME); - private static final int EXECUTOR_SHUTDOWN_MS = 10 * Constants.SECOND_MS; - private final FileSystemMaster mFileSystemMaster; - private final FileSystemContext mContext; - private final Map mLoadJobs = new ConcurrentHashMap<>(); - private final Map> mRunningTasks = new ConcurrentHashMap<>(); - // initial thread in start method since we would stop and start thread when gainPrimacy - private ScheduledExecutorService mLoadScheduler; - private volatile boolean mRunning = false; - private Map> mActiveWorkers = ImmutableMap.of(); - - /** - * Constructor. - * @param fileSystemMaster fileSystemMaster - */ - public LoadManager(FileSystemMaster fileSystemMaster) { - this(fileSystemMaster, FileSystemContext.create()); - } - - /** - * Constructor. - * @param fileSystemMaster fileSystemMaster - * @param context fileSystemContext - */ - @VisibleForTesting - public LoadManager(FileSystemMaster fileSystemMaster, FileSystemContext context) { - mFileSystemMaster = fileSystemMaster; - mContext = context; - } - - /** - * Start load manager. - */ - public void start() { - if (!mRunning) { - mLoadScheduler = Executors.newSingleThreadScheduledExecutor( - ThreadFactoryUtils.build("load-manager-scheduler", false)); - mLoadScheduler.scheduleAtFixedRate(this::updateWorkers, 0, WORKER_UPDATE_INTERVAL, - TimeUnit.MILLISECONDS); - mLoadScheduler.scheduleWithFixedDelay(this::processJobs, 0, 100, TimeUnit.MILLISECONDS); - mLoadScheduler.scheduleWithFixedDelay(this::cleanupStaleJob, 1, 1, TimeUnit.HOURS); - mRunning = true; - } - } - - /** - * Stop load manager. - */ - public void stop() { - if (mRunning) { - mActiveWorkers.values().forEach(CloseableResource::close); - mActiveWorkers = ImmutableMap.of(); - ThreadUtils.shutdownAndAwaitTermination(mLoadScheduler, EXECUTOR_SHUTDOWN_MS); - mRunning = false; - } - } - - /** - * Submit a load job. - * @param loadPath alluxio directory path to load into Alluxio - * @param bandwidth bandwidth allocated to this load - * @param usePartialListing whether to use partial listing or not - * @param verificationEnabled whether to run verification step or not - * @return true if the job is new, false if the job has already been submitted - */ - public boolean submitLoad(String loadPath, OptionalLong bandwidth, - boolean usePartialListing, boolean verificationEnabled) { - try { - mFileSystemMaster.checkAccess(new AlluxioURI(loadPath), CheckAccessContext.defaults()); - } catch (FileDoesNotExistException | InvalidPathException e) { - throw new NotFoundRuntimeException(e); - } catch (AccessControlException e) { - throw new UnauthenticatedRuntimeException(e); - } catch (IOException e) { - throw AlluxioRuntimeException.from(e); - } - return submitLoad(new LoadJob( - loadPath, - Optional.ofNullable(AuthenticatedClientUser.getOrNull()).map(User::getName), UUID - .randomUUID().toString(), bandwidth, - usePartialListing, - verificationEnabled)); - } - - /** - * Submit a load job. - * @param loadJob the load job - * @return true if the job is new, false if the job has already been submitted - */ - @VisibleForTesting - public boolean submitLoad(LoadJob loadJob) { - LoadJob existingJob = mLoadJobs.get(loadJob.getPath()); - if (existingJob != null && !existingJob.isDone()) { - updateExistingJob(loadJob, existingJob); - return false; - } - - if (mRunningTasks.size() >= CAPACITY) { - throw new ResourceExhaustedRuntimeException( - "Too many load jobs running, please submit later.", true); - } - writeJournal(loadJob); - mLoadJobs.put(loadJob.getPath(), loadJob); - mRunningTasks.put(loadJob, new HashSet<>()); - LOG.debug(format("start job: %s", loadJob)); - return true; - } - - private void updateExistingJob(LoadJob loadJob, LoadJob existingJob) { - existingJob.updateBandwidth(loadJob.getBandwidth()); - existingJob.setVerificationEnabled(loadJob.isVerificationEnabled()); - writeJournal(existingJob); - LOG.debug(format("updated existing job: %s from %s", existingJob, loadJob)); - if (existingJob.getJobState() == LoadJobState.STOPPED) { - existingJob.setJobState(LoadJobState.LOADING); - mRunningTasks.put(existingJob, new HashSet<>()); - } - } - - /** - * Stop a load job. - * @param loadPath alluxio directory path to load into Alluxio - * @return true if the job is stopped, false if the job does not exist or has already finished - */ - public boolean stopLoad(String loadPath) { - LoadJob existingJob = mLoadJobs.get(loadPath); - if (existingJob != null && existingJob.isRunning()) { - existingJob.setJobState(LoadJobState.STOPPED); - writeJournal(existingJob); - // leftover tasks in mLoadTasks would be removed by scheduling thread. - return true; - } - return false; - } - - /** - * Get the load job's progress report. - * @param loadPath alluxio directory path of the load job - * @param format progress report format - * @param verbose whether to include details on failed files and failures - * @return the progress report - */ - public String getLoadProgress( - String loadPath, - LoadProgressReportFormat format, - boolean verbose) { - LoadJob job = mLoadJobs.get(loadPath); - if (job == null) { - throw new NotFoundRuntimeException(format("Load for path %s cannot be found.", loadPath)); - } - return job.getProgress(format, verbose); - } - - /** - * Get active workers. - * @return active workers - */ - @VisibleForTesting - public Map> getActiveWorkers() { - return mActiveWorkers; - } - - /** - * Removes all finished jobs outside the retention time. - */ - @VisibleForTesting - public void cleanupStaleJob() { - long current = System.currentTimeMillis(); - mLoadJobs.entrySet().removeIf(job -> !job.getValue().isRunning() - && job.getValue().getEndTime().isPresent() - && job.getValue().getEndTime().getAsLong() <= (current - Configuration.getMs( - PropertyKey.JOB_RETENTION_TIME))); - } - - /** - * Refresh active workers. - */ - @VisibleForTesting - public void updateWorkers() { - if (Thread.currentThread().isInterrupted()) { - return; - } - Set workerInfos; - try { - try { - // TODO(jianjian): need api for healthy worker instead - workerInfos = ImmutableSet.copyOf(mFileSystemMaster.getWorkerInfoList()); - } catch (UnavailableException e) { - LOG.warn("Failed to get worker info, using existing worker infos of {} workers", - mActiveWorkers.size()); - return; - } - if (workerInfos.size() == mActiveWorkers.size() - && workerInfos.containsAll(mActiveWorkers.keySet())) { - return; - } - - ImmutableMap.Builder> updatedWorkers = - ImmutableMap.builder(); - for (WorkerInfo workerInfo : workerInfos) { - if (mActiveWorkers.containsKey(workerInfo)) { - updatedWorkers.put(workerInfo, mActiveWorkers.get(workerInfo)); - } - else { - try { - updatedWorkers.put(workerInfo, - mContext.acquireBlockWorkerClient(workerInfo.getAddress())); - } catch (IOException e) { - // skip the worker if we cannot obtain a client - } - } - } - // Close clients connecting to lost workers - for (Map.Entry> entry : - mActiveWorkers.entrySet()) { - WorkerInfo workerInfo = entry.getKey(); - if (!workerInfos.contains(workerInfo)) { - CloseableResource resource = entry.getValue(); - resource.close(); - LOG.debug("Closed BlockWorkerClient to lost worker {}", workerInfo); - } - } - // Build the clients to the current active worker list - mActiveWorkers = updatedWorkers.build(); - } catch (Exception e) { - // Unknown exception. This should not happen, but if it happens we don't want to lose the - // scheduler thread, thus catching it here. Any exception surfaced here should be properly - // handled. - LOG.error("Unexpected exception thrown in updateWorkers.", e); - } - } - - /** - * Get load jobs. - * @return load jobs - */ - @VisibleForTesting - public Map getLoadJobs() { - return mLoadJobs; - } - - private void processJobs() { - if (Thread.currentThread().isInterrupted()) { - return; - } - mRunningTasks.forEach(this::processJob); - } - - private void processJob(LoadJob loadJob, Set loadWorkers) { - try { - if (!loadJob.isRunning()) { - try { - writeJournal(loadJob); - } - catch (UnavailableRuntimeException e) { - // This should not happen because the load manager should not be started while master is - // still processing journal entries. However, if it does happen, we don't want to throw - // exception in a task running on scheduler thead. So just ignore it and hopefully later - // retry will work. - LOG.error("error writing to journal when processing job", e); - } - mRunningTasks.remove(loadJob); - return; - } - if (!loadJob.isHealthy()) { - loadJob.failJob(new InternalRuntimeException("Too many block load failed.")); - return; - } - - // If there are new workers, schedule job onto new workers - mActiveWorkers.forEach((workerInfo, workerClient) -> { - if (!loadWorkers.contains(workerInfo) && scheduleBatch(loadJob, workerInfo, loadWorkers, - workerClient, loadJob.getBatchSize())) { - loadWorkers.add(workerInfo); - } - }); - - if (loadWorkers.isEmpty() && loadJob.isCurrentLoadDone()) { - if (loadJob.getCurrentBlockCount() > 0 && loadJob.isVerificationEnabled()) { - loadJob.initiateVerification(); - } - else { - if (loadJob.isHealthy()) { - loadJob.setJobState(LoadJobState.SUCCEEDED); - JOB_LOAD_SUCCESS.inc(); - } - else { - loadJob.failJob(new InternalRuntimeException("Too many block load failed.")); - } - } - } - } catch (Exception e) { - // Unknown exception. This should not happen, but if it happens we don't want to lose the - // scheduler thread, thus catching it here. Any exception surfaced here should be properly - // handled. - LOG.error("Unexpected exception thrown in processJob.", e); - loadJob.failJob(new InternalRuntimeException(e)); - } - } - - // Returns false if the whole task fails - private boolean processResponse( - LoadJob load, - LoadRequest request, - ListenableFuture responseFuture) { - try { - long totalBytes = request.getBlocksList().stream() - .map(Block::getLength) - .reduce(Long::sum) - .orElse(0L); - LoadResponse response = responseFuture.get(); - if (response.getStatus() != TaskStatus.SUCCESS) { - LOG.debug(format("Get failure from worker: %s", response.getBlockStatusList())); - for (BlockStatus status : response.getBlockStatusList()) { - totalBytes -= status.getBlock().getLength(); - if (!load.isHealthy() || !status.getRetryable() || !load.addBlockToRetry( - status.getBlock())) { - load.addBlockFailure(status.getBlock(), status.getMessage(), status.getCode()); - } - } - } - load.addLoadedBytes(totalBytes); - JOB_LOAD_BLOCK_COUNT.inc( - request.getBlocksCount() - response.getBlockStatusCount()); - JOB_LOAD_BLOCK_SIZE.inc(totalBytes); - JOB_LOAD_RATE.mark(totalBytes); - return response.getStatus() != TaskStatus.FAILURE; - } - catch (ExecutionException e) { - LOG.warn("exception when trying to get load response.", e.getCause()); - for (Block block : request.getBlocksList()) { - if (load.isHealthy()) { - load.addBlockToRetry(block); - } - else { - AlluxioRuntimeException exception = AlluxioRuntimeException.from(e.getCause()); - load.addBlockFailure(block, exception.getMessage(), exception.getStatus().getCode() - .value()); - } - } - return false; - } - catch (CancellationException e) { - LOG.warn("Task get canceled and will retry.", e); - request.getBlocksList().forEach(load::addBlockToRetry); - return true; - } - catch (InterruptedException e) { - request.getBlocksList().forEach(load::addBlockToRetry); - Thread.currentThread().interrupt(); - // We don't count InterruptedException as task failure - return true; - } - } - - private boolean scheduleBatch( - LoadJob load, - WorkerInfo workerInfo, - Set loadWorkers, - CloseableResource workerClient, - int batchSize) { - if (!load.isRunning()) { - return false; - } - List batch; - try { - batch = load.getNextBatch(mFileSystemMaster, batchSize); - } catch (AlluxioRuntimeException e) { - LOG.warn(format("error getting next batch for load %s", load), e); - if (!e.isRetryable()) { - load.failJob(e); - } - return false; - } - - if (batch.isEmpty()) { - return false; - } - - LoadRequest request = buildRequest(batch, load.getUser(), load.getJobId(), load.getBandwidth()); - ListenableFuture responseFuture = workerClient.get().load(request); - responseFuture.addListener(() -> { - try { - if (!processResponse(load, request, responseFuture)) { - loadWorkers.remove(workerInfo); - } - // Schedule next batch for healthy job - if (load.isHealthy()) { - if (mActiveWorkers.containsKey(workerInfo)) { - if (!scheduleBatch(load, workerInfo, loadWorkers, mActiveWorkers.get(workerInfo), - load.getBatchSize())) { - loadWorkers.remove(workerInfo); - } - } - else { - loadWorkers.remove(workerInfo); - } - } - } catch (Exception e) { - // Unknown exception. This should not happen, but if it happens we don't want to lose the - // scheduler thread, thus catching it here. Any exception surfaced here should be properly - // handled. - LOG.error("Unexpected exception thrown in response future listener.", e); - load.failJob(new InternalRuntimeException(e)); - } - }, mLoadScheduler); - return true; - } - - private void writeJournal(LoadJob job) { - try (JournalContext context = mFileSystemMaster.createJournalContext()) { - context.append(job.toJournalEntry()); - } catch (UnavailableException e) { - throw new UnavailableRuntimeException( - "There is an ongoing backup running, please submit later", e); - } - } - - private LoadRequest buildRequest(List blockBatch, Optional user, String tag, - OptionalLong bandwidth) { - LoadRequest.Builder request = LoadRequest - .newBuilder() - .addAllBlocks(blockBatch); - UfsReadOptions.Builder options = - UfsReadOptions.newBuilder().setTag(tag).setPositionShort(false); - if (bandwidth.isPresent()) { - options.setBandwidth(bandwidth.getAsLong() / mActiveWorkers.size()); - } - user.ifPresent(options::setUser); - return request.setOptions(options.build()).build(); - } - - @Override - public CloseableIterator getJournalEntryIterator() { - return CloseableIterator.noopCloseable( - Iterators.transform(mLoadJobs.values().iterator(), LoadJob::toJournalEntry)); - } - - @Override - public boolean processJournalEntry(Journal.JournalEntry entry) { - if (!entry.hasLoadJob()) { - return false; - } - Job.LoadJobEntry loadJobEntry = entry.getLoadJob(); - LoadJob job = LoadJob.fromJournalEntry(loadJobEntry); - mLoadJobs.put(loadJobEntry.getLoadPath(), job); - if (job.isDone()) { - mRunningTasks.remove(job); - } - else { - mRunningTasks.put(job, new HashSet<>()); - } - return true; - } - - @Override - public void resetState() - { - mLoadJobs.clear(); - mRunningTasks.clear(); - } - - @Override - public CheckpointName getCheckpointName() - { - return CheckpointName.LOAD_MANAGER; - } - - // metrics - public static final Counter JOB_LOAD_SUCCESS = - MetricsSystem.counter(MetricKey.MASTER_JOB_LOAD_SUCCESS.getName()); - public static final Counter JOB_LOAD_FAIL = - MetricsSystem.counter(MetricKey.MASTER_JOB_LOAD_FAIL.getName()); - public static final Counter JOB_LOAD_BLOCK_COUNT = - MetricsSystem.counter(MetricKey.MASTER_JOB_LOAD_BLOCK_COUNT.getName()); - public static final Counter JOB_LOAD_BLOCK_FAIL = - MetricsSystem.counter(MetricKey.MASTER_JOB_LOAD_BLOCK_FAIL.getName()); - public static final Counter JOB_LOAD_BLOCK_SIZE = - MetricsSystem.counter(MetricKey.MASTER_JOB_LOAD_BLOCK_SIZE.getName()); - public static final Meter JOB_LOAD_RATE = - MetricsSystem.meter(MetricKey.MASTER_JOB_LOAD_RATE.getName()); -} diff --git a/core/server/master/src/main/java/alluxio/master/job/FileIterable.java b/core/server/master/src/main/java/alluxio/master/job/FileIterable.java new file mode 100644 index 000000000000..b826c6d2479f --- /dev/null +++ b/core/server/master/src/main/java/alluxio/master/job/FileIterable.java @@ -0,0 +1,199 @@ +/* + * The Alluxio Open Foundation licenses this work under the Apache License, version 2.0 + * (the "License"). You may not use this work except in compliance with the License, which is + * available at www.apache.org/licenses/LICENSE-2.0 + * + * This software is distributed on an "AS IS" basis, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, + * either express or implied, as more fully set forth in the License. + * + * See the NOTICE file distributed with this work for information regarding copyright ownership. + */ + +package alluxio.master.job; + +import static java.util.Objects.requireNonNull; + +import alluxio.AlluxioURI; +import alluxio.exception.AccessControlException; +import alluxio.exception.FileDoesNotExistException; +import alluxio.exception.InvalidPathException; +import alluxio.exception.runtime.AlluxioRuntimeException; +import alluxio.exception.runtime.NotFoundRuntimeException; +import alluxio.exception.runtime.UnauthenticatedRuntimeException; +import alluxio.grpc.ListStatusPOptions; +import alluxio.grpc.ListStatusPartialPOptions; +import alluxio.master.file.FileSystemMaster; +import alluxio.master.file.contexts.CheckAccessContext; +import alluxio.master.file.contexts.ListStatusContext; +import alluxio.security.authentication.AuthenticatedClientUser; +import alluxio.wire.BlockInfo; +import alluxio.wire.FileBlockInfo; +import alluxio.wire.FileInfo; + +import java.io.IOException; +import java.util.Collection; +import java.util.Iterator; +import java.util.List; +import java.util.Optional; +import java.util.concurrent.atomic.AtomicLong; +import java.util.function.Predicate; +import java.util.stream.Collectors; + +/** + * Iterable for {@link FileInfo} objects. Generates the list of files from file system master. + */ +public class FileIterable implements Iterable { + private final FileSystemMaster mFileSystemMaster; + private final String mPath; + private final Optional mUser; + private final boolean mUsePartialListing; + private final Predicate mFilter; + + /** + * Creates a new instance of {@link FileIterable}. + * + * @param fileSystemMaster file system master + * @param path path to list + * @param user user to list as + * @param usePartialListing whether to use partial listing + * @param filter filter to apply to the file infos + */ + public FileIterable(FileSystemMaster fileSystemMaster, String path, Optional user, + boolean usePartialListing, Predicate filter) { + mFileSystemMaster = requireNonNull(fileSystemMaster, "fileSystemMaster is null"); + mPath = requireNonNull(path, "path is null"); + mUser = requireNonNull(user, "user is null"); + mUsePartialListing = usePartialListing; + mFilter = filter; + } + + /** + * @return file iterator. generate new iterator each time + */ + public FileIterator iterator() { + return new FileIterator(mFileSystemMaster, mPath, mUser, mUsePartialListing, mFilter); + } + + /** + * An iterator over {@link FileInfo} objects. + */ + public class FileIterator implements Iterator { + private final ListStatusPOptions.Builder mListOptions = ListStatusPOptions + .newBuilder() + .setRecursive(true); + private static final int PARTIAL_LISTING_BATCH_SIZE = 100; + private final FileSystemMaster mFileSystemMaster; + private final String mPath; + private final Optional mUser; + private final boolean mUsePartialListing; + private final Predicate mFilter; + private String mStartAfter = ""; + private List mFiles; + private Iterator mFileInfoIterator; + private final AtomicLong mTotalFileCount = new AtomicLong(); + private final AtomicLong mTotalByteCount = new AtomicLong(); + + /** + * Creates a new instance of {@link FileIterator}. + * + * @param fileSystemMaster file system master + * @param path path to list + * @param user user to list as + * @param usePartialListing whether to use partial listing + * @param filter filter to apply to the file infos + */ + public FileIterator(FileSystemMaster fileSystemMaster, String path, Optional user, + boolean usePartialListing, Predicate filter) { + mFileSystemMaster = requireNonNull(fileSystemMaster, "fileSystemMaster is null"); + mPath = requireNonNull(path, "path is null"); + mUser = requireNonNull(user, "user is null"); + mUsePartialListing = usePartialListing; + mFilter = filter; + checkAccess(); + if (usePartialListing) { + partialListFileInfos(); + } + else { + listFileInfos(ListStatusContext.create(mListOptions)); + } + } + + private void checkAccess() { + AuthenticatedClientUser.set(mUser.orElse(null)); + try { + mFileSystemMaster.checkAccess(new AlluxioURI(mPath), CheckAccessContext.defaults()); + } catch (FileDoesNotExistException | InvalidPathException e) { + throw new NotFoundRuntimeException(e); + } catch (AccessControlException e) { + throw new UnauthenticatedRuntimeException(e); + } catch (IOException e) { + throw AlluxioRuntimeException.from(e); + } + } + + @Override + public boolean hasNext() { + if (mUsePartialListing && !mFileInfoIterator.hasNext()) { + partialListFileInfos(); + } + return mFileInfoIterator.hasNext(); + } + + @Override + public FileInfo next() { + if (mUsePartialListing && !mFileInfoIterator.hasNext()) { + partialListFileInfos(); + } + return mFileInfoIterator.next(); + } + + private void partialListFileInfos() { + if (!mStartAfter.isEmpty()) { + mListOptions.setDisableAreDescendantsLoadedCheck(true); + } + ListStatusContext context = ListStatusContext.create(ListStatusPartialPOptions + .newBuilder() + .setOptions(mListOptions) + .setBatchSize(PARTIAL_LISTING_BATCH_SIZE) + .setStartAfter(mStartAfter)); + listFileInfos(context); + if (mFiles.size() > 0) { + mStartAfter = mFiles + .get(mFiles.size() - 1) + .getPath(); + } + } + + private void listFileInfos(ListStatusContext context) { + try { + AuthenticatedClientUser.set(mUser.orElse(null)); + mFiles = mFileSystemMaster + .listStatus(new AlluxioURI(mPath), context) + .stream() + .filter(mFilter) + .collect(Collectors.toList()); + mFileInfoIterator = mFiles.iterator(); + } catch (FileDoesNotExistException | InvalidPathException e) { + throw new NotFoundRuntimeException(e); + } catch (AccessControlException e) { + throw new UnauthenticatedRuntimeException(e); + } catch (IOException e) { + throw AlluxioRuntimeException.from(e); + } finally { + AuthenticatedClientUser.remove(); + } + mTotalFileCount.set(mFiles.size()); + mTotalByteCount.set(mFiles + .stream() + .map(FileInfo::getFileBlockInfos) + .flatMap(Collection::stream) + .map(FileBlockInfo::getBlockInfo) + .filter(blockInfo -> blockInfo + .getLocations() + .isEmpty()) + .map(BlockInfo::getLength) + .reduce(Long::sum) + .orElse(0L)); + } + } +} diff --git a/core/server/master/src/main/java/alluxio/master/job/JobFactoryProducer.java b/core/server/master/src/main/java/alluxio/master/job/JobFactoryProducer.java new file mode 100644 index 000000000000..dc9e50c743ca --- /dev/null +++ b/core/server/master/src/main/java/alluxio/master/job/JobFactoryProducer.java @@ -0,0 +1,53 @@ +/* + * The Alluxio Open Foundation licenses this work under the Apache License, version 2.0 + * (the "License"). You may not use this work except in compliance with the License, which is + * available at www.apache.org/licenses/LICENSE-2.0 + * + * This software is distributed on an "AS IS" basis, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, + * either express or implied, as more fully set forth in the License. + * + * See the NOTICE file distributed with this work for information regarding copyright ownership. + */ + +package alluxio.master.job; + +import alluxio.job.JobRequest; +import alluxio.job.LoadJobRequest; +import alluxio.master.file.FileSystemMaster; +import alluxio.proto.journal.Journal; +import alluxio.scheduler.job.JobFactory; + +/** + * Producer for {@link JobFactory}. + */ +public class JobFactoryProducer { + private JobFactoryProducer() {} // prevent instantiation + + /** + * @param request the job request + * @param fsMaster the file system master + * @return the job factory + */ + public static JobFactory create(JobRequest request, FileSystemMaster fsMaster) { + switch (request.getType()) { + case "load": + return new LoadJobFactory((LoadJobRequest) request, fsMaster); + default: + throw new IllegalArgumentException("Unknown job type: " + request.getType()); + } + } + + /** + * @param entry the job journal entry + * @param fsMaster the file system master + * @return the job factory + */ + public static JobFactory create(Journal.JournalEntry entry, FileSystemMaster fsMaster) { + if (entry.hasLoadJob()) { + return new JournalLoadJobFactory(entry.getLoadJob(), fsMaster); + } + else { + throw new IllegalArgumentException("Unknown job type: " + entry); + } + } +} diff --git a/core/server/master/src/main/java/alluxio/master/job/JournalLoadJobFactory.java b/core/server/master/src/main/java/alluxio/master/job/JournalLoadJobFactory.java new file mode 100644 index 000000000000..8a45e13e60df --- /dev/null +++ b/core/server/master/src/main/java/alluxio/master/job/JournalLoadJobFactory.java @@ -0,0 +1,59 @@ +/* + * The Alluxio Open Foundation licenses this work under the Apache License, version 2.0 + * (the "License"). You may not use this work except in compliance with the License, which is + * available at www.apache.org/licenses/LICENSE-2.0 + * + * This software is distributed on an "AS IS" basis, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, + * either express or implied, as more fully set forth in the License. + * + * See the NOTICE file distributed with this work for information regarding copyright ownership. + */ + +package alluxio.master.job; + +import alluxio.master.file.FileSystemMaster; +import alluxio.scheduler.job.Job; +import alluxio.scheduler.job.JobFactory; +import alluxio.scheduler.job.JobState; + +import java.util.Optional; +import java.util.OptionalLong; + +/** + * Factory for creating {@link LoadJob}s from journal entries. + */ +public class JournalLoadJobFactory implements JobFactory { + + private final FileSystemMaster mFsMaster; + + private final alluxio.proto.journal.Job.LoadJobEntry mJobEntry; + + /** + * Create factory. + * @param journalEntry journal entry + * @param fsMaster file system master + */ + public JournalLoadJobFactory(alluxio.proto.journal.Job.LoadJobEntry journalEntry, + FileSystemMaster fsMaster) { + mFsMaster = fsMaster; + mJobEntry = journalEntry; + } + + @Override + public Job create() { + Optional user = + mJobEntry.hasUser() ? Optional.of(mJobEntry.getUser()) : Optional.empty(); + FileIterable fileIterator = + new FileIterable(mFsMaster, mJobEntry.getLoadPath(), user, mJobEntry.getPartialListing(), + LoadJob.QUALIFIED_FILE_FILTER); + LoadJob job = new LoadJob(mJobEntry.getLoadPath(), user, mJobEntry.getJobId(), + mJobEntry.hasBandwidth() ? OptionalLong.of(mJobEntry.getBandwidth()) : OptionalLong.empty(), + mJobEntry.getPartialListing(), mJobEntry.getVerify(), fileIterator); + job.setJobState(JobState.fromProto(mJobEntry.getState())); + if (mJobEntry.hasEndTime()) { + job.setEndTime(mJobEntry.getEndTime()); + } + return job; + } +} + diff --git a/core/server/master/src/main/java/alluxio/master/file/loadmanager/LoadJob.java b/core/server/master/src/main/java/alluxio/master/job/LoadJob.java similarity index 63% rename from core/server/master/src/main/java/alluxio/master/file/loadmanager/LoadJob.java rename to core/server/master/src/main/java/alluxio/master/job/LoadJob.java index 098c7a23d55b..3aee54e7a9e4 100644 --- a/core/server/master/src/main/java/alluxio/master/file/loadmanager/LoadJob.java +++ b/core/server/master/src/main/java/alluxio/master/job/LoadJob.java @@ -9,35 +9,38 @@ * See the NOTICE file distributed with this work for information regarding copyright ownership. */ -package alluxio.master.file.loadmanager; +package alluxio.master.job; +import static java.lang.String.format; import static java.util.Objects.requireNonNull; -import alluxio.AlluxioURI; +import alluxio.client.block.stream.BlockWorkerClient; import alluxio.conf.Configuration; import alluxio.conf.PropertyKey; -import alluxio.exception.AccessControlException; -import alluxio.exception.FileDoesNotExistException; -import alluxio.exception.InvalidPathException; import alluxio.exception.runtime.AlluxioRuntimeException; import alluxio.exception.runtime.InternalRuntimeException; import alluxio.exception.runtime.InvalidArgumentRuntimeException; -import alluxio.exception.runtime.NotFoundRuntimeException; -import alluxio.exception.runtime.UnauthenticatedRuntimeException; import alluxio.grpc.Block; -import alluxio.grpc.ListStatusPOptions; -import alluxio.grpc.ListStatusPartialPOptions; -import alluxio.grpc.LoadProgressReportFormat; -import alluxio.master.file.FileSystemMaster; -import alluxio.master.file.contexts.ListStatusContext; -import alluxio.proto.journal.Job; +import alluxio.grpc.BlockStatus; +import alluxio.grpc.JobProgressReportFormat; +import alluxio.grpc.LoadRequest; +import alluxio.grpc.LoadResponse; +import alluxio.grpc.TaskStatus; +import alluxio.grpc.UfsReadOptions; +import alluxio.job.JobDescription; +import alluxio.metrics.MetricKey; +import alluxio.metrics.MetricsSystem; import alluxio.proto.journal.Journal; -import alluxio.security.authentication.AuthenticatedClientUser; +import alluxio.scheduler.job.Job; +import alluxio.scheduler.job.JobState; +import alluxio.scheduler.job.Task; import alluxio.util.FormatUtils; import alluxio.wire.BlockInfo; -import alluxio.wire.FileBlockInfo; import alluxio.wire.FileInfo; +import alluxio.wire.WorkerInfo; +import com.codahale.metrics.Counter; +import com.codahale.metrics.Meter; import com.fasterxml.jackson.annotation.JsonAutoDetect; import com.fasterxml.jackson.annotation.JsonInclude; import com.fasterxml.jackson.annotation.PropertyAccessor; @@ -48,11 +51,10 @@ import com.google.common.base.Objects; import com.google.common.base.Preconditions; import com.google.common.collect.ImmutableList; +import com.google.common.util.concurrent.ListenableFuture; import org.slf4j.Logger; import org.slf4j.LoggerFactory; -import java.io.IOException; -import java.util.Collection; import java.util.Collections; import java.util.HashMap; import java.util.Iterator; @@ -62,22 +64,29 @@ import java.util.Optional; import java.util.OptionalLong; import java.util.UUID; +import java.util.concurrent.CancellationException; +import java.util.concurrent.ExecutionException; import java.util.concurrent.atomic.AtomicLong; -import java.util.stream.Collectors; +import java.util.function.Predicate; import javax.annotation.concurrent.NotThreadSafe; /** - * This class should only be manipulated from the scheduler thread in LoadManager + * Load job that loads a file or a directory into Alluxio. + * This class should only be manipulated from the scheduler thread in Scheduler * thus the state changing functions are not thread safe. */ @NotThreadSafe -public class LoadJob { +public class LoadJob implements Job { private static final Logger LOG = LoggerFactory.getLogger(LoadJob.class); + public static final String TYPE = "load"; private static final double FAILURE_RATIO_THRESHOLD = 0.05; private static final int FAILURE_COUNT_THRESHOLD = 100; private static final int RETRY_BLOCK_CAPACITY = 1000; private static final double RETRY_THRESHOLD = 0.8 * RETRY_BLOCK_CAPACITY; private static final int BATCH_SIZE = Configuration.getInt(PropertyKey.JOB_BATCH_SIZE); + public static final Predicate QUALIFIED_FILE_FILTER = + (fileInfo) -> !fileInfo.isFolder() && fileInfo.isCompleted() && fileInfo.isPersisted() + && fileInfo.getInAlluxioPercentage() != 100; // Job configurations private final String mPath; private final Optional mUser; @@ -90,18 +99,17 @@ public class LoadJob { private final Map mFailedFiles = new HashMap<>(); private final long mStartTime; private final AtomicLong mProcessedFileCount = new AtomicLong(); - private final AtomicLong mTotalFileCount = new AtomicLong(); private final AtomicLong mLoadedByteCount = new AtomicLong(); - private final AtomicLong mTotalByteCount = new AtomicLong(); private final AtomicLong mTotalBlockCount = new AtomicLong(); private final AtomicLong mCurrentBlockCount = new AtomicLong(); private final AtomicLong mTotalFailureCount = new AtomicLong(); private final AtomicLong mCurrentFailureCount = new AtomicLong(); private final String mJobId; - private LoadJobState mState; + private JobState mState; private Optional mFailedReason = Optional.empty(); - private Optional mFileIterator = Optional.empty(); + private final Iterable mFileIterable; + private Optional> mFileIterator = Optional.empty(); private FileInfo mCurrentFile; private Iterator mBlockIterator = Collections.emptyIterator(); private OptionalLong mEndTime = OptionalLong.empty(); @@ -111,10 +119,13 @@ public class LoadJob { * @param path file path * @param user user for authentication * @param bandwidth bandwidth + * @param fileIterator file iterator */ @VisibleForTesting - public LoadJob(String path, String user, OptionalLong bandwidth) { - this(path, Optional.of(user), UUID.randomUUID().toString(), bandwidth, false, false); + public LoadJob(String path, String user, OptionalLong bandwidth, + FileIterable fileIterator) { + this(path, Optional.of(user), UUID.randomUUID().toString(), bandwidth, false, false, + fileIterator); } /** @@ -126,23 +137,25 @@ public LoadJob(String path, String user, OptionalLong bandwidth) { * @param bandwidth bandwidth * @param usePartialListing whether to use partial listing * @param verificationEnabled whether to verify the job after loaded + * @param fileIterable file iterable */ public LoadJob( String path, Optional user, String jobId, OptionalLong bandwidth, boolean usePartialListing, - boolean verificationEnabled) { + boolean verificationEnabled, FileIterable fileIterable) { mPath = requireNonNull(path, "path is null"); mUser = requireNonNull(user, "user is null"); mJobId = requireNonNull(jobId, "jobId is null"); Preconditions.checkArgument( !bandwidth.isPresent() || bandwidth.getAsLong() > 0, - String.format("bandwidth should be greater than 0 if provided, get %s", bandwidth)); + format("bandwidth should be greater than 0 if provided, get %s", bandwidth)); mBandwidth = bandwidth; mUsePartialListing = usePartialListing; mVerificationEnabled = verificationEnabled; mStartTime = System.currentTimeMillis(); - mState = LoadJobState.LOADING; + mState = JobState.RUNNING; + mFileIterable = fileIterable; } /** @@ -161,10 +174,21 @@ public Optional getUser() { return mUser; } + @Override + public String getJobId() { + return mJobId; + } + + @Override + public JobDescription getDescription() { + return JobDescription.newBuilder().setPath(mPath).setType(TYPE).build(); + } + /** * Get end time. * @return end time */ + @Override public OptionalLong getEndTime() { return mEndTime; } @@ -201,6 +225,16 @@ public boolean isVerificationEnabled() { return mVerificationEnabled; } + /** + * Is verification enabled. + * + * @return whether verification is enabled + */ + @Override + public boolean needVerification() { + return mVerificationEnabled && mCurrentBlockCount.get() > 0; + } + /** * Enable verification. * @param enableVerification whether to enable verification @@ -213,7 +247,8 @@ public void setVerificationEnabled(boolean enableVerification) { * Get load status. * @return the load job's status */ - public LoadJobState getJobState() { + @Override + public JobState getJobState() { return mState; } @@ -221,55 +256,40 @@ public LoadJobState getJobState() { * Set load state. * @param state new state */ - public void setJobState(LoadJobState state) { + @Override + public void setJobState(JobState state) { LOG.debug("Change JobState to {} for job {}", state, this); mState = state; if (!isRunning()) { mEndTime = OptionalLong.of(System.currentTimeMillis()); } - } - - /** - * Get uniq tag. - * @return the tag - */ - public String getJobId() { - return mJobId; + if (state == JobState.SUCCEEDED) { + JOB_LOAD_SUCCESS.inc(); + } } /** * Set load state to FAILED with given reason. * @param reason failure exception */ + @Override public void failJob(AlluxioRuntimeException reason) { - setJobState(LoadJobState.FAILED); + setJobState(JobState.FAILED); mFailedReason = Optional.of(reason); - LoadManager.JOB_LOAD_FAIL.inc(); - } - - /** - * Get batch size. - * @return batch size - */ - public int getBatchSize() { - return BATCH_SIZE; + JOB_LOAD_FAIL.inc(); } /** * Add bytes to total loaded bytes. * @param bytes bytes to be added to total */ + @VisibleForTesting public void addLoadedBytes(long bytes) { mLoadedByteCount.addAndGet(bytes); } - /** - * Get load job progress. - * @param format report format - * @param verbose whether to include error details in the report - * @return the load progress report - */ - public String getProgress(LoadProgressReportFormat format, boolean verbose) { + @Override + public String getProgress(JobProgressReportFormat format, boolean verbose) { return (new LoadProgressReport(this, verbose)).getReport(format); } @@ -281,14 +301,6 @@ public long getCurrentBlockCount() { return mCurrentBlockCount.get(); } - /** - * Get the total processed block count for this job. - * @return total block count - */ - public long getTotalBlockCount() { - return mTotalBlockCount.get(); - } - @Override public boolean equals(Object o) { if (this == o) { @@ -298,82 +310,84 @@ public boolean equals(Object o) { return false; } LoadJob that = (LoadJob) o; - return Objects.equal(mPath, that.mPath); + return Objects.equal(getDescription(), that.getDescription()); } @Override public int hashCode() { - return Objects.hashCode(mPath); + return Objects.hashCode(getDescription()); } - /** - * Check whether the load job is healthy. - * @return true if the load job is healthy, false if not - */ + @Override public boolean isHealthy() { long currentFailureCount = mCurrentFailureCount.get(); - return mState != LoadJobState.FAILED + return mState != JobState.FAILED && currentFailureCount <= FAILURE_COUNT_THRESHOLD || (double) currentFailureCount / mCurrentBlockCount.get() <= FAILURE_RATIO_THRESHOLD; } - /** - * Check whether the load job is still running. - * @return true if the load job is running, false if not - */ + @Override public boolean isRunning() { - return mState == LoadJobState.LOADING || mState == LoadJobState.VERIFYING; + return mState == JobState.RUNNING || mState == JobState.VERIFYING; } - /** - * Check whether the load job is finished. - * @return true if the load job is finished, false if not - */ + @Override public boolean isDone() { - return mState == LoadJobState.SUCCEEDED || mState == LoadJobState.FAILED; + return mState == JobState.SUCCEEDED || mState == JobState.FAILED; } - /** - * Check whether the current loading pass is finished. - * @return true if the load job is finished, false if not - */ - public boolean isCurrentLoadDone() { - return mFileIterator.isPresent() && !mFileIterator.get().hasNext() && !mBlockIterator.hasNext() + @Override + public boolean isCurrentPassDone() { + return mFileIterator.isPresent() && !mFileIterator.get().hasNext() && !mBlockIterator.hasNext() && mRetryBlocks.isEmpty(); } - /** - * Initiate a verification pass. This will re-list the directory and find - * any unloaded files / blocks and try to load them again. - */ + @Override public void initiateVerification() { - Preconditions.checkState(isCurrentLoadDone(), "Previous pass is not finished"); + Preconditions.checkState(isCurrentPassDone(), "Previous pass is not finished"); mFileIterator = Optional.empty(); mTotalBlockCount.addAndGet(mCurrentBlockCount.get()); mTotalFailureCount.addAndGet(mCurrentFailureCount.get()); mCurrentBlockCount.set(0); mCurrentFailureCount.set(0); - mState = LoadJobState.VERIFYING; + mState = JobState.VERIFYING; + } + + /** + * get next load task. + * + * @param worker blocker to worker + * @return the next task to run. If there is no task to run, return empty + */ + public Optional getNextTask(WorkerInfo worker) { + List blocks = getNextBatchBlocks(BATCH_SIZE); + if (blocks.isEmpty()) { + return Optional.empty(); + } + return Optional.of(new LoadTask(blocks)); } /** * Get next batch of blocks. - * @param fileSystemMaster file system master to fetch file infos * @param count number of blocks * @return list of blocks */ - public List getNextBatch(FileSystemMaster fileSystemMaster, int count) { + @VisibleForTesting + public List getNextBatchBlocks(int count) { if (!mFileIterator.isPresent()) { - mFileIterator = - Optional.of(new FileIterator(fileSystemMaster, mPath, mUser, mUsePartialListing)); - if (!mFileIterator.get().hasNext()) { + mFileIterator = Optional.of(mFileIterable.iterator()); + if (!mFileIterator + .get() + .hasNext()) { return ImmutableList.of(); } mCurrentFile = mFileIterator.get().next(); - mProcessedFileCount.incrementAndGet(); + if (!mFailedFiles.containsKey(mCurrentFile.getPath())) { + mProcessedFileCount.incrementAndGet(); + } + mBlockIterator = mCurrentFile.getBlockIds().listIterator(); } - ImmutableList.Builder batchBuilder = ImmutableList.builder(); int i = 0; // retry failed blocks if there's too many failed blocks otherwise wait until no more new block @@ -390,7 +404,9 @@ public List getNextBatch(FileSystemMaster fileSystemMaster, int count) { return batchBuilder.build(); } mCurrentFile = mFileIterator.get().next(); - mProcessedFileCount.incrementAndGet(); + if (!mFailedFiles.containsKey(mCurrentFile.getPath())) { + mProcessedFileCount.incrementAndGet(); + } mBlockIterator = mCurrentFile.getBlockIds().listIterator(); } long blockId = mBlockIterator.next(); @@ -398,6 +414,8 @@ public List getNextBatch(FileSystemMaster fileSystemMaster, int count) { if (blockInfo.getLocations().isEmpty()) { batchBuilder.add(buildBlock(mCurrentFile, blockId)); mCurrentBlockCount.incrementAndGet(); + // would be inaccurate when we initial verification, and we retry un-retryable blocks + mTotalByteCount.addAndGet(blockInfo.getLength()); } } return batchBuilder.build(); @@ -408,6 +426,7 @@ public List getNextBatch(FileSystemMaster fileSystemMaster, int count) { * @param block the block that failed to load thus needing retry * @return whether the block is successfully added */ + @VisibleForTesting public boolean addBlockToRetry(Block block) { if (mRetryBlocks.size() >= RETRY_BLOCK_CAPACITY) { return false; @@ -415,7 +434,7 @@ public boolean addBlockToRetry(Block block) { LOG.debug("Retry block {}", block); mRetryBlocks.add(block); mCurrentFailureCount.incrementAndGet(); - LoadManager.JOB_LOAD_BLOCK_FAIL.inc(); + JOB_LOAD_BLOCK_FAIL.inc(); return true; } @@ -426,14 +445,15 @@ public boolean addBlockToRetry(Block block) { * @param message failure message * @param code status code for exception */ + @VisibleForTesting public void addBlockFailure(Block block, String message, int code) { // When multiple blocks of the same file failed to load, from user's perspective, // it's not hugely important what are the reasons for each specific failure, // if they are different, so we will just keep the first one. mFailedFiles.put(block.getUfsPath(), - String.format("Status code: %s, message: %s", code, message)); + format("Status code: %s, message: %s", code, message)); mCurrentFailureCount.incrementAndGet(); - LoadManager.JOB_LOAD_BLOCK_FAIL.inc(); + JOB_LOAD_BLOCK_FAIL.inc(); } private static Block buildBlock(FileInfo fileInfo, long blockId) { @@ -472,14 +492,12 @@ public String toString() { .toString(); } - /** - * @return journal entry of job - */ + @Override public Journal.JournalEntry toJournalEntry() { - Job.LoadJobEntry.Builder jobEntry = Job.LoadJobEntry + alluxio.proto.journal.Job.LoadJobEntry.Builder jobEntry = alluxio.proto.journal.Job.LoadJobEntry .newBuilder() .setLoadPath(mPath) - .setState(LoadJobState.toProto(mState)) + .setState(JobState.toProto(mState)) .setPartialListing(mUsePartialListing) .setVerify(mVerificationEnabled) .setJobId(mJobId); @@ -492,25 +510,6 @@ public Journal.JournalEntry toJournalEntry() { .build(); } - /** - * Get journal entry of the job. - * - * @param loadJobEntry journal entry - * @return journal entry of the job - */ - public static LoadJob fromJournalEntry(Job.LoadJobEntry loadJobEntry) { - LoadJob job = new LoadJob(loadJobEntry.getLoadPath(), - loadJobEntry.hasUser() ? Optional.of(loadJobEntry.getUser()) : Optional.empty(), - loadJobEntry.getJobId(), - loadJobEntry.hasBandwidth() ? OptionalLong.of(loadJobEntry.getBandwidth()) : - OptionalLong.empty(), loadJobEntry.getPartialListing(), loadJobEntry.getVerify()); - job.setJobState(LoadJobState.fromProto(loadJobEntry.getState())); - if (loadJobEntry.hasEndTime()) { - job.setEndTime(loadJobEntry.getEndTime()); - } - return job; - } - /** * Get duration in seconds. * @return job duration in seconds @@ -520,101 +519,114 @@ public long getDurationInSec() { return (mEndTime.orElse(System.currentTimeMillis()) - mStartTime) / 1000; } - private class FileIterator implements Iterator { - private final ListStatusPOptions.Builder mListOptions = - ListStatusPOptions.newBuilder().setRecursive(true); - private static final int PARTIAL_LISTING_BATCH_SIZE = 100; - private final FileSystemMaster mFileSystemMaster; - private final String mPath; - private final Optional mUser; - private final boolean mUsePartialListing; - private String mStartAfter = ""; - private List mFiles; - private Iterator mFileInfoIterator; - - public FileIterator(FileSystemMaster fileSystemMaster, String path, - Optional user, boolean usePartialListing) { - mFileSystemMaster = requireNonNull(fileSystemMaster, "fileSystemMaster is null"); - mPath = requireNonNull(path, "path is null"); - mUser = requireNonNull(user, "user is null"); - mUsePartialListing = usePartialListing; - if (usePartialListing) { - partialListFileInfos(); - } else { - listFileInfos(ListStatusContext.create(mListOptions)); + @Override + public boolean processResponse(LoadTask loadTask) { + try { + long totalBytes = loadTask.getBlocks().stream() + .map(Block::getLength) + .reduce(Long::sum) + .orElse(0L); + LoadResponse response = loadTask.getResponseFuture().get(); + if (response.getStatus() != TaskStatus.SUCCESS) { + LOG.debug(format("Get failure from worker: %s", response.getBlockStatusList())); + for (BlockStatus status : response.getBlockStatusList()) { + totalBytes -= status.getBlock().getLength(); + if (!isHealthy() || !status.getRetryable() || !addBlockToRetry( + status.getBlock())) { + addBlockFailure(status.getBlock(), status.getMessage(), status.getCode()); + } + } } + addLoadedBytes(totalBytes); + JOB_LOAD_BLOCK_COUNT.inc( + loadTask.getBlocks().size() - response.getBlockStatusCount()); + JOB_LOAD_BLOCK_SIZE.inc(totalBytes); + JOB_LOAD_RATE.mark(totalBytes); + return response.getStatus() != TaskStatus.FAILURE; } - - @Override - public boolean hasNext() - { - if (mUsePartialListing && !mFileInfoIterator.hasNext()) { - partialListFileInfos(); + catch (ExecutionException e) { + LOG.warn("exception when trying to get load response.", e.getCause()); + for (Block block : loadTask.getBlocks()) { + if (isHealthy()) { + addBlockToRetry(block); + } + else { + AlluxioRuntimeException exception = AlluxioRuntimeException.from(e.getCause()); + addBlockFailure(block, exception.getMessage(), exception.getStatus().getCode() + .value()); + } } - return mFileInfoIterator.hasNext(); + return false; + } + catch (CancellationException e) { + LOG.warn("Task get canceled and will retry.", e); + loadTask.getBlocks().forEach(this::addBlockToRetry); + return true; + } + catch (InterruptedException e) { + loadTask.getBlocks().forEach(this::addBlockToRetry); + Thread.currentThread().interrupt(); + // We don't count InterruptedException as task failure + return true; } + } - @Override - public FileInfo next() - { - if (mUsePartialListing && !mFileInfoIterator.hasNext()) { - partialListFileInfos(); - } - return mFileInfoIterator.next(); + @Override + public void updateJob(Job job) { + LoadJob targetJob = (LoadJob) job; + updateBandwidth(targetJob.getBandwidth()); + setVerificationEnabled(targetJob.isVerificationEnabled()); + } + + /** + * Loads blocks in a UFS through an Alluxio worker. + */ + public class LoadTask extends Task { + + /** + * @return blocks to load + */ + public List getBlocks() { + return mBlocks; } - private void partialListFileInfos() { - if (!mStartAfter.isEmpty()) { - mListOptions.setDisableAreDescendantsLoadedCheck(true); - } - ListStatusContext context = ListStatusContext.create(ListStatusPartialPOptions.newBuilder() - .setOptions(mListOptions) - .setBatchSize(PARTIAL_LISTING_BATCH_SIZE) - .setStartAfter(mStartAfter)); - listFileInfos(context); - if (mFiles.size() > 0) { - mStartAfter = mFiles.get(mFiles.size() - 1).getPath(); - } + private final List mBlocks; + + /** + * Creates a new instance of {@link LoadTask}. + * + * @param blocks blocks to load + */ + public LoadTask(List blocks) { + mBlocks = blocks; } - private void listFileInfos(ListStatusContext context) { - try { - AuthenticatedClientUser.set(mUser.orElse(null)); - mFiles = mFileSystemMaster.listStatus(new AlluxioURI(mPath), context).stream().filter( - fileInfo -> !fileInfo.isFolder() && fileInfo.isCompleted() - && fileInfo.getInAlluxioPercentage() != 100).collect(Collectors.toList()); - mFileInfoIterator = mFiles.iterator(); - } catch (FileDoesNotExistException | InvalidPathException e) { - throw new NotFoundRuntimeException(e); - } catch (AccessControlException e) { - throw new UnauthenticatedRuntimeException(e); - } catch (IOException e) { - throw AlluxioRuntimeException.from(e); - } finally { - AuthenticatedClientUser.remove(); + @Override + public ListenableFuture run(BlockWorkerClient workerClient) { + LoadRequest.Builder request1 = LoadRequest + .newBuilder() + .addAllBlocks(mBlocks); + UfsReadOptions.Builder options = UfsReadOptions + .newBuilder() + .setTag(mJobId) + .setPositionShort(false); + if (mBandwidth.isPresent()) { + options.setBandwidth(mBandwidth.getAsLong()); } - List fileInfoStream = mFiles - .stream().filter(fileInfo -> !mFailedFiles.containsKey(fileInfo.getPath())).collect( - Collectors.toList()); - mTotalFileCount.addAndGet(fileInfoStream.size()); - mTotalByteCount.addAndGet(fileInfoStream.stream() - .map(FileInfo::getFileBlockInfos) - .flatMap(Collection::stream) - .map(FileBlockInfo::getBlockInfo) - .filter(blockInfo -> blockInfo.getLocations().isEmpty()) - .map(BlockInfo::getLength) - .reduce(Long::sum) - .orElse(0L)); + mUser.ifPresent(options::setUser); + LoadRequest request = request1 + .setOptions(options.build()) + .build(); + return workerClient.load(request); } } private static class LoadProgressReport { private final boolean mVerbose; - private final LoadJobState mJobState; + private final JobState mJobState; private final Long mBandwidth; private final boolean mVerificationEnabled; private final long mProcessedFileCount; - private final Long mTotalFileCount; private final long mLoadedByteCount; private final Long mTotalByteCount; private final Long mThroughput; @@ -631,12 +643,10 @@ public LoadProgressReport(LoadJob job, boolean verbose) mVerificationEnabled = job.mVerificationEnabled; mProcessedFileCount = job.mProcessedFileCount.get(); mLoadedByteCount = job.mLoadedByteCount.get(); - if (job.mFileIterator.isPresent() && !job.mFileIterator.get().mUsePartialListing) { - mTotalFileCount = job.mTotalFileCount.get(); + if (!job.mUsePartialListing && job.mFileIterator.isPresent()) { mTotalByteCount = job.mTotalByteCount.get(); } else { - mTotalFileCount = null; mTotalByteCount = null; } long duration = job.getDurationInSec(); @@ -664,7 +674,7 @@ public LoadProgressReport(LoadJob job, boolean verbose) } } - public String getReport(LoadProgressReportFormat format) + public String getReport(JobProgressReportFormat format) { switch (format) { case TEXT: @@ -673,43 +683,41 @@ public String getReport(LoadProgressReportFormat format) return getJsonReport(); default: throw new InvalidArgumentRuntimeException( - String.format("Unknown load progress report format: %s", format)); + format("Unknown load progress report format: %s", format)); } } private String getTextReport() { StringBuilder progress = new StringBuilder(); progress.append( - String.format("\tSettings:\tbandwidth: %s\tverify: %s%n", + format("\tSettings:\tbandwidth: %s\tverify: %s%n", mBandwidth == null ? "unlimited" : mBandwidth, mVerificationEnabled)); - progress.append(String.format("\tJob State: %s%s%n", mJobState, + progress.append(format("\tJob State: %s%s%n", mJobState, mFailureReason == null - ? "" : String.format( + ? "" : format( " (%s: %s)", mFailureReason.getClass().getName(), mFailureReason.getMessage()))); if (mVerbose && mFailureReason != null) { for (StackTraceElement stack : mFailureReason.getStackTrace()) { - progress.append(String.format("\t\t%s%n", stack.toString())); + progress.append(format("\t\t%s%n", stack.toString())); } } - progress.append(String.format("\tFiles Processed: %d%s%n", mProcessedFileCount, - mTotalFileCount == null - ? "" : String.format(" out of %s", mTotalFileCount))); - progress.append(String.format("\tBytes Loaded: %s%s%n", + progress.append(format("\tFiles Processed: %d%n", mProcessedFileCount)); + progress.append(format("\tBytes Loaded: %s%s%n", FormatUtils.getSizeFromBytes(mLoadedByteCount), mTotalByteCount == null - ? "" : String.format(" out of %s", FormatUtils.getSizeFromBytes(mTotalByteCount)))); + ? "" : format(" out of %s", FormatUtils.getSizeFromBytes(mTotalByteCount)))); if (mThroughput != null) { - progress.append(String.format("\tThroughput: %s/s%n", + progress.append(format("\tThroughput: %s/s%n", FormatUtils.getSizeFromBytes(mThroughput))); } - progress.append(String.format("\tBlock load failure rate: %.2f%%%n", mFailurePercentage)); - progress.append(String.format("\tFiles Failed: %s%n", mFailedFileCount)); + progress.append(format("\tBlock load failure rate: %.2f%%%n", mFailurePercentage)); + progress.append(format("\tFiles Failed: %s%n", mFailedFileCount)); if (mVerbose && mFailedFilesWithReasons != null) { mFailedFilesWithReasons.forEach((fileName, reason) -> - progress.append(String.format("\t\t%s: %s%n", fileName, reason))); + progress.append(format("\t\t%s: %s%n", fileName, reason))); } return progress.toString(); } @@ -725,4 +733,18 @@ private String getJsonReport() { } } } + + // metrics + public static final Counter JOB_LOAD_SUCCESS = + MetricsSystem.counter(MetricKey.MASTER_JOB_LOAD_SUCCESS.getName()); + public static final Counter JOB_LOAD_FAIL = + MetricsSystem.counter(MetricKey.MASTER_JOB_LOAD_FAIL.getName()); + public static final Counter JOB_LOAD_BLOCK_COUNT = + MetricsSystem.counter(MetricKey.MASTER_JOB_LOAD_BLOCK_COUNT.getName()); + public static final Counter JOB_LOAD_BLOCK_FAIL = + MetricsSystem.counter(MetricKey.MASTER_JOB_LOAD_BLOCK_FAIL.getName()); + public static final Counter JOB_LOAD_BLOCK_SIZE = + MetricsSystem.counter(MetricKey.MASTER_JOB_LOAD_BLOCK_SIZE.getName()); + public static final Meter JOB_LOAD_RATE = + MetricsSystem.meter(MetricKey.MASTER_JOB_LOAD_RATE.getName()); } diff --git a/core/server/master/src/main/java/alluxio/master/job/LoadJobFactory.java b/core/server/master/src/main/java/alluxio/master/job/LoadJobFactory.java new file mode 100644 index 000000000000..c1a5fc6cd7d5 --- /dev/null +++ b/core/server/master/src/main/java/alluxio/master/job/LoadJobFactory.java @@ -0,0 +1,65 @@ +/* + * The Alluxio Open Foundation licenses this work under the Apache License, version 2.0 + * (the "License"). You may not use this work except in compliance with the License, which is + * available at www.apache.org/licenses/LICENSE-2.0 + * + * This software is distributed on an "AS IS" basis, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, + * either express or implied, as more fully set forth in the License. + * + * See the NOTICE file distributed with this work for information regarding copyright ownership. + */ + +package alluxio.master.job; + +import alluxio.grpc.LoadJobPOptions; +import alluxio.job.LoadJobRequest; +import alluxio.master.file.FileSystemMaster; +import alluxio.scheduler.job.Job; +import alluxio.scheduler.job.JobFactory; +import alluxio.security.User; +import alluxio.security.authentication.AuthenticatedClientUser; + +import java.util.Optional; +import java.util.OptionalLong; +import java.util.UUID; + +/** + * Factory for creating {@link LoadJob}s that get file infos from master. + */ +public class LoadJobFactory implements JobFactory { + + private final FileSystemMaster mFsMaster; + private final LoadJobRequest mRequest; + + /** + * Create factory. + * @param request load job request + * @param fsMaster file system master + */ + public LoadJobFactory(LoadJobRequest request, FileSystemMaster fsMaster) { + mFsMaster = fsMaster; + mRequest = request; + } + + @Override + public Job create() { + LoadJobPOptions options = mRequest.getOptions(); + String path = mRequest.getPath(); + OptionalLong bandwidth = + options.hasBandwidth() ? OptionalLong.of(options.getBandwidth()) : OptionalLong.empty(); + boolean partialListing = options.hasPartialListing() && options.getPartialListing(); + boolean verificationEnabled = options.hasVerify() && options.getVerify(); + FileIterable fileIterator = new FileIterable(mFsMaster, path, Optional + .ofNullable(AuthenticatedClientUser.getOrNull()) + .map(User::getName), partialListing, + LoadJob.QUALIFIED_FILE_FILTER); + Optional user = Optional + .ofNullable(AuthenticatedClientUser.getOrNull()) + .map(User::getName); + return new LoadJob(path, user, UUID.randomUUID().toString(), + bandwidth, + partialListing, + verificationEnabled, fileIterator); + } +} + diff --git a/core/server/master/src/main/java/alluxio/master/scheduler/DefaultWorkerProvider.java b/core/server/master/src/main/java/alluxio/master/scheduler/DefaultWorkerProvider.java new file mode 100644 index 000000000000..3d7f623999ab --- /dev/null +++ b/core/server/master/src/main/java/alluxio/master/scheduler/DefaultWorkerProvider.java @@ -0,0 +1,65 @@ +/* + * The Alluxio Open Foundation licenses this work under the Apache License, version 2.0 + * (the "License"). You may not use this work except in compliance with the License, which is + * available at www.apache.org/licenses/LICENSE-2.0 + * + * This software is distributed on an "AS IS" basis, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, + * either express or implied, as more fully set forth in the License. + * + * See the NOTICE file distributed with this work for information regarding copyright ownership. + */ + +package alluxio.master.scheduler; + +import alluxio.client.block.stream.BlockWorkerClient; +import alluxio.client.file.FileSystemContext; +import alluxio.exception.runtime.AlluxioRuntimeException; +import alluxio.exception.runtime.UnavailableRuntimeException; +import alluxio.exception.status.UnavailableException; +import alluxio.master.file.FileSystemMaster; +import alluxio.resource.CloseableResource; +import alluxio.scheduler.job.WorkerProvider; +import alluxio.wire.WorkerInfo; +import alluxio.wire.WorkerNetAddress; + +import java.io.IOException; +import java.util.List; + +/** + * Default worker provider that get worker information from Alluxio master. + */ +public class DefaultWorkerProvider implements WorkerProvider { + private final FileSystemMaster mFileSystemMaster; + private final FileSystemContext mContext; + + /** + * Creates a new instance of {@link DefaultWorkerProvider}. + * + * @param fileSystemMaster the file system master + * @param context the file system context + */ + public DefaultWorkerProvider(FileSystemMaster fileSystemMaster, FileSystemContext context) { + mFileSystemMaster = fileSystemMaster; + mContext = context; + } + + @Override + public List getWorkerInfos() { + try { + // TODO(jianjian): need api for healthy worker instead + return mFileSystemMaster.getWorkerInfoList(); + } catch (UnavailableException e) { + throw new UnavailableRuntimeException( + "fail to get worker infos because master is not available", e); + } + } + + @Override + public CloseableResource getWorkerClient(WorkerNetAddress address) { + try { + return mContext.acquireBlockWorkerClient(address); + } catch (IOException e) { + throw AlluxioRuntimeException.from(e); + } + } +} diff --git a/core/server/master/src/main/java/alluxio/master/scheduler/JournaledJobMetaStore.java b/core/server/master/src/main/java/alluxio/master/scheduler/JournaledJobMetaStore.java new file mode 100644 index 000000000000..1b262fb0449f --- /dev/null +++ b/core/server/master/src/main/java/alluxio/master/scheduler/JournaledJobMetaStore.java @@ -0,0 +1,89 @@ +/* + * The Alluxio Open Foundation licenses this work under the Apache License, version 2.0 + * (the "License"). You may not use this work except in compliance with the License, which is + * available at www.apache.org/licenses/LICENSE-2.0 + * + * This software is distributed on an "AS IS" basis, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, + * either express or implied, as more fully set forth in the License. + * + * See the NOTICE file distributed with this work for information regarding copyright ownership. + */ + +package alluxio.master.scheduler; + +import alluxio.collections.ConcurrentHashSet; +import alluxio.exception.runtime.UnavailableRuntimeException; +import alluxio.exception.status.UnavailableException; +import alluxio.master.file.FileSystemMaster; +import alluxio.master.job.JobFactoryProducer; +import alluxio.master.journal.JournalContext; +import alluxio.master.journal.Journaled; +import alluxio.master.journal.checkpoint.CheckpointName; +import alluxio.proto.journal.Journal; +import alluxio.resource.CloseableIterator; +import alluxio.scheduler.job.Job; +import alluxio.scheduler.job.JobMetaStore; + +import com.google.common.collect.Iterators; + +import java.util.Set; + +/** + * A journaled job meta store. + */ +public class JournaledJobMetaStore implements JobMetaStore, Journaled { + private final FileSystemMaster mFileSystemMaster; + private final Set> + mExistingJobs = new ConcurrentHashSet<>(); + + /** + * Creates a new instance of {@link JournaledJobMetaStore}. + * @param fileSystemMaster the file system master + */ + public JournaledJobMetaStore(FileSystemMaster fileSystemMaster) { + mFileSystemMaster = fileSystemMaster; + } + + @Override + public CloseableIterator getJournalEntryIterator() { + return CloseableIterator.noopCloseable( + Iterators.transform(mExistingJobs.iterator(), Job::toJournalEntry)); + } + + @Override + public boolean processJournalEntry(Journal.JournalEntry entry) { + if (!entry.hasLoadJob()) { + return false; + } + Job job = JobFactoryProducer + .create(entry, mFileSystemMaster).create(); + mExistingJobs.add(job); + return true; + } + + @Override + public void resetState() { + mExistingJobs.clear(); + } + + @Override + public CheckpointName getCheckpointName() { + return CheckpointName.SCHEDULER; + } + + @Override + public void updateJob(Job job) { + try (JournalContext context = mFileSystemMaster.createJournalContext()) { + context.append(job.toJournalEntry()); + mExistingJobs.add(job); + } catch (UnavailableException e) { + throw new UnavailableRuntimeException( + "There is an ongoing backup running, please submit later", e); + } + } + + @Override + public Set> getJobs() { + return mExistingJobs; + } +} diff --git a/core/server/master/src/main/java/alluxio/master/scheduler/Scheduler.java b/core/server/master/src/main/java/alluxio/master/scheduler/Scheduler.java new file mode 100644 index 000000000000..ee548d8bd436 --- /dev/null +++ b/core/server/master/src/main/java/alluxio/master/scheduler/Scheduler.java @@ -0,0 +1,402 @@ +/* + * The Alluxio Open Foundation licenses this work under the Apache License, version 2.0 + * (the "License"). You may not use this work except in compliance with the License, which is + * available at www.apache.org/licenses/LICENSE-2.0 + * + * This software is distributed on an "AS IS" basis, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, + * either express or implied, as more fully set forth in the License. + * + * See the NOTICE file distributed with this work for information regarding copyright ownership. + */ + +package alluxio.master.scheduler; + +import static java.lang.String.format; + +import alluxio.Constants; +import alluxio.client.block.stream.BlockWorkerClient; +import alluxio.conf.Configuration; +import alluxio.conf.PropertyKey; +import alluxio.exception.runtime.AlluxioRuntimeException; +import alluxio.exception.runtime.InternalRuntimeException; +import alluxio.exception.runtime.NotFoundRuntimeException; +import alluxio.exception.runtime.ResourceExhaustedRuntimeException; +import alluxio.exception.runtime.UnavailableRuntimeException; +import alluxio.grpc.JobProgressReportFormat; +import alluxio.job.JobDescription; +import alluxio.resource.CloseableResource; +import alluxio.scheduler.job.Job; +import alluxio.scheduler.job.JobMetaStore; +import alluxio.scheduler.job.JobState; +import alluxio.scheduler.job.Task; +import alluxio.scheduler.job.WorkerProvider; +import alluxio.util.ThreadFactoryUtils; +import alluxio.util.ThreadUtils; +import alluxio.wire.WorkerInfo; + +import com.google.common.annotations.VisibleForTesting; +import com.google.common.collect.ImmutableMap; +import com.google.common.collect.ImmutableSet; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import java.util.HashSet; +import java.util.Map; +import java.util.Optional; +import java.util.Set; +import java.util.concurrent.ConcurrentHashMap; +import java.util.concurrent.Executors; +import java.util.concurrent.ScheduledExecutorService; +import java.util.concurrent.TimeUnit; +import javax.annotation.concurrent.ThreadSafe; + +/** + * The Scheduler which controls jobs. It is responsible for managing active workers, updating jobs + * and update job information to job meta store. + * The workflow is: + * 1. Submit a job to the scheduler. + * 2. The scheduler will pull the task from the job and assign the task to a worker. + * 3. The worker will execute the task and report the result to the job. + * 4. The job will update the progress. And schedule the next task if the job is not done. + * 5. One worker would have one task running for one job description at a time. + */ +@ThreadSafe +public final class Scheduler { + + private static final Logger LOG = LoggerFactory.getLogger(Scheduler.class); + private static final int CAPACITY = 100; + private static final long WORKER_UPDATE_INTERVAL = Configuration.getMs( + PropertyKey.MASTER_WORKER_INFO_CACHE_REFRESH_TIME); + private static final int EXECUTOR_SHUTDOWN_MS = 10 * Constants.SECOND_MS; + private final Map> + mExistingJobs = new ConcurrentHashMap<>(); + private final Map, Set> mRunningTasks = new ConcurrentHashMap<>(); + private final JobMetaStore mJobMetaStore; + // initial thread in start method since we would stop and start thread when gainPrimacy + private ScheduledExecutorService mSchedulerExecutor; + private volatile boolean mRunning = false; + private Map> mActiveWorkers = ImmutableMap.of(); + private final WorkerProvider mWorkerProvider; + + /** + * Constructor. + * + * @param workerProvider workerProvider + * @param jobMetaStore jobMetaStore + */ + public Scheduler(WorkerProvider workerProvider, JobMetaStore jobMetaStore) { + mWorkerProvider = workerProvider; + mJobMetaStore = jobMetaStore; + } + + /** + * Start scheduler. + */ + public void start() { + if (!mRunning) { + retrieveJobs(); + mSchedulerExecutor = Executors.newSingleThreadScheduledExecutor( + ThreadFactoryUtils.build("scheduler", false)); + mSchedulerExecutor.scheduleAtFixedRate(this::updateWorkers, 0, WORKER_UPDATE_INTERVAL, + TimeUnit.MILLISECONDS); + mSchedulerExecutor.scheduleWithFixedDelay(this::processJobs, 0, 100, TimeUnit.MILLISECONDS); + mSchedulerExecutor.scheduleWithFixedDelay(this::cleanupStaleJob, 1, 1, TimeUnit.HOURS); + mRunning = true; + } + } + + private void retrieveJobs() { + for (Job job : mJobMetaStore.getJobs()) { + mExistingJobs.put(job.getDescription(), job); + if (job.isDone()) { + mRunningTasks.remove(job); + } + else { + mRunningTasks.put(job, new HashSet<>()); + } + } + } + + /** + * Stop scheduler. + */ + public void stop() { + if (mRunning) { + mActiveWorkers.values().forEach(CloseableResource::close); + mActiveWorkers = ImmutableMap.of(); + ThreadUtils.shutdownAndAwaitTermination(mSchedulerExecutor, EXECUTOR_SHUTDOWN_MS); + mRunning = false; + } + } + + /** + * Submit a job. + * @param job the job + * @return true if the job is new, false if the job has already been submitted + * @throws ResourceExhaustedRuntimeException if the job cannot be submitted because the scheduler + * is at capacity + * @throws UnavailableRuntimeException if the job cannot be submitted because the meta store is + * not ready + */ + public boolean submitJob(Job job) { + Job existingJob = mExistingJobs.get(job.getDescription()); + if (existingJob != null && !existingJob.isDone()) { + updateExistingJob(job, existingJob); + return false; + } + + if (mRunningTasks.size() >= CAPACITY) { + throw new ResourceExhaustedRuntimeException( + "Too many jobs running, please submit later.", true); + } + mJobMetaStore.updateJob(job); + mExistingJobs.put(job.getDescription(), job); + mRunningTasks.put(job, new HashSet<>()); + LOG.debug(format("start job: %s", job)); + return true; + } + + private void updateExistingJob(Job newJob, Job existingJob) { + existingJob.updateJob(newJob); + mJobMetaStore.updateJob(existingJob); + LOG.debug(format("updated existing job: %s from %s", existingJob, newJob)); + if (existingJob.getJobState() == JobState.STOPPED) { + existingJob.setJobState(JobState.RUNNING); + mRunningTasks.put(existingJob, new HashSet<>()); + LOG.debug(format("restart existing job: %s", existingJob)); + } + } + + /** + * Stop a job. + * @param jobDescription job identifier + * @return true if the job is stopped, false if the job does not exist or has already finished + */ + public boolean stopJob(JobDescription jobDescription) { + Job existingJob = mExistingJobs.get(jobDescription); + if (existingJob != null && existingJob.isRunning()) { + existingJob.setJobState(JobState.STOPPED); + mJobMetaStore.updateJob(existingJob); + // leftover tasks in mRunningTasks would be removed by scheduling thread. + return true; + } + return false; + } + + /** + * Get the job's progress report. + * @param jobDescription job identifier + * @param format progress report format + * @param verbose whether to include details on failed files and failures + * @return the progress report + * @throws NotFoundRuntimeException if the job does not exist + * @throws AlluxioRuntimeException if any other Alluxio exception occurs + */ + public String getJobProgress( + JobDescription jobDescription, + JobProgressReportFormat format, + boolean verbose) { + Job job = mExistingJobs.get(jobDescription); + if (job == null) { + throw new NotFoundRuntimeException(format("%s cannot be found.", jobDescription)); + } + return job.getProgress(format, verbose); + } + + /** + * Get active workers. + * @return active workers + */ + @VisibleForTesting + public Map> getActiveWorkers() { + return mActiveWorkers; + } + + /** + * Removes all finished jobs outside the retention time. + */ + @VisibleForTesting + public void cleanupStaleJob() { + long current = System.currentTimeMillis(); + mExistingJobs + .entrySet().removeIf(job -> !job.getValue().isRunning() + && job.getValue().getEndTime().isPresent() + && job.getValue().getEndTime().getAsLong() <= (current - Configuration.getMs( + PropertyKey.JOB_RETENTION_TIME))); + } + + /** + * Refresh active workers. + */ + @VisibleForTesting + public void updateWorkers() { + if (Thread.currentThread().isInterrupted()) { + return; + } + Set workerInfos; + try { + try { + workerInfos = ImmutableSet.copyOf(mWorkerProvider.getWorkerInfos()); + } catch (AlluxioRuntimeException e) { + LOG.warn("Failed to get worker info, using existing worker infos of {} workers", + mActiveWorkers.size()); + return; + } + if (workerInfos.size() == mActiveWorkers.size() + && workerInfos.containsAll(mActiveWorkers.keySet())) { + return; + } + + ImmutableMap.Builder> updatedWorkers = + ImmutableMap.builder(); + for (WorkerInfo workerInfo : workerInfos) { + if (mActiveWorkers.containsKey(workerInfo)) { + updatedWorkers.put(workerInfo, mActiveWorkers.get(workerInfo)); + } + else { + try { + updatedWorkers.put(workerInfo, + mWorkerProvider.getWorkerClient(workerInfo.getAddress())); + } catch (AlluxioRuntimeException e) { + // skip the worker if we cannot obtain a client + } + } + } + // Close clients connecting to lost workers + for (Map.Entry> entry : + mActiveWorkers.entrySet()) { + WorkerInfo workerInfo = entry.getKey(); + if (!workerInfos.contains(workerInfo)) { + CloseableResource resource = entry.getValue(); + resource.close(); + LOG.debug("Closed BlockWorkerClient to lost worker {}", workerInfo); + } + } + // Build the clients to the current active worker list + mActiveWorkers = updatedWorkers.build(); + } catch (Exception e) { + // Unknown exception. This should not happen, but if it happens we don't want to lose the + // scheduler thread, thus catching it here. Any exception surfaced here should be properly + // handled. + LOG.error("Unexpected exception thrown in updateWorkers.", e); + } + } + + /** + * Get jobs. + * + * @return jobs + */ + @VisibleForTesting + public Map> getJobs() { + return mExistingJobs; + } + + private void processJobs() { + if (Thread.currentThread().isInterrupted()) { + return; + } + mRunningTasks.forEach(this::processJob); + } + + private void processJob(Job job, Set runningWorkers) { + try { + if (!job.isRunning()) { + try { + mJobMetaStore.updateJob(job); + } + catch (UnavailableRuntimeException e) { + // This should not happen because the scheduler should not be started while master is + // still processing journal entries. However, if it does happen, we don't want to throw + // exception in a task running on scheduler thead. So just ignore it and hopefully later + // retry will work. + LOG.error("error writing to journal when processing job", e); + } + mRunningTasks.remove(job); + return; + } + if (!job.isHealthy()) { + job.failJob(new InternalRuntimeException("Job failed because it's not healthy.")); + return; + } + + // If there are new workers, schedule job onto new workers + mActiveWorkers.forEach((workerInfo, workerClient) -> { + if (!runningWorkers.contains(workerInfo) && scheduleTask(job, workerInfo, runningWorkers, + workerClient)) { + runningWorkers.add(workerInfo); + } + }); + + if (runningWorkers.isEmpty() && job.isCurrentPassDone()) { + if (job.needVerification()) { + job.initiateVerification(); + } + else { + if (job.isHealthy()) { + job.setJobState(JobState.SUCCEEDED); + } + else { + job.failJob(new InternalRuntimeException("Job failed because it's not healthy.")); + } + } + } + } catch (Exception e) { + // Unknown exception. This should not happen, but if it happens we don't want to lose the + // scheduler thread, thus catching it here. Any exception surfaced here should be properly + // handled. + LOG.error("Unexpected exception thrown in processJob.", e); + job.failJob(new InternalRuntimeException(e)); + } + } + + private boolean scheduleTask( + @SuppressWarnings("rawtypes") Job job, + WorkerInfo workerInfo, + Set livingWorkers, + CloseableResource workerClient) { + if (!job.isRunning()) { + return false; + } + Optional> task; + try { + task = job.getNextTask(workerInfo); + } catch (AlluxioRuntimeException e) { + LOG.warn(format("error getting next task for job %s", job), e); + if (!e.isRetryable()) { + job.failJob(e); + } + return false; + } + if (!task.isPresent()) { + return false; + } + Task currentTask = task.get(); + currentTask.execute(workerClient.get()); + currentTask.getResponseFuture().addListener(() -> { + try { + if (!job.processResponse(currentTask)) { + livingWorkers.remove(workerInfo); + } + // Schedule next batch for healthy job + if (job.isHealthy()) { + if (mActiveWorkers.containsKey(workerInfo)) { + if (!scheduleTask(job, workerInfo, livingWorkers, mActiveWorkers.get(workerInfo))) { + livingWorkers.remove(workerInfo); + } + } + else { + livingWorkers.remove(workerInfo); + } + } + } catch (Exception e) { + // Unknown exception. This should not happen, but if it happens we don't want to lose the + // scheduler thread, thus catching it here. Any exception surfaced here should be properly + // handled. + LOG.error("Unexpected exception thrown in response future listener.", e); + job.failJob(new InternalRuntimeException(e)); + } + }, mSchedulerExecutor); + return true; + } +} diff --git a/core/server/master/src/test/java/alluxio/master/file/scheduler/FileIterableTest.java b/core/server/master/src/test/java/alluxio/master/file/scheduler/FileIterableTest.java new file mode 100644 index 000000000000..67c8531ad6bd --- /dev/null +++ b/core/server/master/src/test/java/alluxio/master/file/scheduler/FileIterableTest.java @@ -0,0 +1,49 @@ +/* + * The Alluxio Open Foundation licenses this work under the Apache License, version 2.0 + * (the "License"). You may not use this work except in compliance with the License, which is + * available at www.apache.org/licenses/LICENSE-2.0 + * + * This software is distributed on an "AS IS" basis, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, + * either express or implied, as more fully set forth in the License. + * + * See the NOTICE file distributed with this work for information regarding copyright ownership. + */ + +package alluxio.master.file.scheduler; + +import static org.junit.Assert.assertThrows; +import static org.mockito.ArgumentMatchers.any; +import static org.mockito.Mockito.doThrow; +import static org.mockito.Mockito.mock; + +import alluxio.exception.AccessControlException; +import alluxio.exception.FileDoesNotExistException; +import alluxio.exception.InvalidPathException; +import alluxio.exception.runtime.NotFoundRuntimeException; +import alluxio.exception.runtime.UnauthenticatedRuntimeException; +import alluxio.master.file.FileSystemMaster; +import alluxio.master.job.FileIterable; +import alluxio.master.job.LoadJob; + +import org.junit.Test; + +import java.io.IOException; +import java.util.Optional; + +public class FileIterableTest { + + @Test + public void testException() + throws FileDoesNotExistException, AccessControlException, IOException, InvalidPathException { + FileSystemMaster fileSystemMaster = mock(FileSystemMaster.class); + String path = "test"; + doThrow(new FileDoesNotExistException(path)).when(fileSystemMaster).checkAccess(any(), any()); + FileIterable fileIterable = new FileIterable(fileSystemMaster, path, Optional.of("user"), false, + LoadJob.QUALIFIED_FILE_FILTER); + assertThrows(NotFoundRuntimeException.class, fileIterable::iterator); + doThrow(new InvalidPathException(path)).when(fileSystemMaster).checkAccess(any(), any()); + assertThrows(NotFoundRuntimeException.class, fileIterable::iterator); + doThrow(new AccessControlException(path)).when(fileSystemMaster).checkAccess(any(), any()); + assertThrows(UnauthenticatedRuntimeException.class, fileIterable::iterator); + } +} diff --git a/core/server/master/src/test/java/alluxio/master/file/loadmanager/LoadJobTest.java b/core/server/master/src/test/java/alluxio/master/file/scheduler/LoadJobTest.java similarity index 62% rename from core/server/master/src/test/java/alluxio/master/file/loadmanager/LoadJobTest.java rename to core/server/master/src/test/java/alluxio/master/file/scheduler/LoadJobTest.java index 0efb37818c35..4ddf1246e8f2 100644 --- a/core/server/master/src/test/java/alluxio/master/file/loadmanager/LoadJobTest.java +++ b/core/server/master/src/test/java/alluxio/master/file/scheduler/LoadJobTest.java @@ -9,9 +9,9 @@ * See the NOTICE file distributed with this work for information regarding copyright ownership. */ -package alluxio.master.file.loadmanager; +package alluxio.master.file.scheduler; -import static alluxio.master.file.loadmanager.LoadTestUtils.generateRandomFileInfo; +import static alluxio.master.file.scheduler.LoadTestUtils.generateRandomFileInfo; import static org.junit.Assert.assertEquals; import static org.junit.Assert.assertFalse; import static org.junit.Assert.assertTrue; @@ -26,8 +26,11 @@ import alluxio.exception.InvalidPathException; import alluxio.exception.runtime.InternalRuntimeException; import alluxio.grpc.Block; -import alluxio.grpc.LoadProgressReportFormat; +import alluxio.grpc.JobProgressReportFormat; import alluxio.master.file.FileSystemMaster; +import alluxio.master.job.FileIterable; +import alluxio.master.job.LoadJob; +import alluxio.scheduler.job.JobState; import alluxio.wire.FileInfo; import com.google.common.collect.ImmutableSet; @@ -35,6 +38,7 @@ import java.io.IOException; import java.util.List; +import java.util.Optional; import java.util.OptionalLong; public class LoadJobTest { @@ -42,31 +46,37 @@ public class LoadJobTest { public void testGetNextBatch() throws FileDoesNotExistException, AccessControlException, IOException, InvalidPathException { List fileInfos = generateRandomFileInfo(5, 20, 64 * Constants.MB); + FileSystemMaster fileSystemMaster = mock(FileSystemMaster.class); - when(fileSystemMaster.listStatus(any(), any())) - .thenReturn(fileInfos); - LoadJob load = new LoadJob("test", "user", OptionalLong.empty()); - List batch = load.getNextBatch(fileSystemMaster, 10); + when(fileSystemMaster.listStatus(any(), any())).thenReturn(fileInfos); + String testPath = "test"; + Optional user = Optional.of("user"); + FileIterable files = + new FileIterable(fileSystemMaster, testPath, user, false, + LoadJob.QUALIFIED_FILE_FILTER); + LoadJob load = + new LoadJob(testPath, user, "1", OptionalLong.empty(), false, false, files); + List batch = load.getNextBatchBlocks(10); assertEquals(10, batch.size()); assertEquals(1, batch.stream().map(Block::getUfsPath).distinct().count()); batch.forEach(load::addBlockToRetry); - batch = load.getNextBatch(fileSystemMaster, 80); + batch = load.getNextBatchBlocks(80); assertEquals(80, batch.size()); assertEquals(5, batch.stream().map(Block::getUfsPath).distinct().count()); - batch = load.getNextBatch(fileSystemMaster, 80); + batch = load.getNextBatchBlocks(80); assertEquals(10, batch.size()); assertEquals(1, batch.stream().map(Block::getUfsPath).distinct().count()); - batch = load.getNextBatch(fileSystemMaster, 80); + batch = load.getNextBatchBlocks(80); assertEquals(10, batch.size()); assertEquals(1, batch.stream().map(Block::getUfsPath).distinct().count()); assertEquals(ImmutableSet.of(fileInfos.get(0).getUfsPath()), batch.stream().map(Block::getUfsPath).collect(ImmutableSet.toImmutableSet())); - batch = load.getNextBatch(fileSystemMaster, 80); + batch = load.getNextBatchBlocks(80); assertEquals(0, batch.size()); } @@ -75,16 +85,18 @@ public void testIsHealthy() throws FileDoesNotExistException, AccessControlException, IOException, InvalidPathException { List fileInfos = generateRandomFileInfo(100, 5, 64 * 1024 * 1024); FileSystemMaster fileSystemMaster = mock(FileSystemMaster.class); - when(fileSystemMaster.listStatus(any(), any())) - .thenReturn(fileInfos); - LoadJob loadJob = new LoadJob("test", "user", OptionalLong.empty()); - List batch = loadJob.getNextBatch(fileSystemMaster, 100); + when(fileSystemMaster.listStatus(any(), any())).thenReturn(fileInfos); + FileIterable files = new FileIterable(fileSystemMaster, "test", Optional.of("user"), false, + LoadJob.QUALIFIED_FILE_FILTER); + LoadJob loadJob = + new LoadJob("test", Optional.of("user"), "1", OptionalLong.empty(), false, false, files); + List batch = loadJob.getNextBatchBlocks(100); assertTrue(loadJob.isHealthy()); - loadJob.getNextBatch(fileSystemMaster, 100); + loadJob.getNextBatchBlocks(100); assertTrue(loadJob.isHealthy()); batch.forEach(loadJob::addBlockToRetry); assertTrue(loadJob.isHealthy()); - batch = loadJob.getNextBatch(fileSystemMaster, 100); + batch = loadJob.getNextBatchBlocks(100); assertTrue(loadJob.isHealthy()); batch.forEach(loadJob::addBlockToRetry); assertFalse(loadJob.isHealthy()); @@ -94,47 +106,50 @@ public void testIsHealthy() public void testLoadProgressReport() throws Exception { List fileInfos = generateRandomFileInfo(10, 10, 64 * Constants.MB); FileSystemMaster fileSystemMaster = mock(FileSystemMaster.class); - when(fileSystemMaster.listStatus(any(), any())) - .thenReturn(fileInfos); - LoadJob job = spy(new LoadJob("/test", "user", OptionalLong.empty())); + when(fileSystemMaster.listStatus(any(), any())).thenReturn(fileInfos); + FileIterable files = new FileIterable(fileSystemMaster, "test", Optional.of("user"), false, + LoadJob.QUALIFIED_FILE_FILTER); + LoadJob job = + spy(new LoadJob("test", Optional.of("user"), "1", OptionalLong.empty(), false, false, + files)); when(job.getDurationInSec()).thenReturn(0L); - job.setJobState(LoadJobState.LOADING); - List blocks = job.getNextBatch(fileSystemMaster, 25); + job.setJobState(JobState.RUNNING); + List blocks = job.getNextBatchBlocks(25); job.addLoadedBytes(640 * Constants.MB); String expectedTextReport = "\tSettings:\tbandwidth: unlimited\tverify: false\n" - + "\tJob State: LOADING\n" - + "\tFiles Processed: 3 out of 10\n" - + "\tBytes Loaded: 640.00MB out of 6.25GB\n" + + "\tJob State: RUNNING\n" + + "\tFiles Processed: 3\n" + + "\tBytes Loaded: 640.00MB out of 1600.00MB\n" + "\tBlock load failure rate: 0.00%\n" + "\tFiles Failed: 0\n"; - assertEquals(expectedTextReport, job.getProgress(LoadProgressReportFormat.TEXT, false)); - assertEquals(expectedTextReport, job.getProgress(LoadProgressReportFormat.TEXT, true)); - String expectedJsonReport = "{\"mVerbose\":false,\"mJobState\":\"LOADING\"," - + "\"mVerificationEnabled\":false,\"mProcessedFileCount\":3,\"mTotalFileCount\":10," - + "\"mLoadedByteCount\":671088640,\"mTotalByteCount\":6710886400," + assertEquals(expectedTextReport, job.getProgress(JobProgressReportFormat.TEXT, false)); + assertEquals(expectedTextReport, job.getProgress(JobProgressReportFormat.TEXT, true)); + String expectedJsonReport = "{\"mVerbose\":false,\"mJobState\":\"RUNNING\"," + + "\"mVerificationEnabled\":false,\"mProcessedFileCount\":3," + + "\"mLoadedByteCount\":671088640,\"mTotalByteCount\":1677721600," + "\"mFailurePercentage\":0.0,\"mFailedFileCount\":0}"; - assertEquals(expectedJsonReport, job.getProgress(LoadProgressReportFormat.JSON, false)); + assertEquals(expectedJsonReport, job.getProgress(JobProgressReportFormat.JSON, false)); job.addBlockFailure(blocks.get(0), "Test error 1", 2); job.addBlockFailure(blocks.get(4), "Test error 2", 2); job.addBlockFailure(blocks.get(10), "Test error 3", 2); job.failJob(new InternalRuntimeException("test")); String expectedTextReportWithError = "\tSettings:\tbandwidth: unlimited\tverify: false\n" + "\tJob State: FAILED (alluxio.exception.runtime.InternalRuntimeException: test)\n" - + "\tFiles Processed: 3 out of 10\n" - + "\tBytes Loaded: 640.00MB out of 6.25GB\n" + + "\tFiles Processed: 3\n" + + "\tBytes Loaded: 640.00MB out of 1600.00MB\n" + "\tBlock load failure rate: 12.00%\n" + "\tFiles Failed: 2\n"; assertEquals(expectedTextReportWithError, - job.getProgress(LoadProgressReportFormat.TEXT, false)); - String textReport = job.getProgress(LoadProgressReportFormat.TEXT, true); + job.getProgress(JobProgressReportFormat.TEXT, false)); + String textReport = job.getProgress(JobProgressReportFormat.TEXT, true); assertFalse(textReport.contains("Test error 1")); assertTrue(textReport.contains("Test error 2")); assertTrue(textReport.contains("Test error 3")); - String jsonReport = job.getProgress(LoadProgressReportFormat.JSON, false); + String jsonReport = job.getProgress(JobProgressReportFormat.JSON, false); assertTrue(jsonReport.contains("FAILED")); assertTrue(jsonReport.contains("mFailureReason")); assertFalse(jsonReport.contains("Test error 2")); - jsonReport = job.getProgress(LoadProgressReportFormat.JSON, true); + jsonReport = job.getProgress(JobProgressReportFormat.JSON, true); assertFalse(jsonReport.contains("Test error 1")); assertTrue(jsonReport.contains("Test error 2")); assertTrue(jsonReport.contains("Test error 3")); diff --git a/core/server/master/src/test/java/alluxio/master/file/loadmanager/LoadTestUtils.java b/core/server/master/src/test/java/alluxio/master/file/scheduler/LoadTestUtils.java similarity index 97% rename from core/server/master/src/test/java/alluxio/master/file/loadmanager/LoadTestUtils.java rename to core/server/master/src/test/java/alluxio/master/file/scheduler/LoadTestUtils.java index 4b610c6cfe47..de299ea03d97 100644 --- a/core/server/master/src/test/java/alluxio/master/file/loadmanager/LoadTestUtils.java +++ b/core/server/master/src/test/java/alluxio/master/file/scheduler/LoadTestUtils.java @@ -9,7 +9,7 @@ * See the NOTICE file distributed with this work for information regarding copyright ownership. */ -package alluxio.master.file.loadmanager; +package alluxio.master.file.scheduler; import alluxio.grpc.Block; import alluxio.grpc.BlockStatus; @@ -102,7 +102,8 @@ private static FileInfo createFileInfo(int blockCount, long blockSizeLimit) { .stream() .map(id -> LoadTestUtils.createFileBlockInfo(id, blockSizeLimit)) .collect(ImmutableList.toImmutableList())) - .setCompleted(true); + .setCompleted(true) + .setPersisted(true); return info; } diff --git a/core/server/master/src/test/java/alluxio/master/file/loadmanager/LoadManagerTest.java b/core/server/master/src/test/java/alluxio/master/file/scheduler/SchedulerTest.java similarity index 52% rename from core/server/master/src/test/java/alluxio/master/file/loadmanager/LoadManagerTest.java rename to core/server/master/src/test/java/alluxio/master/file/scheduler/SchedulerTest.java index 89ea0057d4c4..5c91987b1d5e 100644 --- a/core/server/master/src/test/java/alluxio/master/file/loadmanager/LoadManagerTest.java +++ b/core/server/master/src/test/java/alluxio/master/file/scheduler/SchedulerTest.java @@ -9,18 +9,17 @@ * See the NOTICE file distributed with this work for information regarding copyright ownership. */ -package alluxio.master.file.loadmanager; +package alluxio.master.file.scheduler; -import static alluxio.master.file.loadmanager.LoadTestUtils.fileWithBlockLocations; -import static alluxio.master.file.loadmanager.LoadTestUtils.generateRandomBlockStatus; -import static alluxio.master.file.loadmanager.LoadTestUtils.generateRandomFileInfo; +import static alluxio.master.file.scheduler.LoadTestUtils.fileWithBlockLocations; +import static alluxio.master.file.scheduler.LoadTestUtils.generateRandomBlockStatus; +import static alluxio.master.file.scheduler.LoadTestUtils.generateRandomFileInfo; import static org.junit.Assert.assertEquals; import static org.junit.Assert.assertFalse; import static org.junit.Assert.assertThrows; import static org.junit.Assert.assertTrue; import static org.mockito.ArgumentMatchers.any; import static org.mockito.ArgumentMatchers.argThat; -import static org.mockito.Mockito.doThrow; import static org.mockito.Mockito.mock; import static org.mockito.Mockito.times; import static org.mockito.Mockito.verify; @@ -33,27 +32,31 @@ import alluxio.conf.PropertyKey; import alluxio.conf.Source; import alluxio.exception.AccessControlException; -import alluxio.exception.FileDoesNotExistException; -import alluxio.exception.InvalidPathException; -import alluxio.exception.runtime.NotFoundRuntimeException; import alluxio.exception.runtime.ResourceExhaustedRuntimeException; -import alluxio.exception.runtime.UnauthenticatedRuntimeException; import alluxio.exception.status.UnavailableException; import alluxio.grpc.BlockStatus; -import alluxio.grpc.LoadProgressReportFormat; +import alluxio.grpc.JobProgressReportFormat; import alluxio.grpc.LoadRequest; import alluxio.grpc.LoadResponse; import alluxio.grpc.TaskStatus; +import alluxio.job.JobDescription; import alluxio.master.file.FileSystemMaster; +import alluxio.master.job.FileIterable; +import alluxio.master.job.LoadJob; import alluxio.master.journal.JournalContext; +import alluxio.master.scheduler.DefaultWorkerProvider; +import alluxio.master.scheduler.JournaledJobMetaStore; +import alluxio.master.scheduler.Scheduler; import alluxio.proto.journal.Job; import alluxio.resource.CloseableResource; +import alluxio.scheduler.job.JobState; import alluxio.security.authentication.AuthenticatedClientUser; import alluxio.wire.FileInfo; import alluxio.wire.WorkerInfo; import alluxio.wire.WorkerNetAddress; import com.google.common.collect.ImmutableList; +import com.google.common.util.concurrent.ListenableFuture; import com.google.common.util.concurrent.SettableFuture; import io.grpc.Status; import org.junit.AfterClass; @@ -68,7 +71,7 @@ import java.util.concurrent.atomic.AtomicInteger; import java.util.stream.IntStream; -public final class LoadManagerTest { +public final class SchedulerTest { @BeforeClass public static void before() { @@ -82,11 +85,13 @@ public static void after() { @Test public void testGetActiveWorkers() throws IOException { - FileSystemMaster fileSystemMaster = mock(FileSystemMaster.class); + FileSystemMaster fsMaster = mock(FileSystemMaster.class); FileSystemContext fileSystemContext = mock(FileSystemContext.class); CloseableResource blockWorkerClient = mock(CloseableResource.class); - LoadManager loadManager = new LoadManager(fileSystemMaster, fileSystemContext); - when(fileSystemMaster.getWorkerInfoList()) + DefaultWorkerProvider workerProvider = + new DefaultWorkerProvider(fsMaster, fileSystemContext); + Scheduler scheduler = new Scheduler(workerProvider, new JournaledJobMetaStore(fsMaster)); + when(fsMaster.getWorkerInfoList()) .thenReturn(ImmutableList.of( new WorkerInfo().setId(1).setAddress( new WorkerNetAddress().setHost("worker1").setRpcPort(1234)), @@ -102,72 +107,90 @@ public void testGetActiveWorkers() throws IOException { new WorkerInfo().setId(2).setAddress( new WorkerNetAddress().setHost("worker2").setRpcPort(1234)))); when(fileSystemContext.acquireBlockWorkerClient(any())).thenReturn(blockWorkerClient); - assertEquals(0, loadManager.getActiveWorkers().size()); - loadManager.updateWorkers(); - assertEquals(2, loadManager.getActiveWorkers().size()); - loadManager.updateWorkers(); - assertEquals(2, loadManager.getActiveWorkers().size()); - loadManager.updateWorkers(); - assertEquals(1, loadManager.getActiveWorkers().size()); - loadManager.updateWorkers(); - assertEquals(2, loadManager.getActiveWorkers().size()); + assertEquals(0, scheduler + .getActiveWorkers().size()); + scheduler.updateWorkers(); + assertEquals(2, scheduler + .getActiveWorkers().size()); + scheduler.updateWorkers(); + assertEquals(2, scheduler + .getActiveWorkers().size()); + scheduler.updateWorkers(); + assertEquals(1, scheduler + .getActiveWorkers().size()); + scheduler.updateWorkers(); + assertEquals(2, scheduler + .getActiveWorkers().size()); } @Test public void testSubmit() throws Exception { String validLoadPath = "/path/to/load"; - String invalidLoadPath = "/path/to/invalid"; - FileSystemMaster fileSystemMaster = mock(FileSystemMaster.class); + FileSystemMaster fsMaster = mock(FileSystemMaster.class); FileSystemContext fileSystemContext = mock(FileSystemContext.class); JournalContext journalContext = mock(JournalContext.class); - when(fileSystemMaster.createJournalContext()).thenReturn(journalContext); - LoadManager loadManager = new LoadManager(fileSystemMaster, fileSystemContext); - assertTrue(loadManager.submitLoad(validLoadPath, OptionalLong.empty(), false, true)); + when(fsMaster.createJournalContext()).thenReturn(journalContext); + DefaultWorkerProvider workerProvider = + new DefaultWorkerProvider(fsMaster, fileSystemContext); + Scheduler scheduler = new Scheduler(workerProvider, new JournaledJobMetaStore(fsMaster)); + FileIterable files = + new FileIterable(fsMaster, validLoadPath, Optional.of("user"), false, + LoadJob.QUALIFIED_FILE_FILTER); + LoadJob loadJob = + new LoadJob(validLoadPath, Optional.of("user"), "1", OptionalLong.empty(), false, true, + files); + assertTrue(scheduler.submitJob(loadJob)); verify(journalContext).append(argThat(journalEntry -> journalEntry.hasLoadJob() && journalEntry.getLoadJob().getLoadPath().equals(validLoadPath) && journalEntry.getLoadJob().getState() == Job.PJobState.CREATED && !journalEntry.getLoadJob().hasBandwidth() && journalEntry.getLoadJob().getVerify())); - assertEquals(1, loadManager.getLoadJobs().size()); - assertEquals(OptionalLong.empty(), loadManager.getLoadJobs().get(validLoadPath).getBandwidth()); - assertTrue(loadManager.getLoadJobs().get(validLoadPath).isVerificationEnabled()); - assertFalse(loadManager.submitLoad(validLoadPath, OptionalLong.of(1000), true, false)); + assertEquals(1, scheduler + .getJobs().size()); + LoadJob job = (LoadJob) scheduler.getJobs().get(loadJob.getDescription()); + assertEquals(OptionalLong.empty(), job.getBandwidth()); + assertTrue(job.isVerificationEnabled()); + loadJob = + new LoadJob(validLoadPath, Optional.of("user"), "1", OptionalLong.of(1000), true, false, + files); + assertFalse(scheduler.submitJob(loadJob)); verify(journalContext).append(argThat(journalEntry -> journalEntry.hasLoadJob() && journalEntry.getLoadJob().getLoadPath().equals(validLoadPath) && journalEntry.getLoadJob().getState() == Job.PJobState.CREATED && journalEntry.getLoadJob().getBandwidth() == 1000 && !journalEntry.getLoadJob().getPartialListing() // we don't update partialListing && !journalEntry.getLoadJob().getVerify())); - assertEquals(1, loadManager.getLoadJobs().size()); - assertEquals(1000, loadManager.getLoadJobs().get(validLoadPath).getBandwidth().getAsLong()); - assertFalse(loadManager.getLoadJobs().get(validLoadPath).isVerificationEnabled()); - doThrow(new FileDoesNotExistException("test")).when(fileSystemMaster).checkAccess(any(), any()); - assertThrows(NotFoundRuntimeException.class, - () -> loadManager.submitLoad(invalidLoadPath, OptionalLong.empty(), false, true)); - doThrow(new InvalidPathException("test")).when(fileSystemMaster).checkAccess(any(), any()); - assertThrows(NotFoundRuntimeException.class, - () -> loadManager.submitLoad(invalidLoadPath, OptionalLong.empty(), false, true)); - doThrow(new AccessControlException("test")).when(fileSystemMaster).checkAccess(any(), any()); - assertThrows(UnauthenticatedRuntimeException.class, - () -> loadManager.submitLoad(invalidLoadPath, OptionalLong.empty(), false, true)); + assertEquals(1, scheduler + .getJobs().size()); + job = (LoadJob) scheduler.getJobs().get(loadJob.getDescription()); + assertEquals(1000, job.getBandwidth().getAsLong()); + assertFalse(job.isVerificationEnabled()); } @Test public void testStop() throws Exception { String validLoadPath = "/path/to/load"; - FileSystemMaster fileSystemMaster = mock(FileSystemMaster.class); + FileSystemMaster fsMaster = mock(FileSystemMaster.class); FileSystemContext fileSystemContext = mock(FileSystemContext.class); JournalContext journalContext = mock(JournalContext.class); - when(fileSystemMaster.createJournalContext()).thenReturn(journalContext); - LoadManager loadManager = new LoadManager(fileSystemMaster, fileSystemContext); - assertTrue(loadManager.submitLoad(validLoadPath, OptionalLong.of(100), false, true)); + when(fsMaster.createJournalContext()).thenReturn(journalContext); + DefaultWorkerProvider workerProvider = + new DefaultWorkerProvider(fsMaster, fileSystemContext); + Scheduler scheduler = new Scheduler(workerProvider, new JournaledJobMetaStore(fsMaster)); + FileIterable files = + new FileIterable(fsMaster, validLoadPath, Optional.of("user"), false, + LoadJob.QUALIFIED_FILE_FILTER); + LoadJob job = + new LoadJob(validLoadPath, Optional.of("user"), "1", OptionalLong.of(100), false, true, + files); + assertTrue(scheduler.submitJob(job)); verify(journalContext, times(1)).append(any()); verify(journalContext).append(argThat(journalEntry -> journalEntry.hasLoadJob() && journalEntry.getLoadJob().getLoadPath().equals(validLoadPath) && journalEntry.getLoadJob().getState() == Job.PJobState.CREATED && journalEntry.getLoadJob().getBandwidth() == 100 && journalEntry.getLoadJob().getVerify())); - assertTrue(loadManager.stopLoad(validLoadPath)); + assertTrue(scheduler.stopJob(job.getDescription())); verify(journalContext, times(2)).append(any()); verify(journalContext).append(argThat(journalEntry -> journalEntry.hasLoadJob() && journalEntry.getLoadJob().getLoadPath().equals(validLoadPath) @@ -175,40 +198,51 @@ public void testStop() throws Exception { && journalEntry.getLoadJob().getBandwidth() == 100 && journalEntry.getLoadJob().getVerify() && journalEntry.getLoadJob().hasEndTime())); - assertFalse(loadManager.stopLoad(validLoadPath)); + assertFalse(scheduler.stopJob(job.getDescription())); verify(journalContext, times(2)).append(any()); - assertFalse(loadManager.stopLoad("/does/not/exist")); + assertFalse(scheduler.stopJob(JobDescription.newBuilder().setPath("/does/not/exist").build())); verify(journalContext, times(2)).append(any()); - assertFalse(loadManager.submitLoad(validLoadPath, OptionalLong.of(100), false, true)); + assertFalse(scheduler.submitJob(job)); verify(journalContext, times(3)).append(any()); - assertTrue(loadManager.stopLoad(validLoadPath)); + assertTrue(scheduler.stopJob(job.getDescription())); verify(journalContext, times(4)).append(any()); } @Test public void testSubmitExceedsCapacity() throws Exception { - FileSystemMaster fileSystemMaster = mock(FileSystemMaster.class); + FileSystemMaster fsMaster = mock(FileSystemMaster.class); FileSystemContext fileSystemContext = mock(FileSystemContext.class); JournalContext journalContext = mock(JournalContext.class); - when(fileSystemMaster.createJournalContext()).thenReturn(journalContext); - LoadManager loadManager = new LoadManager(fileSystemMaster, fileSystemContext); + when(fsMaster.createJournalContext()).thenReturn(journalContext); + DefaultWorkerProvider workerProvider = + new DefaultWorkerProvider(fsMaster, fileSystemContext); + Scheduler scheduler = new Scheduler(workerProvider, new JournaledJobMetaStore(fsMaster)); IntStream.range(0, 100).forEach( - i -> assertTrue(loadManager.submitLoad( - String.format("/path/to/load/%d", i), OptionalLong.empty(), false, true))); - assertThrows( - ResourceExhaustedRuntimeException.class, - () -> loadManager.submitLoad("/path/to/load/101", OptionalLong.empty(), false, true)); + i -> { + String path = String.format("/path/to/load/%d", i); + FileIterable files = new FileIterable(fsMaster, path, Optional.of("user"), false, + LoadJob.QUALIFIED_FILE_FILTER); + assertTrue(scheduler.submitJob( + new LoadJob(path, Optional.of("user"), "1", OptionalLong.empty(), false, true, + files))); + }); + FileIterable files = + new FileIterable(fsMaster, "/path/to/load/101", Optional.of("user"), false, + LoadJob.QUALIFIED_FILE_FILTER); + assertThrows(ResourceExhaustedRuntimeException.class, () -> scheduler.submitJob( + new LoadJob("/path/to/load/101", Optional.of("user"), "1", OptionalLong.empty(), false, + true, files))); } @Test public void testScheduling() throws Exception { - FileSystemMaster fileSystemMaster = mock(FileSystemMaster.class); + FileSystemMaster fsMaster = mock(FileSystemMaster.class); FileSystemContext fileSystemContext = mock(FileSystemContext.class); JournalContext journalContext = mock(JournalContext.class); - when(fileSystemMaster.createJournalContext()).thenReturn(journalContext); + when(fsMaster.createJournalContext()).thenReturn(journalContext); CloseableResource blockWorkerClientResource = mock(CloseableResource.class); BlockWorkerClient blockWorkerClient = mock(BlockWorkerClient.class); - when(fileSystemMaster.getWorkerInfoList()) + when(fsMaster.getWorkerInfoList()) .thenReturn(ImmutableList.of( new WorkerInfo().setId(1).setAddress( new WorkerNetAddress().setHost("worker1").setRpcPort(1234)), @@ -239,88 +273,97 @@ public void testScheduling() throws Exception { new WorkerInfo().setId(10).setAddress( new WorkerNetAddress().setHost("worker10").setRpcPort(1234)))); List fileInfos = generateRandomFileInfo(100, 50, 64 * Constants.MB); - when(fileSystemMaster.listStatus(any(), any())) + when(fsMaster.listStatus(any(), any())) .thenReturn(fileInfos) .thenReturn(fileWithBlockLocations(fileInfos, 0.95)) .thenReturn(fileWithBlockLocations(fileInfos, 1.1)); - int failureRequestIteration = 50; - int exceptionRequestIteration = 70; - AtomicInteger iteration = new AtomicInteger(); - when(fileSystemContext.acquireBlockWorkerClient(any())).thenReturn(blockWorkerClientResource); when(blockWorkerClientResource.get()).thenReturn(blockWorkerClient); + AtomicInteger iteration = new AtomicInteger(); when(blockWorkerClient.load(any())).thenAnswer(invocation -> { - iteration.getAndIncrement(); LoadRequest request = invocation.getArgument(0); - List status; - if (iteration.get() == exceptionRequestIteration) { - // Test worker exception - SettableFuture responseFuture = SettableFuture.create(); - responseFuture.setException(new TimeoutException()); - return responseFuture; - } - else if (iteration.get() == failureRequestIteration) { - // Test worker failing the whole request - status = generateRandomBlockStatus(request.getBlocksList(), 1); - } - else { - status = generateRandomBlockStatus(request.getBlocksList(), 0.01); - } - LoadResponse.Builder response = LoadResponse.newBuilder(); - if (status.stream().allMatch(s -> s.getCode() == Status.OK.getCode().value())) { - response.setStatus(TaskStatus.SUCCESS); - } - else if (status.stream().allMatch(s -> s.getCode() != Status.OK.getCode().value())) { - response.setStatus(TaskStatus.FAILURE) - .addAllBlockStatus(status); - } - else { - response.setStatus(TaskStatus.PARTIAL_FAILURE) - .addAllBlockStatus(status.stream() - .filter(s -> s.getCode() != Status.OK.getCode().value()) - .collect(ImmutableList.toImmutableList())); - } - SettableFuture responseFuture = SettableFuture.create(); - responseFuture.set(response.build()); - return responseFuture; + return buildResponseFuture(request, iteration); }); - - LoadManager loadManager = new LoadManager(fileSystemMaster, fileSystemContext); - LoadJob loadJob = new LoadJob("test", Optional.of("user"), "1", - OptionalLong.of(1000), false, true); - loadManager.submitLoad(loadJob); + DefaultWorkerProvider workerProvider = + new DefaultWorkerProvider(fsMaster, fileSystemContext); + Scheduler scheduler = new Scheduler(workerProvider, new JournaledJobMetaStore(fsMaster)); + String path = "test"; + FileIterable files = new FileIterable(fsMaster, path, Optional.of("user"), false, + LoadJob.QUALIFIED_FILE_FILTER); + LoadJob loadJob = new LoadJob(path, Optional.of("user"), "1", + OptionalLong.of(1000), false, true, files); + scheduler.submitJob(loadJob); verify(journalContext).append(argThat(journalEntry -> journalEntry.hasLoadJob() - && journalEntry.getLoadJob().getLoadPath().equals("test") + && journalEntry.getLoadJob().getLoadPath().equals(path) && journalEntry.getLoadJob().getState() == Job.PJobState.CREATED && journalEntry.getLoadJob().getBandwidth() == 1000 && journalEntry.getLoadJob().getVerify())); - loadManager.start(); - while (!loadManager - .getLoadProgress("test", LoadProgressReportFormat.TEXT, false) + + scheduler.start(); + while (!scheduler + .getJobProgress(loadJob.getDescription(), JobProgressReportFormat.TEXT, false) .contains("SUCCEEDED")) { - assertFalse(loadManager.submitLoad( - new LoadJob("test", Optional.of("user"), "1", OptionalLong.of(1000), false, true))); + assertFalse(scheduler.submitJob( + new LoadJob(path, Optional.of("user"), "1", OptionalLong.of(1000), false, true, files))); Thread.sleep(1000); } Thread.sleep(1000); - loadManager.stop(); - assertEquals(LoadJobState.SUCCEEDED, loadJob.getJobState()); + scheduler.stop(); + assertEquals(JobState.SUCCEEDED, loadJob.getJobState()); assertEquals(0, loadJob.getCurrentBlockCount()); - assertTrue(loadJob.getTotalBlockCount() > 5000); verify(journalContext).append(argThat(journalEntry -> journalEntry.hasLoadJob() - && journalEntry.getLoadJob().getLoadPath().equals("test") + && journalEntry.getLoadJob().getLoadPath().equals(path) && journalEntry.getLoadJob().getState() == Job.PJobState.SUCCEEDED && journalEntry.getLoadJob().getBandwidth() == 1000 && journalEntry.getLoadJob().getVerify())); - assertTrue(loadManager.submitLoad(new LoadJob("test", "user", OptionalLong.of(1000)))); + assertTrue(scheduler.submitJob(new LoadJob(path, "user", OptionalLong.of(1000), files))); + } + + private ListenableFuture buildResponseFuture(LoadRequest request, + AtomicInteger iteration) { + int failureRequestIteration = 50; + int exceptionRequestIteration = 70; + + iteration.getAndIncrement(); + List status; + if (iteration.get() == exceptionRequestIteration) { + // Test worker exception + SettableFuture responseFuture = SettableFuture.create(); + responseFuture.setException(new TimeoutException()); + return responseFuture; + } + else if (iteration.get() == failureRequestIteration) { + // Test worker failing the whole request + status = generateRandomBlockStatus(request.getBlocksList(), 1); + } + else { + status = generateRandomBlockStatus(request.getBlocksList(), 0.01); + } + LoadResponse.Builder response = LoadResponse.newBuilder(); + if (status.stream().allMatch(s -> s.getCode() == Status.OK.getCode().value())) { + response.setStatus(TaskStatus.SUCCESS); + } + else if (status.stream().allMatch(s -> s.getCode() != Status.OK.getCode().value())) { + response.setStatus(TaskStatus.FAILURE) + .addAllBlockStatus(status); + } + else { + response.setStatus(TaskStatus.PARTIAL_FAILURE) + .addAllBlockStatus(status.stream() + .filter(s -> s.getCode() != Status.OK.getCode().value()) + .collect(ImmutableList.toImmutableList())); + } + SettableFuture responseFuture = SettableFuture.create(); + responseFuture.set(response.build()); + return responseFuture; } @Test public void testSchedulingFullCapacity() throws Exception { - FileSystemMaster fileSystemMaster = mock(FileSystemMaster.class); + FileSystemMaster fsMaster = mock(FileSystemMaster.class); FileSystemContext fileSystemContext = mock(FileSystemContext.class); JournalContext journalContext = mock(JournalContext.class); - when(fileSystemMaster.createJournalContext()).thenReturn(journalContext); + when(fsMaster.createJournalContext()).thenReturn(journalContext); CloseableResource blockWorkerClientResource = mock(CloseableResource.class); BlockWorkerClient blockWorkerClient = mock(BlockWorkerClient.class); ImmutableList.Builder workerInfos = ImmutableList.builder(); @@ -328,10 +371,10 @@ public void testSchedulingFullCapacity() throws Exception { workerInfos.add(new WorkerInfo().setId(i).setAddress( new WorkerNetAddress().setHost("worker" + i).setRpcPort(1234))); } - when(fileSystemMaster.getWorkerInfoList()) + when(fsMaster.getWorkerInfoList()) .thenReturn(workerInfos.build()); List fileInfos = generateRandomFileInfo(2000, 50, 64 * Constants.MB); - when(fileSystemMaster.listStatus(any(), any())) + when(fsMaster.listStatus(any(), any())) .thenReturn(fileInfos); when(fileSystemContext.acquireBlockWorkerClient(any())).thenReturn(blockWorkerClientResource); @@ -342,31 +385,36 @@ public void testSchedulingFullCapacity() throws Exception { responseFuture.set(response.build()); return responseFuture; }); - - LoadManager loadManager = new LoadManager(fileSystemMaster, fileSystemContext); + FileIterable files = + new FileIterable(fsMaster, "test", Optional.of("user"), false, + LoadJob.QUALIFIED_FILE_FILTER); + DefaultWorkerProvider workerProvider = + new DefaultWorkerProvider(fsMaster, fileSystemContext); + Scheduler scheduler = new Scheduler(workerProvider, new JournaledJobMetaStore(fsMaster)); for (int i = 0; i < 100; i++) { - LoadJob loadJob = new LoadJob("test" + i, "user", OptionalLong.of(1000)); - loadManager.submitLoad(loadJob); + LoadJob loadJob = new LoadJob("test" + i, "user", OptionalLong.of(1000), files); + scheduler.submitJob(loadJob); } - assertThrows(ResourceExhaustedRuntimeException.class, - () -> loadManager.submitLoad(new LoadJob("/way/too/many", "user", OptionalLong.empty()))); - loadManager.start(); - while (loadManager.getLoadJobs().values().stream() - .anyMatch(loadJob -> loadJob.getJobState() != LoadJobState.SUCCEEDED)) { + assertThrows(ResourceExhaustedRuntimeException.class, () -> scheduler.submitJob( + new LoadJob("/way/too/many", "user", OptionalLong.empty(), files))); + scheduler.start(); + while (scheduler + .getJobs().values().stream() + .anyMatch(loadJob -> loadJob.getJobState() != JobState.SUCCEEDED)) { Thread.sleep(1000); } - loadManager.stop(); + scheduler.stop(); } @Test public void testSchedulingWithException() throws Exception { - FileSystemMaster fileSystemMaster = mock(FileSystemMaster.class); + FileSystemMaster fsMaster = mock(FileSystemMaster.class); FileSystemContext fileSystemContext = mock(FileSystemContext.class); JournalContext journalContext = mock(JournalContext.class); - when(fileSystemMaster.createJournalContext()).thenReturn(journalContext); + when(fsMaster.createJournalContext()).thenReturn(journalContext); CloseableResource blockWorkerClientResource = mock(CloseableResource.class); BlockWorkerClient blockWorkerClient = mock(BlockWorkerClient.class); - when(fileSystemMaster.getWorkerInfoList()) + when(fsMaster.getWorkerInfoList()) .thenReturn(ImmutableList.of( new WorkerInfo().setId(1).setAddress( new WorkerNetAddress().setHost("worker1").setRpcPort(1234)), @@ -375,16 +423,23 @@ public void testSchedulingWithException() throws Exception { when(fileSystemContext.acquireBlockWorkerClient(any())).thenReturn(blockWorkerClientResource); when(blockWorkerClientResource.get()).thenReturn(blockWorkerClient); List fileInfos = generateRandomFileInfo(100, 10, 64 * Constants.MB); - when(fileSystemMaster.listStatus(any(), any())) + when(fsMaster.listStatus(any(), any())) // Non-retryable exception, first load job should fail .thenThrow(AccessControlException.class) // Retryable exception, second load job should succeed .thenThrow(new ResourceExhaustedRuntimeException("test", true)) .thenReturn(fileInfos); - LoadManager loadManager = new LoadManager(fileSystemMaster, fileSystemContext); - loadManager.start(); - loadManager.submitLoad("test", OptionalLong.of(1000), false, false); - while (!loadManager.getLoadProgress("test", LoadProgressReportFormat.TEXT, false) + DefaultWorkerProvider workerProvider = + new DefaultWorkerProvider(fsMaster, fileSystemContext); + Scheduler scheduler = new Scheduler(workerProvider, new JournaledJobMetaStore(fsMaster)); + scheduler.start(); + FileIterable files = + new FileIterable(fsMaster, "test", Optional.of("user"), false, + LoadJob.QUALIFIED_FILE_FILTER); + LoadJob job = new LoadJob("test", "user", OptionalLong.of(1000), files); + scheduler.submitJob(job); + while (!scheduler + .getJobProgress(job.getDescription(), JobProgressReportFormat.TEXT, false) .contains("FAILED")) { Thread.sleep(1000); } @@ -394,37 +449,93 @@ public void testSchedulingWithException() throws Exception { responseFuture.set(response.build()); return responseFuture; }); - loadManager.submitLoad("test", OptionalLong.of(1000), false, false); - while (!loadManager.getLoadProgress("test", LoadProgressReportFormat.TEXT, false) + job = new LoadJob("test", "user", OptionalLong.of(1000), files); + scheduler.submitJob(job); + while (!scheduler + .getJobProgress(job.getDescription(), JobProgressReportFormat.TEXT, false) .contains("SUCCEEDED")) { Thread.sleep(1000); } - loadManager.stop(); + scheduler.stop(); } @Test public void testJobRetention() throws Exception { Configuration.modifiableGlobal().set(PropertyKey.JOB_RETENTION_TIME, "0ms", Source.RUNTIME); - FileSystemMaster fileSystemMaster = mock(FileSystemMaster.class); + FileSystemMaster fsMaster = mock(FileSystemMaster.class); FileSystemContext fileSystemContext = mock(FileSystemContext.class); JournalContext journalContext = mock(JournalContext.class); - when(fileSystemMaster.createJournalContext()).thenReturn(journalContext); - LoadManager loadManager = new LoadManager(fileSystemMaster, fileSystemContext); - loadManager.start(); - IntStream.range(0, 5).forEach( - i -> assertTrue(loadManager.submitLoad( - String.format("/load/%d", i), OptionalLong.empty(), false, true))); - assertEquals(5, loadManager.getLoadJobs().size()); - loadManager.getLoadJobs().get("/load/1").setJobState(LoadJobState.VERIFYING); - loadManager.getLoadJobs().get("/load/2").setJobState(LoadJobState.FAILED); - loadManager.getLoadJobs().get("/load/3").setJobState(LoadJobState.SUCCEEDED); - loadManager.getLoadJobs().get("/load/4").setJobState(LoadJobState.STOPPED); - loadManager.cleanupStaleJob(); - assertEquals(2, loadManager.getLoadJobs().size()); - assertTrue(loadManager.getLoadJobs().containsKey("/load/0")); - assertTrue(loadManager.getLoadJobs().containsKey("/load/1")); + when(fsMaster.createJournalContext()).thenReturn(journalContext); + DefaultWorkerProvider workerProvider = + new DefaultWorkerProvider(fsMaster, fileSystemContext); + Scheduler scheduler = new Scheduler(workerProvider, new JournaledJobMetaStore(fsMaster)); + scheduler.start(); + IntStream + .range(0, 5) + .forEach(i -> { + String path = String.format("/load/%d", i); + FileIterable files = new FileIterable(fsMaster, path, Optional.of("user"), + false, LoadJob.QUALIFIED_FILE_FILTER); + assertTrue(scheduler.submitJob( + new LoadJob(path, Optional.of("user"), "1", + OptionalLong.empty(), false, true, files))); + }); + assertEquals(5, scheduler + .getJobs().size()); + scheduler + .getJobs() + .get(JobDescription + .newBuilder() + .setPath("/load/1") + .setType("load") + .build()) + .setJobState(JobState.VERIFYING); + scheduler + .getJobs() + .get(JobDescription + .newBuilder() + .setPath("/load/2") + .setType("load") + .build()) + .setJobState(JobState.FAILED); + scheduler + .getJobs() + .get(JobDescription + .newBuilder() + .setPath("/load/3") + .setType("load") + .build()) + .setJobState(JobState.SUCCEEDED); + scheduler + .getJobs() + .get(JobDescription + .newBuilder() + .setPath("/load/4") + .setType("load") + .build()) + .setJobState(JobState.STOPPED); + scheduler.cleanupStaleJob(); + assertEquals(2, scheduler + .getJobs().size()); + assertTrue(scheduler + .getJobs().containsKey(JobDescription + .newBuilder() + .setPath("/load/0") + .setType("load") + .build())); + assertTrue(scheduler + .getJobs().containsKey(JobDescription + .newBuilder() + .setPath("/load/1") + .setType("load") + .build())); IntStream.range(2, 5).forEach( - i -> assertFalse(loadManager.getLoadJobs().containsKey(String.format("/load/%d", i)))); + i -> assertFalse(scheduler + .getJobs().containsKey(JobDescription + .newBuilder() + .setPath("/load/" + i) + .setType("load") + .build()))); Configuration.modifiableGlobal().unset(PropertyKey.JOB_RETENTION_TIME); } } diff --git a/core/transport/src/main/proto/grpc/file_system_master.proto b/core/transport/src/main/proto/grpc/file_system_master.proto index 3f8179352670..b982be78897f 100644 --- a/core/transport/src/main/proto/grpc/file_system_master.proto +++ b/core/transport/src/main/proto/grpc/file_system_master.proto @@ -602,47 +602,51 @@ message NeedsSyncRequest { message NeedsSyncResponse {} -message LoadPathPOptions { - optional int64 bandwidth = 1; - optional bool verify = 2; - optional bool partialListing = 3; +message SubmitJobPRequest{ + optional bytes request_body = 1; } -message LoadPathPRequest { - required string path = 1; - optional LoadPathPOptions options = 2; +message SubmitJobPResponse { + optional string jobId = 1; } -message LoadPathPResponse { - optional bool newLoadSubmitted = 1; +message LoadJobPOptions { + optional int64 bandwidth = 1; + optional bool verify = 2; + optional bool partialListing = 3; } -message StopLoadPathPRequest { - required string path = 1; +message StopJobPRequest { + required JobDescription jobDescription = 1; } -message StopLoadPathPResponse { - optional bool existingLoadStopped = 1; +message StopJobPResponse { + optional bool jobStopped = 1; } -enum LoadProgressReportFormat { +enum JobProgressReportFormat { TEXT = 1; JSON = 2; } -message LoadProgressPOptions { - optional LoadProgressReportFormat format = 1; +message JobDescription{ + required string type = 1; + optional string path = 2; +} + +message JobProgressPOptions { + optional JobProgressReportFormat format = 1; optional bool verbose = 2; } -message GetLoadProgressPRequest { - required string path = 1; - optional LoadProgressPOptions options = 2; +message GetJobProgressPRequest { + required JobDescription jobDescription = 1; + optional JobProgressPOptions options = 2; } -message GetLoadProgressPResponse { +message GetJobProgressPResponse { optional string progressReport = 1; - optional LoadProgressReportFormat format = 2; + optional JobProgressReportFormat format = 2; } /** @@ -794,11 +798,11 @@ service FileSystemMasterClientService { /** * Load a directory into Alluxio. */ - rpc LoadPath(LoadPathPRequest) returns (LoadPathPResponse); + rpc submitJob(SubmitJobPRequest) returns (SubmitJobPResponse); - rpc StopLoadPath(StopLoadPathPRequest) returns (StopLoadPathPResponse); + rpc StopJob(StopJobPRequest) returns (StopJobPResponse); - rpc GetLoadProgress(GetLoadProgressPRequest) returns (GetLoadProgressPResponse); + rpc GetJobProgress(GetJobProgressPRequest) returns (GetJobProgressPResponse); } message FileSystemHeartbeatPResponse { diff --git a/core/transport/src/main/proto/proto.lock b/core/transport/src/main/proto/proto.lock index 879b1f1a9af5..56f4a25c5854 100644 --- a/core/transport/src/main/proto/proto.lock +++ b/core/transport/src/main/proto/proto.lock @@ -2085,7 +2085,7 @@ ] }, { - "name": "LoadProgressReportFormat", + "name": "JobProgressReportFormat", "enum_fields": [ { "name": "TEXT", @@ -3794,7 +3794,27 @@ "name": "NeedsSyncResponse" }, { - "name": "LoadPathPOptions", + "name": "SubmitJobPRequest", + "fields": [ + { + "id": 1, + "name": "request_body", + "type": "bytes" + } + ] + }, + { + "name": "SubmitJobPResponse", + "fields": [ + { + "id": 1, + "name": "jobId", + "type": "string" + } + ] + }, + { + "name": "LoadJobPOptions", "fields": [ { "id": 1, @@ -3814,57 +3834,47 @@ ] }, { - "name": "LoadPathPRequest", + "name": "StopJobPRequest", "fields": [ { "id": 1, - "name": "path", - "type": "string" - }, - { - "id": 2, - "name": "options", - "type": "LoadPathPOptions" + "name": "jobDescription", + "type": "JobDescription" } ] }, { - "name": "LoadPathPResponse", + "name": "StopJobPResponse", "fields": [ { "id": 1, - "name": "newLoadSubmitted", + "name": "jobStopped", "type": "bool" } ] }, { - "name": "StopLoadPathPRequest", + "name": "JobDescription", "fields": [ { "id": 1, - "name": "path", + "name": "type", "type": "string" - } - ] - }, - { - "name": "StopLoadPathPResponse", - "fields": [ + }, { - "id": 1, - "name": "existingLoadStopped", - "type": "bool" + "id": 2, + "name": "path", + "type": "string" } ] }, { - "name": "LoadProgressPOptions", + "name": "JobProgressPOptions", "fields": [ { "id": 1, "name": "format", - "type": "LoadProgressReportFormat" + "type": "JobProgressReportFormat" }, { "id": 2, @@ -3874,22 +3884,22 @@ ] }, { - "name": "GetLoadProgressPRequest", + "name": "GetJobProgressPRequest", "fields": [ { "id": 1, - "name": "path", - "type": "string" + "name": "jobDescription", + "type": "JobDescription" }, { "id": 2, "name": "options", - "type": "LoadProgressPOptions" + "type": "JobProgressPOptions" } ] }, { - "name": "GetLoadProgressPResponse", + "name": "GetJobProgressPResponse", "fields": [ { "id": 1, @@ -3899,7 +3909,7 @@ { "id": 2, "name": "format", - "type": "LoadProgressReportFormat" + "type": "JobProgressReportFormat" } ] }, @@ -4172,19 +4182,19 @@ "out_type": "NeedsSyncResponse" }, { - "name": "LoadPath", - "in_type": "LoadPathPRequest", - "out_type": "LoadPathPResponse" + "name": "submitJob", + "in_type": "SubmitJobPRequest", + "out_type": "SubmitJobPResponse" }, { - "name": "StopLoadPath", - "in_type": "StopLoadPathPRequest", - "out_type": "StopLoadPathPResponse" + "name": "StopJob", + "in_type": "StopJobPRequest", + "out_type": "StopJobPResponse" }, { - "name": "GetLoadProgress", - "in_type": "GetLoadProgressPRequest", - "out_type": "GetLoadProgressPResponse" + "name": "GetJobProgress", + "in_type": "GetJobProgressPRequest", + "out_type": "GetJobProgressPResponse" } ] }, diff --git a/integration/fuse/src/test/java/alluxio/fuse/auth/AbstractAuthPolicyTest.java b/integration/fuse/src/test/java/alluxio/fuse/auth/AbstractAuthPolicyTest.java index b7bb97a4c34e..b07449ec7511 100644 --- a/integration/fuse/src/test/java/alluxio/fuse/auth/AbstractAuthPolicyTest.java +++ b/integration/fuse/src/test/java/alluxio/fuse/auth/AbstractAuthPolicyTest.java @@ -31,9 +31,9 @@ import alluxio.grpc.ExistsPOptions; import alluxio.grpc.FreePOptions; import alluxio.grpc.GetStatusPOptions; +import alluxio.grpc.JobProgressReportFormat; import alluxio.grpc.ListStatusPOptions; import alluxio.grpc.ListStatusPartialPOptions; -import alluxio.grpc.LoadProgressReportFormat; import alluxio.grpc.MountPOptions; import alluxio.grpc.OpenFilePOptions; import alluxio.grpc.RenamePOptions; @@ -44,6 +44,8 @@ import alluxio.grpc.UnmountPOptions; import alluxio.jnifuse.FuseFileSystem; import alluxio.jnifuse.struct.FuseContext; +import alluxio.job.JobDescription; +import alluxio.job.JobRequest; import alluxio.security.authorization.AclEntry; import alluxio.wire.BlockLocationInfo; import alluxio.wire.FileInfo; @@ -298,19 +300,18 @@ public void needsSync(AlluxioURI path) { } @Override - public boolean submitLoad(AlluxioURI path, java.util.OptionalLong bandwidth, - boolean usePartialListing, boolean verify) { + public Optional submitJob(JobRequest jobRequest) { throw new UnsupportedOperationException(); } @Override - public boolean stopLoad(AlluxioURI path) { + public boolean stopJob(JobDescription jobDescription) { throw new UnsupportedOperationException(); } @Override - public String getLoadProgress(AlluxioURI path, - Optional format, boolean verbose) { + public String getJobProgress(JobDescription jobDescription, + JobProgressReportFormat format, boolean verbose) { throw new UnsupportedOperationException(); } diff --git a/integration/fuse/src/test/java/alluxio/fuse/cli/MockFuseFileSystemMasterClient.java b/integration/fuse/src/test/java/alluxio/fuse/cli/MockFuseFileSystemMasterClient.java index 8eadd3b7636f..cceb7c7ee042 100644 --- a/integration/fuse/src/test/java/alluxio/fuse/cli/MockFuseFileSystemMasterClient.java +++ b/integration/fuse/src/test/java/alluxio/fuse/cli/MockFuseFileSystemMasterClient.java @@ -26,6 +26,7 @@ import alluxio.grpc.ExistsPOptions; import alluxio.grpc.FreePOptions; import alluxio.grpc.GetStatusPOptions; +import alluxio.grpc.JobProgressReportFormat; import alluxio.grpc.ListStatusPOptions; import alluxio.grpc.ListStatusPartialPOptions; import alluxio.grpc.MountPOptions; @@ -35,6 +36,8 @@ import alluxio.grpc.SetAclPOptions; import alluxio.grpc.SetAttributePOptions; import alluxio.grpc.UpdateUfsModePOptions; +import alluxio.job.JobDescription; +import alluxio.job.JobRequest; import alluxio.security.authorization.AclEntry; import alluxio.wire.MountPointInfo; import alluxio.wire.SyncPointInfo; @@ -45,6 +48,7 @@ import java.util.Collections; import java.util.List; import java.util.Map; +import java.util.Optional; import java.util.function.Consumer; /** @@ -202,19 +206,18 @@ public void needsSync(AlluxioURI path) throws AlluxioStatusException { } @Override - public boolean submitLoad(AlluxioURI path, java.util.OptionalLong bandwidth, - boolean usePartialListing, boolean verify) { - return false; + public Optional submitJob(JobRequest job) { + return Optional.empty(); } @Override - public boolean stopLoad(AlluxioURI path) { + public boolean stopJob(JobDescription jobDescription) { return false; } @Override - public String getLoadProgress(AlluxioURI path, - java.util.Optional format, boolean verbose) { + public String getJobProgress(JobDescription jobDescription, + JobProgressReportFormat format, boolean verbose) { return null; } diff --git a/job/common/pom.xml b/job/common/pom.xml index 71c81a481448..f95b15e867ad 100644 --- a/job/common/pom.xml +++ b/job/common/pom.xml @@ -37,6 +37,12 @@ alluxio-core-common ${project.version} + + org.alluxio + alluxio-core-client-fs + ${project.version} + + diff --git a/job/common/src/main/java/alluxio/scheduler/job/Job.java b/job/common/src/main/java/alluxio/scheduler/job/Job.java new file mode 100644 index 000000000000..4b9f5fdd4483 --- /dev/null +++ b/job/common/src/main/java/alluxio/scheduler/job/Job.java @@ -0,0 +1,133 @@ +/* + * The Alluxio Open Foundation licenses this work under the Apache License, version 2.0 + * (the "License"). You may not use this work except in compliance with the License, which is + * available at www.apache.org/licenses/LICENSE-2.0 + * + * This software is distributed on an "AS IS" basis, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, + * either express or implied, as more fully set forth in the License. + * + * See the NOTICE file distributed with this work for information regarding copyright ownership. + */ + +package alluxio.scheduler.job; + +import alluxio.exception.runtime.AlluxioRuntimeException; +import alluxio.grpc.JobProgressReportFormat; +import alluxio.job.JobDescription; +import alluxio.proto.journal.Journal; +import alluxio.wire.WorkerInfo; + +import java.util.Optional; +import java.util.OptionalLong; + +/** + * interface for job that can be scheduled by Alluxio scheduler. + * + * @param the type of the task of the job + */ +public interface Job> { + + /** + * @return the job description, which is used to identify the intention of the job. We don't allow + * multiple jobs with the same description to be run on the scheduler at the same time. When + * submitting a job when there is already a job with the same description, scheduler will update + * the job instead of submitting a new job. + */ + JobDescription getDescription(); + + /** + * @return job end time if finished, otherwise empty + */ + OptionalLong getEndTime(); + + /** + * @return whether the job need verification + */ + boolean needVerification(); + + /** + * @return job state + */ + JobState getJobState(); + + /** + * set job state. + * @param state job state + */ + void setJobState(JobState state); + + /** + * @return job id. unique id for the job + */ + String getJobId(); + + /** + * set job as failure with exception. + * @param reason exception + */ + void failJob(AlluxioRuntimeException reason); + + /** + * Get job progress. + * @param format progress report format + * @param verbose whether to include detailed information + * @return job progress report + * @throws IllegalArgumentException if the format is not supported + */ + String getProgress(JobProgressReportFormat format, boolean verbose); + + /** + * Check whether the job is healthy. + * @return true if the job is healthy, false if not + */ + boolean isHealthy(); + + /** + * Check whether the job is still running. + * @return true if the job is running, false if not + */ + boolean isRunning(); + + /** + * Check whether the job is finished. + * @return true if the job is finished, false if not + */ + boolean isDone(); + + /** + * Check whether the current pass is finished. + * @return true if the current pass of job is finished, false if not + */ + boolean isCurrentPassDone(); + + /** + * Initiate a verification pass. This will re-list the directory and find + * any unfinished files / tasks and try to execute them again. + */ + void initiateVerification(); + + /** + * @param worker blocker to worker + * @return the next task to run. If there is no more task to run, return empty + * @throws AlluxioRuntimeException if any error occurs when getting next task + */ + Optional getNextTask(WorkerInfo worker); + + /** + * @return job journal entry + */ + Journal.JournalEntry toJournalEntry(); + + /** + * process task result. + * @param task task containing result future + * @return success if successfully process task result, otherwise return failure + */ + boolean processResponse(T task); + + /** + * update job configs. + * @param job the job to update from. Must be the same job type + */ + void updateJob(Job job); +} diff --git a/job/common/src/main/java/alluxio/scheduler/job/JobFactory.java b/job/common/src/main/java/alluxio/scheduler/job/JobFactory.java new file mode 100644 index 000000000000..6105186203ac --- /dev/null +++ b/job/common/src/main/java/alluxio/scheduler/job/JobFactory.java @@ -0,0 +1,22 @@ +/* + * The Alluxio Open Foundation licenses this work under the Apache License, version 2.0 + * (the "License"). You may not use this work except in compliance with the License, which is + * available at www.apache.org/licenses/LICENSE-2.0 + * + * This software is distributed on an "AS IS" basis, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, + * either express or implied, as more fully set forth in the License. + * + * See the NOTICE file distributed with this work for information regarding copyright ownership. + */ + +package alluxio.scheduler.job; + +/** + * Factory for creating job instances. + */ +public interface JobFactory { + /** + * @return the job + */ + Job create(); +} diff --git a/job/common/src/main/java/alluxio/scheduler/job/JobMetaStore.java b/job/common/src/main/java/alluxio/scheduler/job/JobMetaStore.java new file mode 100644 index 000000000000..4db669a4b3f3 --- /dev/null +++ b/job/common/src/main/java/alluxio/scheduler/job/JobMetaStore.java @@ -0,0 +1,31 @@ +/* + * The Alluxio Open Foundation licenses this work under the Apache License, version 2.0 + * (the "License"). You may not use this work except in compliance with the License, which is + * available at www.apache.org/licenses/LICENSE-2.0 + * + * This software is distributed on an "AS IS" basis, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, + * either express or implied, as more fully set forth in the License. + * + * See the NOTICE file distributed with this work for information regarding copyright ownership. + */ + +package alluxio.scheduler.job; + +import java.util.Set; + +/** + * Job meta store that store job information. + */ +public interface JobMetaStore { + + /** + * Update existing job in the meta store with the new job. + * @param job the job used to update the existing job in the meta store + */ + void updateJob(Job job); + + /** + * @return all the jobs in the meta store + */ + Set> getJobs(); +} diff --git a/core/server/master/src/main/java/alluxio/master/file/loadmanager/LoadJobState.java b/job/common/src/main/java/alluxio/scheduler/job/JobState.java similarity index 77% rename from core/server/master/src/main/java/alluxio/master/file/loadmanager/LoadJobState.java rename to job/common/src/main/java/alluxio/scheduler/job/JobState.java index e7a45dbb001e..35ec2e96d54f 100644 --- a/core/server/master/src/main/java/alluxio/master/file/loadmanager/LoadJobState.java +++ b/job/common/src/main/java/alluxio/scheduler/job/JobState.java @@ -9,31 +9,31 @@ * See the NOTICE file distributed with this work for information regarding copyright ownership. */ -package alluxio.master.file.loadmanager; +package alluxio.scheduler.job; import alluxio.proto.journal.Job; /** - * Load status. + * Job status. */ -public enum LoadJobState +public enum JobState { - LOADING, + RUNNING, VERIFYING, STOPPED, SUCCEEDED, FAILED; /** - * Convert LoadStatus to PJobStatus. + * Convert JobStatus to PJobStatus. * - * @param state load job state + * @param state job state * @return the corresponding PJobStatus */ - public static Job.PJobState toProto(LoadJobState state) + public static Job.PJobState toProto(JobState state) { switch (state) { - case LOADING: + case RUNNING: case VERIFYING: return Job.PJobState.CREATED; case STOPPED: @@ -48,16 +48,16 @@ public static Job.PJobState toProto(LoadJobState state) } /** - * Convert PJobStatus to LoadStatus. + * Convert PJobStatus to JobStatus. * * @param jobStatus protobuf job status - * @return the corresponding LoadStatus + * @return the corresponding JobStatus */ - public static LoadJobState fromProto(Job.PJobState jobStatus) + public static JobState fromProto(Job.PJobState jobStatus) { switch (jobStatus) { case CREATED: - return LOADING; + return RUNNING; case STOPPED: return STOPPED; case SUCCEEDED: diff --git a/job/common/src/main/java/alluxio/scheduler/job/Task.java b/job/common/src/main/java/alluxio/scheduler/job/Task.java new file mode 100644 index 000000000000..f3ce7f64c759 --- /dev/null +++ b/job/common/src/main/java/alluxio/scheduler/job/Task.java @@ -0,0 +1,46 @@ +/* + * The Alluxio Open Foundation licenses this work under the Apache License, version 2.0 + * (the "License"). You may not use this work except in compliance with the License, which is + * available at www.apache.org/licenses/LICENSE-2.0 + * + * This software is distributed on an "AS IS" basis, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, + * either express or implied, as more fully set forth in the License. + * + * See the NOTICE file distributed with this work for information regarding copyright ownership. + */ + +package alluxio.scheduler.job; + +import alluxio.client.block.stream.BlockWorkerClient; + +import com.google.common.util.concurrent.ListenableFuture; + +/** + * A task that can be executed on a worker. Belongs to a {@link Job}. + * + * @param the response type of the task + */ +public abstract class Task { + + /** + * run the task. + */ + protected abstract ListenableFuture run(BlockWorkerClient client); + + private ListenableFuture mResponseFuture; + + /** + * @return the response future + */ + public ListenableFuture getResponseFuture() { + return mResponseFuture; + } + + /** + * run the task and set the response future. + * @param client worker client + */ + public void execute(BlockWorkerClient client) { + mResponseFuture = run(client); + } +} diff --git a/job/common/src/main/java/alluxio/scheduler/job/WorkerProvider.java b/job/common/src/main/java/alluxio/scheduler/job/WorkerProvider.java new file mode 100644 index 000000000000..cf587462b720 --- /dev/null +++ b/job/common/src/main/java/alluxio/scheduler/job/WorkerProvider.java @@ -0,0 +1,43 @@ +/* + * The Alluxio Open Foundation licenses this work under the Apache License, version 2.0 + * (the "License"). You may not use this work except in compliance with the License, which is + * available at www.apache.org/licenses/LICENSE-2.0 + * + * This software is distributed on an "AS IS" basis, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, + * either express or implied, as more fully set forth in the License. + * + * See the NOTICE file distributed with this work for information regarding copyright ownership. + */ + +package alluxio.scheduler.job; + +import alluxio.client.block.stream.BlockWorkerClient; +import alluxio.exception.runtime.AlluxioRuntimeException; +import alluxio.resource.CloseableResource; +import alluxio.wire.WorkerInfo; +import alluxio.wire.WorkerNetAddress; + +import java.util.List; + +/** + * Interface for providing worker information and client. + */ +public interface WorkerProvider { + + /** + * Gets a list of worker information. + * + * @return a list of worker information + * @throws AlluxioRuntimeException if failed to get worker information + */ + List getWorkerInfos(); + + /** + * Gets a worker client. + * + * @param address the worker address + * @return a worker client + * @throws AlluxioRuntimeException if failed to get worker client + */ + CloseableResource getWorkerClient(WorkerNetAddress address); +} diff --git a/microbench/src/main/java/alluxio/fsmaster/FileSystemMasterBase.java b/microbench/src/main/java/alluxio/fsmaster/FileSystemMasterBase.java index 3b068bf40c9a..9ffcafa3c6d2 100644 --- a/microbench/src/main/java/alluxio/fsmaster/FileSystemMasterBase.java +++ b/microbench/src/main/java/alluxio/fsmaster/FileSystemMasterBase.java @@ -91,7 +91,7 @@ void init() throws Exception { mFsMaster = new DefaultFileSystemMaster(blockMaster, masterContext, ExecutorServiceFactories.constantExecutorServiceFactory(service), Clock.systemUTC()); mFsMasterServer = - new FileSystemMasterClientServiceHandler(mFsMaster, mFsMaster.getLoadManager()); + new FileSystemMasterClientServiceHandler(mFsMaster, mFsMaster.getScheduler()); mGetStatusObserver = createStreamObserver(); mRegistry.add(FileSystemMaster.class, mFsMaster); diff --git a/shell/src/main/java/alluxio/cli/fs/command/LoadCommand.java b/shell/src/main/java/alluxio/cli/fs/command/LoadCommand.java index 96f6c0b9ffc5..64355679d13e 100644 --- a/shell/src/main/java/alluxio/cli/fs/command/LoadCommand.java +++ b/shell/src/main/java/alluxio/cli/fs/command/LoadCommand.java @@ -27,8 +27,11 @@ import alluxio.exception.AlluxioException; import alluxio.exception.status.InvalidArgumentException; import alluxio.grpc.CacheRequest; -import alluxio.grpc.LoadProgressReportFormat; +import alluxio.grpc.JobProgressReportFormat; +import alluxio.grpc.LoadJobPOptions; import alluxio.grpc.OpenFilePOptions; +import alluxio.job.JobDescription; +import alluxio.job.LoadJobRequest; import alluxio.proto.dataserver.Protocol; import alluxio.resource.CloseableResource; import alluxio.util.FileSystemOptionsUtils; @@ -56,6 +59,8 @@ @ThreadSafe @PublicApi public final class LoadCommand extends AbstractFileSystemCommand { + private static final JobProgressReportFormat DEFAULT_FORMAT = JobProgressReportFormat.TEXT; + private static final String JOB_TYPE = "load"; private static final Option LOCAL_OPTION = Option.builder() .longOpt("local") @@ -105,7 +110,7 @@ public final class LoadCommand extends AbstractFileSystemCommand { .longOpt("bandwidth") .required(false) .hasArg(true) - .desc("Run verification when load finish and load new files if any.") + .desc("Single worker read bandwidth limit.") .build(); private static final Option PROGRESS_FORMAT = Option.builder() @@ -180,12 +185,10 @@ public int run(CommandLine cl) throws AlluxioException, IOException { if (cl.hasOption(STOP_OPTION.getLongOpt())) { return stopLoad(path); } - + JobProgressReportFormat format = DEFAULT_FORMAT; if (cl.hasOption(PROGRESS_OPTION.getLongOpt())) { - Optional format = Optional.empty(); if (cl.hasOption(PROGRESS_FORMAT.getLongOpt())) { - format = Optional.of(LoadProgressReportFormat.valueOf( - cl.getOptionValue(PROGRESS_FORMAT.getLongOpt()))); + format = JobProgressReportFormat.valueOf(cl.getOptionValue(PROGRESS_FORMAT.getLongOpt())); } return getProgress(path, format, cl.hasOption(PROGRESS_VERBOSE.getLongOpt())); } @@ -229,9 +232,16 @@ public void validateArgs(CommandLine cl) throws InvalidArgumentException { private int submitLoad(AlluxioURI path, OptionalLong bandwidth, boolean usePartialListing, boolean verify) { + LoadJobPOptions.Builder options = alluxio.grpc.LoadJobPOptions + .newBuilder().setPartialListing(usePartialListing).setVerify(verify); + if (bandwidth.isPresent()) { + options.setBandwidth(bandwidth.getAsLong()); + } + LoadJobRequest job = new LoadJobRequest(path.getPath(), options.build()); try { - if (mFileSystem.submitLoad(path, bandwidth, usePartialListing, verify)) { - System.out.printf("Load '%s' is successfully submitted.%n", path); + Optional jobId = mFileSystem.submitJob(job); + if (jobId.isPresent()) { + System.out.printf("Load '%s' is successfully submitted. JobId: %s%n", path, jobId.get()); } else { System.out.printf("Load already running for path '%s', updated the job with " + "new bandwidth: %s, verify: %s%n", @@ -248,7 +258,11 @@ private int submitLoad(AlluxioURI path, OptionalLong bandwidth, private int stopLoad(AlluxioURI path) { try { - if (mFileSystem.stopLoad(path)) { + if (mFileSystem.stopJob(JobDescription + .newBuilder() + .setPath(path.getPath()) + .setType(JOB_TYPE) + .build())) { System.out.printf("Load '%s' is successfully stopped.%n", path); } else { @@ -262,11 +276,15 @@ private int stopLoad(AlluxioURI path) { } } - private int getProgress(AlluxioURI path, Optional format, + private int getProgress(AlluxioURI path, JobProgressReportFormat format, boolean verbose) { try { System.out.println("Progress for loading path '" + path + "':"); - System.out.println(mFileSystem.getLoadProgress(path, format, verbose)); + System.out.println(mFileSystem.getJobProgress(JobDescription + .newBuilder() + .setPath(path.getPath()) + .setType(JOB_TYPE) + .build(), format, verbose)); return 0; } catch (StatusRuntimeException e) { if (e.getStatus().getCode() == Status.Code.NOT_FOUND) { diff --git a/tests/src/test/java/alluxio/client/cli/fs/command/LoadCommandIntegrationTest.java b/tests/src/test/java/alluxio/client/cli/fs/command/LoadCommandIntegrationTest.java index 77a1ec45824c..bd0c28feb830 100644 --- a/tests/src/test/java/alluxio/client/cli/fs/command/LoadCommandIntegrationTest.java +++ b/tests/src/test/java/alluxio/client/cli/fs/command/LoadCommandIntegrationTest.java @@ -67,7 +67,7 @@ public void testCommand() throws Exception { assertEquals(0, sFsShell.run("load", "/testRoot", "--progress")); Thread.sleep(1000); } - assertTrue(mOutput.toString().contains("Files Processed: 3 out of 3")); + assertTrue(mOutput.toString().contains("Files Processed: 3")); assertTrue(mOutput.toString().contains("Bytes Loaded: 3072.00KB out of 3072.00KB")); assertTrue(mOutput.toString().contains("Files Failed: 0")); assertEquals(0, sFsShell.run("load", "/testRoot", "--stop")); @@ -163,7 +163,7 @@ public void testAlreadyLoaded() throws Exception { assertEquals(0, sFsShell.run("load", "/testRootLoaded", "--progress")); Thread.sleep(1000); } - assertTrue(mOutput.toString().contains("Files Processed: 0 out of 0")); + assertTrue(mOutput.toString().contains("Files Processed: 0")); assertTrue(mOutput.toString().contains("Bytes Loaded: 0B out of 0B")); } } From ace82a43a75a5bb948b8f4c7e7e109a927a5bbd1 Mon Sep 17 00:00:00 2001 From: voddle Date: Fri, 10 Mar 2023 11:14:05 +0800 Subject: [PATCH 185/334] Add LoadMetadata and CreateTree operation to StressMasterBench ### What changes are proposed in this pull request? Two new operation 'LOAD_METADATA' and 'CREATE_TREE' were added to client type 'AlluxioNative' ### Why are the changes needed? Cause there are demands for using stress master bench test the performance of 'metadata_sync' with specified file tree. ### Does this PR introduce any user facing changes? 1. New parameters added to MasterBenchBaseParameters: a. 'LOAD_METADATA' and 'CREATE_TREE' available for '--operation' b. Four new parameters: '--tree-threads' '--tree-depth' '--tree-width' '--tree-files' are needed when using the operation: 'CREATE_TREE'. All workers will create a tree with the same structure, with subtrees. Each subtree is in depth of , except that each node at the bottom has children. Each node at the bottom contains files. Here, the files layer is not satiated by the of the tree. pr-link: Alluxio/alluxio#17042 change-id: cid-37dffef9de2d73727e6ee4110bd4438d5ed05c2f --- .../master/MasterBenchBaseParameters.java | 16 ++++++ .../java/alluxio/stress/master/Operation.java | 2 + .../alluxio/stress/cli/StressMasterBench.java | 29 ++++++++--- .../stress/cli/StressMasterBenchBase.java | 51 ++++++++++++++++++- 4 files changed, 91 insertions(+), 7 deletions(-) diff --git a/stress/common/src/main/java/alluxio/stress/master/MasterBenchBaseParameters.java b/stress/common/src/main/java/alluxio/stress/master/MasterBenchBaseParameters.java index 9f5915db069b..1f70c1beb5ae 100644 --- a/stress/common/src/main/java/alluxio/stress/master/MasterBenchBaseParameters.java +++ b/stress/common/src/main/java/alluxio/stress/master/MasterBenchBaseParameters.java @@ -29,6 +29,10 @@ public class MasterBenchBaseParameters extends FileSystemParameters { /** The stop count value that is invalid. */ public static final int STOP_COUNT_INVALID = -1; + public static final String TREE_THREADS = "--tree-threads"; + public static final String TREE_DEPTH = "--tree-depth"; + public static final String TREE_WIDTH = "--tree-width"; + public static final String TREE_FILES = "--tree-files"; @Parameter(names = {CLIENT_NUM_OPTION_NAME}, description = "the number of fs client instances to use") @@ -61,4 +65,16 @@ public class MasterBenchBaseParameters extends FileSystemParameters { @Parameter(names = {CREATE_FILE_SIZE_OPTION_NAME}, description = "The size of a file for the Create op, allowed to be 0. (0, 1m, 2k, 8k, etc.)") public String mCreateFileSize = "0"; + + @Parameter(names = {TREE_THREADS}, description = "The Tree Threads number") + public int mTreeThreads = 100; + + @Parameter(names = {TREE_DEPTH}, description = "The Tree Depth number") + public int mTreeDepth = 100; + + @Parameter(names = {TREE_WIDTH}, description = "The Tree Width number") + public int mTreeWidth = 100; + + @Parameter(names = {TREE_FILES}, description = "The Tree Files number") + public int mTreeFiles = 100; } diff --git a/stress/common/src/main/java/alluxio/stress/master/Operation.java b/stress/common/src/main/java/alluxio/stress/master/Operation.java index 20a5a3d9a753..0b413814803b 100644 --- a/stress/common/src/main/java/alluxio/stress/master/Operation.java +++ b/stress/common/src/main/java/alluxio/stress/master/Operation.java @@ -32,6 +32,8 @@ public enum Operation { // Create dirs CREATE_DIR("CreateDir"), // create fixed-N, create more in extra + CREATE_TREE("CreateTree"), // create fixed-N, create more in extra + LOAD_METADATA("LoadMetadata"), // create fixed-N, create more in extra ; private final String mName; diff --git a/stress/shell/src/main/java/alluxio/stress/cli/StressMasterBench.java b/stress/shell/src/main/java/alluxio/stress/cli/StressMasterBench.java index c86e1c8fbc1e..b203f4cb9935 100644 --- a/stress/shell/src/main/java/alluxio/stress/cli/StressMasterBench.java +++ b/stress/shell/src/main/java/alluxio/stress/cli/StressMasterBench.java @@ -111,6 +111,9 @@ public void prepare() throws Exception { Path basePath; if (mParameters.mOperation == Operation.CREATE_DIR) { basePath = new Path(path, "dirs"); + } else if (mParameters.mOperation == Operation.CREATE_TREE + || mParameters.mOperation == Operation.LOAD_METADATA) { + basePath = new Path(path, extractHostName(mBaseParameters.mId)); } else { basePath = new Path(path, "files"); } @@ -123,6 +126,9 @@ public void prepare() throws Exception { long end = CommonUtils.getCurrentMs(); LOG.info("Cleanup took: {} s", (end - start) / 1000.0); prepareFs.mkdirs(basePath); + } else if (mParameters.mOperation == Operation.CREATE_TREE + || mParameters.mOperation == Operation.LOAD_METADATA) { + // Do nothing } else { // these are read operations. the directory must exist if (!prepareFs.exists(basePath)) { @@ -131,10 +137,13 @@ public void prepare() throws Exception { mParameters.mOperation)); } } - if (!prepareFs.getFileStatus(basePath).isDirectory()) { - throw new IllegalStateException(String - .format("base path (%s) must be a directory for operation (%s)", basePath, - mParameters.mOperation)); + if (mParameters.mOperation != Operation.CREATE_TREE + && mParameters.mOperation != Operation.LOAD_METADATA) { + if (!prepareFs.getFileStatus(basePath).isDirectory()) { + throw new IllegalStateException(String + .format("base path (%s) must be a directory for operation (%s)", basePath, + mParameters.mOperation)); + } } } @@ -150,7 +159,6 @@ public void prepare() throws Exception { hdfsConf.set(PropertyKey.Name.USER_FILE_WRITE_TYPE_DEFAULT, mParameters.mWriteType); - LOG.info("Using {} to perform the test.", mParameters.mClientType); if (mParameters.mClientType == FileSystemClientType.ALLUXIO_HDFS) { mCachedFs = new FileSystem[mParameters.mClients]; for (int i = 0; i < mCachedFs.length; i++) { @@ -258,10 +266,19 @@ private void runInternal() throws IOException, AlluxioException { if (Thread.currentThread().isInterrupted()) { break; } - if (!useStopCount && CommonUtils.getCurrentMs() >= mContext.getEndMs()) { + if (mParameters.mOperation != Operation.LOAD_METADATA + && mParameters.mOperation != Operation.CREATE_TREE + && !useStopCount && CommonUtils.getCurrentMs() >= mContext.getEndMs()) { break; } localCounter = mContext.getOperationCounter(0).getAndIncrement(); + if (mParameters.mOperation == Operation.CREATE_TREE && localCounter >= mTreeTotalCount) { + break; + } + if (mParameters.mOperation == Operation.LOAD_METADATA + && localCounter >= mParameters.mThreads) { + break; + } if (useStopCount && localCounter >= mParameters.mStopCount) { break; } diff --git a/stress/shell/src/main/java/alluxio/stress/cli/StressMasterBenchBase.java b/stress/shell/src/main/java/alluxio/stress/cli/StressMasterBenchBase.java index c7766f3d584f..9eb0f711255e 100644 --- a/stress/shell/src/main/java/alluxio/stress/cli/StressMasterBenchBase.java +++ b/stress/shell/src/main/java/alluxio/stress/cli/StressMasterBenchBase.java @@ -21,6 +21,8 @@ import alluxio.grpc.CreateDirectoryPOptions; import alluxio.grpc.CreateFilePOptions; import alluxio.grpc.DeletePOptions; +import alluxio.grpc.ListStatusPOptions; +import alluxio.grpc.LoadMetadataPType; import alluxio.grpc.PMode; import alluxio.grpc.SetAttributePOptions; import alluxio.stress.BaseParameters; @@ -76,6 +78,11 @@ public abstract class StressMasterBenchBase protected final String mFilesDir = "files"; protected final String mFixedDir = "fixed"; + // vars for createTestTree + protected int[] mPathRecord; + protected int[] mTreeLevelQuant; + protected int mTreeTotalCount; + /** * Creates instance. */ @@ -211,7 +218,6 @@ protected final class BenchContext { private final AtomicLong mTotalCounter; private final Path[] mBasePaths; private final Path[] mFixedBasePaths; - /** The results. Access must be synchronized for thread safety. */ private T mResult; @@ -236,11 +242,23 @@ protected final class BenchContext { mBasePaths = new Path[operations.length]; mFixedBasePaths = new Path[operations.length]; + mPathRecord = new int[mParameters.mTreeDepth]; + mTreeLevelQuant = new int[mParameters.mTreeDepth]; + mTreeLevelQuant[mParameters.mTreeDepth - 1] = mParameters.mTreeWidth; + for (int i = mTreeLevelQuant.length - 2; i >= 0; i--) { + mTreeLevelQuant[i] = mTreeLevelQuant[i + 1] * mParameters.mTreeWidth; + } + mTreeTotalCount = mTreeLevelQuant[0] * mParameters.mTreeThreads; + for (int i = 0; i < operations.length; i++) { mOperationCounters[i] = new AtomicLong(); if (operations[i] == Operation.CREATE_DIR) { mBasePaths[i] = new Path(PathUtils.concatPath(basePaths[i], mDirsDir, mBaseParameters.mId)); + } else if (operations[i] == Operation.CREATE_TREE + || operations[i] == Operation.LOAD_METADATA) { + mBasePaths[i] = new Path(PathUtils.concatPath(basePaths[i], + extractHostName(mBaseParameters.mId))); } else { mBasePaths[i] = new Path(PathUtils.concatPath(basePaths[i], mFilesDir, mBaseParameters.mId)); @@ -461,8 +479,39 @@ protected void applyNativeOperation( throw new IOException("[INCONSISTENCY] file still exists after deletion"); } break; + case LOAD_METADATA: + fs.loadMetadata(new AlluxioURI(basePath + "/" + counter), ListStatusPOptions.newBuilder() + .setLoadMetadataType(LoadMetadataPType.ALWAYS) + .setRecursive(true) + .setLoadMetadataOnly(true).build()); + break; + case CREATE_TREE: + String p = ""; + int redundent = (int) counter; + for (int i = 0; i < mParameters.mTreeWidth; i++) { + mPathRecord[i] = redundent / mTreeLevelQuant[i]; + redundent = redundent % mTreeLevelQuant[i]; + p += "/"; + p += mPathRecord[i]; + } + for (int i = 0; i < mParameters.mTreeFiles; i++) { + fs.createFile(new AlluxioURI((basePath + p + "/" + redundent + "/" + i + ".txt")), + CreateFilePOptions.newBuilder().setRecursive(true).build()).close(); + } + break; default: throw new IllegalStateException("Unknown operation: " + operation); } } + + protected String extractHostName(String mId) { + String hostName = ""; + String[] splitedMid = mId.split("-"); + hostName += splitedMid[0]; + for (int i = 1; i < splitedMid.length - 1; i++) { + hostName += "-"; + hostName += splitedMid[i]; + } + return hostName; + } } From cb8d7cabd7ea83b9561e234bd93dc44420dd7895 Mon Sep 17 00:00:00 2001 From: Yaolong Liu Date: Fri, 10 Mar 2023 11:15:22 +0800 Subject: [PATCH 186/334] Make getTimeoutMs of WaitForOptions return long type ### What changes are proposed in this pull request? Make WORKER_MASTER_CONNECT_RETRY_TIMEOUT use Integer.Max when the actual value exceeds Integer.Max. ### Why are the changes needed? In our alluxio cluster, `alluxio.worker.master.connect.retry.timeout` is set to 30day, which exceeds the upper limit of Integer, causing the worker fail to start: ``` 2023-02-27 17:17:24,353 ERROR SpecificMasterBlockSync - Fatal error: Failed to register with primary master java.util.concurrent.TimeoutException: Timed out waiting for alluxio.worker.block.BlockSyncMasterGroup@d8948cd to start options: WaitForOptions{interval=20, timeout=-1702967296} last value: false at alluxio.util.CommonUtils.waitForResult(CommonUtils.java:383) at alluxio.util.CommonUtils.waitFor(CommonUtils.java:341) at alluxio.worker.block.BlockSyncMasterGroup.waitForPrimaryMasterRegistrationComplete(BlockSyncMasterGroup.java:122) at alluxio.worker.block.AllMasterRegistrationBlockWorker.start(AllMasterRegistrationBlockWorker.java:69) at alluxio.worker.block.AllMasterRegistrationBlockWorker.start(AllMasterRegistrationBlockWorker.java:31) at alluxio.Registry.start(Registry.java:129) at alluxio.worker.AlluxioWorkerProcess.startWorkers(AlluxioWorkerProcess.java:274) at alluxio.worker.AlluxioWorkerProcess.start(AlluxioWorkerProcess.java:221) at alluxio.ProcessUtils.run(ProcessUtils.java:37) at alluxio.worker.AlluxioWorker.main(AlluxioWorker.java:78) ``` ### Does this PR introduce any user facing changes? No pr-link: Alluxio/alluxio#16952 change-id: cid-d00b8df35393a8170367b7bbe5b5f46586956bcf --- core/common/src/main/java/alluxio/util/CommonUtils.java | 2 +- core/common/src/main/java/alluxio/util/WaitForOptions.java | 6 +++--- .../java/alluxio/worker/block/BlockSyncMasterGroup.java | 2 +- shell/src/main/java/alluxio/cli/fs/command/FreeCommand.java | 2 +- 4 files changed, 6 insertions(+), 6 deletions(-) diff --git a/core/common/src/main/java/alluxio/util/CommonUtils.java b/core/common/src/main/java/alluxio/util/CommonUtils.java index cbfde2c9b154..600ecc4d5188 100644 --- a/core/common/src/main/java/alluxio/util/CommonUtils.java +++ b/core/common/src/main/java/alluxio/util/CommonUtils.java @@ -376,7 +376,7 @@ public static T waitForResult(String description, Supplier objectSupplier T value; long start = getCurrentMs(); int interval = options.getInterval(); - int timeout = options.getTimeoutMs(); + long timeout = options.getTimeoutMs(); while (condition.apply(value = objectSupplier.get()) != true) { if (timeout != WaitForOptions.NEVER && getCurrentMs() - start > timeout) { throw new TimeoutException("Timed out waiting for " + description + " options: " + options diff --git a/core/common/src/main/java/alluxio/util/WaitForOptions.java b/core/common/src/main/java/alluxio/util/WaitForOptions.java index 04d9bf72b3cf..d69ad7909eb9 100644 --- a/core/common/src/main/java/alluxio/util/WaitForOptions.java +++ b/core/common/src/main/java/alluxio/util/WaitForOptions.java @@ -24,7 +24,7 @@ public final class WaitForOptions { /** How often to check for completion. */ private int mIntervalMs; /** How long to wait before giving up. */ - private int mTimeoutMs; + private long mTimeoutMs; private WaitForOptions() {} @@ -45,7 +45,7 @@ public int getInterval() { /** * @return the timeout */ - public int getTimeoutMs() { + public long getTimeoutMs() { return mTimeoutMs; } @@ -62,7 +62,7 @@ public WaitForOptions setInterval(int intervalMs) { * @param timeoutMs the timeout to use (in milliseconds) * @return the updated options object */ - public WaitForOptions setTimeoutMs(int timeoutMs) { + public WaitForOptions setTimeoutMs(long timeoutMs) { mTimeoutMs = timeoutMs; return this; } diff --git a/core/server/worker/src/main/java/alluxio/worker/block/BlockSyncMasterGroup.java b/core/server/worker/src/main/java/alluxio/worker/block/BlockSyncMasterGroup.java index 903cd8fb2215..6abc313fc1d1 100644 --- a/core/server/worker/src/main/java/alluxio/worker/block/BlockSyncMasterGroup.java +++ b/core/server/worker/src/main/java/alluxio/worker/block/BlockSyncMasterGroup.java @@ -121,7 +121,7 @@ public void waitForPrimaryMasterRegistrationComplete(InetSocketAddress primaryMa try { CommonUtils.waitFor(this + " to start", primaryMasterSync::isRegistered, - WaitForOptions.defaults().setTimeoutMs((int) WORKER_MASTER_CONNECT_RETRY_TIMEOUT)); + WaitForOptions.defaults().setTimeoutMs(WORKER_MASTER_CONNECT_RETRY_TIMEOUT)); } catch (InterruptedException e) { Thread.currentThread().interrupt(); LOG.warn("Exit the worker on interruption", e); diff --git a/shell/src/main/java/alluxio/cli/fs/command/FreeCommand.java b/shell/src/main/java/alluxio/cli/fs/command/FreeCommand.java index c5779fef732a..c320e1dd23c9 100644 --- a/shell/src/main/java/alluxio/cli/fs/command/FreeCommand.java +++ b/shell/src/main/java/alluxio/cli/fs/command/FreeCommand.java @@ -94,7 +94,7 @@ protected void runPlainPath(AlluxioURI path, CommandLine cl) Throwables.propagateIfPossible(e); throw new RuntimeException(e); } - }, WaitForOptions.defaults().setTimeoutMs(10 * Math.toIntExact(interval)) + }, WaitForOptions.defaults().setTimeoutMs(10L * Math.toIntExact(interval)) .setInterval(interval)); } catch (InterruptedException e) { Thread.currentThread().interrupt(); From 731385377b151b1c995c2d7f48562cd2b61805a9 Mon Sep 17 00:00:00 2001 From: lucyge2022 <111789461+lucyge2022@users.noreply.github.com> Date: Mon, 13 Mar 2023 15:21:01 -0700 Subject: [PATCH 187/334] Fix setTtl workflow and ttlbucket bugs ### What changes are proposed in this pull request? 1)setttl cmd on a big directory unnecessary setattr recursively, while dir ttl already supercedes children ttl. there's absolutely no reason to save ttl on children node individually. 2)ttlbucket unnecessarily saves Inode obj on heap unboundedly, where it could and should only save inodeid and load inode from inodestore to double check expiry during inodettlchecker processing. 3)fix race condition on ttlbucketlists from ttlchecker and insertion from foreground. 4)fix case where any expiration of inodes failed during ttlchecker it will never be retried again. ### Why are the changes needed? Reduce heavy GC when setttl on a huge folder / bug fixes as indicated above for ttl ### Does this PR introduce any user facing changes? yes. So now setTtl command on a dir will not set ttl attributes for all its children. pr-link: Alluxio/alluxio#16933 change-id: cid-36ec524b2178220886cccb7a82de119bdcc8203f --- .../alluxio/master/file/InodeTtlChecker.java | 43 +++++-- .../alluxio/master/file/meta/TtlBucket.java | 57 ++++++--- .../master/file/meta/TtlBucketList.java | 112 ++++++++++-------- .../master/file/meta/TtlBucketListTest.java | 52 ++++---- .../master/file/meta/TtlBucketTest.java | 38 +++--- .../fs/command/FileSystemCommandUtils.java | 2 +- .../alluxio/client/fs/TtlIntegrationTest.java | 54 +++++++++ ...FileSystemMasterSetTtlIntegrationTest.java | 91 ++++++++++++++ 8 files changed, 332 insertions(+), 117 deletions(-) diff --git a/core/server/master/src/main/java/alluxio/master/file/InodeTtlChecker.java b/core/server/master/src/main/java/alluxio/master/file/InodeTtlChecker.java index e3c6978627af..0c9cf4a76ab4 100644 --- a/core/server/master/src/main/java/alluxio/master/file/InodeTtlChecker.java +++ b/core/server/master/src/main/java/alluxio/master/file/InodeTtlChecker.java @@ -30,10 +30,13 @@ import alluxio.master.journal.JournalContext; import alluxio.master.journal.NoopJournalContext; import alluxio.proto.journal.File.UpdateInodeEntry; +import alluxio.util.ThreadUtils; import org.slf4j.Logger; import org.slf4j.LoggerFactory; +import java.util.HashMap; +import java.util.Map; import java.util.Set; import javax.annotation.concurrent.NotThreadSafe; @@ -59,28 +62,42 @@ public InodeTtlChecker(FileSystemMaster fileSystemMaster, InodeTree inodeTree) { @Override public void heartbeat() throws InterruptedException { - Set expiredBuckets = mTtlBuckets.getExpiredBuckets(System.currentTimeMillis()); + Set expiredBuckets = mTtlBuckets.pollExpiredBuckets(System.currentTimeMillis()); + Map failedInodesToRetryNum = new HashMap<>(); for (TtlBucket bucket : expiredBuckets) { - for (Inode inode : bucket.getInodes()) { + for (Map.Entry inodeExpiryEntry : bucket.getInodeExpiries()) { // Throw if interrupted. if (Thread.interrupted()) { throw new InterruptedException("InodeTtlChecker interrupted."); } + long inodeId = inodeExpiryEntry.getKey(); + int leftRetries = inodeExpiryEntry.getValue(); + // Exhausted retry attempt to expire this inode, bail. + if (leftRetries <= 0) { + continue; + } AlluxioURI path = null; try (LockedInodePath inodePath = mInodeTree.lockFullInodePath( - inode.getId(), LockPattern.READ, NoopJournalContext.INSTANCE) + inodeId, LockPattern.READ, NoopJournalContext.INSTANCE) ) { path = inodePath.getUri(); } catch (FileDoesNotExistException e) { // The inode has already been deleted, nothing needs to be done. continue; } catch (Exception e) { - LOG.error("Exception trying to clean up {} for ttl check: {}", inode.toString(), - e.toString()); + LOG.error("Exception trying to clean up inode:{},path:{} for ttl check: {}", inodeId, + path, e.toString()); } if (path != null) { + Inode inode = null; try { + inode = mTtlBuckets.loadInode(inodeId); + // Check again if this inode is indeed expired. + if (inode == null || inode.getTtl() == Constants.NO_TTL + || inode.getCreationTimeMs() + inode.getTtl() > System.currentTimeMillis()) { + continue; + } TtlAction ttlAction = inode.getTtlAction(); LOG.info("Path {} TTL has expired, performing action {}", path.getPath(), ttlAction); switch (ttlAction) { @@ -102,7 +119,6 @@ public void heartbeat() throws InterruptedException { .setTtlAction(ProtobufUtils.toProtobuf(TtlAction.DELETE)) .build()); } - mTtlBuckets.remove(inode); break; case DELETE: // public delete method will lock the path, and check WRITE permission required at @@ -131,12 +147,23 @@ public void heartbeat() throws InterruptedException { LOG.error("Unknown ttl action {}", ttlAction); } } catch (Exception e) { - LOG.error("Exception trying to clean up {} for ttl check", inode, e); + boolean retryExhausted = --leftRetries <= 0; + if (retryExhausted) { + LOG.error("Retry exhausted to clean up {} for ttl check. {}", + path, ThreadUtils.formatStackTrace(e)); + } else if (inode != null) { + failedInodesToRetryNum.put(inode, leftRetries); + } } } } } - mTtlBuckets.removeBuckets(expiredBuckets); + // Put back those failed-to-expire inodes for next round retry. + if (!failedInodesToRetryNum.isEmpty()) { + for (Map.Entry failedInodeEntry : failedInodesToRetryNum.entrySet()) { + mTtlBuckets.insert(failedInodeEntry.getKey(), failedInodeEntry.getValue()); + } + } } @Override diff --git a/core/server/master/src/main/java/alluxio/master/file/meta/TtlBucket.java b/core/server/master/src/main/java/alluxio/master/file/meta/TtlBucket.java index ebfbe7d7ee79..fcb0659d6068 100644 --- a/core/server/master/src/main/java/alluxio/master/file/meta/TtlBucket.java +++ b/core/server/master/src/main/java/alluxio/master/file/meta/TtlBucket.java @@ -17,6 +17,8 @@ import com.google.common.base.Objects; import java.util.Collection; +import java.util.Collections; +import java.util.Map; import java.util.concurrent.ConcurrentHashMap; import javax.annotation.concurrent.ThreadSafe; @@ -34,16 +36,18 @@ public final class TtlBucket implements Comparable { */ private static long sTtlIntervalMs = Configuration.getMs(PropertyKey.MASTER_TTL_CHECKER_INTERVAL_MS); + public static final int DEFAULT_RETRY_ATTEMPTS = 5; /** * Each bucket has a time to live interval, this value is the start of the interval, interval * value is the same as the configuration of {@link PropertyKey#MASTER_TTL_CHECKER_INTERVAL_MS}. */ private final long mTtlIntervalStartTimeMs; /** - * A collection of inodes whose ttl value is in the range of this bucket's interval. The mapping - * is from inode id to inode. + * A collection containing those inodes whose ttl value is + * in the range of this bucket's interval. The mapping + * is from inode id to the number of left retry to process. */ - private final ConcurrentHashMap mInodes; + private final ConcurrentHashMap mInodeToRetryMap; /** * Creates a new instance of {@link TtlBucket}. @@ -52,7 +56,7 @@ public final class TtlBucket implements Comparable { */ public TtlBucket(long startTimeMs) { mTtlIntervalStartTimeMs = startTimeMs; - mInodes = new ConcurrentHashMap<>(); + mInodeToRetryMap = new ConcurrentHashMap<>(); } /** @@ -78,38 +82,57 @@ public static long getTtlIntervalMs() { } /** - * @return the set of all inodes in the bucket backed by the internal set, changes made to the - * returned set will be shown in the internal set, and vice versa + * @return an unmodifiable view of all inodes ids in the bucket */ - public Collection getInodes() { - return mInodes.values(); + public Collection getInodeIds() { + return Collections.unmodifiableSet(mInodeToRetryMap.keySet()); } /** - * Adds a inode to the bucket. + * Get collection of inode to its left ttl process retry attempts. + * @return collection of inode to its left ttl process retry attempts + */ + public Collection> getInodeExpiries() { + return Collections.unmodifiableSet(mInodeToRetryMap.entrySet()); + } + + /** + * Adds an inode with default num of retry attempt to expire. + * @param inode + */ + public void addInode(Inode inode) { + addInode(inode, DEFAULT_RETRY_ATTEMPTS); + } + + /** + * Adds an inode to the bucket with a specific left retry number. * * @param inode the inode to be added - * @return true if a new inode was added to the bucket + * @param numOfRetry num of retries left when added to the ttlbucket */ - public boolean addInode(Inode inode) { - return mInodes.put(inode.getId(), inode) == null; + public void addInode(Inode inode, int numOfRetry) { + mInodeToRetryMap.compute(inode.getId(), (k, v) -> { + if (v != null) { + return Math.min(v, numOfRetry); + } + return numOfRetry; + }); } /** - * Removes a inode from the bucket. + * Removes an inode from the bucket. * * @param inode the inode to be removed - * @return true if a inode was removed */ - public boolean removeInode(InodeView inode) { - return mInodes.remove(inode.getId()) != null; + public void removeInode(InodeView inode) { + mInodeToRetryMap.remove(inode.getId()); } /** * @return the number of inodes in the bucket */ public int size() { - return mInodes.size(); + return mInodeToRetryMap.size(); } /** diff --git a/core/server/master/src/main/java/alluxio/master/file/meta/TtlBucketList.java b/core/server/master/src/main/java/alluxio/master/file/meta/TtlBucketList.java index e68b293b08b0..a160a2d488dc 100644 --- a/core/server/master/src/main/java/alluxio/master/file/meta/TtlBucketList.java +++ b/core/server/master/src/main/java/alluxio/master/file/meta/TtlBucketList.java @@ -26,10 +26,10 @@ import java.io.EOFException; import java.io.IOException; import java.io.OutputStream; +import java.util.HashSet; import java.util.Optional; import java.util.Set; import java.util.concurrent.ConcurrentSkipListSet; -import java.util.concurrent.atomic.AtomicLong; import javax.annotation.Nullable; import javax.annotation.concurrent.ThreadSafe; @@ -49,7 +49,6 @@ public final class TtlBucketList implements Checkpointed { */ private final ConcurrentSkipListSet mBucketList; private final ReadOnlyInodeStore mInodeStore; - private final AtomicLong mNumInodes = new AtomicLong(); /** * Creates a new list of {@link TtlBucket}s. @@ -61,6 +60,15 @@ public TtlBucketList(ReadOnlyInodeStore inodeStore) { mBucketList = new ConcurrentSkipListSet<>(); } + /** + * Load inode from inode store on processing the provided inode id. + * @param inodeId + * @return Inode + */ + public Inode loadInode(long inodeId) { + return mInodeStore.get(inodeId).orElseGet(null); + } + /** * @return the number of TTL buckets */ @@ -72,7 +80,7 @@ public int getNumBuckets() { * @return the total number of inodes in all the buckets */ public long getNumInodes() { - return mNumInodes.get(); + return mBucketList.stream().mapToInt((bucket) -> bucket.size()).sum(); } /** @@ -104,6 +112,14 @@ private TtlBucket getBucketContaining(InodeView inode) { return bucket; } + /** + * Insert inode to the ttlbucket with default number of retry attempts. + * @param inode + */ + public void insert(Inode inode) { + insert(inode, TtlBucket.DEFAULT_RETRY_ATTEMPTS); + } + /** * Inserts an inode to the appropriate bucket where its ttl end time lies in the * bucket's interval, if no appropriate bucket exists, a new bucket will be created to contain @@ -111,8 +127,9 @@ private TtlBucket getBucketContaining(InodeView inode) { * buckets and nothing will happen. * * @param inode the inode to be inserted + * @param numOfRetry number of retries left to process this inode */ - public void insert(Inode inode) { + public void insert(Inode inode, int numOfRetry) { if (inode.getTtl() == Constants.NO_TTL) { return; } @@ -120,72 +137,67 @@ public void insert(Inode inode) { TtlBucket bucket; while (true) { bucket = getBucketContaining(inode); - if (bucket != null) { - break; + if (bucket == null) { + long ttlEndTimeMs = inode.getCreationTimeMs() + inode.getTtl(); + // No bucket contains the inode, so a new bucket should be added with an appropriate + // interval start. Assume the list of buckets have continuous intervals, and the + // first interval starts at 0, then ttlEndTimeMs should be in number + // (ttlEndTimeMs / interval) interval, so the start time of this interval should be + // (ttlEndTimeMs / interval) * interval. + long interval = TtlBucket.getTtlIntervalMs(); + bucket = new TtlBucket(interval == 0 ? ttlEndTimeMs : ttlEndTimeMs / interval * interval); + if (!mBucketList.add(bucket)) { + // If we reach here, it means the same bucket has been concurrently inserted by another + // thread, try again. + continue; + } } - long ttlEndTimeMs = inode.getCreationTimeMs() + inode.getTtl(); - // No bucket contains the inode, so a new bucket should be added with an appropriate interval - // start. Assume the list of buckets have continuous intervals, and the first interval starts - // at 0, then ttlEndTimeMs should be in number (ttlEndTimeMs / interval) interval, so the - // start time of this interval should be (ttlEndTimeMs / interval) * interval. - long interval = TtlBucket.getTtlIntervalMs(); - bucket = new TtlBucket(interval == 0 ? ttlEndTimeMs : ttlEndTimeMs / interval * interval); - if (mBucketList.add(bucket)) { + bucket.addInode(inode, numOfRetry); + /* if we added to the bucket but it got concurrently polled by InodeTtlChecker, + we're not sure this newly-added inode will be processed by the checker, + so we need to try insert again. */ + if (mBucketList.contains(bucket)) { break; } - // If we reach here, it means the same bucket has been concurrently inserted by another - // thread. - } - // TODO(zhouyufa): Consider the concurrent situation that the bucket is expired and processed by - // the InodeTtlChecker, then adding the inode into the bucket is meaningless since the bucket - // will not be accessed again. (c.f. ALLUXIO-2821) - if (bucket.addInode(inode)) { - mNumInodes.incrementAndGet(); } } /** - * Removes a inode from the bucket containing it if the inode is in one of the buckets, otherwise, - * do nothing. + * Removes an inode from the bucket containing it if the inode is in one + * of the buckets, otherwise, do nothing. * *

* Assume that no inode in the buckets has ttl value that equals {@link Constants#NO_TTL}. - * If a inode with valid ttl value is inserted to the buckets and its ttl value is going to be set - * to {@link Constants#NO_TTL} later, be sure to remove the inode from the buckets first. + * If an inode with valid ttl value is inserted to the buckets and its ttl value is + * going to be set to {@link Constants#NO_TTL} later, be sure to remove the inode + * from the buckets first. * * @param inode the inode to be removed */ public void remove(InodeView inode) { TtlBucket bucket = getBucketContaining(inode); if (bucket != null) { - if (bucket.removeInode(inode)) { - mNumInodes.decrementAndGet(); - } + bucket.removeInode(inode); } } /** - * Retrieves buckets whose ttl interval has expired before the specified time, that is, the + * Polls buckets whose ttl interval has expired before the specified time, that is, the * bucket's interval start time should be less than or equal to (specified time - ttl interval). - * The returned set is backed by the internal set. - * + * if concurrently there are new inodes added to those polled buckets, we check if after the + * moment it got added and the bucket got polled out, we're not sure if InodeTtlChecker will + * process it sa part of this batch, it will create a new bucket and added there to retry. + * Check {@link TtlBucketList#insert(Inode)} * @param time the expiration time * @return a set of expired buckets or an empty set if no buckets have expired */ - public Set getExpiredBuckets(long time) { - return mBucketList.headSet(new TtlBucket(time - TtlBucket.getTtlIntervalMs()), true); - } - - /** - * Removes all buckets in the set. - * - * @param buckets a set of buckets to be removed - */ - public void removeBuckets(Set buckets) { - mBucketList.removeAll(buckets); - for (TtlBucket nxt : buckets) { - mNumInodes.addAndGet(-nxt.size()); + public Set pollExpiredBuckets(long time) { + Set expiredBuckets = new HashSet<>(); + TtlBucket upperBound = new TtlBucket(time - TtlBucket.getTtlIntervalMs()); + while (!mBucketList.isEmpty() && mBucketList.first().compareTo(upperBound) <= 0) { + expiredBuckets.add(mBucketList.pollFirst()); } + return expiredBuckets; } @Override @@ -193,12 +205,17 @@ public CheckpointName getCheckpointName() { return CheckpointName.TTL_BUCKET_LIST; } + /* + Checkpointing a snapshot of the current inodes in ttlbucketlist. It's ok we checkpointed + some inodes that have already been processed during the process as the expiry of inode + will be double-checked at time of processing in InodeTtlChecker. + */ @Override public void writeToCheckpoint(OutputStream output) throws IOException, InterruptedException { CheckpointOutputStream cos = new CheckpointOutputStream(output, CheckpointType.LONGS); for (TtlBucket bucket : mBucketList) { - for (Inode inode : bucket.getInodes()) { - cos.writeLong(inode.getId()); + for (long inodeId : bucket.getInodeIds()) { + cos.writeLong(inodeId); } } } @@ -206,7 +223,6 @@ public void writeToCheckpoint(OutputStream output) throws IOException, Interrupt @Override public void restoreFromCheckpoint(CheckpointInputStream input) throws IOException { mBucketList.clear(); - mNumInodes.set(0); Preconditions.checkState(input.getType() == CheckpointType.LONGS, "Unexpected checkpoint type: %s", input.getType()); while (true) { diff --git a/core/server/master/src/test/java/alluxio/master/file/meta/TtlBucketListTest.java b/core/server/master/src/test/java/alluxio/master/file/meta/TtlBucketListTest.java index 2570719a13a3..2959df2fddb5 100644 --- a/core/server/master/src/test/java/alluxio/master/file/meta/TtlBucketListTest.java +++ b/core/server/master/src/test/java/alluxio/master/file/meta/TtlBucketListTest.java @@ -16,7 +16,6 @@ import alluxio.master.metastore.InodeStore; import com.google.common.collect.Lists; -import com.google.common.collect.Sets; import org.junit.Assert; import org.junit.Before; import org.junit.ClassRule; @@ -24,6 +23,7 @@ import java.util.Collections; import java.util.List; +import java.util.stream.Collectors; /** * Unit tests for {@link TtlBucketList}. @@ -54,8 +54,8 @@ public void before() { mBucketList = new TtlBucketList(mock(InodeStore.class)); } - private List getSortedExpiredBuckets(long expireTime) { - List buckets = Lists.newArrayList(mBucketList.getExpiredBuckets(expireTime)); + private List pollSortedExpiredBuckets(long expireTime) { + List buckets = Lists.newArrayList(mBucketList.pollExpiredBuckets(expireTime)); Collections.sort(buckets); return buckets; } @@ -63,8 +63,10 @@ private List getSortedExpiredBuckets(long expireTime) { private void assertExpired(List expiredBuckets, int bucketIndex, Inode... inodes) { TtlBucket bucket = expiredBuckets.get(bucketIndex); - Assert.assertEquals(inodes.length, bucket.getInodes().size()); - Assert.assertTrue(bucket.getInodes().containsAll(Lists.newArrayList(inodes))); + Assert.assertEquals(inodes.length, bucket.size()); + List inodeIds = Lists.newArrayList(inodes).stream().map(Inode::getId) + .collect(Collectors.toList()); + Assert.assertTrue(bucket.getInodeIds().containsAll(inodeIds)); } /** @@ -73,24 +75,27 @@ private void assertExpired(List expiredBuckets, int bucketIndex, @Test public void insert() { // No bucket should expire. - List expired = getSortedExpiredBuckets(BUCKET1_START); + List expired = pollSortedExpiredBuckets(BUCKET1_START); Assert.assertTrue(expired.isEmpty()); mBucketList.insert(BUCKET1_FILE1); // The first bucket should expire. - expired = getSortedExpiredBuckets(BUCKET1_END); + expired = pollSortedExpiredBuckets(BUCKET1_END); assertExpired(expired, 0, BUCKET1_FILE1); + mBucketList.insert(BUCKET1_FILE1); mBucketList.insert(BUCKET1_FILE2); // Only the first bucket should expire. for (long end = BUCKET2_START; end < BUCKET2_END; end++) { - expired = getSortedExpiredBuckets(end); + expired = pollSortedExpiredBuckets(end); assertExpired(expired, 0, BUCKET1_FILE1, BUCKET1_FILE2); + mBucketList.insert(BUCKET1_FILE1); + mBucketList.insert(BUCKET1_FILE2); } mBucketList.insert(BUCKET2_FILE); // All buckets should expire. - expired = getSortedExpiredBuckets(BUCKET2_END); + expired = pollSortedExpiredBuckets(BUCKET2_END); assertExpired(expired, 0, BUCKET1_FILE1, BUCKET1_FILE2); assertExpired(expired, 1, BUCKET2_FILE); } @@ -104,39 +109,28 @@ public void remove() { mBucketList.insert(BUCKET1_FILE2); mBucketList.insert(BUCKET2_FILE); - List expired = getSortedExpiredBuckets(BUCKET1_END); + List expired = pollSortedExpiredBuckets(BUCKET1_END); assertExpired(expired, 0, BUCKET1_FILE1, BUCKET1_FILE2); + mBucketList.insert(BUCKET1_FILE1); + mBucketList.insert(BUCKET1_FILE2); mBucketList.remove(BUCKET1_FILE1); - expired = getSortedExpiredBuckets(BUCKET1_END); + expired = pollSortedExpiredBuckets(BUCKET1_END); // Only the first bucket should expire, and there should be only one BUCKET1_FILE2 in it. assertExpired(expired, 0, BUCKET1_FILE2); + mBucketList.insert(BUCKET1_FILE2); mBucketList.remove(BUCKET1_FILE2); - expired = getSortedExpiredBuckets(BUCKET1_END); + expired = pollSortedExpiredBuckets(BUCKET1_END); // Only the first bucket should expire, and there should be no files in it. assertExpired(expired, 0); // nothing in bucket 0. - expired = getSortedExpiredBuckets(BUCKET2_END); - // All buckets should expire. - assertExpired(expired, 0); // nothing in bucket 0. - assertExpired(expired, 1, BUCKET2_FILE); - - // Remove bucket 0. - expired = getSortedExpiredBuckets(BUCKET1_END); - mBucketList.removeBuckets(Sets.newHashSet(expired)); - - expired = getSortedExpiredBuckets(BUCKET2_END); - // The only remaining bucket is bucket 1, it should expire. + expired = pollSortedExpiredBuckets(BUCKET2_END); + // Current bucket should expire. assertExpired(expired, 0, BUCKET2_FILE); - mBucketList.remove(BUCKET2_FILE); - expired = getSortedExpiredBuckets(BUCKET2_END); - assertExpired(expired, 0); // nothing in bucket. - - mBucketList.removeBuckets(Sets.newHashSet(expired)); // No bucket should exist now. - expired = getSortedExpiredBuckets(BUCKET2_END); + expired = pollSortedExpiredBuckets(BUCKET2_END); Assert.assertEquals(0, expired.size()); } } diff --git a/core/server/master/src/test/java/alluxio/master/file/meta/TtlBucketTest.java b/core/server/master/src/test/java/alluxio/master/file/meta/TtlBucketTest.java index 29b6a5081fe3..0a035918d4ce 100644 --- a/core/server/master/src/test/java/alluxio/master/file/meta/TtlBucketTest.java +++ b/core/server/master/src/test/java/alluxio/master/file/meta/TtlBucketTest.java @@ -74,25 +74,35 @@ public void compareIntervalStartTime() { public void addAndRemoveInodeFile() { Inode fileTtl1 = TtlTestUtils.createFileWithIdAndTtl(0, 1); Inode fileTtl2 = TtlTestUtils.createFileWithIdAndTtl(1, 2); - Assert.assertTrue(mBucket.getInodes().isEmpty()); + Assert.assertTrue(mBucket.getInodeIds().isEmpty()); mBucket.addInode(fileTtl1); - Assert.assertEquals(1, mBucket.getInodes().size()); + Assert.assertEquals(1, mBucket.size()); // The same file, won't be added. mBucket.addInode(fileTtl1); - Assert.assertEquals(1, mBucket.getInodes().size()); + Assert.assertEquals(1, mBucket.size()); // Different file, will be added. mBucket.addInode(fileTtl2); - Assert.assertEquals(2, mBucket.getInodes().size()); + Assert.assertEquals(2, mBucket.size()); // Remove files; mBucket.removeInode(fileTtl1); - Assert.assertEquals(1, mBucket.getInodes().size()); - Assert.assertTrue(mBucket.getInodes().contains(fileTtl2)); + Assert.assertEquals(1, mBucket.size()); + Assert.assertTrue(mBucket.getInodeIds().contains(fileTtl2.getId())); mBucket.removeInode(fileTtl2); - Assert.assertEquals(0, mBucket.getInodes().size()); + Assert.assertEquals(0, mBucket.size()); + + // Retry attempts; + mBucket.addInode(fileTtl1); + Assert.assertTrue(mBucket.getInodeIds().contains(fileTtl1.getId())); + int retryAttempt = mBucket.getInodeExpiries().iterator().next().getValue(); + Assert.assertEquals(retryAttempt, TtlBucket.DEFAULT_RETRY_ATTEMPTS); + mBucket.addInode(fileTtl1, 2); + Assert.assertTrue(mBucket.getInodeIds().contains(fileTtl1.getId())); + int newRetryAttempt = mBucket.getInodeExpiries().iterator().next().getValue(); + Assert.assertEquals(newRetryAttempt, 2); } /** @@ -103,25 +113,25 @@ public void addAndRemoveInodeFile() { public void addAndRemoveInodeDirectory() { Inode directoryTtl1 = TtlTestUtils.createDirectoryWithIdAndTtl(0, 1); Inode directoryTtl2 = TtlTestUtils.createDirectoryWithIdAndTtl(1, 2); - Assert.assertTrue(mBucket.getInodes().isEmpty()); + Assert.assertTrue(mBucket.getInodeIds().isEmpty()); mBucket.addInode(directoryTtl1); - Assert.assertEquals(1, mBucket.getInodes().size()); + Assert.assertEquals(1, mBucket.size()); // The same directory, won't be added. mBucket.addInode(directoryTtl1); - Assert.assertEquals(1, mBucket.getInodes().size()); + Assert.assertEquals(1, mBucket.size()); // Different directory, will be added. mBucket.addInode(directoryTtl2); - Assert.assertEquals(2, mBucket.getInodes().size()); + Assert.assertEquals(2, mBucket.size()); // Remove directorys; mBucket.removeInode(directoryTtl1); - Assert.assertEquals(1, mBucket.getInodes().size()); - Assert.assertTrue(mBucket.getInodes().contains(directoryTtl2)); + Assert.assertEquals(1, mBucket.size()); + Assert.assertTrue(mBucket.getInodeIds().contains(directoryTtl2.getId())); mBucket.removeInode(directoryTtl2); - Assert.assertEquals(0, mBucket.getInodes().size()); + Assert.assertEquals(0, mBucket.getInodeIds().size()); } /** diff --git a/shell/src/main/java/alluxio/cli/fs/command/FileSystemCommandUtils.java b/shell/src/main/java/alluxio/cli/fs/command/FileSystemCommandUtils.java index 6b9728190a96..4413340d1b47 100644 --- a/shell/src/main/java/alluxio/cli/fs/command/FileSystemCommandUtils.java +++ b/shell/src/main/java/alluxio/cli/fs/command/FileSystemCommandUtils.java @@ -45,7 +45,7 @@ private FileSystemCommandUtils() {} // prevent instantiation */ public static void setTtl(FileSystem fs, AlluxioURI path, long ttlMs, TtlAction ttlAction) throws AlluxioException, IOException { - SetAttributePOptions options = SetAttributePOptions.newBuilder().setRecursive(true) + SetAttributePOptions options = SetAttributePOptions.newBuilder().setRecursive(false) .setCommonOptions(FileSystemMasterCommonPOptions.newBuilder() .setTtl(ttlMs).setTtlAction(ttlAction).build()) .build(); diff --git a/tests/src/test/java/alluxio/client/fs/TtlIntegrationTest.java b/tests/src/test/java/alluxio/client/fs/TtlIntegrationTest.java index b4cd881477d3..ec66859f78e5 100644 --- a/tests/src/test/java/alluxio/client/fs/TtlIntegrationTest.java +++ b/tests/src/test/java/alluxio/client/fs/TtlIntegrationTest.java @@ -20,10 +20,13 @@ import alluxio.Constants; import alluxio.client.file.FileOutStream; import alluxio.client.file.FileSystem; +import alluxio.client.file.URIStatus; import alluxio.conf.PropertyKey; +import alluxio.grpc.CreateDirectoryPOptions; import alluxio.grpc.CreateFilePOptions; import alluxio.grpc.FileSystemMasterCommonPOptions; import alluxio.grpc.LoadMetadataPType; +import alluxio.grpc.SetAttributePOptions; import alluxio.grpc.TtlAction; import alluxio.grpc.WritePType; import alluxio.heartbeat.HeartbeatContext; @@ -41,6 +44,7 @@ import java.io.File; import java.util.Arrays; +import java.util.Random; /** * Integration tests for handling file TTLs (times to live). @@ -198,4 +202,54 @@ public void expireManyAfterDelete() throws Exception { } } } + + /** + * Tests that ttl on a directory will be enforced on all its children regarless + * of their ttl. + * @throws Exception + */ + @Test + public void expireADirectory() throws Exception { + int numFiles = 100; + AlluxioURI[] files = new AlluxioURI[numFiles]; + String directoryName = "dir1"; + mFileSystem.createDirectory(new AlluxioURI("/" + directoryName), + CreateDirectoryPOptions.newBuilder().setWriteType(WritePType.CACHE_THROUGH).build()); + String fileNamePrefix = "fileDelete"; + for (int i = 0; i < numFiles; i++) { + files[i] = new AlluxioURI("/" + directoryName + "/" + fileNamePrefix + i); + // Only the even-index files should expire. + long ttl = i % 2 == 0 ? TTL_INTERVAL_MS * 2000 : TTL_INTERVAL_MS * 1000; + mOutStream = mFileSystem.createFile(files[i], + CreateFilePOptions.newBuilder().setWriteType(WritePType.CACHE_THROUGH) + .setCommonOptions(FileSystemMasterCommonPOptions.newBuilder().setTtl(ttl) + .setTtlAction(TtlAction.DELETE)).build()); + mOutStream.write(mBuffer, 0, 10); + mOutStream.close(); + } + // Set much smaller ttl on directory. + SetAttributePOptions setTTlOptions = SetAttributePOptions.newBuilder().setRecursive(false) + .setCommonOptions(FileSystemMasterCommonPOptions.newBuilder() + .setTtl(TTL_INTERVAL_MS).setTtlAction(TtlAction.DELETE).build()) + .build(); + mFileSystem.setAttribute(new AlluxioURI("/" + directoryName), setTTlOptions); + // Individual children file's ttl should not be changed. + Random random = new Random(); + int fileNum = random.nextInt(numFiles); + URIStatus anyFileStatus = mFileSystem.getStatus(new AlluxioURI("/" + directoryName + + "/" + fileNamePrefix + fileNum)); + assert (anyFileStatus.getFileInfo().getTtl() + == (fileNum % 2 == 0 ? TTL_INTERVAL_MS * 2000 : TTL_INTERVAL_MS * 1000)); + + CommonUtils.sleepMs(4 * TTL_INTERVAL_MS); + HeartbeatScheduler.execute(HeartbeatContext.MASTER_TTL_CHECK); + /* Even though children have longer ttl, but parents' ttl overrides all. + No Children should exist now. */ + for (int i = 0; i < numFiles; i++) { + assertFalse(mFileSystem.exists(files[i])); + String fileName = directoryName + "/" + fileNamePrefix + i; + // Check Ufs file existence + assertFalse(Arrays.stream(mUfs.list()).anyMatch(s -> s.equals(fileName))); + } + } } diff --git a/tests/src/test/java/alluxio/client/fs/concurrent/ConcurrentFileSystemMasterSetTtlIntegrationTest.java b/tests/src/test/java/alluxio/client/fs/concurrent/ConcurrentFileSystemMasterSetTtlIntegrationTest.java index b1e791f61fcc..358a14fc926d 100644 --- a/tests/src/test/java/alluxio/client/fs/concurrent/ConcurrentFileSystemMasterSetTtlIntegrationTest.java +++ b/tests/src/test/java/alluxio/client/fs/concurrent/ConcurrentFileSystemMasterSetTtlIntegrationTest.java @@ -15,6 +15,7 @@ import alluxio.AuthenticatedUserRule; import alluxio.Constants; import alluxio.client.file.FileSystem; +import alluxio.client.file.URIStatus; import alluxio.collections.ConcurrentHashSet; import alluxio.conf.Configuration; import alluxio.conf.PropertyKey; @@ -173,4 +174,94 @@ private void assertErrorsSizeEquals(ConcurrentHashSet errors, int exp expected, errors.size()) + Joiner.on("\n").join(errors)); } } + + @Test + public void testConcurrentInsertAndExpire() throws Exception { + /* Test race condition when an inode is concurrently added to ttlbucket and + inodettlchecker has been processing this particular ttlbucket, the inode should + not be left out forever unprocessed in the future rounds. */ + // Create two files + String fileNamePrefix = "file"; + AlluxioURI fileUri1 = new AlluxioURI("/" + fileNamePrefix + "1"); + AlluxioURI fileUri2 = new AlluxioURI("/" + fileNamePrefix + "2"); + mFileSystem.createFile(fileUri1, + CreateFilePOptions.newBuilder().setWriteType(WritePType.MUST_CACHE).build()); + mFileSystem.createFile(fileUri2, + CreateFilePOptions.newBuilder().setWriteType(WritePType.MUST_CACHE).build()); + // Set ttl on file1. + SetAttributePOptions setTTlOptions = SetAttributePOptions.newBuilder().setRecursive(false) + .setCommonOptions(FileSystemMasterCommonPOptions.newBuilder() + .setTtl(TTL_INTERVAL_MS).setTtlAction(TtlAction.DELETE).build()) + .build(); + mFileSystem.setAttribute((fileUri1), setTTlOptions); + + CommonUtils.sleepMs(4 * TTL_INTERVAL_MS); + // One thread to run InodeTtlChecker, file1 should be expired, another thread + // to set the ttl of file2 which with same ttl as file1, which is supposed to + // land in the bucket that's being processed by ttlchecker at the same time. + final CyclicBarrier barrier = new CyclicBarrier(2); + List threads = new ArrayList<>(2); + final ConcurrentHashSet errors = new ConcurrentHashSet<>(); + Thread.UncaughtExceptionHandler exceptionHandler = new Thread.UncaughtExceptionHandler() { + public void uncaughtException(Thread th, Throwable ex) { + errors.add(ex); + } + }; + Thread ttlCheckerThread = new Thread(new Runnable() { + @Override + public void run() { + try { + AuthenticatedClientUser.set(TEST_USER); + barrier.await(); + HeartbeatScheduler.execute(HeartbeatContext.MASTER_TTL_CHECK); + } catch (Exception e) { + throw new RuntimeException(e); + } + } + }); + ttlCheckerThread.setUncaughtExceptionHandler(exceptionHandler); + threads.add(ttlCheckerThread); + + Thread setTtlFile2Thread = new Thread(new Runnable() { + @Override + public void run() { + try { + AuthenticatedClientUser.set(TEST_USER); + barrier.await(); + SetAttributePOptions setTTlOptions = SetAttributePOptions.newBuilder().setRecursive(false) + .setCommonOptions(FileSystemMasterCommonPOptions.newBuilder() + .setTtl(TTL_INTERVAL_MS).setTtlAction(TtlAction.DELETE).build()) + .build(); + mFileSystem.setAttribute(fileUri2, setTTlOptions); + } catch (Exception e) { + throw new RuntimeException(e); + } + } + }); + setTtlFile2Thread.setUncaughtExceptionHandler(exceptionHandler); + threads.add(setTtlFile2Thread); + Collections.shuffle(threads); + long startMs = CommonUtils.getCurrentMs(); + for (Thread t : threads) { + t.start(); + } + for (Thread t : threads) { + t.join(); + } + // Now file2 inode should either be in ttlbucket or it is cleaned up as part of + // the ttlchecker processing + List fileStatus = mFileSystem.listStatus(new AlluxioURI("/")); + Assert.assertTrue(String.format("file1:{} still exists and didn't get expired.", + fileUri1.getPath()), !fileStatus.stream().anyMatch( + status -> new AlluxioURI(status.getFileInfo().getPath()).equals(fileUri1))); + if (fileStatus.stream().anyMatch(status -> new AlluxioURI(status.getFileInfo().getPath()) + .equals(fileUri2))) { + // The inode is not being processed during concurrent insertion into ttlbucket + Assert.assertTrue(fileStatus.get(0).getFileInfo().getTtl() == TTL_INTERVAL_MS); + // Now run ttl checker again, it should be gone. + HeartbeatScheduler.execute(HeartbeatContext.MASTER_TTL_CHECK); + Assert.assertEquals("There are remaining file existing with expired TTLs", + 0, mFileSystem.listStatus(new AlluxioURI("/")).size()); + } + } } From dcad6b0712486a288a21a9ac415608ee036dabd4 Mon Sep 17 00:00:00 2001 From: lucyge2022 <111789461+lucyge2022@users.noreply.github.com> Date: Tue, 14 Mar 2023 12:59:58 -0700 Subject: [PATCH 188/334] Add test coverage on multipart upload ### What changes are proposed in this pull request? 1. add multipart upload tests for correct part ordering 2. add log4j configs to show details such as filename/linenumber/threadid for better tracking ### Why are the changes needed? improve test coverage on large multipart upload corruption ### Does this PR introduce any user facing changes? No pr-link: Alluxio/alluxio#17046 change-id: cid-8a719d4aecd351bae2019d0e30cc99f7a5c4ff13 --- .../client/rest/S3ClientRestApiTest.java | 43 +++++++++++++------ 1 file changed, 31 insertions(+), 12 deletions(-) diff --git a/tests/src/test/java/alluxio/client/rest/S3ClientRestApiTest.java b/tests/src/test/java/alluxio/client/rest/S3ClientRestApiTest.java index 3c7a54a32e23..1ce63d6c5d4f 100644 --- a/tests/src/test/java/alluxio/client/rest/S3ClientRestApiTest.java +++ b/tests/src/test/java/alluxio/client/rest/S3ClientRestApiTest.java @@ -78,9 +78,11 @@ import java.net.HttpURLConnection; import java.security.MessageDigest; import java.util.ArrayList; +import java.util.Collections; import java.util.HashMap; import java.util.List; import java.util.Map; +import java.util.Random; import java.util.UUID; import javax.validation.constraints.NotNull; import javax.ws.rs.HttpMethod; @@ -1481,9 +1483,19 @@ public void abortMultipartUploadWithNonExistingUploadId() throws Exception { } @Test - public void completeMultipartUpload() throws Exception { - // Two temporary parts in the multipart upload, each part contains a random string, - // after completion, the object should contain the combination of the two strings. + public void completeMultipartUploadTest() throws Exception { + int numOfTestIter = 3; + int maxParts = 50; + int minParts = 15; + Random random = new Random(); + for (int i = 0; i < numOfTestIter; i++) { + completeMultipartUpload(random.nextInt(maxParts - minParts) + minParts); + } + } + + public void completeMultipartUpload(int partsNum) throws Exception { + // Random number of parts in the multipart upload, each part contains a random string, + // after completion, the object should contain the combination of these parts' content. final String bucketName = "bucket"; createBucketRestCall(bucketName); @@ -1498,25 +1510,32 @@ public void completeMultipartUpload() throws Exception { final String uploadId = multipartUploadResult.getUploadId(); // Upload parts. - String object1 = CommonUtils.randomAlphaNumString(DATA_SIZE); - String object2 = CommonUtils.randomAlphaNumString(DATA_SIZE); - createObject(objectKey, object1.getBytes(), uploadId, 1); - createObject(objectKey, object2.getBytes(), uploadId, 2); - + String[] objects = new String[partsNum]; + List parts = new ArrayList<>(); + for (int i = 0; i < partsNum; i++) { + parts.add(i + 1); + } + Collections.shuffle(parts); + for (int partNum : parts) { + int idx = partNum - 1; + objects[idx] = CommonUtils.randomAlphaNumString(DATA_SIZE); + createObject(objectKey, objects[idx].getBytes(), uploadId, partNum); + } // Verify that the two parts are uploaded to the temporary directory. AlluxioURI tmpDir = new AlluxioURI(S3RestUtils.getMultipartTemporaryDirForObject( AlluxioURI.SEPARATOR + bucketName, objectName, uploadId)); - Assert.assertEquals(2, mFileSystem.listStatus(tmpDir).size()); + Assert.assertEquals(partsNum, mFileSystem.listStatus(tmpDir).size()); // Complete the multipart upload. List partList = new ArrayList<>(); - partList.add(new CompleteMultipartUploadRequest.Part("", 1)); - partList.add(new CompleteMultipartUploadRequest.Part("", 2)); + for (int i = 1; i <= partsNum; i++) { + partList.add(new CompleteMultipartUploadRequest.Part("", i)); + } result = completeMultipartUploadRestCall(objectKey, uploadId, new CompleteMultipartUploadRequest(partList)); // Verify that the response is expected. - String expectedCombinedObject = object1 + object2; + String expectedCombinedObject = String.join("", objects); MessageDigest md5 = MessageDigest.getInstance("MD5"); byte[] digest = md5.digest(expectedCombinedObject.getBytes()); String etag = Hex.encodeHexString(digest); From f8d3e48a6bb64f510256985baf94cdaaee57cd2a Mon Sep 17 00:00:00 2001 From: humengyu Date: Wed, 15 Mar 2023 06:46:53 +0800 Subject: [PATCH 189/334] Remove System.out.println ### What changes are proposed in this pull request? Remove System.out.println. ### Why are the changes needed? This looks like the debug code forgot to delete. pr-link: Alluxio/alluxio#17087 change-id: cid-c723d6aa7eb51311ef5962a891a26d192dd2c048 --- .../java/alluxio/proxy/s3/signature/StringToSignProducer.java | 1 - 1 file changed, 1 deletion(-) diff --git a/core/server/proxy/src/main/java/alluxio/proxy/s3/signature/StringToSignProducer.java b/core/server/proxy/src/main/java/alluxio/proxy/s3/signature/StringToSignProducer.java index d47b977a95b5..ca5459418832 100644 --- a/core/server/proxy/src/main/java/alluxio/proxy/s3/signature/StringToSignProducer.java +++ b/core/server/proxy/src/main/java/alluxio/proxy/s3/signature/StringToSignProducer.java @@ -175,7 +175,6 @@ public static String buildCanonicalRequest( String canonicalUri = getCanonicalUri("/", uri); String canonicalQueryStr = getQueryParamString(queryParams); - System.out.println(canonicalQueryStr); StringBuilder canonicalHeaders = new StringBuilder(); From 6bbe71358328a3e1557e9a22360db1891a6a4f61 Mon Sep 17 00:00:00 2001 From: Tyler Crain Date: Tue, 14 Mar 2023 19:00:43 -0700 Subject: [PATCH 190/334] Fix metadata sync journalling when using UFS journal This fixes an issue caused by https://github.com/Alluxio/alluxio/pull/16529 That PR reduced the number of journal flushes during metadata sync using embedded journal, but was not compatible with ufs journal as in ufs journal each master has a different journal file. This PR disables that change when using ufs journal. pr-link: Alluxio/alluxio#17032 change-id: cid-20836bc9b5d64aff37e6f6f6e6ef3ec5f0e73617 --- .../master/file/DefaultFileSystemMaster.java | 6 +- .../multi/process/PortCoordination.java | 1 + ...emMasterFaultToleranceIntegrationTest.java | 51 ++++++++++++++- ...dJournalIntegrationTestFaultTolerance.java | 62 +++++++++++++++++++ 4 files changed, 118 insertions(+), 2 deletions(-) diff --git a/core/server/master/src/main/java/alluxio/master/file/DefaultFileSystemMaster.java b/core/server/master/src/main/java/alluxio/master/file/DefaultFileSystemMaster.java index 4bfac61254d1..4f6153136a6f 100644 --- a/core/server/master/src/main/java/alluxio/master/file/DefaultFileSystemMaster.java +++ b/core/server/master/src/main/java/alluxio/master/file/DefaultFileSystemMaster.java @@ -118,6 +118,7 @@ import alluxio.master.journal.JournaledGroup; import alluxio.master.journal.NoopJournalContext; import alluxio.master.journal.checkpoint.CheckpointName; +import alluxio.master.journal.ufs.UfsJournalSystem; import alluxio.master.metastore.DelegatingReadOnlyInodeStore; import alluxio.master.metastore.InodeStore; import alluxio.master.metastore.ReadOnlyInodeStore; @@ -1834,7 +1835,10 @@ private void commitBlockInfosForFile(List blockIds, long fileLength, long long currLength = fileLength; for (long blockId : blockIds) { long currentBlockSize = Math.min(currLength, blockSize); - if (context != null) { + // if we are not using the UFS journal system, we can use the same journal context + // for the block info so that we do not have to create a new journal + // context and flush again + if (context != null && !(mJournalSystem instanceof UfsJournalSystem)) { mBlockMaster.commitBlockInUFS(blockId, currentBlockSize, context); } else { mBlockMaster.commitBlockInUFS(blockId, currentBlockSize); diff --git a/minicluster/src/main/java/alluxio/multi/process/PortCoordination.java b/minicluster/src/main/java/alluxio/multi/process/PortCoordination.java index 10af58bc303b..6ff1d23ed78f 100644 --- a/minicluster/src/main/java/alluxio/multi/process/PortCoordination.java +++ b/minicluster/src/main/java/alluxio/multi/process/PortCoordination.java @@ -39,6 +39,7 @@ public class PortCoordination { )); // for EmbeddedJournalIntegrationTestFaultTolerance public static final List EMBEDDED_JOURNAL_FAILOVER = allocate(3, 0); + public static final List EMBEDDED_JOURNAL_FAILOVER_METADATA_SYNC = allocate(3, 1); public static final List EMBEDDED_JOURNAL_SNAPSHOT_MASTER = allocate(3, 0); public static final List EMBEDDED_JOURNAL_SNAPSHOT_FOLLOWER = allocate(3, 0); public static final List EMBEDDED_JOURNAL_SNAPSHOT_TRANSFER_LOAD = allocate(3, 0); diff --git a/tests/src/test/java/alluxio/server/ft/FileSystemMasterFaultToleranceIntegrationTest.java b/tests/src/test/java/alluxio/server/ft/FileSystemMasterFaultToleranceIntegrationTest.java index 49305aa84c1a..30a5f2628a2f 100644 --- a/tests/src/test/java/alluxio/server/ft/FileSystemMasterFaultToleranceIntegrationTest.java +++ b/tests/src/test/java/alluxio/server/ft/FileSystemMasterFaultToleranceIntegrationTest.java @@ -11,11 +11,13 @@ package alluxio.server.ft; +import static org.junit.Assert.assertEquals; import static org.junit.Assert.assertThrows; import alluxio.AlluxioURI; import alluxio.AuthenticatedUserRule; import alluxio.Constants; +import alluxio.client.file.FileSystem; import alluxio.conf.Configuration; import alluxio.conf.PropertyKey; import alluxio.exception.FileAlreadyCompletedException; @@ -25,6 +27,7 @@ import alluxio.grpc.CreateFilePOptions; import alluxio.grpc.DeletePOptions; import alluxio.grpc.FileSystemMasterCommonPOptions; +import alluxio.grpc.MountPOptions; import alluxio.grpc.RenamePOptions; import alluxio.master.MultiMasterLocalAlluxioCluster; import alluxio.master.file.FileSystemMaster; @@ -35,19 +38,25 @@ import alluxio.master.file.contexts.RenameContext; import alluxio.testutils.BaseIntegrationTest; import alluxio.testutils.IntegrationTestUtils; +import alluxio.util.io.PathUtils; import alluxio.wire.FileInfo; import alluxio.wire.OperationId; import com.google.common.collect.ImmutableMap; +import org.apache.commons.io.IOUtils; import org.junit.After; import org.junit.Assert; import org.junit.Before; import org.junit.Rule; import org.junit.Test; +import org.junit.rules.TemporaryFolder; import org.junit.rules.TestName; import org.junit.runner.RunWith; import org.junit.runners.Parameterized; +import java.io.FileWriter; +import java.nio.charset.Charset; +import java.nio.file.Paths; import java.util.Arrays; import java.util.Collection; import java.util.Map; @@ -60,6 +69,9 @@ public final class FileSystemMasterFaultToleranceIntegrationTest extends BaseInt private MultiMasterLocalAlluxioCluster mMultiMasterLocalAlluxioCluster; + @Rule + public TemporaryFolder mFolder = new TemporaryFolder(); + @Rule public TestName mTestName = new TestName(); @@ -84,7 +96,7 @@ public static Collection data() { @Before public final void before() throws Exception { - mMultiMasterLocalAlluxioCluster = new MultiMasterLocalAlluxioCluster(2, 0); + mMultiMasterLocalAlluxioCluster = new MultiMasterLocalAlluxioCluster(2, 1); mMultiMasterLocalAlluxioCluster.initConfiguration( IntegrationTestUtils.getTestName(getClass().getSimpleName(), mTestName.getMethodName())); Configuration.set(PropertyKey.USER_RPC_RETRY_MAX_DURATION, "60sec"); @@ -105,6 +117,43 @@ public final void after() throws Exception { Configuration.reloadProperties(); } + @Test + public void syncMetadataUFSFailOver() throws Exception { + String ufsPath = mFolder.newFolder().getAbsoluteFile().toString(); + String ufsUri = "file://" + ufsPath; + MountPOptions options = MountPOptions.newBuilder().build(); + FileSystem client = mMultiMasterLocalAlluxioCluster.getClient(); + AlluxioURI mountPath = new AlluxioURI("/mnt1"); + client.mount(mountPath, new AlluxioURI(ufsUri), options); + + // create files outside alluxio + String fileName = "someFile"; + String contents = "contents"; + for (int i = 0; i < 100; i++) { + try (FileWriter fw = new FileWriter(Paths.get(PathUtils.concatPath( + ufsPath, fileName + i)).toString())) { + fw.write(contents + i); + } + } + for (int i = 0; i < 100; i++) { + // sync it with metadata sync + assertEquals(contents + i, IOUtils.toString(client.openFile( + mountPath.join(fileName + i)), Charset.defaultCharset())); + } + + // Promote standby to be a leader and reset test state. + mMultiMasterLocalAlluxioCluster.stopLeader(); + mMultiMasterLocalAlluxioCluster.waitForNewMaster(CLUSTER_WAIT_TIMEOUT_MS); + mMultiMasterLocalAlluxioCluster.waitForWorkersRegistered(CLUSTER_WAIT_TIMEOUT_MS); + + // read the files again + client = mMultiMasterLocalAlluxioCluster.getClient(); + for (int i = 0; i < 100; i++) { + assertEquals(contents + i, IOUtils.toString(client.openFile( + mountPath.join(fileName + i)), Charset.defaultCharset())); + } + } + @Test public void partitionTolerantCreateFile() throws Exception { // Create paths for the test. diff --git a/tests/src/test/java/alluxio/server/ft/journal/raft/EmbeddedJournalIntegrationTestFaultTolerance.java b/tests/src/test/java/alluxio/server/ft/journal/raft/EmbeddedJournalIntegrationTestFaultTolerance.java index a004af9521c5..224484451047 100644 --- a/tests/src/test/java/alluxio/server/ft/journal/raft/EmbeddedJournalIntegrationTestFaultTolerance.java +++ b/tests/src/test/java/alluxio/server/ft/journal/raft/EmbeddedJournalIntegrationTestFaultTolerance.java @@ -21,6 +21,7 @@ import alluxio.conf.PropertyKey; import alluxio.exception.FileAlreadyExistsException; import alluxio.exception.FileDoesNotExistException; +import alluxio.grpc.MountPOptions; import alluxio.master.journal.JournalType; import alluxio.master.journal.raft.RaftJournalSystem; import alluxio.master.journal.raft.RaftJournalUtils; @@ -28,17 +29,23 @@ import alluxio.multi.process.PortCoordination; import alluxio.util.CommonUtils; import alluxio.util.WaitForOptions; +import alluxio.util.io.PathUtils; import org.apache.commons.io.FileUtils; +import org.apache.commons.io.IOUtils; import org.apache.ratis.server.RaftServerConfigKeys; import org.apache.ratis.server.storage.RaftStorage; import org.apache.ratis.server.storage.StorageImplUtils; import org.apache.ratis.statemachine.impl.SimpleStateMachineStorage; import org.apache.ratis.statemachine.impl.SingleFileSnapshotInfo; import org.junit.Assert; +import org.junit.Rule; import org.junit.Test; +import org.junit.rules.TemporaryFolder; import java.io.File; +import java.io.FileWriter; +import java.nio.charset.Charset; import java.nio.file.Files; import java.nio.file.Path; import java.nio.file.Paths; @@ -56,6 +63,9 @@ public class EmbeddedJournalIntegrationTestFaultTolerance private static final int NUM_MASTERS = 3; private static final int NUM_WORKERS = 0; + @Rule + public TemporaryFolder mFolder = new TemporaryFolder(); + @Test public void failover() throws Exception { mCluster = MultiProcessCluster.newBuilder(PortCoordination.EMBEDDED_JOURNAL_FAILOVER) @@ -77,6 +87,58 @@ public void failover() throws Exception { mCluster.notifySuccess(); } + @Test + public void syncMetadataEJFailOver() throws Exception { + mCluster = MultiProcessCluster.newBuilder( + PortCoordination.EMBEDDED_JOURNAL_FAILOVER_METADATA_SYNC) + .setClusterName("EmbeddedJournalFaultTolerance_syncMetadataFailOver") + .setNumMasters(NUM_MASTERS) + .setNumWorkers(1) + .addProperty(PropertyKey.MASTER_JOURNAL_TYPE, JournalType.EMBEDDED) + .addProperty(PropertyKey.MASTER_JOURNAL_FLUSH_TIMEOUT_MS, "5min") + .addProperty(PropertyKey.MASTER_JOURNAL_CHECKPOINT_PERIOD_ENTRIES, 1000) + .addProperty(PropertyKey.MASTER_JOURNAL_LOG_SIZE_BYTES_MAX, "50KB") + .addProperty(PropertyKey.MASTER_EMBEDDED_JOURNAL_MIN_ELECTION_TIMEOUT, "3s") + .addProperty(PropertyKey.MASTER_EMBEDDED_JOURNAL_MAX_ELECTION_TIMEOUT, "6s") + .addProperty(PropertyKey.MASTER_STANDBY_HEARTBEAT_INTERVAL, "5s") + .build(); + mCluster.start(); + mCluster.waitForAllNodesRegistered(30_000); + String ufsPath = mFolder.newFolder().getAbsoluteFile().toString(); + String ufsUri = "file://" + ufsPath; + MountPOptions options = MountPOptions.newBuilder().build(); + FileSystem client = mCluster.getFileSystemClient(); + AlluxioURI mountPath = new AlluxioURI("/mnt1"); + client.mount(mountPath, new AlluxioURI(ufsUri), options); + + // create files outside alluxio + String fileName = "someFile"; + String contents = "contents"; + for (int i = 0; i < 100; i++) { + try (FileWriter fw = new FileWriter(Paths.get( + PathUtils.concatPath(ufsPath, fileName + i)).toString())) { + fw.write(contents + i); + } + } + // sync then with metadata sync + for (int i = 0; i < 100; i++) { + assertEquals(contents + i, IOUtils.toString(client.openFile( + mountPath.join(fileName + i)), Charset.defaultCharset())); + } + + // restart the cluster + mCluster.stopMasters(); + mCluster.startMasters(); + mCluster.waitForAllNodesRegistered(30_000); + + // read the files again + client = mCluster.getFileSystemClient(); + for (int i = 0; i < 100; i++) { + assertEquals(contents + i, IOUtils.toString(client.openFile( + mountPath.join(fileName + i)), Charset.defaultCharset())); + } + } + @Test public void copySnapshotToMaster() throws Exception { mCluster = MultiProcessCluster.newBuilder(PortCoordination.EMBEDDED_JOURNAL_SNAPSHOT_MASTER) From 9ae71902499b005852563055ae5135e97833e274 Mon Sep 17 00:00:00 2001 From: voddle Date: Wed, 15 Mar 2023 13:41:09 +0800 Subject: [PATCH 191/334] Improve thread dump format to match jstack ### What changes are proposed in this pull request? In Alluxio, each component's WebUI `host:port/stacks` shows all thread stacks in the current component. This change makes the output format align with `jstack` command, so analyzer tools like https://fastthread.io/ can parse it. ### Why are the changes needed? See above ### Does this PR introduce any user facing changes? Now can use [FastThread ](https://fastthread.io/) to analyze the `stacks` output result of threaddump pr-link: Alluxio/alluxio#17090 change-id: cid-484d9c50a04dfb50d93c46290152238b7a13e952 --- .../main/java/alluxio/util/ThreadUtils.java | 37 ++----------------- 1 file changed, 4 insertions(+), 33 deletions(-) diff --git a/core/common/src/main/java/alluxio/util/ThreadUtils.java b/core/common/src/main/java/alluxio/util/ThreadUtils.java index 8600bf4d60ca..3d2a253881ab 100644 --- a/core/common/src/main/java/alluxio/util/ThreadUtils.java +++ b/core/common/src/main/java/alluxio/util/ThreadUtils.java @@ -120,40 +120,11 @@ private static String getTaskName(long id, String name) { * @param stream the stream to * @param title a string title for the stack trace */ - public static synchronized void printThreadInfo(PrintStream stream, - String title) { - final int STACK_DEPTH = 20; - boolean contention = THREAD_BEAN.isThreadContentionMonitoringEnabled(); - long[] threadIds = THREAD_BEAN.getAllThreadIds(); + public static synchronized void printThreadInfo(PrintStream stream, String title) { stream.println("Process Thread Dump: " + title); - stream.println(threadIds.length + " active threads"); - for (long tid : threadIds) { - ThreadInfo info = THREAD_BEAN.getThreadInfo(tid, STACK_DEPTH); - if (info == null) { - stream.println(" Inactive"); - continue; - } - stream.println("Thread " - + getTaskName(info.getThreadId(), info.getThreadName()) + ":"); - Thread.State state = info.getThreadState(); - stream.println(" State: " + state); - stream.println(" Blocked count: " + info.getBlockedCount()); - stream.println(" Waited count: " + info.getWaitedCount()); - if (contention) { - stream.println(" Blocked time: " + info.getBlockedTime()); - stream.println(" Waited time: " + info.getWaitedTime()); - } - if (state == Thread.State.WAITING) { - stream.println(" Waiting on " + info.getLockName()); - } else if (state == Thread.State.BLOCKED) { - stream.println(" Blocked on " + info.getLockName()); - stream.println(" Blocked by " - + getTaskName(info.getLockOwnerId(), info.getLockOwnerName())); - } - stream.println(" Stack:"); - for (StackTraceElement frame : info.getStackTrace()) { - stream.println(" " + frame.toString()); - } + stream.println(THREAD_BEAN.getThreadCount() + " active theads"); + for (ThreadInfo ti: THREAD_BEAN.dumpAllThreads(true, true)) { + stream.print(ti.toString()); } stream.flush(); } From e106cc8bbb3d74d876c9869a0b2fe77240e80af9 Mon Sep 17 00:00:00 2001 From: jja725 Date: Wed, 15 Mar 2023 15:45:44 -0700 Subject: [PATCH 192/334] Add request proto to add copy job ### What changes are proposed in this pull request? Initial commit to add proto to support copy job. Please ignore CopyJob.java since it's incomplete. ### Why are the changes needed? So we can work on user-facing command and backbone in parallel ### Does this PR introduce any user facing changes? na pr-link: Alluxio/alluxio#17088 change-id: cid-75815bc622b6f67cece9d3d5830c2db394b69a6e --- .../main/java/alluxio/job/CopyJobRequest.java | 82 ++ .../journal/JournalEntryAssociation.java | 3 +- .../journal/JournalEntryAssociationTest.java | 5 +- .../main/java/alluxio/master/job/CopyJob.java | 745 ++++++++++++++++++ .../alluxio/master/job/CopyJobFactory.java | 65 ++ .../master/job/JobFactoryProducer.java | 15 +- .../master/job/JournalCopyJobFactory.java | 59 ++ .../main/java/alluxio/master/job/LoadJob.java | 3 + .../main/proto/grpc/file_system_master.proto | 7 + core/transport/src/main/proto/proto.lock | 85 ++ .../src/main/proto/proto/journal/job.proto | 16 +- .../main/proto/proto/journal/journal.proto | 1 + 12 files changed, 1078 insertions(+), 8 deletions(-) create mode 100644 core/common/src/main/java/alluxio/job/CopyJobRequest.java create mode 100644 core/server/master/src/main/java/alluxio/master/job/CopyJob.java create mode 100644 core/server/master/src/main/java/alluxio/master/job/CopyJobFactory.java create mode 100644 core/server/master/src/main/java/alluxio/master/job/JournalCopyJobFactory.java diff --git a/core/common/src/main/java/alluxio/job/CopyJobRequest.java b/core/common/src/main/java/alluxio/job/CopyJobRequest.java new file mode 100644 index 000000000000..598cb72f9fe7 --- /dev/null +++ b/core/common/src/main/java/alluxio/job/CopyJobRequest.java @@ -0,0 +1,82 @@ +/* + * The Alluxio Open Foundation licenses this work under the Apache License, version 2.0 + * (the "License"). You may not use this work except in compliance with the License, which is + * available at www.apache.org/licenses/LICENSE-2.0 + * + * This software is distributed on an "AS IS" basis, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, + * either express or implied, as more fully set forth in the License. + * + * See the NOTICE file distributed with this work for information regarding copyright ownership. + */ + +package alluxio.job; + +import alluxio.grpc.CopyJobPOptions; + +import com.fasterxml.jackson.annotation.JsonProperty; +import com.google.common.base.MoreObjects; +import com.google.common.base.Preconditions; + +import javax.annotation.concurrent.ThreadSafe; + +/** + * The request of loading files. + */ +@ThreadSafe +public class CopyJobRequest implements JobRequest { + private static final String TYPE = "copy"; + private static final long serialVersionUID = -8565405317284410500L; + private final String mDst; + private final CopyJobPOptions mOptions; + private final String mSrc; + + /** + * @param src the source file path + * @param dst the destination file path + * @param options copy job options + **/ + public CopyJobRequest(@JsonProperty("src") String src, + @JsonProperty("dst") String dst, + @JsonProperty("copyJobPOptions") CopyJobPOptions options) { + mSrc = Preconditions.checkNotNull(src, "The source path cannot be null"); + + mDst = Preconditions.checkNotNull(dst, "The destination path cannot be null"); + mOptions = Preconditions.checkNotNull(options, "The job options cannot be null"); + } + + /** + * @return the source file path + */ + public String getSrc() { + return mSrc; + } + + /** + * @return the file path + */ + public String getDst() { + return mDst; + } + + /** + * @return job options + */ + public CopyJobPOptions getOptions() { + return mOptions; + } + + @Override + public String toString() { + return MoreObjects + .toStringHelper(this) + .add("Src", mSrc) + .add("Dst", mDst) + .add("Options", mOptions) + .toString(); + } + + @Override + public String getType() { + return TYPE; + } +} diff --git a/core/server/common/src/main/java/alluxio/master/journal/JournalEntryAssociation.java b/core/server/common/src/main/java/alluxio/master/journal/JournalEntryAssociation.java index 2e650cb0d3b8..03350192d481 100644 --- a/core/server/common/src/main/java/alluxio/master/journal/JournalEntryAssociation.java +++ b/core/server/common/src/main/java/alluxio/master/journal/JournalEntryAssociation.java @@ -45,7 +45,8 @@ public static String getMasterForEntry(JournalEntry entry) { || entry.hasUpdateInode() || entry.hasUpdateInodeDirectory() || entry.hasUpdateInodeFile() - || entry.hasLoadJob()) { + || entry.hasLoadJob() + || entry.hasCopyJob()) { return Constants.FILE_SYSTEM_MASTER_NAME; } if (entry.hasBlockContainerIdGenerator() diff --git a/core/server/common/src/test/java/alluxio/master/journal/JournalEntryAssociationTest.java b/core/server/common/src/test/java/alluxio/master/journal/JournalEntryAssociationTest.java index cff02dd0e428..3148b3239685 100644 --- a/core/server/common/src/test/java/alluxio/master/journal/JournalEntryAssociationTest.java +++ b/core/server/common/src/test/java/alluxio/master/journal/JournalEntryAssociationTest.java @@ -101,7 +101,10 @@ public class JournalEntryAssociationTest { JournalEntry.newBuilder().setCompleteTransformTable(Table.CompleteTransformTableEntry.getDefaultInstance()).build(), JournalEntry.newBuilder().setLoadJob(alluxio.proto.journal.Job.LoadJobEntry.newBuilder() .setLoadPath("/test").setState(alluxio.proto.journal.Job.PJobState.CREATED) - .setBandwidth(1).setPartialListing(false).setVerify(true).setJobId("1").build()).build() + .setBandwidth(1).setPartialListing(false).setVerify(true).setJobId("1").build()).build(), + JournalEntry.newBuilder().setCopyJob(alluxio.proto.journal.Job.CopyJobEntry.newBuilder() + .setSrc("/src").setDst("/dst").setState(alluxio.proto.journal.Job.PJobState.CREATED) + .setBandwidth(1).setPartialListing(false).setVerify(true).setJobId("2").build()).build() ); // CHECKSTYLE.OFF: LineLengthExceed diff --git a/core/server/master/src/main/java/alluxio/master/job/CopyJob.java b/core/server/master/src/main/java/alluxio/master/job/CopyJob.java new file mode 100644 index 000000000000..fa7803f57c96 --- /dev/null +++ b/core/server/master/src/main/java/alluxio/master/job/CopyJob.java @@ -0,0 +1,745 @@ +/* + * The Alluxio Open Foundation licenses this work under the Apache License, version 2.0 + * (the "License"). You may not use this work except in compliance with the License, which is + * available at www.apache.org/licenses/LICENSE-2.0 + * + * This software is distributed on an "AS IS" basis, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, + * either express or implied, as more fully set forth in the License. + * + * See the NOTICE file distributed with this work for information regarding copyright ownership. + */ + +package alluxio.master.job; + +import static java.lang.String.format; +import static java.util.Objects.requireNonNull; + +import alluxio.client.block.stream.BlockWorkerClient; +import alluxio.conf.Configuration; +import alluxio.conf.PropertyKey; +import alluxio.exception.runtime.AlluxioRuntimeException; +import alluxio.exception.runtime.InternalRuntimeException; +import alluxio.exception.runtime.InvalidArgumentRuntimeException; +import alluxio.grpc.Block; +import alluxio.grpc.BlockStatus; +import alluxio.grpc.JobProgressReportFormat; +import alluxio.grpc.LoadRequest; +import alluxio.grpc.LoadResponse; +import alluxio.grpc.TaskStatus; +import alluxio.grpc.UfsReadOptions; +import alluxio.job.JobDescription; +import alluxio.metrics.MetricKey; +import alluxio.metrics.MetricsSystem; +import alluxio.proto.journal.Journal; +import alluxio.scheduler.job.Job; +import alluxio.scheduler.job.JobState; +import alluxio.scheduler.job.Task; +import alluxio.util.FormatUtils; +import alluxio.wire.BlockInfo; +import alluxio.wire.FileInfo; +import alluxio.wire.WorkerInfo; + +import com.codahale.metrics.Counter; +import com.codahale.metrics.Meter; +import com.fasterxml.jackson.annotation.JsonAutoDetect; +import com.fasterxml.jackson.annotation.JsonInclude; +import com.fasterxml.jackson.annotation.PropertyAccessor; +import com.fasterxml.jackson.core.JsonProcessingException; +import com.fasterxml.jackson.databind.ObjectMapper; +import com.google.common.annotations.VisibleForTesting; +import com.google.common.base.MoreObjects; +import com.google.common.base.Objects; +import com.google.common.base.Preconditions; +import com.google.common.collect.ImmutableList; +import com.google.common.util.concurrent.ListenableFuture; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import java.util.Collections; +import java.util.HashMap; +import java.util.Iterator; +import java.util.LinkedList; +import java.util.List; +import java.util.Map; +import java.util.Optional; +import java.util.OptionalLong; +import java.util.concurrent.CancellationException; +import java.util.concurrent.ExecutionException; +import java.util.concurrent.atomic.AtomicLong; +import java.util.function.Predicate; +import javax.annotation.concurrent.NotThreadSafe; + +/** + * Load job that loads a file or a directory into Alluxio. + * This class should only be manipulated from the scheduler thread in Scheduler + * thus the state changing functions are not thread safe. + */ +@NotThreadSafe +public class CopyJob implements Job { + private static final Logger LOG = LoggerFactory.getLogger(CopyJob.class); + public static final String TYPE = "load"; + private static final double FAILURE_RATIO_THRESHOLD = 0.05; + private static final int FAILURE_COUNT_THRESHOLD = 100; + private static final int RETRY_BLOCK_CAPACITY = 1000; + private static final double RETRY_THRESHOLD = 0.8 * RETRY_BLOCK_CAPACITY; + private static final int BATCH_SIZE = Configuration.getInt(PropertyKey.JOB_BATCH_SIZE); + public static final Predicate QUALIFIED_FILE_FILTER = + (fileInfo) -> !fileInfo.isFolder() && fileInfo.isCompleted() && fileInfo.isPersisted() + && fileInfo.getInAlluxioPercentage() != 100; + // Job configurations + private final String mSrc; + private final String mDst; + private final Optional mUser; + + private OptionalLong mBandwidth; + private boolean mUsePartialListing; + private boolean mVerificationEnabled; + + // Job states + private final LinkedList mRetryBlocks = new LinkedList<>(); + private final Map mFailedFiles = new HashMap<>(); + private final long mStartTime; + private final AtomicLong mProcessedFileCount = new AtomicLong(); + private final AtomicLong mLoadedByteCount = new AtomicLong(); + private final AtomicLong mTotalByteCount = new AtomicLong(); + private final AtomicLong mTotalBlockCount = new AtomicLong(); + private final AtomicLong mCurrentBlockCount = new AtomicLong(); + private final AtomicLong mTotalFailureCount = new AtomicLong(); + private final AtomicLong mCurrentFailureCount = new AtomicLong(); + private final String mJobId; + private JobState mState; + private Optional mFailedReason = Optional.empty(); + private final Iterable mFileIterable; + private Optional> mFileIterator = Optional.empty(); + private FileInfo mCurrentFile; + private Iterator mBlockIterator = Collections.emptyIterator(); + private OptionalLong mEndTime = OptionalLong.empty(); + + /** + * Constructor. + * + * @param src file source + * @param dst file destination + * @param user user for authentication + * @param jobId job identifier + * @param bandwidth bandwidth + * @param usePartialListing whether to use partial listing + * @param verificationEnabled whether to verify the job after loaded + * @param fileIterable file iterable + */ + public CopyJob( + String src, + String dst, + Optional user, String jobId, OptionalLong bandwidth, + boolean usePartialListing, + boolean verificationEnabled, FileIterable fileIterable) { + mSrc = requireNonNull(src, "src is null"); + mDst = requireNonNull(dst, "dst is null"); + mUser = requireNonNull(user, "user is null"); + mJobId = requireNonNull(jobId, "jobId is null"); + Preconditions.checkArgument( + !bandwidth.isPresent() || bandwidth.getAsLong() > 0, + format("bandwidth should be greater than 0 if provided, get %s", bandwidth)); + mBandwidth = bandwidth; + mUsePartialListing = usePartialListing; + mVerificationEnabled = verificationEnabled; + mStartTime = System.currentTimeMillis(); + mState = JobState.RUNNING; + mFileIterable = fileIterable; + } + + /** + * Get load file path. + * @return file path + */ + public String getPath() { + return mSrc; + } + + /** + * Get user. + * @return user + */ + public Optional getUser() { + return mUser; + } + + @Override + public String getJobId() { + return mJobId; + } + + @Override + public JobDescription getDescription() { + return JobDescription.newBuilder().setPath(mSrc).setType(TYPE).build(); + } + + /** + * Get end time. + * @return end time + */ + @Override + public OptionalLong getEndTime() { + return mEndTime; + } + + /** + * Get bandwidth. + * @return the allocated bandwidth + */ + public OptionalLong getBandwidth() { + return mBandwidth; + } + + /** + * Update end time. + * @param time time in ms + */ + public void setEndTime(long time) { + mEndTime = OptionalLong.of(time); + } + + /** + * Update bandwidth. + * @param bandwidth new bandwidth + */ + public void updateBandwidth(OptionalLong bandwidth) { + mBandwidth = bandwidth; + } + + /** + * Is verification enabled. + * @return whether verification is enabled + */ + public boolean isVerificationEnabled() { + return mVerificationEnabled; + } + + /** + * Is verification enabled. + * + * @return whether verification is enabled + */ + @Override + public boolean needVerification() { + return mVerificationEnabled && mCurrentBlockCount.get() > 0; + } + + /** + * Enable verification. + * @param enableVerification whether to enable verification + */ + public void setVerificationEnabled(boolean enableVerification) { + mVerificationEnabled = enableVerification; + } + + /** + * Get load status. + * @return the load job's status + */ + @Override + public JobState getJobState() { + return mState; + } + + /** + * Set load state. + * @param state new state + */ + @Override + public void setJobState(JobState state) { + LOG.debug("Change JobState to {} for job {}", state, this); + mState = state; + if (!isRunning()) { + mEndTime = OptionalLong.of(System.currentTimeMillis()); + } + if (state == JobState.SUCCEEDED) { + JOB_LOAD_SUCCESS.inc(); + } + } + + /** + * Set load state to FAILED with given reason. + * @param reason failure exception + */ + @Override + public void failJob(AlluxioRuntimeException reason) { + setJobState(JobState.FAILED); + mFailedReason = Optional.of(reason); + JOB_LOAD_FAIL.inc(); + } + + /** + * Add bytes to total loaded bytes. + * @param bytes bytes to be added to total + */ + @VisibleForTesting + public void addLoadedBytes(long bytes) { + mLoadedByteCount.addAndGet(bytes); + } + + @Override + public String getProgress(JobProgressReportFormat format, boolean verbose) { + return (new LoadProgressReport(this, verbose)).getReport(format); + } + + /** + * Get the processed block count in the current loading pass. + * @return current block count + */ + public long getCurrentBlockCount() { + return mCurrentBlockCount.get(); + } + + @Override + public boolean equals(Object o) { + if (this == o) { + return true; + } + if (o == null || getClass() != o.getClass()) { + return false; + } + CopyJob that = (CopyJob) o; + return Objects.equal(getDescription(), that.getDescription()); + } + + @Override + public int hashCode() { + return Objects.hashCode(getDescription()); + } + + @Override + public boolean isHealthy() { + long currentFailureCount = mCurrentFailureCount.get(); + return mState != JobState.FAILED + && currentFailureCount <= FAILURE_COUNT_THRESHOLD + || (double) currentFailureCount / mCurrentBlockCount.get() <= FAILURE_RATIO_THRESHOLD; + } + + @Override + public boolean isRunning() { + return mState == JobState.RUNNING || mState == JobState.VERIFYING; + } + + @Override + public boolean isDone() { + return mState == JobState.SUCCEEDED || mState == JobState.FAILED; + } + + @Override + public boolean isCurrentPassDone() { + return mFileIterator.isPresent() && !mFileIterator.get().hasNext() && !mBlockIterator.hasNext() + && mRetryBlocks.isEmpty(); + } + + @Override + public void initiateVerification() { + Preconditions.checkState(isCurrentPassDone(), "Previous pass is not finished"); + mFileIterator = Optional.empty(); + mTotalBlockCount.addAndGet(mCurrentBlockCount.get()); + mTotalFailureCount.addAndGet(mCurrentFailureCount.get()); + mCurrentBlockCount.set(0); + mCurrentFailureCount.set(0); + mState = JobState.VERIFYING; + } + + /** + * get next load task. + * + * @param worker blocker to worker + * @return the next task to run. If there is no task to run, return empty + */ + public Optional getNextTask(WorkerInfo worker) { + List blocks = getNextBatchBlocks(BATCH_SIZE); + if (blocks.isEmpty()) { + return Optional.empty(); + } + return Optional.of(new CopyTask(blocks)); + } + + /** + * Get next batch of blocks. + * @param count number of blocks + * @return list of blocks + */ + @VisibleForTesting + public List getNextBatchBlocks(int count) { + if (!mFileIterator.isPresent()) { + mFileIterator = Optional.of(mFileIterable.iterator()); + if (!mFileIterator + .get() + .hasNext()) { + return ImmutableList.of(); + } + mCurrentFile = mFileIterator.get().next(); + if (!mFailedFiles.containsKey(mCurrentFile.getPath())) { + mProcessedFileCount.incrementAndGet(); + } + + mBlockIterator = mCurrentFile.getBlockIds().listIterator(); + } + ImmutableList.Builder batchBuilder = ImmutableList.builder(); + int i = 0; + // retry failed blocks if there's too many failed blocks otherwise wait until no more new block + if (mRetryBlocks.size() > RETRY_THRESHOLD + || (!mFileIterator.get().hasNext() && !mBlockIterator.hasNext())) { + while (i < count && !mRetryBlocks.isEmpty()) { + batchBuilder.add(requireNonNull(mRetryBlocks.removeFirst())); + i++; + } + } + for (; i < count; i++) { + if (!mBlockIterator.hasNext()) { + if (!mFileIterator.get().hasNext()) { + return batchBuilder.build(); + } + mCurrentFile = mFileIterator.get().next(); + if (!mFailedFiles.containsKey(mCurrentFile.getPath())) { + mProcessedFileCount.incrementAndGet(); + } + mBlockIterator = mCurrentFile.getBlockIds().listIterator(); + } + long blockId = mBlockIterator.next(); + BlockInfo blockInfo = mCurrentFile.getFileBlockInfo(blockId).getBlockInfo(); + if (blockInfo.getLocations().isEmpty()) { + batchBuilder.add(buildBlock(mCurrentFile, blockId)); + mCurrentBlockCount.incrementAndGet(); + // would be inaccurate when we initial verification, and we retry un-retryable blocks + mTotalByteCount.addAndGet(blockInfo.getLength()); + } + } + return batchBuilder.build(); + } + + /** + * Add a block to retry later. + * @param block the block that failed to load thus needing retry + * @return whether the block is successfully added + */ + @VisibleForTesting + public boolean addBlockToRetry(Block block) { + if (mRetryBlocks.size() >= RETRY_BLOCK_CAPACITY) { + return false; + } + LOG.debug("Retry block {}", block); + mRetryBlocks.add(block); + mCurrentFailureCount.incrementAndGet(); + JOB_LOAD_BLOCK_FAIL.inc(); + return true; + } + + /** + * Add a block to failure summary. + * + * @param block the block that failed to load and cannot be retried + * @param message failure message + * @param code status code for exception + */ + @VisibleForTesting + public void addBlockFailure(Block block, String message, int code) { + // When multiple blocks of the same file failed to load, from user's perspective, + // it's not hugely important what are the reasons for each specific failure, + // if they are different, so we will just keep the first one. + mFailedFiles.put(block.getUfsPath(), + format("Status code: %s, message: %s", code, message)); + mCurrentFailureCount.incrementAndGet(); + JOB_LOAD_BLOCK_FAIL.inc(); + } + + private static Block buildBlock(FileInfo fileInfo, long blockId) { + return Block.newBuilder().setBlockId(blockId) + .setLength(fileInfo.getFileBlockInfo(blockId).getBlockInfo().getLength()) + .setUfsPath(fileInfo.getUfsPath()) + .setMountId(fileInfo.getMountId()) + .setOffsetInFile(fileInfo.getFileBlockInfo(blockId).getOffset()) + .build(); + } + + @Override + public String toString() { + return MoreObjects.toStringHelper(this) + .add("Src", mSrc) + .add("Dst", mDst) + .add("User", mUser) + .add("Bandwidth", mBandwidth) + .add("UsePartialListing", mUsePartialListing) + .add("VerificationEnabled", mVerificationEnabled) + .add("RetryBlocks", mRetryBlocks) + .add("FailedFiles", mFailedFiles) + .add("StartTime", mStartTime) + .add("ProcessedFileCount", mProcessedFileCount) + .add("LoadedByteCount", mLoadedByteCount) + .add("TotalBlockCount", mTotalBlockCount) + .add("CurrentBlockCount", mCurrentBlockCount) + .add("TotalFailureCount", mTotalFailureCount) + .add("CurrentFailureCount", mCurrentFailureCount) + .add("State", mState) + .add("BatchSize", BATCH_SIZE) + .add("FailedReason", mFailedReason) + .add("FileIterator", mFileIterator) + .add("CurrentFile", mCurrentFile) + .add("BlockIterator", mBlockIterator) + .add("EndTime", mEndTime) + .toString(); + } + + @Override + public Journal.JournalEntry toJournalEntry() { + alluxio.proto.journal.Job.CopyJobEntry.Builder jobEntry = alluxio.proto.journal.Job.CopyJobEntry + .newBuilder() + .setSrc(mSrc) + .setDst(mDst) + .setState(JobState.toProto(mState)) + .setPartialListing(mUsePartialListing) + .setVerify(mVerificationEnabled) + .setJobId(mJobId); + mUser.ifPresent(jobEntry::setUser); + mBandwidth.ifPresent(jobEntry::setBandwidth); + mEndTime.ifPresent(jobEntry::setEndTime); + return Journal.JournalEntry + .newBuilder() + .setCopyJob(jobEntry.build()) + .build(); + } + + /** + * Get duration in seconds. + * @return job duration in seconds + */ + @VisibleForTesting + public long getDurationInSec() { + return (mEndTime.orElse(System.currentTimeMillis()) - mStartTime) / 1000; + } + + @Override + public boolean processResponse(CopyTask loadTask) { + try { + long totalBytes = loadTask.getBlocks().stream() + .map(Block::getLength) + .reduce(Long::sum) + .orElse(0L); + LoadResponse response = loadTask.getResponseFuture().get(); + if (response.getStatus() != TaskStatus.SUCCESS) { + LOG.debug(format("Get failure from worker: %s", response.getBlockStatusList())); + for (BlockStatus status : response.getBlockStatusList()) { + totalBytes -= status.getBlock().getLength(); + if (!isHealthy() || !status.getRetryable() || !addBlockToRetry( + status.getBlock())) { + addBlockFailure(status.getBlock(), status.getMessage(), status.getCode()); + } + } + } + addLoadedBytes(totalBytes); + JOB_LOAD_BLOCK_COUNT.inc( + loadTask.getBlocks().size() - response.getBlockStatusCount()); + JOB_LOAD_BLOCK_SIZE.inc(totalBytes); + JOB_LOAD_RATE.mark(totalBytes); + return response.getStatus() != TaskStatus.FAILURE; + } + catch (ExecutionException e) { + LOG.warn("exception when trying to get load response.", e.getCause()); + for (Block block : loadTask.getBlocks()) { + if (isHealthy()) { + addBlockToRetry(block); + } + else { + AlluxioRuntimeException exception = AlluxioRuntimeException.from(e.getCause()); + addBlockFailure(block, exception.getMessage(), exception.getStatus().getCode() + .value()); + } + } + return false; + } + catch (CancellationException e) { + LOG.warn("Task get canceled and will retry.", e); + loadTask.getBlocks().forEach(this::addBlockToRetry); + return true; + } + catch (InterruptedException e) { + loadTask.getBlocks().forEach(this::addBlockToRetry); + Thread.currentThread().interrupt(); + // We don't count InterruptedException as task failure + return true; + } + } + + @Override + public void updateJob(Job job) { + if (!(job instanceof CopyJob)) { + throw new IllegalArgumentException("Job is not a LoadJob: " + job); + } + CopyJob targetJob = (CopyJob) job; + updateBandwidth(targetJob.getBandwidth()); + setVerificationEnabled(targetJob.isVerificationEnabled()); + } + + /** + * Loads blocks in a UFS through an Alluxio worker. + */ + public class CopyTask extends Task { + + /** + * @return blocks to load + */ + public List getBlocks() { + return mBlocks; + } + + private final List mBlocks; + + /** + * Creates a new instance of {@link CopyTask}. + * + * @param blocks blocks to load + */ + public CopyTask(List blocks) { + mBlocks = blocks; + } + + @Override + public ListenableFuture run(BlockWorkerClient workerClient) { + LoadRequest.Builder request1 = LoadRequest + .newBuilder() + .addAllBlocks(mBlocks); + UfsReadOptions.Builder options = UfsReadOptions + .newBuilder() + .setTag(mJobId) + .setPositionShort(false); + if (mBandwidth.isPresent()) { + options.setBandwidth(mBandwidth.getAsLong()); + } + mUser.ifPresent(options::setUser); + LoadRequest request = request1 + .setOptions(options.build()) + .build(); + return workerClient.load(request); + } + } + + private static class LoadProgressReport { + private final boolean mVerbose; + private final JobState mJobState; + private final Long mBandwidth; + private final boolean mVerificationEnabled; + private final long mProcessedFileCount; + private final long mLoadedByteCount; + private final Long mTotalByteCount; + private final Long mThroughput; + private final double mFailurePercentage; + private final AlluxioRuntimeException mFailureReason; + private final long mFailedFileCount; + private final Map mFailedFilesWithReasons; + + public LoadProgressReport(CopyJob job, boolean verbose) + { + mVerbose = verbose; + mJobState = job.mState; + mBandwidth = job.mBandwidth.isPresent() ? job.mBandwidth.getAsLong() : null; + mVerificationEnabled = job.mVerificationEnabled; + mProcessedFileCount = job.mProcessedFileCount.get(); + mLoadedByteCount = job.mLoadedByteCount.get(); + if (!job.mUsePartialListing && job.mFileIterator.isPresent()) { + mTotalByteCount = job.mTotalByteCount.get(); + } + else { + mTotalByteCount = null; + } + long duration = job.getDurationInSec(); + if (duration > 0) { + mThroughput = job.mLoadedByteCount.get() / duration; + } + else { + mThroughput = null; + } + long blockCount = job.mTotalBlockCount.get() + job.mCurrentBlockCount.get(); + if (blockCount > 0) { + mFailurePercentage = + ((double) (job.mTotalFailureCount.get() + job.mCurrentFailureCount.get()) / blockCount) + * 100; + } + else { + mFailurePercentage = 0; + } + mFailureReason = job.mFailedReason.orElse(null); + mFailedFileCount = job.mFailedFiles.size(); + if (verbose && mFailedFileCount > 0) { + mFailedFilesWithReasons = job.mFailedFiles; + } else { + mFailedFilesWithReasons = null; + } + } + + public String getReport(JobProgressReportFormat format) + { + switch (format) { + case TEXT: + return getTextReport(); + case JSON: + return getJsonReport(); + default: + throw new InvalidArgumentRuntimeException( + format("Unknown load progress report format: %s", format)); + } + } + + private String getTextReport() { + StringBuilder progress = new StringBuilder(); + progress.append( + format("\tSettings:\tbandwidth: %s\tverify: %s%n", + mBandwidth == null ? "unlimited" : mBandwidth, + mVerificationEnabled)); + progress.append(format("\tJob State: %s%s%n", mJobState, + mFailureReason == null + ? "" : format( + " (%s: %s)", + mFailureReason.getClass().getName(), + mFailureReason.getMessage()))); + if (mVerbose && mFailureReason != null) { + for (StackTraceElement stack : mFailureReason.getStackTrace()) { + progress.append(format("\t\t%s%n", stack.toString())); + } + } + progress.append(format("\tFiles Processed: %d%n", mProcessedFileCount)); + progress.append(format("\tBytes Loaded: %s%s%n", + FormatUtils.getSizeFromBytes(mLoadedByteCount), + mTotalByteCount == null + ? "" : format(" out of %s", FormatUtils.getSizeFromBytes(mTotalByteCount)))); + if (mThroughput != null) { + progress.append(format("\tThroughput: %s/s%n", + FormatUtils.getSizeFromBytes(mThroughput))); + } + progress.append(format("\tBlock load failure rate: %.2f%%%n", mFailurePercentage)); + progress.append(format("\tFiles Failed: %s%n", mFailedFileCount)); + if (mVerbose && mFailedFilesWithReasons != null) { + mFailedFilesWithReasons.forEach((fileName, reason) -> + progress.append(format("\t\t%s: %s%n", fileName, reason))); + } + return progress.toString(); + } + + private String getJsonReport() { + try { + return new ObjectMapper() + .setVisibility(PropertyAccessor.FIELD, JsonAutoDetect.Visibility.ANY) + .setSerializationInclusion(JsonInclude.Include.NON_NULL) + .writeValueAsString(this); + } catch (JsonProcessingException e) { + throw new InternalRuntimeException("Failed to convert LoadProgressReport to JSON", e); + } + } + } + + // metrics + public static final Counter JOB_LOAD_SUCCESS = + MetricsSystem.counter(MetricKey.MASTER_JOB_LOAD_SUCCESS.getName()); + public static final Counter JOB_LOAD_FAIL = + MetricsSystem.counter(MetricKey.MASTER_JOB_LOAD_FAIL.getName()); + public static final Counter JOB_LOAD_BLOCK_COUNT = + MetricsSystem.counter(MetricKey.MASTER_JOB_LOAD_BLOCK_COUNT.getName()); + public static final Counter JOB_LOAD_BLOCK_FAIL = + MetricsSystem.counter(MetricKey.MASTER_JOB_LOAD_BLOCK_FAIL.getName()); + public static final Counter JOB_LOAD_BLOCK_SIZE = + MetricsSystem.counter(MetricKey.MASTER_JOB_LOAD_BLOCK_SIZE.getName()); + public static final Meter JOB_LOAD_RATE = + MetricsSystem.meter(MetricKey.MASTER_JOB_LOAD_RATE.getName()); +} diff --git a/core/server/master/src/main/java/alluxio/master/job/CopyJobFactory.java b/core/server/master/src/main/java/alluxio/master/job/CopyJobFactory.java new file mode 100644 index 000000000000..483325797cdb --- /dev/null +++ b/core/server/master/src/main/java/alluxio/master/job/CopyJobFactory.java @@ -0,0 +1,65 @@ +/* + * The Alluxio Open Foundation licenses this work under the Apache License, version 2.0 + * (the "License"). You may not use this work except in compliance with the License, which is + * available at www.apache.org/licenses/LICENSE-2.0 + * + * This software is distributed on an "AS IS" basis, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, + * either express or implied, as more fully set forth in the License. + * + * See the NOTICE file distributed with this work for information regarding copyright ownership. + */ + +package alluxio.master.job; + +import alluxio.grpc.CopyJobPOptions; +import alluxio.job.CopyJobRequest; +import alluxio.master.file.FileSystemMaster; +import alluxio.scheduler.job.Job; +import alluxio.scheduler.job.JobFactory; +import alluxio.security.User; +import alluxio.security.authentication.AuthenticatedClientUser; + +import java.util.Optional; +import java.util.OptionalLong; +import java.util.UUID; + +/** + * Factory for creating {@link LoadJob}s that get file infos from master. + */ +public class CopyJobFactory implements JobFactory { + + private final FileSystemMaster mFsMaster; + private final CopyJobRequest mRequest; + + /** + * Create factory. + * @param request load job request + * @param fsMaster file system master + */ + public CopyJobFactory(CopyJobRequest request, FileSystemMaster fsMaster) { + mFsMaster = fsMaster; + mRequest = request; + } + + @Override + public Job create() { + CopyJobPOptions options = mRequest.getOptions(); + String src = mRequest.getSrc(); + OptionalLong bandwidth = + options.hasBandwidth() ? OptionalLong.of(options.getBandwidth()) : OptionalLong.empty(); + boolean partialListing = options.hasPartialListing() && options.getPartialListing(); + boolean verificationEnabled = options.hasVerify() && options.getVerify(); + FileIterable fileIterator = new FileIterable(mFsMaster, src, Optional + .ofNullable(AuthenticatedClientUser.getOrNull()) + .map(User::getName), partialListing, + LoadJob.QUALIFIED_FILE_FILTER); + Optional user = Optional + .ofNullable(AuthenticatedClientUser.getOrNull()) + .map(User::getName); + return new CopyJob(src, mRequest.getDst(), user, UUID.randomUUID().toString(), + bandwidth, + partialListing, + verificationEnabled, fileIterator); + } +} + diff --git a/core/server/master/src/main/java/alluxio/master/job/JobFactoryProducer.java b/core/server/master/src/main/java/alluxio/master/job/JobFactoryProducer.java index dc9e50c743ca..6604925c650e 100644 --- a/core/server/master/src/main/java/alluxio/master/job/JobFactoryProducer.java +++ b/core/server/master/src/main/java/alluxio/master/job/JobFactoryProducer.java @@ -11,6 +11,7 @@ package alluxio.master.job; +import alluxio.job.CopyJobRequest; import alluxio.job.JobRequest; import alluxio.job.LoadJobRequest; import alluxio.master.file.FileSystemMaster; @@ -29,12 +30,13 @@ private JobFactoryProducer() {} // prevent instantiation * @return the job factory */ public static JobFactory create(JobRequest request, FileSystemMaster fsMaster) { - switch (request.getType()) { - case "load": - return new LoadJobFactory((LoadJobRequest) request, fsMaster); - default: - throw new IllegalArgumentException("Unknown job type: " + request.getType()); + if (request instanceof LoadJobRequest) { + return new LoadJobFactory((LoadJobRequest) request, fsMaster); } + if (request instanceof CopyJobRequest) { + return new CopyJobFactory((CopyJobRequest) request, fsMaster); + } + throw new IllegalArgumentException("Unknown job type: " + request.getType()); } /** @@ -46,6 +48,9 @@ public static JobFactory create(Journal.JournalEntry entry, FileSystemMaster fsM if (entry.hasLoadJob()) { return new JournalLoadJobFactory(entry.getLoadJob(), fsMaster); } + if (entry.hasCopyJob()) { + return new JournalCopyJobFactory(entry.getCopyJob(), fsMaster); + } else { throw new IllegalArgumentException("Unknown job type: " + entry); } diff --git a/core/server/master/src/main/java/alluxio/master/job/JournalCopyJobFactory.java b/core/server/master/src/main/java/alluxio/master/job/JournalCopyJobFactory.java new file mode 100644 index 000000000000..24d365ecff8f --- /dev/null +++ b/core/server/master/src/main/java/alluxio/master/job/JournalCopyJobFactory.java @@ -0,0 +1,59 @@ +/* + * The Alluxio Open Foundation licenses this work under the Apache License, version 2.0 + * (the "License"). You may not use this work except in compliance with the License, which is + * available at www.apache.org/licenses/LICENSE-2.0 + * + * This software is distributed on an "AS IS" basis, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, + * either express or implied, as more fully set forth in the License. + * + * See the NOTICE file distributed with this work for information regarding copyright ownership. + */ + +package alluxio.master.job; + +import alluxio.master.file.FileSystemMaster; +import alluxio.scheduler.job.Job; +import alluxio.scheduler.job.JobFactory; +import alluxio.scheduler.job.JobState; + +import java.util.Optional; +import java.util.OptionalLong; + +/** + * Factory for creating {@link LoadJob}s from journal entries. + */ +public class JournalCopyJobFactory implements JobFactory { + + private final FileSystemMaster mFsMaster; + + private final alluxio.proto.journal.Job.CopyJobEntry mJobEntry; + + /** + * Create factory. + * @param journalEntry journal entry + * @param fsMaster file system master + */ + public JournalCopyJobFactory(alluxio.proto.journal.Job.CopyJobEntry journalEntry, + FileSystemMaster fsMaster) { + mFsMaster = fsMaster; + mJobEntry = journalEntry; + } + + @Override + public Job create() { + Optional user = + mJobEntry.hasUser() ? Optional.of(mJobEntry.getUser()) : Optional.empty(); + FileIterable fileIterator = + new FileIterable(mFsMaster, mJobEntry.getSrc(), user, mJobEntry.getPartialListing(), + LoadJob.QUALIFIED_FILE_FILTER); + CopyJob job = new CopyJob(mJobEntry.getSrc(), mJobEntry.getDst(), user, mJobEntry.getJobId(), + mJobEntry.hasBandwidth() ? OptionalLong.of(mJobEntry.getBandwidth()) : OptionalLong.empty(), + mJobEntry.getPartialListing(), mJobEntry.getVerify(), fileIterator); + job.setJobState(JobState.fromProto(mJobEntry.getState())); + if (mJobEntry.hasEndTime()) { + job.setEndTime(mJobEntry.getEndTime()); + } + return job; + } +} + diff --git a/core/server/master/src/main/java/alluxio/master/job/LoadJob.java b/core/server/master/src/main/java/alluxio/master/job/LoadJob.java index 3aee54e7a9e4..854ca3a6241b 100644 --- a/core/server/master/src/main/java/alluxio/master/job/LoadJob.java +++ b/core/server/master/src/main/java/alluxio/master/job/LoadJob.java @@ -573,6 +573,9 @@ public boolean processResponse(LoadTask loadTask) { @Override public void updateJob(Job job) { + if (!(job instanceof LoadJob)) { + throw new IllegalArgumentException("Job is not a LoadJob: " + job); + } LoadJob targetJob = (LoadJob) job; updateBandwidth(targetJob.getBandwidth()); setVerificationEnabled(targetJob.isVerificationEnabled()); diff --git a/core/transport/src/main/proto/grpc/file_system_master.proto b/core/transport/src/main/proto/grpc/file_system_master.proto index b982be78897f..c6a6c4099359 100644 --- a/core/transport/src/main/proto/grpc/file_system_master.proto +++ b/core/transport/src/main/proto/grpc/file_system_master.proto @@ -616,6 +616,13 @@ message LoadJobPOptions { optional bool partialListing = 3; } +message CopyJobPOptions { + optional int64 bandwidth = 1; + optional bool verify = 2; + optional bool partialListing = 3; + optional bool overwrite = 4; +} + message StopJobPRequest { required JobDescription jobDescription = 1; } diff --git a/core/transport/src/main/proto/proto.lock b/core/transport/src/main/proto/proto.lock index 56f4a25c5854..cff9aab24eb9 100644 --- a/core/transport/src/main/proto/proto.lock +++ b/core/transport/src/main/proto/proto.lock @@ -3833,6 +3833,31 @@ } ] }, + { + "name": "CopyJobPOptions", + "fields": [ + { + "id": 1, + "name": "bandwidth", + "type": "int64" + }, + { + "id": 2, + "name": "verify", + "type": "bool" + }, + { + "id": 3, + "name": "partialListing", + "type": "bool" + }, + { + "id": 4, + "name": "overwrite", + "type": "bool" + } + ] + }, { "name": "StopJobPRequest", "fields": [ @@ -9754,6 +9779,61 @@ "type": "int64" } ] + }, + { + "name": "CopyJobEntry", + "fields": [ + { + "id": 1, + "name": "src", + "type": "string" + }, + { + "id": 2, + "name": "dst", + "type": "string" + }, + { + "id": 3, + "name": "state", + "type": "PJobState" + }, + { + "id": 4, + "name": "bandwidth", + "type": "int64" + }, + { + "id": 5, + "name": "verify", + "type": "bool" + }, + { + "id": 6, + "name": "user", + "type": "string" + }, + { + "id": 7, + "name": "partialListing", + "type": "bool" + }, + { + "id": 8, + "name": "job_id", + "type": "string" + }, + { + "id": 9, + "name": "end_time", + "type": "int64" + }, + { + "id": 10, + "name": "overwrite", + "type": "int64" + } + ] } ], "package": { @@ -9978,6 +10058,11 @@ "name": "load_job", "type": "LoadJobEntry" }, + { + "id": 54, + "name": "copy_job", + "type": "CopyJobEntry" + }, { "id": 39, "name": "journal_entries", diff --git a/core/transport/src/main/proto/proto/journal/job.proto b/core/transport/src/main/proto/proto/journal/job.proto index 9496f3cc21a1..dc7f5df48e3d 100644 --- a/core/transport/src/main/proto/proto/journal/job.proto +++ b/core/transport/src/main/proto/proto/journal/job.proto @@ -10,7 +10,7 @@ enum PJobState { FAILED = 4; } -// next available id: 8 +// next available id: 9 message LoadJobEntry { required string load_path = 1; required PJobState state = 2; @@ -21,3 +21,17 @@ message LoadJobEntry { required string job_id = 7; optional int64 end_time = 8; } + +// next available id: 11 +message CopyJobEntry { + required string src = 1; + required string dst = 2; + required PJobState state = 3; + optional int64 bandwidth = 4; + required bool verify = 5; + optional string user = 6; + required bool partialListing = 7; + required string job_id = 8; + optional int64 end_time = 9; + optional int64 overwrite = 10; +} diff --git a/core/transport/src/main/proto/proto/journal/journal.proto b/core/transport/src/main/proto/proto/journal/journal.proto index 023f03879193..3b91319a1ece 100644 --- a/core/transport/src/main/proto/proto/journal/journal.proto +++ b/core/transport/src/main/proto/proto/journal/journal.proto @@ -68,6 +68,7 @@ message JournalEntry { optional UpdateInodeDirectoryEntry update_inode_directory = 36; optional UpdateInodeFileEntry update_inode_file = 37; optional LoadJobEntry load_job = 53; + optional CopyJobEntry copy_job = 54; // This journal entry is a list of other entries. when a journal entry // contains other journal entries, all other optional fields must be unset. From cbf24bf4184f188aa5c36aceddf60ea8aaca2a4c Mon Sep 17 00:00:00 2001 From: Tyler Crain Date: Wed, 15 Mar 2023 21:34:20 -0700 Subject: [PATCH 193/334] Fix retry function and S3 abort upload ### What changes are proposed in this pull request? This fixes the retry utils function so it throws an IOException when the retires run out. Currently it throws a null pointer exception as it tries to throw a null exception. It also fixes the retires in the low level object output stream. pr-link: Alluxio/alluxio#17094 change-id: cid-26a0c62fbad16aad30f4bec96c8dc8ddbfba6c09 --- .../main/java/alluxio/retry/RetryUtils.java | 11 +++++--- .../underfs/ObjectLowLevelOutputStream.java | 27 ++++++++++++------- 2 files changed, 26 insertions(+), 12 deletions(-) diff --git a/core/common/src/main/java/alluxio/retry/RetryUtils.java b/core/common/src/main/java/alluxio/retry/RetryUtils.java index f07530aec288..7dce3d8891c3 100644 --- a/core/common/src/main/java/alluxio/retry/RetryUtils.java +++ b/core/common/src/main/java/alluxio/retry/RetryUtils.java @@ -30,7 +30,8 @@ public final class RetryUtils { /** * Retries the given method until it doesn't throw an IO exception or the retry policy expires. If - * the retry policy expires, the last exception generated will be rethrown. + * the retry policy expires, the last exception generated will be rethrown. If no retry succeeds + * then a default IO Exception will be thrown. * * @param action a description of the action that fits the phrase "Failed to ${action}" * @param f the function to retry @@ -45,10 +46,14 @@ public static void retry(String action, RunnableThrowsIOException f, RetryPolicy return; } catch (IOException ioe) { e = ioe; - LOG.warn("Failed to {} (attempt {}): {}", action, policy.getAttemptCount(), e.toString()); + LOG.debug("Failed to {} (attempt {}): {}", action, policy.getAttemptCount(), e.toString()); } } - throw e; + if (e != null) { + throw e; + } + throw new IOException(String.format("Failed to run action %s after %d attempts", + action, policy.getAttemptCount())); } /** diff --git a/core/common/src/main/java/alluxio/underfs/ObjectLowLevelOutputStream.java b/core/common/src/main/java/alluxio/underfs/ObjectLowLevelOutputStream.java index 123b77aae031..a5d3bd1a1ebb 100644 --- a/core/common/src/main/java/alluxio/underfs/ObjectLowLevelOutputStream.java +++ b/core/common/src/main/java/alluxio/underfs/ObjectLowLevelOutputStream.java @@ -45,6 +45,7 @@ import java.util.concurrent.TimeUnit; import java.util.concurrent.TimeoutException; import java.util.concurrent.atomic.AtomicInteger; +import java.util.function.Supplier; import javax.annotation.Nullable; import javax.annotation.concurrent.NotThreadSafe; @@ -94,7 +95,7 @@ public abstract class ObjectLowLevelOutputStream extends OutputStream protected final String mKey; /** The retry policy of this multipart upload. */ - protected final RetryPolicy mRetryPolicy = new CountingRetry(5); + protected final Supplier mRetryPolicy = () -> new CountingRetry(5); /** Pre-allocated byte buffer for writing single characters. */ protected final byte[] mSingleCharWrite = new byte[1]; @@ -237,13 +238,13 @@ public void close() throws IOException { if (mFile == null) { LOG.debug("Streaming upload output stream closed without uploading any data."); RetryUtils.retry("put empty object for key" + mKey, () -> createEmptyObject(mKey), - mRetryPolicy); + mRetryPolicy.get()); } else { try { mLocalOutputStream.close(); final String md5 = mHash != null ? Base64.encodeBase64String(mHash.digest()) : null; RetryUtils.retry("put object for key" + mKey, () -> putObject(mKey, mFile, md5), - mRetryPolicy); + mRetryPolicy.get()); } finally { if (!mFile.delete()) { LOG.error("Failed to delete temporary file @ {}", mFile.getPath()); @@ -262,7 +263,7 @@ public void close() throws IOException { waitForAllPartsUpload(); RetryUtils.retry("complete multipart upload", - this::completeMultiPartUploadInternal, mRetryPolicy); + this::completeMultiPartUploadInternal, mRetryPolicy.get()); } catch (Exception e) { LOG.error("Failed to upload {}", mKey, e); throw new IOException(e); @@ -302,7 +303,8 @@ protected void uploadPart() throws IOException { return; } if (!mMultiPartUploadInitialized) { - RetryUtils.retry("init multipart upload", this::initMultiPartUploadInternal, mRetryPolicy); + RetryUtils.retry("init multipart upload", this::initMultiPartUploadInternal, + mRetryPolicy.get()); mMultiPartUploadInitialized = true; } mLocalOutputStream.close(); @@ -317,7 +319,7 @@ protected void uploadPart(File file, int partNumber, boolean lastPart) { Callable callable = () -> { try { RetryUtils.retry("upload part for key " + mKey + " and part number " + partNumber, - () -> uploadPartInternal(file, partNumber, lastPart, md5), mRetryPolicy); + () -> uploadPartInternal(file, partNumber, lastPart, md5), mRetryPolicy.get()); return null; } finally { // Delete the uploaded or failed to upload file @@ -333,9 +335,16 @@ protected void uploadPart(File file, int partNumber, boolean lastPart) { mKey, partNumber, file.getPath(), file.length(), lastPart); } - protected void abortMultiPartUpload() throws IOException { - RetryUtils.retry("abort multipart upload for key " + mKey, this::abortMultiPartUploadInternal, - mRetryPolicy); + protected void abortMultiPartUpload() { + try { + RetryUtils.retry("abort multipart upload for key " + mKey, this::abortMultiPartUploadInternal, + mRetryPolicy.get()); + } catch (IOException e) { + LOG.warn("Unable to abort multipart upload for key '{}' and id '{}' to bucket {}. " + + "You may need to enable the periodical cleanup by setting property {}" + + "to be true.", mKey, mBucketName, + PropertyKey.UNDERFS_CLEANUP_ENABLED.getName(), e); + } } protected void waitForAllPartsUpload() throws IOException { From b59df3c479c0b82a6d56034d9a4554d970d1046c Mon Sep 17 00:00:00 2001 From: Tyler Crain Date: Wed, 15 Mar 2023 21:35:27 -0700 Subject: [PATCH 194/334] Fix content hash for GCS v2 stream The content hash for the fingerprint was being calculated using the wrong value for the GCS v2 stream. This fixes it. pr-link: Alluxio/alluxio#17089 change-id: cid-270490db06b1748c440118f1e1bb18020f3b266e --- .../java/alluxio/underfs/gcs/v2/GCSV2OutputStream.java | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/underfs/gcs/src/main/java/alluxio/underfs/gcs/v2/GCSV2OutputStream.java b/underfs/gcs/src/main/java/alluxio/underfs/gcs/v2/GCSV2OutputStream.java index 935537db4e8b..7d0ac4ec95a9 100644 --- a/underfs/gcs/src/main/java/alluxio/underfs/gcs/v2/GCSV2OutputStream.java +++ b/underfs/gcs/src/main/java/alluxio/underfs/gcs/v2/GCSV2OutputStream.java @@ -29,7 +29,6 @@ import java.nio.channels.ClosedChannelException; import java.security.MessageDigest; import java.security.NoSuchAlgorithmException; -import java.util.Base64; import java.util.Optional; import java.util.concurrent.atomic.AtomicBoolean; import javax.annotation.concurrent.NotThreadSafe; @@ -67,6 +66,8 @@ public final class GCSV2OutputStream extends OutputStream implements ContentHash /** Flag to indicate this stream has been closed, to ensure close is only done once. */ private AtomicBoolean mClosed = new AtomicBoolean(false); + private String mContentHash; + /** * Constructs a new stream for writing a file. * @@ -150,6 +151,7 @@ public void close() throws IOException { throw new IOException(String .format("Failed to create empty object %s in %s", mKey, mBucketName)); } + mContentHash = blob.getMd5(); } } catch (ClosedChannelException e) { LOG.error("Channel already closed, possible duplicate close call.", e); @@ -170,8 +172,8 @@ private void createWriteChannel() throws IOException { @Override public Optional getContentHash() { - if (mHash != null) { - return Optional.of(Base64.getEncoder().encodeToString(mHash.digest())); + if (mContentHash != null) { + return Optional.of(mContentHash); } return Optional.empty(); } From 8e08e7750621ae60731c6fcbfdbdc110eca1c41f Mon Sep 17 00:00:00 2001 From: jianghuazhu <740087514@qq.com> Date: Fri, 17 Mar 2023 07:04:31 +0800 Subject: [PATCH 195/334] Remove some unused code in AlluxioMasterRestServiceHandler ### What changes are proposed in this pull request? The purpose of the pr is removed in AlluxioMasterRestServiceHandler some unused code. ### Why are the changes needed? In some of AlluxioMasterRestServiceHandler unused code, should remove them. ### Does this PR introduce any user facing changes? Please list the user-facing changes introduced by your change, including 1. change in user-facing APIs 2. addition or removal of property keys 3. webui pr-link: Alluxio/alluxio#17049 change-id: cid-af63274b15a8cedef202ade0a7f63f73d6e4b53d --- .../alluxio/master/meta/AlluxioMasterRestServiceHandler.java | 4 ---- 1 file changed, 4 deletions(-) diff --git a/core/server/master/src/main/java/alluxio/master/meta/AlluxioMasterRestServiceHandler.java b/core/server/master/src/main/java/alluxio/master/meta/AlluxioMasterRestServiceHandler.java index 124677587af4..3898a6ef4bd0 100644 --- a/core/server/master/src/main/java/alluxio/master/meta/AlluxioMasterRestServiceHandler.java +++ b/core/server/master/src/main/java/alluxio/master/meta/AlluxioMasterRestServiceHandler.java @@ -696,9 +696,6 @@ public Response getWebUILogs(@DefaultValue("") @QueryParam("path") String reques } response.setDebug(Configuration.getBoolean(PropertyKey.DEBUG)).setInvalidPathError("") .setViewingOffset(0).setCurrentPath(""); - //response.setDownloadLogFile(1); - //response.setBaseUrl("./browseLogs"); - //response.setShowPermissions(false); String logsPath = Configuration.getString(PropertyKey.LOGS_DIR); File logsDir = new File(logsPath); @@ -743,7 +740,6 @@ public Response getWebUILogs(@DefaultValue("") @QueryParam("path") String reques } } else { // Request a specific log file. - // Only allow filenames as the path, to avoid arbitrary local path lookups. requestFile = new File(requestFile).getName(); response.setCurrentPath(requestFile); From 71521f631d1bd9c15ffe05634a7251014f4be1a0 Mon Sep 17 00:00:00 2001 From: jja725 Date: Thu, 16 Mar 2023 16:23:53 -0700 Subject: [PATCH 196/334] Add abstract job ### What changes are proposed in this pull request? Add abstract job and delete copy implementation ### Why are the changes needed? Please clarify why the changes are needed. For instance, clean code ### Does this PR introduce any user facing changes? na pr-link: Alluxio/alluxio#17103 change-id: cid-af24bee40f5998adb59ced8642f038591b0d7c96 --- .../java/alluxio/master/job/AbstractJob.java | 112 +++ .../main/java/alluxio/master/job/CopyJob.java | 745 ------------------ .../alluxio/master/job/CopyJobFactory.java | 65 -- .../master/job/JobFactoryProducer.java | 7 - .../master/job/JournalCopyJobFactory.java | 59 -- .../main/java/alluxio/master/job/LoadJob.java | 100 +-- 6 files changed, 127 insertions(+), 961 deletions(-) create mode 100644 core/server/master/src/main/java/alluxio/master/job/AbstractJob.java delete mode 100644 core/server/master/src/main/java/alluxio/master/job/CopyJob.java delete mode 100644 core/server/master/src/main/java/alluxio/master/job/CopyJobFactory.java delete mode 100644 core/server/master/src/main/java/alluxio/master/job/JournalCopyJobFactory.java diff --git a/core/server/master/src/main/java/alluxio/master/job/AbstractJob.java b/core/server/master/src/main/java/alluxio/master/job/AbstractJob.java new file mode 100644 index 000000000000..ee75c5fe9bb3 --- /dev/null +++ b/core/server/master/src/main/java/alluxio/master/job/AbstractJob.java @@ -0,0 +1,112 @@ +/* + * The Alluxio Open Foundation licenses this work under the Apache License, version 2.0 + * (the "License"). You may not use this work except in compliance with the License, which is + * available at www.apache.org/licenses/LICENSE-2.0 + * + * This software is distributed on an "AS IS" basis, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, + * either express or implied, as more fully set forth in the License. + * + * See the NOTICE file distributed with this work for information regarding copyright ownership. + */ + +package alluxio.master.job; + +import static java.util.Objects.requireNonNull; + +import alluxio.scheduler.job.Job; +import alluxio.scheduler.job.JobState; +import alluxio.scheduler.job.Task; + +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import java.util.Optional; +import java.util.OptionalLong; + +/** + * Abstract class for job. It provides basic job information and state management. + * + * @param the type of the task of the job + */ +public abstract class AbstractJob> implements Job { + private static final Logger LOG = LoggerFactory.getLogger(LoadJob.class); + protected final String mJobId; + protected JobState mState; + protected OptionalLong mEndTime = OptionalLong.empty(); + protected final long mStartTime; + protected final Optional mUser; + + /** + * Creates a new instance of {@link AbstractJob}. + * + * @param user the user who submitted the job + * @param jobId the job id + */ + public AbstractJob(Optional user, String jobId) { + mUser = requireNonNull(user, "user is null"); + mJobId = requireNonNull(jobId, "jobId is null"); + mState = JobState.RUNNING; + mStartTime = System.currentTimeMillis(); + } + + @Override + public String getJobId() { + return mJobId; + } + + /** + * Get end time. + * + * @return end time + */ + @Override + public OptionalLong getEndTime() { + return mEndTime; + } + + /** + * Update end time. + * + * @param time time in ms + */ + public void setEndTime(long time) { + mEndTime = OptionalLong.of(time); + } + + /** + * Get load status. + * + * @return the load job's status + */ + @Override + public JobState getJobState() { + return mState; + } + + /** + * Set load state. + * + * @param state new state + */ + @Override + public void setJobState(JobState state) { + LOG.debug("Change JobState to {} for job {}", state, this); + mState = state; + if (!isRunning()) { + mEndTime = OptionalLong.of(System.currentTimeMillis()); + } + if (state == JobState.SUCCEEDED) { + LoadJob.JOB_LOAD_SUCCESS.inc(); + } + } + + @Override + public boolean isRunning() { + return mState == JobState.RUNNING || mState == JobState.VERIFYING; + } + + @Override + public boolean isDone() { + return mState == JobState.SUCCEEDED || mState == JobState.FAILED; + } +} diff --git a/core/server/master/src/main/java/alluxio/master/job/CopyJob.java b/core/server/master/src/main/java/alluxio/master/job/CopyJob.java deleted file mode 100644 index fa7803f57c96..000000000000 --- a/core/server/master/src/main/java/alluxio/master/job/CopyJob.java +++ /dev/null @@ -1,745 +0,0 @@ -/* - * The Alluxio Open Foundation licenses this work under the Apache License, version 2.0 - * (the "License"). You may not use this work except in compliance with the License, which is - * available at www.apache.org/licenses/LICENSE-2.0 - * - * This software is distributed on an "AS IS" basis, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, - * either express or implied, as more fully set forth in the License. - * - * See the NOTICE file distributed with this work for information regarding copyright ownership. - */ - -package alluxio.master.job; - -import static java.lang.String.format; -import static java.util.Objects.requireNonNull; - -import alluxio.client.block.stream.BlockWorkerClient; -import alluxio.conf.Configuration; -import alluxio.conf.PropertyKey; -import alluxio.exception.runtime.AlluxioRuntimeException; -import alluxio.exception.runtime.InternalRuntimeException; -import alluxio.exception.runtime.InvalidArgumentRuntimeException; -import alluxio.grpc.Block; -import alluxio.grpc.BlockStatus; -import alluxio.grpc.JobProgressReportFormat; -import alluxio.grpc.LoadRequest; -import alluxio.grpc.LoadResponse; -import alluxio.grpc.TaskStatus; -import alluxio.grpc.UfsReadOptions; -import alluxio.job.JobDescription; -import alluxio.metrics.MetricKey; -import alluxio.metrics.MetricsSystem; -import alluxio.proto.journal.Journal; -import alluxio.scheduler.job.Job; -import alluxio.scheduler.job.JobState; -import alluxio.scheduler.job.Task; -import alluxio.util.FormatUtils; -import alluxio.wire.BlockInfo; -import alluxio.wire.FileInfo; -import alluxio.wire.WorkerInfo; - -import com.codahale.metrics.Counter; -import com.codahale.metrics.Meter; -import com.fasterxml.jackson.annotation.JsonAutoDetect; -import com.fasterxml.jackson.annotation.JsonInclude; -import com.fasterxml.jackson.annotation.PropertyAccessor; -import com.fasterxml.jackson.core.JsonProcessingException; -import com.fasterxml.jackson.databind.ObjectMapper; -import com.google.common.annotations.VisibleForTesting; -import com.google.common.base.MoreObjects; -import com.google.common.base.Objects; -import com.google.common.base.Preconditions; -import com.google.common.collect.ImmutableList; -import com.google.common.util.concurrent.ListenableFuture; -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; - -import java.util.Collections; -import java.util.HashMap; -import java.util.Iterator; -import java.util.LinkedList; -import java.util.List; -import java.util.Map; -import java.util.Optional; -import java.util.OptionalLong; -import java.util.concurrent.CancellationException; -import java.util.concurrent.ExecutionException; -import java.util.concurrent.atomic.AtomicLong; -import java.util.function.Predicate; -import javax.annotation.concurrent.NotThreadSafe; - -/** - * Load job that loads a file or a directory into Alluxio. - * This class should only be manipulated from the scheduler thread in Scheduler - * thus the state changing functions are not thread safe. - */ -@NotThreadSafe -public class CopyJob implements Job { - private static final Logger LOG = LoggerFactory.getLogger(CopyJob.class); - public static final String TYPE = "load"; - private static final double FAILURE_RATIO_THRESHOLD = 0.05; - private static final int FAILURE_COUNT_THRESHOLD = 100; - private static final int RETRY_BLOCK_CAPACITY = 1000; - private static final double RETRY_THRESHOLD = 0.8 * RETRY_BLOCK_CAPACITY; - private static final int BATCH_SIZE = Configuration.getInt(PropertyKey.JOB_BATCH_SIZE); - public static final Predicate QUALIFIED_FILE_FILTER = - (fileInfo) -> !fileInfo.isFolder() && fileInfo.isCompleted() && fileInfo.isPersisted() - && fileInfo.getInAlluxioPercentage() != 100; - // Job configurations - private final String mSrc; - private final String mDst; - private final Optional mUser; - - private OptionalLong mBandwidth; - private boolean mUsePartialListing; - private boolean mVerificationEnabled; - - // Job states - private final LinkedList mRetryBlocks = new LinkedList<>(); - private final Map mFailedFiles = new HashMap<>(); - private final long mStartTime; - private final AtomicLong mProcessedFileCount = new AtomicLong(); - private final AtomicLong mLoadedByteCount = new AtomicLong(); - private final AtomicLong mTotalByteCount = new AtomicLong(); - private final AtomicLong mTotalBlockCount = new AtomicLong(); - private final AtomicLong mCurrentBlockCount = new AtomicLong(); - private final AtomicLong mTotalFailureCount = new AtomicLong(); - private final AtomicLong mCurrentFailureCount = new AtomicLong(); - private final String mJobId; - private JobState mState; - private Optional mFailedReason = Optional.empty(); - private final Iterable mFileIterable; - private Optional> mFileIterator = Optional.empty(); - private FileInfo mCurrentFile; - private Iterator mBlockIterator = Collections.emptyIterator(); - private OptionalLong mEndTime = OptionalLong.empty(); - - /** - * Constructor. - * - * @param src file source - * @param dst file destination - * @param user user for authentication - * @param jobId job identifier - * @param bandwidth bandwidth - * @param usePartialListing whether to use partial listing - * @param verificationEnabled whether to verify the job after loaded - * @param fileIterable file iterable - */ - public CopyJob( - String src, - String dst, - Optional user, String jobId, OptionalLong bandwidth, - boolean usePartialListing, - boolean verificationEnabled, FileIterable fileIterable) { - mSrc = requireNonNull(src, "src is null"); - mDst = requireNonNull(dst, "dst is null"); - mUser = requireNonNull(user, "user is null"); - mJobId = requireNonNull(jobId, "jobId is null"); - Preconditions.checkArgument( - !bandwidth.isPresent() || bandwidth.getAsLong() > 0, - format("bandwidth should be greater than 0 if provided, get %s", bandwidth)); - mBandwidth = bandwidth; - mUsePartialListing = usePartialListing; - mVerificationEnabled = verificationEnabled; - mStartTime = System.currentTimeMillis(); - mState = JobState.RUNNING; - mFileIterable = fileIterable; - } - - /** - * Get load file path. - * @return file path - */ - public String getPath() { - return mSrc; - } - - /** - * Get user. - * @return user - */ - public Optional getUser() { - return mUser; - } - - @Override - public String getJobId() { - return mJobId; - } - - @Override - public JobDescription getDescription() { - return JobDescription.newBuilder().setPath(mSrc).setType(TYPE).build(); - } - - /** - * Get end time. - * @return end time - */ - @Override - public OptionalLong getEndTime() { - return mEndTime; - } - - /** - * Get bandwidth. - * @return the allocated bandwidth - */ - public OptionalLong getBandwidth() { - return mBandwidth; - } - - /** - * Update end time. - * @param time time in ms - */ - public void setEndTime(long time) { - mEndTime = OptionalLong.of(time); - } - - /** - * Update bandwidth. - * @param bandwidth new bandwidth - */ - public void updateBandwidth(OptionalLong bandwidth) { - mBandwidth = bandwidth; - } - - /** - * Is verification enabled. - * @return whether verification is enabled - */ - public boolean isVerificationEnabled() { - return mVerificationEnabled; - } - - /** - * Is verification enabled. - * - * @return whether verification is enabled - */ - @Override - public boolean needVerification() { - return mVerificationEnabled && mCurrentBlockCount.get() > 0; - } - - /** - * Enable verification. - * @param enableVerification whether to enable verification - */ - public void setVerificationEnabled(boolean enableVerification) { - mVerificationEnabled = enableVerification; - } - - /** - * Get load status. - * @return the load job's status - */ - @Override - public JobState getJobState() { - return mState; - } - - /** - * Set load state. - * @param state new state - */ - @Override - public void setJobState(JobState state) { - LOG.debug("Change JobState to {} for job {}", state, this); - mState = state; - if (!isRunning()) { - mEndTime = OptionalLong.of(System.currentTimeMillis()); - } - if (state == JobState.SUCCEEDED) { - JOB_LOAD_SUCCESS.inc(); - } - } - - /** - * Set load state to FAILED with given reason. - * @param reason failure exception - */ - @Override - public void failJob(AlluxioRuntimeException reason) { - setJobState(JobState.FAILED); - mFailedReason = Optional.of(reason); - JOB_LOAD_FAIL.inc(); - } - - /** - * Add bytes to total loaded bytes. - * @param bytes bytes to be added to total - */ - @VisibleForTesting - public void addLoadedBytes(long bytes) { - mLoadedByteCount.addAndGet(bytes); - } - - @Override - public String getProgress(JobProgressReportFormat format, boolean verbose) { - return (new LoadProgressReport(this, verbose)).getReport(format); - } - - /** - * Get the processed block count in the current loading pass. - * @return current block count - */ - public long getCurrentBlockCount() { - return mCurrentBlockCount.get(); - } - - @Override - public boolean equals(Object o) { - if (this == o) { - return true; - } - if (o == null || getClass() != o.getClass()) { - return false; - } - CopyJob that = (CopyJob) o; - return Objects.equal(getDescription(), that.getDescription()); - } - - @Override - public int hashCode() { - return Objects.hashCode(getDescription()); - } - - @Override - public boolean isHealthy() { - long currentFailureCount = mCurrentFailureCount.get(); - return mState != JobState.FAILED - && currentFailureCount <= FAILURE_COUNT_THRESHOLD - || (double) currentFailureCount / mCurrentBlockCount.get() <= FAILURE_RATIO_THRESHOLD; - } - - @Override - public boolean isRunning() { - return mState == JobState.RUNNING || mState == JobState.VERIFYING; - } - - @Override - public boolean isDone() { - return mState == JobState.SUCCEEDED || mState == JobState.FAILED; - } - - @Override - public boolean isCurrentPassDone() { - return mFileIterator.isPresent() && !mFileIterator.get().hasNext() && !mBlockIterator.hasNext() - && mRetryBlocks.isEmpty(); - } - - @Override - public void initiateVerification() { - Preconditions.checkState(isCurrentPassDone(), "Previous pass is not finished"); - mFileIterator = Optional.empty(); - mTotalBlockCount.addAndGet(mCurrentBlockCount.get()); - mTotalFailureCount.addAndGet(mCurrentFailureCount.get()); - mCurrentBlockCount.set(0); - mCurrentFailureCount.set(0); - mState = JobState.VERIFYING; - } - - /** - * get next load task. - * - * @param worker blocker to worker - * @return the next task to run. If there is no task to run, return empty - */ - public Optional getNextTask(WorkerInfo worker) { - List blocks = getNextBatchBlocks(BATCH_SIZE); - if (blocks.isEmpty()) { - return Optional.empty(); - } - return Optional.of(new CopyTask(blocks)); - } - - /** - * Get next batch of blocks. - * @param count number of blocks - * @return list of blocks - */ - @VisibleForTesting - public List getNextBatchBlocks(int count) { - if (!mFileIterator.isPresent()) { - mFileIterator = Optional.of(mFileIterable.iterator()); - if (!mFileIterator - .get() - .hasNext()) { - return ImmutableList.of(); - } - mCurrentFile = mFileIterator.get().next(); - if (!mFailedFiles.containsKey(mCurrentFile.getPath())) { - mProcessedFileCount.incrementAndGet(); - } - - mBlockIterator = mCurrentFile.getBlockIds().listIterator(); - } - ImmutableList.Builder batchBuilder = ImmutableList.builder(); - int i = 0; - // retry failed blocks if there's too many failed blocks otherwise wait until no more new block - if (mRetryBlocks.size() > RETRY_THRESHOLD - || (!mFileIterator.get().hasNext() && !mBlockIterator.hasNext())) { - while (i < count && !mRetryBlocks.isEmpty()) { - batchBuilder.add(requireNonNull(mRetryBlocks.removeFirst())); - i++; - } - } - for (; i < count; i++) { - if (!mBlockIterator.hasNext()) { - if (!mFileIterator.get().hasNext()) { - return batchBuilder.build(); - } - mCurrentFile = mFileIterator.get().next(); - if (!mFailedFiles.containsKey(mCurrentFile.getPath())) { - mProcessedFileCount.incrementAndGet(); - } - mBlockIterator = mCurrentFile.getBlockIds().listIterator(); - } - long blockId = mBlockIterator.next(); - BlockInfo blockInfo = mCurrentFile.getFileBlockInfo(blockId).getBlockInfo(); - if (blockInfo.getLocations().isEmpty()) { - batchBuilder.add(buildBlock(mCurrentFile, blockId)); - mCurrentBlockCount.incrementAndGet(); - // would be inaccurate when we initial verification, and we retry un-retryable blocks - mTotalByteCount.addAndGet(blockInfo.getLength()); - } - } - return batchBuilder.build(); - } - - /** - * Add a block to retry later. - * @param block the block that failed to load thus needing retry - * @return whether the block is successfully added - */ - @VisibleForTesting - public boolean addBlockToRetry(Block block) { - if (mRetryBlocks.size() >= RETRY_BLOCK_CAPACITY) { - return false; - } - LOG.debug("Retry block {}", block); - mRetryBlocks.add(block); - mCurrentFailureCount.incrementAndGet(); - JOB_LOAD_BLOCK_FAIL.inc(); - return true; - } - - /** - * Add a block to failure summary. - * - * @param block the block that failed to load and cannot be retried - * @param message failure message - * @param code status code for exception - */ - @VisibleForTesting - public void addBlockFailure(Block block, String message, int code) { - // When multiple blocks of the same file failed to load, from user's perspective, - // it's not hugely important what are the reasons for each specific failure, - // if they are different, so we will just keep the first one. - mFailedFiles.put(block.getUfsPath(), - format("Status code: %s, message: %s", code, message)); - mCurrentFailureCount.incrementAndGet(); - JOB_LOAD_BLOCK_FAIL.inc(); - } - - private static Block buildBlock(FileInfo fileInfo, long blockId) { - return Block.newBuilder().setBlockId(blockId) - .setLength(fileInfo.getFileBlockInfo(blockId).getBlockInfo().getLength()) - .setUfsPath(fileInfo.getUfsPath()) - .setMountId(fileInfo.getMountId()) - .setOffsetInFile(fileInfo.getFileBlockInfo(blockId).getOffset()) - .build(); - } - - @Override - public String toString() { - return MoreObjects.toStringHelper(this) - .add("Src", mSrc) - .add("Dst", mDst) - .add("User", mUser) - .add("Bandwidth", mBandwidth) - .add("UsePartialListing", mUsePartialListing) - .add("VerificationEnabled", mVerificationEnabled) - .add("RetryBlocks", mRetryBlocks) - .add("FailedFiles", mFailedFiles) - .add("StartTime", mStartTime) - .add("ProcessedFileCount", mProcessedFileCount) - .add("LoadedByteCount", mLoadedByteCount) - .add("TotalBlockCount", mTotalBlockCount) - .add("CurrentBlockCount", mCurrentBlockCount) - .add("TotalFailureCount", mTotalFailureCount) - .add("CurrentFailureCount", mCurrentFailureCount) - .add("State", mState) - .add("BatchSize", BATCH_SIZE) - .add("FailedReason", mFailedReason) - .add("FileIterator", mFileIterator) - .add("CurrentFile", mCurrentFile) - .add("BlockIterator", mBlockIterator) - .add("EndTime", mEndTime) - .toString(); - } - - @Override - public Journal.JournalEntry toJournalEntry() { - alluxio.proto.journal.Job.CopyJobEntry.Builder jobEntry = alluxio.proto.journal.Job.CopyJobEntry - .newBuilder() - .setSrc(mSrc) - .setDst(mDst) - .setState(JobState.toProto(mState)) - .setPartialListing(mUsePartialListing) - .setVerify(mVerificationEnabled) - .setJobId(mJobId); - mUser.ifPresent(jobEntry::setUser); - mBandwidth.ifPresent(jobEntry::setBandwidth); - mEndTime.ifPresent(jobEntry::setEndTime); - return Journal.JournalEntry - .newBuilder() - .setCopyJob(jobEntry.build()) - .build(); - } - - /** - * Get duration in seconds. - * @return job duration in seconds - */ - @VisibleForTesting - public long getDurationInSec() { - return (mEndTime.orElse(System.currentTimeMillis()) - mStartTime) / 1000; - } - - @Override - public boolean processResponse(CopyTask loadTask) { - try { - long totalBytes = loadTask.getBlocks().stream() - .map(Block::getLength) - .reduce(Long::sum) - .orElse(0L); - LoadResponse response = loadTask.getResponseFuture().get(); - if (response.getStatus() != TaskStatus.SUCCESS) { - LOG.debug(format("Get failure from worker: %s", response.getBlockStatusList())); - for (BlockStatus status : response.getBlockStatusList()) { - totalBytes -= status.getBlock().getLength(); - if (!isHealthy() || !status.getRetryable() || !addBlockToRetry( - status.getBlock())) { - addBlockFailure(status.getBlock(), status.getMessage(), status.getCode()); - } - } - } - addLoadedBytes(totalBytes); - JOB_LOAD_BLOCK_COUNT.inc( - loadTask.getBlocks().size() - response.getBlockStatusCount()); - JOB_LOAD_BLOCK_SIZE.inc(totalBytes); - JOB_LOAD_RATE.mark(totalBytes); - return response.getStatus() != TaskStatus.FAILURE; - } - catch (ExecutionException e) { - LOG.warn("exception when trying to get load response.", e.getCause()); - for (Block block : loadTask.getBlocks()) { - if (isHealthy()) { - addBlockToRetry(block); - } - else { - AlluxioRuntimeException exception = AlluxioRuntimeException.from(e.getCause()); - addBlockFailure(block, exception.getMessage(), exception.getStatus().getCode() - .value()); - } - } - return false; - } - catch (CancellationException e) { - LOG.warn("Task get canceled and will retry.", e); - loadTask.getBlocks().forEach(this::addBlockToRetry); - return true; - } - catch (InterruptedException e) { - loadTask.getBlocks().forEach(this::addBlockToRetry); - Thread.currentThread().interrupt(); - // We don't count InterruptedException as task failure - return true; - } - } - - @Override - public void updateJob(Job job) { - if (!(job instanceof CopyJob)) { - throw new IllegalArgumentException("Job is not a LoadJob: " + job); - } - CopyJob targetJob = (CopyJob) job; - updateBandwidth(targetJob.getBandwidth()); - setVerificationEnabled(targetJob.isVerificationEnabled()); - } - - /** - * Loads blocks in a UFS through an Alluxio worker. - */ - public class CopyTask extends Task { - - /** - * @return blocks to load - */ - public List getBlocks() { - return mBlocks; - } - - private final List mBlocks; - - /** - * Creates a new instance of {@link CopyTask}. - * - * @param blocks blocks to load - */ - public CopyTask(List blocks) { - mBlocks = blocks; - } - - @Override - public ListenableFuture run(BlockWorkerClient workerClient) { - LoadRequest.Builder request1 = LoadRequest - .newBuilder() - .addAllBlocks(mBlocks); - UfsReadOptions.Builder options = UfsReadOptions - .newBuilder() - .setTag(mJobId) - .setPositionShort(false); - if (mBandwidth.isPresent()) { - options.setBandwidth(mBandwidth.getAsLong()); - } - mUser.ifPresent(options::setUser); - LoadRequest request = request1 - .setOptions(options.build()) - .build(); - return workerClient.load(request); - } - } - - private static class LoadProgressReport { - private final boolean mVerbose; - private final JobState mJobState; - private final Long mBandwidth; - private final boolean mVerificationEnabled; - private final long mProcessedFileCount; - private final long mLoadedByteCount; - private final Long mTotalByteCount; - private final Long mThroughput; - private final double mFailurePercentage; - private final AlluxioRuntimeException mFailureReason; - private final long mFailedFileCount; - private final Map mFailedFilesWithReasons; - - public LoadProgressReport(CopyJob job, boolean verbose) - { - mVerbose = verbose; - mJobState = job.mState; - mBandwidth = job.mBandwidth.isPresent() ? job.mBandwidth.getAsLong() : null; - mVerificationEnabled = job.mVerificationEnabled; - mProcessedFileCount = job.mProcessedFileCount.get(); - mLoadedByteCount = job.mLoadedByteCount.get(); - if (!job.mUsePartialListing && job.mFileIterator.isPresent()) { - mTotalByteCount = job.mTotalByteCount.get(); - } - else { - mTotalByteCount = null; - } - long duration = job.getDurationInSec(); - if (duration > 0) { - mThroughput = job.mLoadedByteCount.get() / duration; - } - else { - mThroughput = null; - } - long blockCount = job.mTotalBlockCount.get() + job.mCurrentBlockCount.get(); - if (blockCount > 0) { - mFailurePercentage = - ((double) (job.mTotalFailureCount.get() + job.mCurrentFailureCount.get()) / blockCount) - * 100; - } - else { - mFailurePercentage = 0; - } - mFailureReason = job.mFailedReason.orElse(null); - mFailedFileCount = job.mFailedFiles.size(); - if (verbose && mFailedFileCount > 0) { - mFailedFilesWithReasons = job.mFailedFiles; - } else { - mFailedFilesWithReasons = null; - } - } - - public String getReport(JobProgressReportFormat format) - { - switch (format) { - case TEXT: - return getTextReport(); - case JSON: - return getJsonReport(); - default: - throw new InvalidArgumentRuntimeException( - format("Unknown load progress report format: %s", format)); - } - } - - private String getTextReport() { - StringBuilder progress = new StringBuilder(); - progress.append( - format("\tSettings:\tbandwidth: %s\tverify: %s%n", - mBandwidth == null ? "unlimited" : mBandwidth, - mVerificationEnabled)); - progress.append(format("\tJob State: %s%s%n", mJobState, - mFailureReason == null - ? "" : format( - " (%s: %s)", - mFailureReason.getClass().getName(), - mFailureReason.getMessage()))); - if (mVerbose && mFailureReason != null) { - for (StackTraceElement stack : mFailureReason.getStackTrace()) { - progress.append(format("\t\t%s%n", stack.toString())); - } - } - progress.append(format("\tFiles Processed: %d%n", mProcessedFileCount)); - progress.append(format("\tBytes Loaded: %s%s%n", - FormatUtils.getSizeFromBytes(mLoadedByteCount), - mTotalByteCount == null - ? "" : format(" out of %s", FormatUtils.getSizeFromBytes(mTotalByteCount)))); - if (mThroughput != null) { - progress.append(format("\tThroughput: %s/s%n", - FormatUtils.getSizeFromBytes(mThroughput))); - } - progress.append(format("\tBlock load failure rate: %.2f%%%n", mFailurePercentage)); - progress.append(format("\tFiles Failed: %s%n", mFailedFileCount)); - if (mVerbose && mFailedFilesWithReasons != null) { - mFailedFilesWithReasons.forEach((fileName, reason) -> - progress.append(format("\t\t%s: %s%n", fileName, reason))); - } - return progress.toString(); - } - - private String getJsonReport() { - try { - return new ObjectMapper() - .setVisibility(PropertyAccessor.FIELD, JsonAutoDetect.Visibility.ANY) - .setSerializationInclusion(JsonInclude.Include.NON_NULL) - .writeValueAsString(this); - } catch (JsonProcessingException e) { - throw new InternalRuntimeException("Failed to convert LoadProgressReport to JSON", e); - } - } - } - - // metrics - public static final Counter JOB_LOAD_SUCCESS = - MetricsSystem.counter(MetricKey.MASTER_JOB_LOAD_SUCCESS.getName()); - public static final Counter JOB_LOAD_FAIL = - MetricsSystem.counter(MetricKey.MASTER_JOB_LOAD_FAIL.getName()); - public static final Counter JOB_LOAD_BLOCK_COUNT = - MetricsSystem.counter(MetricKey.MASTER_JOB_LOAD_BLOCK_COUNT.getName()); - public static final Counter JOB_LOAD_BLOCK_FAIL = - MetricsSystem.counter(MetricKey.MASTER_JOB_LOAD_BLOCK_FAIL.getName()); - public static final Counter JOB_LOAD_BLOCK_SIZE = - MetricsSystem.counter(MetricKey.MASTER_JOB_LOAD_BLOCK_SIZE.getName()); - public static final Meter JOB_LOAD_RATE = - MetricsSystem.meter(MetricKey.MASTER_JOB_LOAD_RATE.getName()); -} diff --git a/core/server/master/src/main/java/alluxio/master/job/CopyJobFactory.java b/core/server/master/src/main/java/alluxio/master/job/CopyJobFactory.java deleted file mode 100644 index 483325797cdb..000000000000 --- a/core/server/master/src/main/java/alluxio/master/job/CopyJobFactory.java +++ /dev/null @@ -1,65 +0,0 @@ -/* - * The Alluxio Open Foundation licenses this work under the Apache License, version 2.0 - * (the "License"). You may not use this work except in compliance with the License, which is - * available at www.apache.org/licenses/LICENSE-2.0 - * - * This software is distributed on an "AS IS" basis, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, - * either express or implied, as more fully set forth in the License. - * - * See the NOTICE file distributed with this work for information regarding copyright ownership. - */ - -package alluxio.master.job; - -import alluxio.grpc.CopyJobPOptions; -import alluxio.job.CopyJobRequest; -import alluxio.master.file.FileSystemMaster; -import alluxio.scheduler.job.Job; -import alluxio.scheduler.job.JobFactory; -import alluxio.security.User; -import alluxio.security.authentication.AuthenticatedClientUser; - -import java.util.Optional; -import java.util.OptionalLong; -import java.util.UUID; - -/** - * Factory for creating {@link LoadJob}s that get file infos from master. - */ -public class CopyJobFactory implements JobFactory { - - private final FileSystemMaster mFsMaster; - private final CopyJobRequest mRequest; - - /** - * Create factory. - * @param request load job request - * @param fsMaster file system master - */ - public CopyJobFactory(CopyJobRequest request, FileSystemMaster fsMaster) { - mFsMaster = fsMaster; - mRequest = request; - } - - @Override - public Job create() { - CopyJobPOptions options = mRequest.getOptions(); - String src = mRequest.getSrc(); - OptionalLong bandwidth = - options.hasBandwidth() ? OptionalLong.of(options.getBandwidth()) : OptionalLong.empty(); - boolean partialListing = options.hasPartialListing() && options.getPartialListing(); - boolean verificationEnabled = options.hasVerify() && options.getVerify(); - FileIterable fileIterator = new FileIterable(mFsMaster, src, Optional - .ofNullable(AuthenticatedClientUser.getOrNull()) - .map(User::getName), partialListing, - LoadJob.QUALIFIED_FILE_FILTER); - Optional user = Optional - .ofNullable(AuthenticatedClientUser.getOrNull()) - .map(User::getName); - return new CopyJob(src, mRequest.getDst(), user, UUID.randomUUID().toString(), - bandwidth, - partialListing, - verificationEnabled, fileIterator); - } -} - diff --git a/core/server/master/src/main/java/alluxio/master/job/JobFactoryProducer.java b/core/server/master/src/main/java/alluxio/master/job/JobFactoryProducer.java index 6604925c650e..2146097f07fa 100644 --- a/core/server/master/src/main/java/alluxio/master/job/JobFactoryProducer.java +++ b/core/server/master/src/main/java/alluxio/master/job/JobFactoryProducer.java @@ -11,7 +11,6 @@ package alluxio.master.job; -import alluxio.job.CopyJobRequest; import alluxio.job.JobRequest; import alluxio.job.LoadJobRequest; import alluxio.master.file.FileSystemMaster; @@ -33,9 +32,6 @@ public static JobFactory create(JobRequest request, FileSystemMaster fsMaster) { if (request instanceof LoadJobRequest) { return new LoadJobFactory((LoadJobRequest) request, fsMaster); } - if (request instanceof CopyJobRequest) { - return new CopyJobFactory((CopyJobRequest) request, fsMaster); - } throw new IllegalArgumentException("Unknown job type: " + request.getType()); } @@ -48,9 +44,6 @@ public static JobFactory create(Journal.JournalEntry entry, FileSystemMaster fsM if (entry.hasLoadJob()) { return new JournalLoadJobFactory(entry.getLoadJob(), fsMaster); } - if (entry.hasCopyJob()) { - return new JournalCopyJobFactory(entry.getCopyJob(), fsMaster); - } else { throw new IllegalArgumentException("Unknown job type: " + entry); } diff --git a/core/server/master/src/main/java/alluxio/master/job/JournalCopyJobFactory.java b/core/server/master/src/main/java/alluxio/master/job/JournalCopyJobFactory.java deleted file mode 100644 index 24d365ecff8f..000000000000 --- a/core/server/master/src/main/java/alluxio/master/job/JournalCopyJobFactory.java +++ /dev/null @@ -1,59 +0,0 @@ -/* - * The Alluxio Open Foundation licenses this work under the Apache License, version 2.0 - * (the "License"). You may not use this work except in compliance with the License, which is - * available at www.apache.org/licenses/LICENSE-2.0 - * - * This software is distributed on an "AS IS" basis, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, - * either express or implied, as more fully set forth in the License. - * - * See the NOTICE file distributed with this work for information regarding copyright ownership. - */ - -package alluxio.master.job; - -import alluxio.master.file.FileSystemMaster; -import alluxio.scheduler.job.Job; -import alluxio.scheduler.job.JobFactory; -import alluxio.scheduler.job.JobState; - -import java.util.Optional; -import java.util.OptionalLong; - -/** - * Factory for creating {@link LoadJob}s from journal entries. - */ -public class JournalCopyJobFactory implements JobFactory { - - private final FileSystemMaster mFsMaster; - - private final alluxio.proto.journal.Job.CopyJobEntry mJobEntry; - - /** - * Create factory. - * @param journalEntry journal entry - * @param fsMaster file system master - */ - public JournalCopyJobFactory(alluxio.proto.journal.Job.CopyJobEntry journalEntry, - FileSystemMaster fsMaster) { - mFsMaster = fsMaster; - mJobEntry = journalEntry; - } - - @Override - public Job create() { - Optional user = - mJobEntry.hasUser() ? Optional.of(mJobEntry.getUser()) : Optional.empty(); - FileIterable fileIterator = - new FileIterable(mFsMaster, mJobEntry.getSrc(), user, mJobEntry.getPartialListing(), - LoadJob.QUALIFIED_FILE_FILTER); - CopyJob job = new CopyJob(mJobEntry.getSrc(), mJobEntry.getDst(), user, mJobEntry.getJobId(), - mJobEntry.hasBandwidth() ? OptionalLong.of(mJobEntry.getBandwidth()) : OptionalLong.empty(), - mJobEntry.getPartialListing(), mJobEntry.getVerify(), fileIterator); - job.setJobState(JobState.fromProto(mJobEntry.getState())); - if (mJobEntry.hasEndTime()) { - job.setEndTime(mJobEntry.getEndTime()); - } - return job; - } -} - diff --git a/core/server/master/src/main/java/alluxio/master/job/LoadJob.java b/core/server/master/src/main/java/alluxio/master/job/LoadJob.java index 854ca3a6241b..a7f8fb16dae9 100644 --- a/core/server/master/src/main/java/alluxio/master/job/LoadJob.java +++ b/core/server/master/src/main/java/alluxio/master/job/LoadJob.java @@ -76,7 +76,7 @@ * thus the state changing functions are not thread safe. */ @NotThreadSafe -public class LoadJob implements Job { +public class LoadJob extends AbstractJob { private static final Logger LOG = LoggerFactory.getLogger(LoadJob.class); public static final String TYPE = "load"; private static final double FAILURE_RATIO_THRESHOLD = 0.05; @@ -89,7 +89,7 @@ public class LoadJob implements Job { && fileInfo.getInAlluxioPercentage() != 100; // Job configurations private final String mPath; - private final Optional mUser; + private OptionalLong mBandwidth; private boolean mUsePartialListing; private boolean mVerificationEnabled; @@ -97,7 +97,7 @@ public class LoadJob implements Job { // Job states private final LinkedList mRetryBlocks = new LinkedList<>(); private final Map mFailedFiles = new HashMap<>(); - private final long mStartTime; + private final AtomicLong mProcessedFileCount = new AtomicLong(); private final AtomicLong mLoadedByteCount = new AtomicLong(); private final AtomicLong mTotalByteCount = new AtomicLong(); @@ -105,14 +105,11 @@ public class LoadJob implements Job { private final AtomicLong mCurrentBlockCount = new AtomicLong(); private final AtomicLong mTotalFailureCount = new AtomicLong(); private final AtomicLong mCurrentFailureCount = new AtomicLong(); - private final String mJobId; - private JobState mState; private Optional mFailedReason = Optional.empty(); private final Iterable mFileIterable; private Optional> mFileIterator = Optional.empty(); private FileInfo mCurrentFile; private Iterator mBlockIterator = Collections.emptyIterator(); - private OptionalLong mEndTime = OptionalLong.empty(); /** * Constructor. @@ -144,17 +141,14 @@ public LoadJob( Optional user, String jobId, OptionalLong bandwidth, boolean usePartialListing, boolean verificationEnabled, FileIterable fileIterable) { + super(user, jobId); mPath = requireNonNull(path, "path is null"); - mUser = requireNonNull(user, "user is null"); - mJobId = requireNonNull(jobId, "jobId is null"); Preconditions.checkArgument( !bandwidth.isPresent() || bandwidth.getAsLong() > 0, format("bandwidth should be greater than 0 if provided, get %s", bandwidth)); mBandwidth = bandwidth; mUsePartialListing = usePartialListing; mVerificationEnabled = verificationEnabled; - mStartTime = System.currentTimeMillis(); - mState = JobState.RUNNING; mFileIterable = fileIterable; } @@ -166,33 +160,11 @@ public String getPath() { return mPath; } - /** - * Get user. - * @return user - */ - public Optional getUser() { - return mUser; - } - - @Override - public String getJobId() { - return mJobId; - } - @Override public JobDescription getDescription() { return JobDescription.newBuilder().setPath(mPath).setType(TYPE).build(); } - /** - * Get end time. - * @return end time - */ - @Override - public OptionalLong getEndTime() { - return mEndTime; - } - /** * Get bandwidth. * @return the allocated bandwidth @@ -201,14 +173,6 @@ public OptionalLong getBandwidth() { return mBandwidth; } - /** - * Update end time. - * @param time time in ms - */ - public void setEndTime(long time) { - mEndTime = OptionalLong.of(time); - } - /** * Update bandwidth. * @param bandwidth new bandwidth @@ -225,16 +189,6 @@ public boolean isVerificationEnabled() { return mVerificationEnabled; } - /** - * Is verification enabled. - * - * @return whether verification is enabled - */ - @Override - public boolean needVerification() { - return mVerificationEnabled && mCurrentBlockCount.get() > 0; - } - /** * Enable verification. * @param enableVerification whether to enable verification @@ -243,31 +197,6 @@ public void setVerificationEnabled(boolean enableVerification) { mVerificationEnabled = enableVerification; } - /** - * Get load status. - * @return the load job's status - */ - @Override - public JobState getJobState() { - return mState; - } - - /** - * Set load state. - * @param state new state - */ - @Override - public void setJobState(JobState state) { - LOG.debug("Change JobState to {} for job {}", state, this); - mState = state; - if (!isRunning()) { - mEndTime = OptionalLong.of(System.currentTimeMillis()); - } - if (state == JobState.SUCCEEDED) { - JOB_LOAD_SUCCESS.inc(); - } - } - /** * Set load state to FAILED with given reason. * @param reason failure exception @@ -326,16 +255,6 @@ public boolean isHealthy() { || (double) currentFailureCount / mCurrentBlockCount.get() <= FAILURE_RATIO_THRESHOLD; } - @Override - public boolean isRunning() { - return mState == JobState.RUNNING || mState == JobState.VERIFYING; - } - - @Override - public boolean isDone() { - return mState == JobState.SUCCEEDED || mState == JobState.FAILED; - } - @Override public boolean isCurrentPassDone() { return mFileIterator.isPresent() && !mFileIterator.get().hasNext() && !mBlockIterator.hasNext() @@ -359,6 +278,7 @@ public void initiateVerification() { * @param worker blocker to worker * @return the next task to run. If there is no task to run, return empty */ + @Override public Optional getNextTask(WorkerInfo worker) { List blocks = getNextBatchBlocks(BATCH_SIZE); if (blocks.isEmpty()) { @@ -581,6 +501,16 @@ public void updateJob(Job job) { setVerificationEnabled(targetJob.isVerificationEnabled()); } + /** + * Is verification enabled. + * + * @return whether verification is enabled + */ + @Override + public boolean needVerification() { + return mVerificationEnabled && mCurrentBlockCount.get() > 0; + } + /** * Loads blocks in a UFS through an Alluxio worker. */ From 73b4d67c8b39a6358897ec349370e7037a6fea7f Mon Sep 17 00:00:00 2001 From: maobaolong <307499405@qq.com> Date: Fri, 17 Mar 2023 13:16:28 +0800 Subject: [PATCH 197/334] Add metric for cached block location Finish the todo task after https://github.com/Alluxio/alluxio/pull/16953 pr-link: Alluxio/alluxio#17056 change-id: cid-e6ddc03b08e7a8a18cd53d77fa9a2f0edf4e1f57 --- core/common/src/main/java/alluxio/metrics/MetricKey.java | 5 +++++ .../main/java/alluxio/util/proto/BlockLocationUtils.java | 9 ++++++++- .../java/alluxio/master/block/DefaultBlockMaster.java | 3 +++ 3 files changed, 16 insertions(+), 1 deletion(-) diff --git a/core/common/src/main/java/alluxio/metrics/MetricKey.java b/core/common/src/main/java/alluxio/metrics/MetricKey.java index 477a8250da90..abd286028bb7 100644 --- a/core/common/src/main/java/alluxio/metrics/MetricKey.java +++ b/core/common/src/main/java/alluxio/metrics/MetricKey.java @@ -410,6 +410,11 @@ public static String getSyncMetricName(long mountId) { .setDescription("Total number of unique blocks in Alluxio") .setMetricType(MetricType.GAUGE) .build(); + public static final MetricKey MASTER_CACHED_BLOCK_LOCATIONS = + new Builder("Master.CachedBlockLocations") + .setDescription("Total number of cached block locations") + .setMetricType(MetricType.GAUGE) + .build(); public static final MetricKey MASTER_TOTAL_RPCS = new Builder("Master.TotalRpcs") .setDescription("Throughput of master RPC calls. This metrics indicates how busy the" diff --git a/core/common/src/main/java/alluxio/util/proto/BlockLocationUtils.java b/core/common/src/main/java/alluxio/util/proto/BlockLocationUtils.java index d53ec25fda47..6105a26b9213 100644 --- a/core/common/src/main/java/alluxio/util/proto/BlockLocationUtils.java +++ b/core/common/src/main/java/alluxio/util/proto/BlockLocationUtils.java @@ -30,7 +30,6 @@ public class BlockLocationUtils { private static final IndexDefinition WORKER_ID_INDEX = IndexDefinition.ofNonUnique(BlockLocation::getWorkerId); - // TODO(maobaolong): Add a metric to monitor the size of mLocationCacheMap private static final IndexedSet BLOCK_LOCATION_CACHE = new IndexedSet<>(OBJECT_INDEX, WORKER_ID_INDEX); @@ -84,4 +83,12 @@ public static BlockLocation getCached(BlockLocation blockLocation) { public static void evictByWorkerId(long workerId) { BLOCK_LOCATION_CACHE.removeByField(WORKER_ID_INDEX, workerId); } + + /** + * Gets the cached block location size. + * @return the cached block location size + */ + public static int getCachedBlockLocationSize() { + return BLOCK_LOCATION_CACHE.size(); + } } diff --git a/core/server/master/src/main/java/alluxio/master/block/DefaultBlockMaster.java b/core/server/master/src/main/java/alluxio/master/block/DefaultBlockMaster.java index 02582147d329..0168f6449456 100644 --- a/core/server/master/src/main/java/alluxio/master/block/DefaultBlockMaster.java +++ b/core/server/master/src/main/java/alluxio/master/block/DefaultBlockMaster.java @@ -1872,6 +1872,9 @@ public static void registerGauges(final DefaultBlockMaster master) { master::getWorkerCount); MetricsSystem.registerGaugeIfAbsent(MetricKey.CLUSTER_LOST_WORKERS.getName(), master::getLostWorkerCount); + + MetricsSystem.registerGaugeIfAbsent(MetricKey.MASTER_CACHED_BLOCK_LOCATIONS.getName(), + BlockLocationUtils::getCachedBlockLocationSize); } private Metrics() {} // prevent instantiation From fd7350705e5f40e521dd025700341fc6746de573 Mon Sep 17 00:00:00 2001 From: yuyang wang <39869597+Jackson-Wang-7@users.noreply.github.com> Date: Fri, 17 Mar 2023 16:58:35 +0800 Subject: [PATCH 198/334] Fix property identity typo error ### What changes are proposed in this pull request? Fix a typo in the property. the matcher pattern is not aligned with the expected content. ### Why are the changes needed? Fix a typo in the property. the matcher pattern is not aligned with the expected content. pr-link: Alluxio/alluxio#17109 change-id: cid-a74af8d9d8734e8e10cec723914ad0d206fa03f6 --- core/common/src/main/java/alluxio/conf/PropertyKey.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/core/common/src/main/java/alluxio/conf/PropertyKey.java b/core/common/src/main/java/alluxio/conf/PropertyKey.java index fa645e2411a7..5fb87b648f90 100755 --- a/core/common/src/main/java/alluxio/conf/PropertyKey.java +++ b/core/common/src/main/java/alluxio/conf/PropertyKey.java @@ -9072,7 +9072,7 @@ public enum Template { PropertyType.STRING), UNDERFS_ABFS_ACCOUNT_KEY( "fs.azure.account.key.%s.dfs.core.windows.net", - "fs\\.azure\\.account\\.key\\.(\\w+)\\.dfs\\.core\\.window\\.net", + "fs\\.azure\\.account\\.key\\.(\\w+)\\.dfs\\.core\\.windows\\.net", PropertyCreators.fromBuilder(stringBuilder("fs.azure.account.key.%s.dfs.core.windows.net") .setDisplayType(DisplayType.CREDENTIALS))), UNDERFS_AZURE_ACCOUNT_KEY( From 7f335463bb1ef07186cbc302895fdb6631889e05 Mon Sep 17 00:00:00 2001 From: Jiacheng Liu Date: Sun, 19 Mar 2023 20:18:58 +0800 Subject: [PATCH 199/334] Dump metrics + stacks on failover/crash ### What changes are proposed in this pull request? The master/worker processes will dump jstack/metrics when: 1. Process exits normally, from an error, or SIGTERM from `alluxio-stop.sh`. (Note that segfault is abrupt and the process doesn't have a chance to output these information.) 2. Primary master fails over to standby. The major changes in the code are: 1. Dumping those information into files 2. CollectInfo and CollectLog commands will capture those output files 3. Change the jstack dumping to async (https://github.com/Alluxio/alluxio/commit/9ae71902499b005852563055ae5135e97833e274 changed that to sync), so we don't block the process. The trade off is the output is not totally consistent. That should be acceptable in troubleshooting. This change depends on https://github.com/Alluxio/alluxio/commit/9ae71902499b005852563055ae5135e97833e274 ### Why are the changes needed? Sample output files: [alluxio-master-exit-stacks-20230315-063353.txt](https://github.com/Alluxio/alluxio/files/10980702/alluxio-master-exit-stacks-20230315-063353.txt) [alluxio-master-exit-metrics-20230315-063353.txt](https://github.com/Alluxio/alluxio/files/10980711/alluxio-master-exit-metrics-20230315-063353.txt) This helps persisting critical troubleshooting information because when the system unexpectedly exits or failover, the admin doesn't have a chance to manually poll these information before it's too late (process already died or primacy is lost). ### Does this PR introduce any user facing changes? See above pr-link: Alluxio/alluxio#17081 change-id: cid-5c9dcd5abb5c9731d3c20fe734d3f8d07f8fd167 --- .../main/java/alluxio/conf/PropertyKey.java | 18 +++ .../main/java/alluxio/util/ThreadUtils.java | 15 +- .../src/main/java/alluxio/ProcessUtils.java | 146 ++++++++++++++++++ .../java/alluxio/master/StateLockManager.java | 11 +- .../alluxio/metrics/sink/MetricsServlet.java | 8 +- .../alluxio/master/AlluxioMasterProcess.java | 32 +++- .../master/file/DefaultFileSystemMaster.java | 1 + .../bundler/command/CollectLogCommand.java | 4 +- 8 files changed, 216 insertions(+), 19 deletions(-) diff --git a/core/common/src/main/java/alluxio/conf/PropertyKey.java b/core/common/src/main/java/alluxio/conf/PropertyKey.java index 5fb87b648f90..5e42b86b1799 100755 --- a/core/common/src/main/java/alluxio/conf/PropertyKey.java +++ b/core/common/src/main/java/alluxio/conf/PropertyKey.java @@ -586,6 +586,14 @@ public String toString() { .setScope(Scope.SERVER) .setIsHidden(true) .build(); + public static final PropertyKey EXIT_COLLECT_INFO = + booleanBuilder(Name.EXIT_COLLECT_INFO) + .setDefaultValue(true) + .setDescription("If true, the process will dump metrics and jstack into the log folder. " + + "This only applies to Alluxio master and worker processes.") + .setConsistencyCheckLevel(ConsistencyCheckLevel.WARN) + .setScope(Scope.SERVER) + .build(); public static final PropertyKey GRPC_REFLECTION_ENABLED = booleanBuilder(Name.GRPC_REFLECTION_ENABLED) .setDefaultValue(false) @@ -2447,6 +2455,13 @@ public String toString() { + "the master addresses.") .setScope(Scope.ALL) .build(); + public static final PropertyKey MASTER_FAILOVER_COLLECT_INFO = + booleanBuilder(Name.MASTER_FAILOVER_COLLECT_INFO) + .setDefaultValue(true) + .setDescription("If true, the primary master will persist metrics and jstack into " + + "the log folder when it transitions to standby. ") + .setScope(Scope.MASTER) + .build(); public static final PropertyKey MASTER_FILE_ACCESS_TIME_UPDATER_ENABLED = booleanBuilder(Name.MASTER_FILE_ACCESS_TIME_UPDATER_ENABLED) @@ -7520,6 +7535,7 @@ public static final class Name { public static final String CONF_VALIDATION_ENABLED = "alluxio.conf.validation.enabled"; public static final String DEBUG = "alluxio.debug"; public static final String EXTENSIONS_DIR = "alluxio.extensions.dir"; + public static final String EXIT_COLLECT_INFO = "alluxio.exit.collect.info"; public static final String GRPC_REFLECTION_ENABLED = "alluxio.grpc.reflection.enabled"; public static final String HOME = "alluxio.home"; @@ -7857,6 +7873,8 @@ public static final class Name { "alluxio.master.cluster.metrics.update.interval"; public static final String MASTER_CONTAINER_ID_RESERVATION_SIZE = "alluxio.master.container.id.reservation.size"; + public static final String MASTER_FAILOVER_COLLECT_INFO = + "alluxio.master.failover.collect.info"; public static final String MASTER_FILE_ACCESS_TIME_UPDATER_ENABLED = "alluxio.master.file.access.time.updater.enabled"; public static final String MASTER_FILE_ACCESS_TIME_JOURNAL_FLUSH_INTERVAL = diff --git a/core/common/src/main/java/alluxio/util/ThreadUtils.java b/core/common/src/main/java/alluxio/util/ThreadUtils.java index 3d2a253881ab..966835c818bc 100644 --- a/core/common/src/main/java/alluxio/util/ThreadUtils.java +++ b/core/common/src/main/java/alluxio/util/ThreadUtils.java @@ -116,15 +116,24 @@ private static String getTaskName(long id, String name) { /** * Prints the information and stack traces of all threads. + * In order not to pause the JVM when there are tons of threads, thread stacks are printed + * one by one. So the thread stacks are not guaranteed to be based on one consistent + * snapshot. * * @param stream the stream to * @param title a string title for the stack trace */ public static synchronized void printThreadInfo(PrintStream stream, String title) { stream.println("Process Thread Dump: " + title); - stream.println(THREAD_BEAN.getThreadCount() + " active theads"); - for (ThreadInfo ti: THREAD_BEAN.dumpAllThreads(true, true)) { - stream.print(ti.toString()); + stream.println(THREAD_BEAN.getThreadCount() + " active threads"); + long[] threadIds = THREAD_BEAN.getAllThreadIds(); + for (long id : threadIds) { + ThreadInfo info = THREAD_BEAN.getThreadInfo(id, Integer.MAX_VALUE); + if (info == null) { + // The thread is no longer active, ignore + continue; + } + stream.print(info.toString()); } stream.flush(); } diff --git a/core/server/common/src/main/java/alluxio/ProcessUtils.java b/core/server/common/src/main/java/alluxio/ProcessUtils.java index 802d6d77a986..4e9d13c464bb 100644 --- a/core/server/common/src/main/java/alluxio/ProcessUtils.java +++ b/core/server/common/src/main/java/alluxio/ProcessUtils.java @@ -11,19 +11,51 @@ package alluxio; +import static alluxio.metrics.sink.MetricsServlet.OBJECT_MAPPER; + import alluxio.conf.Configuration; import alluxio.conf.PropertyKey; +import alluxio.metrics.MetricsSystem; +import alluxio.util.CommonUtils; +import alluxio.util.ThreadUtils; import com.google.common.base.Throwables; +import com.google.common.collect.ImmutableSet; import org.slf4j.Logger; import org.slf4j.LoggerFactory; +import java.io.File; +import java.io.FileOutputStream; +import java.io.IOException; +import java.io.PrintStream; +import java.nio.ByteBuffer; +import java.nio.charset.StandardCharsets; +import java.time.Duration; +import java.time.Instant; +import java.time.ZoneId; +import java.time.format.DateTimeFormatter; +import java.time.format.FormatStyle; +import java.util.ArrayList; +import java.util.Collections; +import java.util.List; +import java.util.Locale; +import java.util.Set; +import java.util.concurrent.ExecutorService; +import java.util.concurrent.Future; + /** * Utility methods for Alluxio {@link Process}es. */ public final class ProcessUtils { private static final Logger LOG = LoggerFactory.getLogger(ProcessUtils.class); + public static final Set COLLECT_ON_EXIT = + ImmutableSet.of(CommonUtils.ProcessType.MASTER, CommonUtils.ProcessType.WORKER); + public static volatile boolean sInfoDumpOnExitCheck = false; + public static final DateTimeFormatter DATETIME_FORMAT = + DateTimeFormatter.ofLocalizedDateTime(FormatStyle.SHORT).ofPattern("yyyyMMdd-HHmmss") + .withLocale(Locale.getDefault()).withZone(ZoneId.systemDefault()); + /** * Runs the given {@link Process}. This method should only be called from {@code main()} methods. * @@ -36,6 +68,9 @@ public static void run(Process process) { LOG.info("Java version: {}", System.getProperty("java.version")); process.start(); LOG.info("Stopping {}.", process); + + dumpInformationOnExit(); + System.exit(0); } catch (Throwable t) { LOG.error("Uncaught exception while running {}, stopping it and exiting. " @@ -48,6 +83,8 @@ public static void run(Process process) { + "Exception \"{}\", Root Cause \"{}\"", process, t2, Throwables.getRootCause(t2), t2); } + dumpInformationOnExit(); + System.exit(-1); } } @@ -80,6 +117,9 @@ public static void fatalError(Logger logger, Throwable t, String format, Object. throw new RuntimeException(message); } logger.error(message); + + dumpInformationOnExit(); + System.exit(-1); } @@ -95,6 +135,7 @@ public static void fatalError(Logger logger, Throwable t, String format, Object. public static void stopProcessOnShutdown(final Process process) { Runtime.getRuntime().addShutdownHook(new Thread(() -> { try { + dumpInformationOnExit(); process.stop(); } catch (Throwable t) { LOG.error("Failed to stop process", t); @@ -102,5 +143,110 @@ public static void stopProcessOnShutdown(final Process process) { }, "alluxio-process-shutdown-hook")); } + /** + * Outputs process critical information like metrics and jstack before it exits. + * This is synchronous in order to capture as much information at the scene as possible. + * The information will be output to separate files in the log directory. + */ + public static void dumpInformationOnExit() { + if (!COLLECT_ON_EXIT.contains(CommonUtils.PROCESS_TYPE.get())) { + LOG.info("Process type is {}, skip dumping metrics and thread stacks", + CommonUtils.PROCESS_TYPE.get()); + return; + } + if (Configuration.getBoolean(PropertyKey.EXIT_COLLECT_INFO)) { + synchronized (ProcessUtils.class) { + if (!sInfoDumpOnExitCheck) { + sInfoDumpOnExitCheck = true; + LOG.info("Logging metrics and jstack on {} exit...", CommonUtils.PROCESS_TYPE.get()); + try { + String logsDir = Configuration.getString(PropertyKey.LOGS_DIR); + String outputFilePrefix = "alluxio-" + + CommonUtils.PROCESS_TYPE.get().toString().toLowerCase() + "-exit"; + dumpMetrics(logsDir, outputFilePrefix); + dumpStacks(logsDir, outputFilePrefix); + } catch (Throwable t) { + LOG.error("Failed to dump metrics and jstacks", t); + } + } + } + } else { + LOG.info("Not logging metrics and jstack on exit, set {}=true to enable this feature", + PropertyKey.EXIT_COLLECT_INFO.getName()); + } + } + + /** + * Outputs process critical information like metrics and jstack before the primary master + * fails over to standby. This is asynchronous in order not to block the failover. + * The information will be output to separate files in the log directory. + * + * @param es the thread pool to submit tasks to + * @return a list of futures for async info dumping jobs + */ + public static List> dumpInformationOnFailover(ExecutorService es) { + if (Configuration.getBoolean(PropertyKey.MASTER_FAILOVER_COLLECT_INFO)) { + LOG.info("Logging metrics and jstack when primary master switches to standby..."); + String logsDir = Configuration.getString(PropertyKey.LOGS_DIR); + String outputFilePrefix = "alluxio-" + + CommonUtils.PROCESS_TYPE.get().toString().toLowerCase() + "-failover"; + List> futures = new ArrayList<>(); + // Attempt to dump metrics first before MetricsMaster clears all metrics + // The failover procedure will shutdown RPC -> Journal -> Master components + // So we rely on the first two steps take longer than this thread + futures.add(es.submit(() -> { + ProcessUtils.dumpMetrics(logsDir, outputFilePrefix); + return null; + })); + futures.add(es.submit(() -> { + ProcessUtils.dumpStacks(logsDir, outputFilePrefix); + return null; + })); + LOG.info("Started dumping metrics and jstacks into {}", logsDir); + return futures; + } else { + LOG.info("Not logging information like metrics and jstack on failover, " + + "set {}=true to enable this feature", + PropertyKey.MASTER_FAILOVER_COLLECT_INFO.getName()); + return Collections.emptyList(); + } + } + + private static void dumpMetrics(String logsDir, String outputFilePrefix) { + Instant start = Instant.now(); + String childFilePath = String.format("%s-metrics-%s.json", + outputFilePrefix, DATETIME_FORMAT.format(start)); + File metricDumpFile = new File(logsDir, childFilePath); + try (FileOutputStream fos = new FileOutputStream(metricDumpFile, false)) { + // The metrics json string is ~100KB in size + String outputContents = OBJECT_MAPPER.writerWithDefaultPrettyPrinter() + .writeValueAsString(MetricsSystem.METRIC_REGISTRY); + fos.getChannel().write(ByteBuffer.wrap(outputContents.getBytes(StandardCharsets.UTF_8))); + } catch (IOException e) { + LOG.error("Failed to persist metrics to {}", metricDumpFile.getAbsolutePath(), e); + return; + } + Instant end = Instant.now(); + LOG.info("Dumped metrics of current process in {}ms to {}", + Duration.between(start, end).toMillis(), childFilePath); + } + + private static void dumpStacks(String logsDir, String outputFilePrefix) { + Instant start = Instant.now(); + String childFilePath = String.format("%s-stacks-%s.txt", + outputFilePrefix, DATETIME_FORMAT.format(start)); + File stacksDumpFile = new File(logsDir, childFilePath); + try (PrintStream stream = new PrintStream(stacksDumpFile)) { + // Dumping one thread produces <1KB + ThreadUtils.printThreadInfo(stream, "Dumping all threads in process"); + } catch (IOException e) { + LOG.error("Failed to persist thread stacks to {}", stacksDumpFile.getAbsolutePath(), e); + return; + } + Instant end = Instant.now(); + LOG.info("Dumped jstack of current process in {}ms to {}", + Duration.between(start, end).toMillis(), childFilePath); + } + private ProcessUtils() {} // prevent instantiation } diff --git a/core/server/common/src/main/java/alluxio/master/StateLockManager.java b/core/server/common/src/main/java/alluxio/master/StateLockManager.java index 6c06f0af12c5..8c5be7096aff 100644 --- a/core/server/common/src/main/java/alluxio/master/StateLockManager.java +++ b/core/server/common/src/main/java/alluxio/master/StateLockManager.java @@ -176,15 +176,8 @@ public LockResource lockShared() throws InterruptedException { LOG.warn("Current thread is {}. All state lock holders are {}", threadName, mSharedLockHolders, e); } - try { - // Grab the lock interruptibly. - mStateLock.readLock().lockInterruptibly(); - } catch (Error e) { - // An Error is thrown when the lock is acquired 65536 times, log the jstack before exiting - LOG.error("Logging all thread stacks before exiting", e); - ThreadUtils.logAllThreads(); - throw e; - } + // Grab the lock interruptibly. + mStateLock.readLock().lockInterruptibly(); // Return the resource. // Register an action to remove the thread from holders registry before releasing the lock. return new LockResource(mStateLock.readLock(), false, false, () -> { diff --git a/core/server/common/src/main/java/alluxio/metrics/sink/MetricsServlet.java b/core/server/common/src/main/java/alluxio/metrics/sink/MetricsServlet.java index 6d67de1f4727..84298b54dce7 100644 --- a/core/server/common/src/main/java/alluxio/metrics/sink/MetricsServlet.java +++ b/core/server/common/src/main/java/alluxio/metrics/sink/MetricsServlet.java @@ -32,9 +32,10 @@ @NotThreadSafe public class MetricsServlet implements Sink { public static final String SERVLET_PATH = "/metrics/json"; + public static final ObjectMapper OBJECT_MAPPER = new ObjectMapper() + .registerModule(new MetricsModule(TimeUnit.SECONDS, TimeUnit.MILLISECONDS, false)); private MetricRegistry mMetricsRegistry; - private ObjectMapper mObjectMapper; /** * Creates a new {@link MetricsServlet} with a {@link Properties} and {@link MetricRegistry}. @@ -43,9 +44,6 @@ public class MetricsServlet implements Sink { */ public MetricsServlet(MetricRegistry registry) { mMetricsRegistry = registry; - mObjectMapper = - new ObjectMapper().registerModule(new MetricsModule(TimeUnit.SECONDS, - TimeUnit.MILLISECONDS, false)); } private HttpServlet createServlet() { @@ -58,7 +56,7 @@ protected void doGet(HttpServletRequest request, HttpServletResponse response) response.setContentType("application/json"); response.setStatus(HttpServletResponse.SC_OK); response.setHeader("Cache-Control", "no-cache, no-store, must-revalidate"); - String result = mObjectMapper.writerWithDefaultPrettyPrinter() + String result = OBJECT_MAPPER.writerWithDefaultPrettyPrinter() .writeValueAsString(mMetricsRegistry); response.getWriter().println(result); } diff --git a/core/server/master/src/main/java/alluxio/master/AlluxioMasterProcess.java b/core/server/master/src/main/java/alluxio/master/AlluxioMasterProcess.java index 2e71e0ccc4fa..ec01e0a8abe6 100644 --- a/core/server/master/src/main/java/alluxio/master/AlluxioMasterProcess.java +++ b/core/server/master/src/main/java/alluxio/master/AlluxioMasterProcess.java @@ -14,6 +14,7 @@ import static alluxio.util.network.NetworkAddressUtils.ServiceType; import alluxio.AlluxioURI; +import alluxio.ProcessUtils; import alluxio.conf.Configuration; import alluxio.conf.PropertyKey; import alluxio.exception.AlluxioException; @@ -44,6 +45,7 @@ import alluxio.underfs.UnderFileSystemConfiguration; import alluxio.util.CommonUtils; import alluxio.util.CommonUtils.ProcessType; +import alluxio.util.ThreadFactoryUtils; import alluxio.util.URIUtils; import alluxio.util.WaitForOptions; import alluxio.util.interfaces.Scoped; @@ -60,7 +62,13 @@ import java.io.IOException; import java.io.InputStream; import java.net.URI; +import java.util.ArrayList; +import java.util.List; import java.util.Optional; +import java.util.concurrent.ExecutionException; +import java.util.concurrent.ExecutorService; +import java.util.concurrent.Executors; +import java.util.concurrent.Future; import java.util.concurrent.TimeUnit; import java.util.concurrent.TimeoutException; import java.util.concurrent.atomic.AtomicBoolean; @@ -244,9 +252,31 @@ public void start() throws Exception { if (!mRunning) { break; } + // Dump important information asynchronously + ExecutorService es = null; + List> dumpFutures = new ArrayList<>(); + try { + es = Executors.newFixedThreadPool( + 2, ThreadFactoryUtils.build("info-dumper-%d", true)); + dumpFutures.addAll(ProcessUtils.dumpInformationOnFailover(es)); + } catch (Throwable t) { + LOG.warn("Failed to dump metrics and jstacks before demotion", t); + } + // Shut down services like RPC, WebServer, Journal and all master components LOG.info("Losing the leadership."); mServices.forEach(SimpleService::demote); demote(); + // Block until information dump is done and close resources + for (Future f : dumpFutures) { + try { + f.get(); + } catch (InterruptedException | ExecutionException e) { + LOG.warn("Failed to dump metrics and jstacks before demotion", e); + } + } + if (es != null) { + es.shutdownNow(); + } } } } @@ -277,6 +307,7 @@ private boolean promote() throws Exception { if (unstable.get()) { LOG.info("Terminating an unstable attempt to become a leader."); if (Configuration.getBoolean(PropertyKey.MASTER_JOURNAL_EXIT_ON_DEMOTION)) { + ProcessUtils.dumpInformationOnExit(); stop(); } else { demote(); @@ -302,7 +333,6 @@ private void demote() throws Exception { // sockets in stopServing so that clients don't see NPEs. mJournalSystem.losePrimacy(); stopMasterComponents(); - LOG.info("Primary stopped"); startMasterComponents(false); LOG.info("Standby started"); } diff --git a/core/server/master/src/main/java/alluxio/master/file/DefaultFileSystemMaster.java b/core/server/master/src/main/java/alluxio/master/file/DefaultFileSystemMaster.java index 4f6153136a6f..5b02062f6311 100644 --- a/core/server/master/src/main/java/alluxio/master/file/DefaultFileSystemMaster.java +++ b/core/server/master/src/main/java/alluxio/master/file/DefaultFileSystemMaster.java @@ -1248,6 +1248,7 @@ private void listStatusInternal( if (context.donePartialListing()) { return; } + // The item should be listed if: // 1. We are not doing a partial listing, or have reached the start of the partial listing // (partialPath is empty) diff --git a/shell/src/main/java/alluxio/cli/bundler/command/CollectLogCommand.java b/shell/src/main/java/alluxio/cli/bundler/command/CollectLogCommand.java index dfc0d47ca297..a77d331e8bac 100644 --- a/shell/src/main/java/alluxio/cli/bundler/command/CollectLogCommand.java +++ b/shell/src/main/java/alluxio/cli/bundler/command/CollectLogCommand.java @@ -64,7 +64,9 @@ public class CollectLogCommand extends AbstractCollectInfoCommand { "proxy.out", "task.log", "task.out", - "user" + "user", + "alluxio-master", + "alluxio-worker" ).collect(Collectors.toSet()); // We tolerate the beginning of a log file to contain some rows that are not timestamped. // A YARN application log can have >20 rows in the beginning for From 57c078cf47f872db5c610270633b4bb3cd835c20 Mon Sep 17 00:00:00 2001 From: elega <445092967@qq.com> Date: Mon, 20 Mar 2023 10:46:59 +0800 Subject: [PATCH 200/334] Fix file system merge journal context ### What changes are proposed in this pull request? 1. do not use the FileSystemMergeJournalContext in list status operation as well as other operations that calls the FileSystemMaster public createJournalContext() method, because some of these callers does not expect journals to be held in the context until it gets closed. 2. force flushing journals if too many journals in the FileSystemMergeJournalContext 3. better observerability ### Why are the changes needed? A PR that addressed inode operation consistency issue https://github.com/Alluxio/alluxio/pull/17071/files caused issues on master side and made fuse unable to connect to it. https://github.com/Alluxio/alluxio/issues/17041 That PR introduced a FileSystemMergeJouralContext that keeps inode operation journals in memory and merges these journal entries on the fly. When the journal context closes, all of the jourals will be appended to the journal writer. This journal context is supposed to use in metadata write operations but it is also used unexpectedly in some other places: 1. ListStatus might also generate joruanl entries (e.g. update access time) 2. The journal context might also be used in other places outside the file system master (e.g. update access time/backup/checkpoint etc.) These places might generate excessive # of journal entries and all kept in memory and might cause OOM issue. So in the fix, we will just return a normal non-merging journal context instead. ### Does this PR introduce any user facing changes? N/A pr-link: Alluxio/alluxio#17071 change-id: cid-02ed825792b57fe6fb1b1b7fcb4566ff5caabb7d --- .../FileSystemMergeJournalContext.java | 13 +++++---- .../master/file/DefaultFileSystemMaster.java | 27 +++++++++++++++-- .../alluxio/master/file/InodeSyncStream.java | 29 +++++++++++-------- .../master/file/FileSystemMasterTest.java | 4 +-- 4 files changed, 50 insertions(+), 23 deletions(-) diff --git a/core/server/common/src/main/java/alluxio/master/journal/FileSystemMergeJournalContext.java b/core/server/common/src/main/java/alluxio/master/journal/FileSystemMergeJournalContext.java index 4a89953bf136..7989971b78bd 100644 --- a/core/server/common/src/main/java/alluxio/master/journal/FileSystemMergeJournalContext.java +++ b/core/server/common/src/main/java/alluxio/master/journal/FileSystemMergeJournalContext.java @@ -11,12 +11,10 @@ package alluxio.master.journal; -import alluxio.Constants; import alluxio.conf.Configuration; import alluxio.conf.PropertyKey; import alluxio.exception.status.UnavailableException; import alluxio.proto.journal.Journal.JournalEntry; -import alluxio.util.logging.SamplingLogger; import com.google.common.base.Preconditions; import org.slf4j.Logger; @@ -42,8 +40,7 @@ public class FileSystemMergeJournalContext implements JournalContext { = Configuration.getInt( PropertyKey.MASTER_MERGE_JOURNAL_CONTEXT_NUM_ENTRIES_LOGGING_THRESHOLD); - private static final Logger SAMPLING_LOG = new SamplingLogger( - LoggerFactory.getLogger(FileSystemMergeJournalContext.class), 30L * Constants.SECOND_MS); + private static final Logger LOG = LoggerFactory.getLogger(FileSystemMergeJournalContext.class); private final JournalContext mJournalContext; protected final JournalEntryMerger mJournalEntryMerger; @@ -77,8 +74,12 @@ public synchronized void append(JournalEntry entry) { mJournalEntryMerger.add(entry); List journalEntries = mJournalEntryMerger.getMergedJournalEntries(); if (journalEntries.size() >= MAX_LOGGING_ENTRIES) { - SAMPLING_LOG.warn("MergeJournalContext has " + journalEntries.size() - + " entries, over the limit of " + MAX_LOGGING_ENTRIES); + LOG.warn("MergeJournalContext has " + journalEntries.size() + + " entries, over the limit of " + MAX_LOGGING_ENTRIES + + ", forcefully merging journal entries and add them to the async journal writer" + + "\n Journal Entry: \n" + + entry, new Exception("MergeJournalContext Stacktrace:")); + appendMergedJournals(); } } diff --git a/core/server/master/src/main/java/alluxio/master/file/DefaultFileSystemMaster.java b/core/server/master/src/main/java/alluxio/master/file/DefaultFileSystemMaster.java index 5b02062f6311..5e2eb91255dc 100644 --- a/core/server/master/src/main/java/alluxio/master/file/DefaultFileSystemMaster.java +++ b/core/server/master/src/main/java/alluxio/master/file/DefaultFileSystemMaster.java @@ -601,8 +601,21 @@ public Journaled getDelegate() { @Override public JournalContext createJournalContext() throws UnavailableException { + return createJournalContext(false); + } + + /** + * Creates a journal context. + * @param useMergeJournalContext if set to true, if possible, a journal context that merges + * journal entries and holds them until the context is closed. If set to false, + * a normal journal context will be returned. + * @return the journal context + */ + @VisibleForTesting + JournalContext createJournalContext(boolean useMergeJournalContext) + throws UnavailableException { JournalContext context = super.createJournalContext(); - if (!mMergeInodeJournals) { + if (!(mMergeInodeJournals && useMergeJournalContext)) { return context; } return new FileSystemMergeJournalContext( @@ -1081,7 +1094,9 @@ public void listStatus(AlluxioURI path, ListStatusContext context, Metrics.GET_FILE_INFO_OPS.inc(); LockingScheme lockingScheme = new LockingScheme(path, LockPattern.READ, false); boolean ufsAccessed = false; - try (RpcContext rpcContext = createRpcContext(context); + // List status might journal inode access time update journals. + // We want these journals to be added to the async writer immediately instead of being merged. + try (RpcContext rpcContext = createNonMergingJournalRpcContext(context); FileSystemMasterAuditContext auditContext = createAuditContext("listStatus", path, null, null)) { @@ -5365,7 +5380,13 @@ public RpcContext createRpcContext() throws UnavailableException { @VisibleForTesting public RpcContext createRpcContext(OperationContext operationContext) throws UnavailableException { - return new RpcContext(createBlockDeletionContext(), createJournalContext(), + return new RpcContext(createBlockDeletionContext(), createJournalContext(true), + operationContext.withTracker(mStateLockCallTracker)); + } + + private RpcContext createNonMergingJournalRpcContext(OperationContext operationContext) + throws UnavailableException { + return new RpcContext(createBlockDeletionContext(), createJournalContext(false), operationContext.withTracker(mStateLockCallTracker)); } diff --git a/core/server/master/src/main/java/alluxio/master/file/InodeSyncStream.java b/core/server/master/src/main/java/alluxio/master/file/InodeSyncStream.java index 4d67ebd9f4e3..ff0a6bcf8ec6 100644 --- a/core/server/master/src/main/java/alluxio/master/file/InodeSyncStream.java +++ b/core/server/master/src/main/java/alluxio/master/file/InodeSyncStream.java @@ -1229,16 +1229,21 @@ void loadFileMetadataInternal(RpcContext rpcContext, LockedInodePath inodePath, if (ufsLastModified != null) { createFileContext.setOperationTimeMs(ufsLastModified); } - + // If the journal context is a MetadataSyncMergeJournalContext, then the + // journals will be taken care and merged by that context already and hence + // there's no need to create a new MergeJournalContext. + boolean shouldUseMetadataSyncMergeJournalContext = + mUseFileSystemMergeJournalContext + && rpcContext.getJournalContext() instanceof MetadataSyncMergeJournalContext; try (LockedInodePath writeLockedPath = inodePath.lockFinalEdgeWrite(); - JournalContext merger = mUseFileSystemMergeJournalContext + JournalContext merger = shouldUseMetadataSyncMergeJournalContext ? NoopJournalContext.INSTANCE : new MergeJournalContext(rpcContext.getJournalContext(), writeLockedPath.getUri(), InodeSyncStream::mergeCreateComplete) ) { // We do not want to close this wrapRpcContext because it uses elements from another context - RpcContext wrapRpcContext = mUseFileSystemMergeJournalContext + RpcContext wrapRpcContext = shouldUseMetadataSyncMergeJournalContext ? rpcContext : new RpcContext( rpcContext.getBlockDeletionContext(), merger, rpcContext.getOperationContext()); @@ -1386,16 +1391,16 @@ private void maybeFlushJournalToAsyncJournalWriter(RpcContext rpcContext) { protected RpcContext getMetadataSyncRpcContext() { JournalContext journalContext = mRpcContext.getJournalContext(); - if (!mUseFileSystemMergeJournalContext - || !(journalContext instanceof FileSystemMergeJournalContext)) { - return mRpcContext; + if (mUseFileSystemMergeJournalContext + && journalContext instanceof FileSystemMergeJournalContext) { + return new RpcContext( + mRpcContext.getBlockDeletionContext(), + new MetadataSyncMergeJournalContext( + ((FileSystemMergeJournalContext) journalContext).getUnderlyingJournalContext(), + new FileSystemJournalEntryMerger()), + mRpcContext.getOperationContext()); } - return new RpcContext( - mRpcContext.getBlockDeletionContext(), - new MetadataSyncMergeJournalContext( - ((FileSystemMergeJournalContext) journalContext).getUnderlyingJournalContext(), - new FileSystemJournalEntryMerger()), - mRpcContext.getOperationContext()); + return mRpcContext; } @Override diff --git a/core/server/master/src/test/java/alluxio/master/file/FileSystemMasterTest.java b/core/server/master/src/test/java/alluxio/master/file/FileSystemMasterTest.java index 3b767b278ed3..9c41577513a9 100644 --- a/core/server/master/src/test/java/alluxio/master/file/FileSystemMasterTest.java +++ b/core/server/master/src/test/java/alluxio/master/file/FileSystemMasterTest.java @@ -1779,10 +1779,10 @@ public void writeToReadOnlyFileWhileCreating() throws Exception { @Test public void RecursiveDeleteForceFlushJournals() throws Exception { - FileSystemMaster fileSystemMasterWithSpy = spy(mFileSystemMaster); + DefaultFileSystemMaster fileSystemMasterWithSpy = spy(mFileSystemMaster); AtomicInteger flushCount = new AtomicInteger(); AtomicInteger closeCount = new AtomicInteger(); - when(fileSystemMasterWithSpy.createJournalContext()).thenReturn( + when(fileSystemMasterWithSpy.createJournalContext(true)).thenReturn( new JournalContext() { private int mNumLogs = 0; From a5cf2a23300aa94014d54fcb9be0c5cf68193d9a Mon Sep 17 00:00:00 2001 From: humengyu Date: Tue, 21 Mar 2023 01:36:54 +0800 Subject: [PATCH 201/334] Fix NPE for s3 proxy v2 ### What changes are proposed in this pull request? Fix NPE for S3 Porxy. ### Why are the changes needed? When we enable s3 v2 and authentication, the NullPointException will occur: ``` alluxio.s3.rest.authentication.enabled=true alluxio.proxy.s3.v2.version.enabled=true ``` like: ``` 2023-03-14 22:54:51,738 ERROR S3Handler - Exception during create s3handler:alluxio.proxy.s3.S3Exception at alluxio.proxy.s3.signature.AwsSignatureProcessor.getAuthInfo(AwsSignatureProcessor.java:138) at alluxio.proxy.s3.S3RestUtils.getUserFromSignature(S3RestUtils.java:576) at alluxio.proxy.s3.S3RestUtils.getUser(S3RestUtils.java:562) at alluxio.proxy.s3.S3Handler.doAuthentication(S3Handler.java:458) at alluxio.proxy.s3.S3Handler.init(S3Handler.java:230) at alluxio.proxy.s3.S3Handler.createHandler(S3Handler.java:147) at alluxio.proxy.s3.S3RequestServlet.service(S3RequestServlet.java:73) at javax.servlet.http.HttpServlet.service(HttpServlet.java:790) at org.eclipse.jetty.servlet.ServletHolder.handle(ServletHolder.java:799) at org.eclipse.jetty.servlet.ServletHandler$ChainEnd.doFilter(ServletHandler.java:1631) at alluxio.web.CORSFilter.doFilter(CORSFilter.java:54) at alluxio.web.HttpFilter.doFilter(HttpFilter.java:48) at org.eclipse.jetty.servlet.FilterHolder.doFilter(FilterHolder.java:193) at org.eclipse.jetty.servlet.ServletHandler$Chain.doFilter(ServletHandler.java:1601) at org.eclipse.jetty.servlet.ServletHandler.doHandle(ServletHandler.java:548) at org.eclipse.jetty.server.handler.ScopedHandler.handle(ScopedHandler.java:143) at org.eclipse.jetty.security.SecurityHandler.handle(SecurityHandler.java:600) at org.eclipse.jetty.server.handler.HandlerWrapper.handle(HandlerWrapper.java:127) at org.eclipse.jetty.server.handler.ScopedHandler.nextHandle(ScopedHandler.java:235) at org.eclipse.jetty.server.handler.ContextHandler.doHandle(ContextHandler.java:1440) at org.eclipse.jetty.server.handler.ScopedHandler.nextScope(ScopedHandler.java:188) at org.eclipse.jetty.servlet.ServletHandler.doScope(ServletHandler.java:501) at org.eclipse.jetty.server.handler.ScopedHandler.nextScope(ScopedHandler.java:186) at org.eclipse.jetty.server.handler.ContextHandler.doScope(ContextHandler.java:1355) at org.eclipse.jetty.server.handler.ScopedHandler.handle(ScopedHandler.java:141) at org.eclipse.jetty.server.handler.HandlerList.handle(HandlerList.java:59) at org.eclipse.jetty.server.handler.HandlerWrapper.handle(HandlerWrapper.java:127) at org.eclipse.jetty.server.Server.handle(Server.java:516) at org.eclipse.jetty.server.HttpChannel.lambda$handle$1(HttpChannel.java:487) at org.eclipse.jetty.server.HttpChannel.dispatch(HttpChannel.java:732) at org.eclipse.jetty.server.HttpChannel.handle(HttpChannel.java:479) at org.eclipse.jetty.server.HttpConnection.onFillable(HttpConnection.java:277) at org.eclipse.jetty.io.AbstractConnection$ReadCallback.succeeded(AbstractConnection.java:311) at org.eclipse.jetty.io.FillInterest.fillable(FillInterest.java:105) at org.eclipse.jetty.io.ChannelEndPoint$1.run(ChannelEndPoint.java:104) at org.eclipse.jetty.util.thread.strategy.EatWhatYouKill.runTask(EatWhatYouKill.java:338) at org.eclipse.jetty.util.thread.strategy.EatWhatYouKill.doProduce(EatWhatYouKill.java:315) at org.eclipse.jetty.util.thread.strategy.EatWhatYouKill.tryProduce(EatWhatYouKill.java:173) at org.eclipse.jetty.util.thread.strategy.EatWhatYouKill.run(EatWhatYouKill.java:131) at org.eclipse.jetty.util.thread.ReservedThreadExecutor$ReservedThread.run(ReservedThreadExecutor.java:409) at org.eclipse.jetty.util.thread.QueuedThreadPool.runJob(QueuedThreadPool.java:883) at org.eclipse.jetty.util.thread.QueuedThreadPool$Runner.run(QueuedThreadPool.java:1034) at java.lang.Thread.run(Thread.java:748) Caused by: java.lang.NullPointerException at alluxio.proxy.s3.signature.StringToSignProducer.createSignatureBase(StringToSignProducer.java:78) at alluxio.proxy.s3.signature.AwsSignatureProcessor.getAuthInfo(AwsSignatureProcessor.java:118) ... 42 more ``` ### Does this PR introduce any user facing changes? Please list the user-facing changes introduced by your change, including 1. change in user-facing APIs 2. addition or removal of property keys 3. webui pr-link: Alluxio/alluxio#17086 change-id: cid-29fbb6da6bbd3ae22c9acb2ade964d90fc7efc77 --- .../s3/signature/AwsSignatureProcessor.java | 9 ++- .../s3/signature/StringToSignProducer.java | 55 +++++++++++++++++++ 2 files changed, 62 insertions(+), 2 deletions(-) diff --git a/core/server/proxy/src/main/java/alluxio/proxy/s3/signature/AwsSignatureProcessor.java b/core/server/proxy/src/main/java/alluxio/proxy/s3/signature/AwsSignatureProcessor.java index 21c3d4ad1786..3cb80a615828 100644 --- a/core/server/proxy/src/main/java/alluxio/proxy/s3/signature/AwsSignatureProcessor.java +++ b/core/server/proxy/src/main/java/alluxio/proxy/s3/signature/AwsSignatureProcessor.java @@ -114,8 +114,13 @@ public AwsAuthInfo getAuthInfo() throws S3Exception { SignatureInfo signatureInfo = parseSignature(); String stringToSign = ""; if (signatureInfo.getVersion() == SignatureInfo.Version.V4) { - stringToSign = - StringToSignProducer.createSignatureBase(signatureInfo, mContext); + if (mContext != null) { + stringToSign = + StringToSignProducer.createSignatureBase(signatureInfo, mContext); + } else { + stringToSign = + StringToSignProducer.createSignatureBase(signatureInfo, mServletRequest); + } } String awsAccessId = signatureInfo.getAwsAccessId(); // ONLY validate aws access id when needed. diff --git a/core/server/proxy/src/main/java/alluxio/proxy/s3/signature/StringToSignProducer.java b/core/server/proxy/src/main/java/alluxio/proxy/s3/signature/StringToSignProducer.java index ca5459418832..31f9c3ea171c 100644 --- a/core/server/proxy/src/main/java/alluxio/proxy/s3/signature/StringToSignProducer.java +++ b/core/server/proxy/src/main/java/alluxio/proxy/s3/signature/StringToSignProducer.java @@ -36,11 +36,16 @@ import java.time.temporal.ChronoUnit; import java.util.ArrayList; import java.util.Collections; +import java.util.Enumeration; import java.util.List; import java.util.Map; +import java.util.Map.Entry; import java.util.StringJoiner; +import java.util.TreeMap; import java.util.regex.Matcher; import java.util.regex.Pattern; +import java.util.stream.Collectors; +import javax.servlet.http.HttpServletRequest; import javax.ws.rs.container.ContainerRequestContext; /** @@ -83,6 +88,26 @@ public static String createSignatureBase( context.getUriInfo().getQueryParameters(), false)); } + /** + * Convert signature info to strToSign. + * + * @param signatureInfo + * @param request + * @return signature string + * @throws Exception + */ + public static String createSignatureBase( + SignatureInfo signatureInfo, + HttpServletRequest request + ) throws Exception { + return createSignatureBase(signatureInfo, + request.getScheme(), + request.getMethod(), + request.getRequestURI(), + getHeaders(request), + getParameterMap(request)); + } + /** * Convert request info to strToSign. * @@ -139,6 +164,36 @@ public static String createSignatureBase( return strToSign.toString(); } + /** + * Get all headers by given http request, and the result map will ignore case. + * @param request + * @return + */ + private static Map getHeaders(HttpServletRequest request) { + Map result = new TreeMap<>(String::compareToIgnoreCase); + Enumeration headerNames = request.getHeaderNames(); + if (headerNames != null) { + while (headerNames.hasMoreElements()) { + String name = headerNames.nextElement(); + String value = request.getHeader(name); + result.put(name, value); + } + } + return result; + } + + /** + * Get all parameters by given http request, + * if there are multiple values for the same key, the first one will be taken. + * @param request + * @return + */ + private static Map getParameterMap(HttpServletRequest request) { + return request.getParameterMap().entrySet() + .stream() + .collect(Collectors.toMap(Entry::getKey, e -> e.getValue()[0])); + } + /** * Compute a hash for provided string. * @param payload From b7866e44c279b429cd570dd5a8adf28421adafa6 Mon Sep 17 00:00:00 2001 From: Shawn Sun <32376495+ssz1997@users.noreply.github.com> Date: Mon, 20 Mar 2023 11:39:40 -0700 Subject: [PATCH 202/334] Refactor collectMetrics command 1. We added more information to the response JSON string. Make the whole thing a JSON string for easier parsing. 2. Refactor the get metrics part for reusing. pr-link: Alluxio/alluxio#17111 change-id: cid-0675889969cfe77f069f84c2eb02ae1aa55a0b0b --- .../command/CollectMetricsCommand.java | 106 +++++++++++------- 1 file changed, 63 insertions(+), 43 deletions(-) diff --git a/shell/src/main/java/alluxio/cli/bundler/command/CollectMetricsCommand.java b/shell/src/main/java/alluxio/cli/bundler/command/CollectMetricsCommand.java index 1d2417086d7b..b6ded1c8003f 100644 --- a/shell/src/main/java/alluxio/cli/bundler/command/CollectMetricsCommand.java +++ b/shell/src/main/java/alluxio/cli/bundler/command/CollectMetricsCommand.java @@ -31,6 +31,7 @@ import java.io.StringWriter; import java.time.LocalDateTime; import java.time.format.DateTimeFormatter; +import java.util.ArrayList; import java.util.List; /** @@ -79,12 +80,12 @@ public int run(CommandLine cl) throws AlluxioException, IOException { String masterMsg = String.format("Collecting master metrics at %s ", dtf.format(now)); LOG.info(masterMsg); outputBuffer.write(masterMsg); - masterMetrics(outputBuffer, i); + writeMasterMetrics(outputBuffer, i); if (!cl.hasOption(EXCLUDE_OPTION_NAME)) { String workerMsg = String.format("Collecting worker metrics at %s ", dtf.format(now)); LOG.info(workerMsg); outputBuffer.write(workerMsg); - workerMetrics(outputBuffer, i); + writeWorkerMetrics(outputBuffer, i); } // Wait for an interval SleepUtils.sleepMs(LOG, COLLECT_METRICS_INTERVAL); @@ -98,19 +99,54 @@ public int run(CommandLine cl) throws AlluxioException, IOException { return 0; } - private void masterMetrics(StringWriter outputBuffer, int i) throws IOException { + private void writeMasterMetrics(StringWriter outputBuffer, int i) throws IOException { + outputBuffer.write(masterMetrics(mFsContext)); + outputBuffer.write("\n"); + + // Write to file + File outputFile = generateOutputFile(mWorkingDirPath, + String.format("%s-master-%s", getCommandName(), i)); + FileUtils.writeStringToFile(outputFile, outputBuffer.toString()); + } + + private void writeWorkerMetrics(StringWriter outputBuffer, int i) throws IOException { + for (String metricsResponse: workerMetrics(mFsContext)) { + outputBuffer.write(metricsResponse); + outputBuffer.write("\n"); + } + // Write to file + File outputFile = generateOutputFile(mWorkingDirPath, + String.format("%s-worker-%s", getCommandName(), i)); + FileUtils.writeStringToFile(outputFile, outputBuffer.toString()); + } + + @Override + public String getUsage() { + return "collectMetrics "; + } + + @Override + public String getDescription() { + return "Collect Alluxio metrics"; + } + + /** + * Get master metrics. + * @param fsContext for connecting to master + * @return the string of master metrics in JSON format + */ + public static String masterMetrics(FileSystemContext fsContext) { // Generate URL from config properties String masterAddr; try { - masterAddr = mFsContext.getMasterAddress().getHostName(); + masterAddr = fsContext.getMasterAddress().getHostName(); } catch (UnavailableException e) { String noMasterMsg = "No Alluxio master available. Skip metrics collection."; LOG.warn(noMasterMsg); - outputBuffer.write(noMasterMsg); - return; + return noMasterMsg; } String url = String.format("http://%s:%s%s", masterAddr, - mFsContext.getClusterConf().get(PropertyKey.MASTER_WEB_PORT), + fsContext.getClusterConf().get(PropertyKey.MASTER_WEB_PORT), METRICS_SERVLET_PATH); LOG.info(String.format("Metric address URL: %s", url)); @@ -122,62 +158,46 @@ private void masterMetrics(StringWriter outputBuffer, int i) throws IOException // Do not break the loop since the HTTP failure can be due to many reasons // Return the error message instead LOG.error("Failed to get Alluxio master metrics from URL {}. Exception: ", url, e); - metricsResponse = String.format("Url: %s%nError: %s", url, e.getMessage()); + metricsResponse = String.format("{Url: \"%s\",%n\"Error\": %s}", url, e.getMessage()); } - outputBuffer.write(metricsResponse); - outputBuffer.write("\n"); - - // Write to file - File outputFile = generateOutputFile(mWorkingDirPath, - String.format("%s-master-%s", getCommandName(), i)); - FileUtils.writeStringToFile(outputFile, metricsResponse); + return metricsResponse; } - private void workerMetrics(StringWriter outputBuffer, int i) throws IOException { + /** + * Get metrics from each worker. + * @param fsContext for connecting to master + * @return a list of worker metrics in JSON format + * @throws IOException + */ + public static List workerMetrics(FileSystemContext fsContext) throws IOException { + List metricsResponses = new ArrayList<>(); // Generate URL from config properties List workers; try { - workers = mFsContext.getCachedWorkers(); + workers = fsContext.getCachedWorkers(); } catch (UnavailableException e) { String noWorkerMsg = "No Alluxio workers available. Skip metrics collection."; LOG.warn(noWorkerMsg); - outputBuffer.write(noWorkerMsg); - return; + metricsResponses.add(noWorkerMsg); + return metricsResponses; } for (BlockWorkerInfo worker : workers) { String url = String.format("http://%s:%s%s", worker.getNetAddress().getHost(), - mFsContext.getClusterConf().get(PropertyKey.WORKER_WEB_PORT), - METRICS_SERVLET_PATH); + fsContext.getClusterConf().get(PropertyKey.WORKER_WEB_PORT), + METRICS_SERVLET_PATH); LOG.info(String.format("Metric address URL: %s", url)); // Get metrics - String metricsResponse; try { - metricsResponse = getMetricsJson(url); + metricsResponses.add(getMetricsJson(url)); } catch (Exception e) { // Do not break the loop since the HTTP failure can be due to many reasons // Return the error message instead LOG.error("Failed to get Alluxio worker metrics from URL {}. Exception: ", url, e); - metricsResponse = String.format("Url: %s%nError: %s", url, e.getMessage()); + metricsResponses.add(String.format("{Url: \"%s\",%n\"Error\": %s}", url, e.getMessage())); } - outputBuffer.write(metricsResponse); - outputBuffer.write("\n"); - - // Write to file - File outputFile = generateOutputFile(mWorkingDirPath, - String.format("%s-worker-%s", getCommandName(), i)); - FileUtils.writeStringToFile(outputFile, metricsResponse); } - } - - @Override - public String getUsage() { - return "collectMetrics "; - } - - @Override - public String getDescription() { - return "Collect Alluxio metrics"; + return metricsResponses; } /** @@ -188,8 +208,8 @@ public String getDescription() { * @param url URL that serves Alluxio metrics * @return HTTP response in JSON string */ - public String getMetricsJson(String url) throws IOException { + public static String getMetricsJson(String url) throws IOException { String responseJson = HttpUtils.get(url, COLLECT_METRICS_TIMEOUT); - return String.format("Url: %s%nResponse: %s", url, responseJson); + return String.format("{Url: \"%s\",%n\"Response\": %s}", url, responseJson); } } From 1ad1cc97b523e03a8cc45df7350bff7ad725fc6e Mon Sep 17 00:00:00 2001 From: Shawn Sun <32376495+ssz1997@users.noreply.github.com> Date: Mon, 20 Mar 2023 11:39:55 -0700 Subject: [PATCH 203/334] Bump CSI package version to fix security alert Bump go version and some package version in Alluxio CSI module to fix some security alert brought up by Github Dependabot some time ago. pr-link: Alluxio/alluxio#17117 change-id: cid-28ce431066394de6151d3ed33dba2eadc4f24adb --- integration/docker/Dockerfile | 2 +- integration/docker/csi/go.mod | 39 ++++++++++++++++++++++++++++++++--- integration/docker/csi/go.sum | 24 +++++++++++++++++++++ 3 files changed, 61 insertions(+), 4 deletions(-) diff --git a/integration/docker/Dockerfile b/integration/docker/Dockerfile index 504f294e6407..6c7fd4246ec8 100644 --- a/integration/docker/Dockerfile +++ b/integration/docker/Dockerfile @@ -14,7 +14,7 @@ ARG JAVA_VERSION=8 # Setup CSI -FROM golang:1.15.13-alpine AS csi-dev +FROM golang:1.18-alpine3.17 AS csi-dev ENV GO111MODULE=on RUN mkdir -p /alluxio-csi COPY ./csi /alluxio-csi diff --git a/integration/docker/csi/go.mod b/integration/docker/csi/go.mod index f4940343b0af..3665db63467f 100644 --- a/integration/docker/csi/go.mod +++ b/integration/docker/csi/go.mod @@ -1,14 +1,47 @@ module github.com/Alluxio/integration/csi -go 1.15 +go 1.18 require ( github.com/container-storage-interface/spec v1.1.0 github.com/golang/glog v0.0.0-20210429001901-424d2337a529 - github.com/kubernetes-csi/csi-lib-utils v0.7.0 // indirect github.com/kubernetes-csi/drivers v1.0.2 + github.com/pkg/errors v0.8.1 github.com/spf13/cobra v1.1.3 - golang.org/x/net v0.0.0-20210510120150-4163338589ed + golang.org/x/net v0.7.0 google.golang.org/grpc v1.37.1 + k8s.io/api v0.17.0 + k8s.io/apimachinery v0.17.1-beta.0 + k8s.io/client-go v0.17.0 k8s.io/mount-utils v0.21.0 ) + +require ( + github.com/davecgh/go-spew v1.1.1 // indirect + github.com/go-logr/logr v0.4.0 // indirect + github.com/gogo/protobuf v1.2.2-0.20190723190241-65acae22fc9d // indirect + github.com/golang/protobuf v1.4.2 // indirect + github.com/google/gofuzz v1.0.0 // indirect + github.com/googleapis/gnostic v0.2.0 // indirect + github.com/imdario/mergo v0.3.5 // indirect + github.com/inconshreveable/mousetrap v1.0.0 // indirect + github.com/json-iterator/go v1.1.8 // indirect + github.com/kubernetes-csi/csi-lib-utils v0.7.0 // indirect + github.com/modern-go/concurrent v0.0.0-20180306012644-bacd9c7ef1dd // indirect + github.com/modern-go/reflect2 v1.0.1 // indirect + github.com/spf13/pflag v1.0.5 // indirect + golang.org/x/crypto v0.0.0-20190820162420-60c769a6c586 // indirect + golang.org/x/oauth2 v0.0.0-20190604053449-0f29369cfe45 // indirect + golang.org/x/sys v0.5.0 // indirect + golang.org/x/text v0.7.0 // indirect + golang.org/x/time v0.0.0-20190308202827-9d24e82272b4 // indirect + google.golang.org/appengine v1.6.1 // indirect + google.golang.org/genproto v0.0.0-20200526211855-cb27e3aa2013 // indirect + google.golang.org/protobuf v1.25.0 // indirect + gopkg.in/inf.v0 v0.9.1 // indirect + gopkg.in/yaml.v2 v2.4.0 // indirect + k8s.io/klog v1.0.0 // indirect + k8s.io/klog/v2 v2.8.0 // indirect + k8s.io/utils v0.0.0-20201110183641-67b214c5f920 // indirect + sigs.k8s.io/yaml v1.1.0 // indirect +) diff --git a/integration/docker/csi/go.sum b/integration/docker/csi/go.sum index ad980d43f551..b707c8eaa8fe 100644 --- a/integration/docker/csi/go.sum +++ b/integration/docker/csi/go.sum @@ -80,6 +80,7 @@ github.com/go-openapi/swag v0.0.0-20160704191624-1d0bd113de87/go.mod h1:DXUve3Dp github.com/go-stack/stack v1.8.0/go.mod h1:v0f6uXyyMGvRgIKkXu+yp6POWl0qKG85gN/melR3HDY= github.com/gogo/protobuf v1.1.1/go.mod h1:r8qH/GZQm5c6nD/R0oafs1akxWv10x8SbQlK7atdtwQ= github.com/gogo/protobuf v1.2.1/go.mod h1:hp+jE20tsWTFYpLwKvXlhS1hjn+gTNwPg2I6zVXpSg4= +github.com/gogo/protobuf v1.2.2-0.20190723190241-65acae22fc9d h1:3PaI8p3seN09VjbTYC/QWlUZdZ1qS1zGjy7LH2Wt07I= github.com/gogo/protobuf v1.2.2-0.20190723190241-65acae22fc9d/go.mod h1:SlYgWuQ5SjCEi6WLHjHCa1yvBfUnHcTbrrZtXPKa29o= github.com/golang/glog v0.0.0-20160126235308-23def4e6c14b/go.mod h1:SBH7ygxi8pfUlaOkMMuAQtPIUF8ecWP5IEl/CR7VP2Q= github.com/golang/glog v0.0.0-20210429001901-424d2337a529 h1:2voWjNECnrZRbfwXxHB1/j8wa6xdKn85B5NzgVL/pTU= @@ -110,6 +111,7 @@ github.com/google/go-cmp v0.4.0/go.mod h1:v8dTdLbMG2kIc/vJvl+f65V22dbkXbowE6jgT/ github.com/google/go-cmp v0.5.0 h1:/QaMHBdZ26BB3SSst0Iwl10Epc+xhTquomWX0oZEB6w= github.com/google/go-cmp v0.5.0/go.mod h1:v8dTdLbMG2kIc/vJvl+f65V22dbkXbowE6jgT/gNBxE= github.com/google/gofuzz v0.0.0-20161122191042-44d81051d367/go.mod h1:HP5RmnzzSNb993RKQDq4+1A4ia9nllfqcQFTQJedwGI= +github.com/google/gofuzz v1.0.0 h1:A8PeW59pxE9IoFRqBp37U+mSNaQoZ46F1f0f863XSXw= github.com/google/gofuzz v1.0.0/go.mod h1:dBl0BpW6vV/+mYPU4Po3pmUjxk6FQPldtuIdl/M65Eg= github.com/google/martian v2.1.0+incompatible/go.mod h1:9I4somxYTbIHy5NJKHRl3wXiIaQGbYVAs8BPL6v8lEs= github.com/google/pprof v0.0.0-20181206194817-3ea8567a2e57/go.mod h1:zfwlbNMJ+OItoe0UupaVj+oy1omPYYDuagoSzA8v9mc= @@ -120,6 +122,7 @@ github.com/google/uuid v1.1.2/go.mod h1:TIyPZe4MgqvfeYDBFedMoGGpEw/LqOeaOT+nhxU+ github.com/googleapis/gax-go/v2 v2.0.4/go.mod h1:0Wqv26UfaUD9n4G6kQubkQ+KchISgw+vpHVxEJEs9eg= github.com/googleapis/gax-go/v2 v2.0.5/go.mod h1:DWXyrwAJ9X0FpwwEdw+IPEYBICEFu5mhpdKc/us6bOk= github.com/googleapis/gnostic v0.0.0-20170729233727-0c5108395e2d/go.mod h1:sJBsCZ4ayReDTBIg8b9dl28c5xFWyhBTVRp3pOg5EKY= +github.com/googleapis/gnostic v0.2.0 h1:l6N3VoaVzTncYYW+9yOz2LJJammFZGBO13sqgEhpy9g= github.com/googleapis/gnostic v0.2.0/go.mod h1:sJBsCZ4ayReDTBIg8b9dl28c5xFWyhBTVRp3pOg5EKY= github.com/gophercloud/gophercloud v0.1.0/go.mod h1:vxM41WHh5uqHVBMZHzuwNOHh8XEoIEcSTewFxm1c5g8= github.com/gopherjs/gopherjs v0.0.0-20181017120253-0766667cb4d1/go.mod h1:wJfORRmW1u3UXTncJ5qlYoELFm8eSnnEO6hX4iZ3EWY= @@ -149,12 +152,14 @@ github.com/hashicorp/mdns v1.0.0/go.mod h1:tL+uN++7HEJ6SQLQ2/p+z2pH24WQKWjBPkE0m github.com/hashicorp/memberlist v0.1.3/go.mod h1:ajVTdAv/9Im8oMAAj5G31PhhMCZJV2pPBoIllUwCN7I= github.com/hashicorp/serf v0.8.2/go.mod h1:6hOLApaqBFA1NXqRQAsxw9QxuDEvNxSQRwA/JwenrHc= github.com/hpcloud/tail v1.0.0/go.mod h1:ab1qPbhIpdTxEkNHXyeSf5vhxWSCs/tWer42PpOxQnU= +github.com/imdario/mergo v0.3.5 h1:JboBksRwiiAJWvIYJVo46AfV+IAIKZpfrSzVKj42R4Q= github.com/imdario/mergo v0.3.5/go.mod h1:2EnlNZ0deacrJVfApfmtdGgDfMuh/nq6Ok1EcJh5FfA= github.com/inconshreveable/mousetrap v1.0.0 h1:Z8tu5sraLXCXIcARxBp/8cbvlwVa7Z1NHg9XEKhtSvM= github.com/inconshreveable/mousetrap v1.0.0/go.mod h1:PxqpIevigyE2G7u3NXJIT2ANytuPF1OarO4DADm73n8= github.com/jonboulle/clockwork v0.1.0/go.mod h1:Ii8DK3G1RaLaWxj9trq07+26W01tbo22gdxWY5EU2bo= github.com/json-iterator/go v0.0.0-20180612202835-f2b4162afba3/go.mod h1:+SdeFBvtyEkXs7REEP0seUULqWtbJapLOCVDaaPEHmU= github.com/json-iterator/go v1.1.6/go.mod h1:+SdeFBvtyEkXs7REEP0seUULqWtbJapLOCVDaaPEHmU= +github.com/json-iterator/go v1.1.8 h1:QiWkFLKq0T7mpzwOTu6BzNDbfTE8OLrYhVKYMLF46Ok= github.com/json-iterator/go v1.1.8/go.mod h1:KdQUCv79m/52Kvf8AW2vK1V8akMuk1QjK/uOdHXbAo4= github.com/jstemmer/go-junit-report v0.0.0-20190106144839-af01ea7f8024/go.mod h1:6v2b51hI/fHJwM22ozAgKL4VKDeJcHhJFhtBdhmNjmU= github.com/jtolds/gls v4.20.0+incompatible/go.mod h1:QJZ7F/aHp+rZTRtaJ1ow/lLfFfVYBRgL+9YlvaHOwJU= @@ -188,9 +193,11 @@ github.com/mitchellh/iochan v1.0.0/go.mod h1:JwYml1nuB7xOzsp52dPpHFffvOCDupsG0Qu github.com/mitchellh/mapstructure v0.0.0-20160808181253-ca63d7c062ee/go.mod h1:FVVH3fgwuzCH5S8UJGiWEs2h04kUh9fWfEaFds41c1Y= github.com/mitchellh/mapstructure v1.1.2/go.mod h1:FVVH3fgwuzCH5S8UJGiWEs2h04kUh9fWfEaFds41c1Y= github.com/modern-go/concurrent v0.0.0-20180228061459-e0a39a4cb421/go.mod h1:6dJC0mAP4ikYIbvyc7fijjWJddQyLn8Ig3JB5CqoB9Q= +github.com/modern-go/concurrent v0.0.0-20180306012644-bacd9c7ef1dd h1:TRLaZ9cD/w8PVh93nsPXa1VrQ6jlwL5oN8l14QlcNfg= github.com/modern-go/concurrent v0.0.0-20180306012644-bacd9c7ef1dd/go.mod h1:6dJC0mAP4ikYIbvyc7fijjWJddQyLn8Ig3JB5CqoB9Q= github.com/modern-go/reflect2 v0.0.0-20180320133207-05fbef0ca5da/go.mod h1:bx2lNnkwVCuqBIxFjflWJWanXIb3RllmbCylyMrvgv0= github.com/modern-go/reflect2 v0.0.0-20180701023420-4b7aa43c6742/go.mod h1:bx2lNnkwVCuqBIxFjflWJWanXIb3RllmbCylyMrvgv0= +github.com/modern-go/reflect2 v1.0.1 h1:9f412s+6RmYXLWZSEzVVgPGK7C2PphHj5RJrvfx9AWI= github.com/modern-go/reflect2 v1.0.1/go.mod h1:bx2lNnkwVCuqBIxFjflWJWanXIb3RllmbCylyMrvgv0= github.com/munnerz/goautoneg v0.0.0-20120707110453-a547fc61f48d/go.mod h1:+n7T8mK8HuQTcFwEeznm/DIxMOiR9yIdICNftLE1DvQ= github.com/mwitkow/go-conntrack v0.0.0-20161129095857-cc309e4a2223/go.mod h1:qRWi+5nqEBWmkhHvq77mSJWrCKwh8bxhgT7d/eI7P4U= @@ -208,6 +215,7 @@ github.com/pascaldekloe/goe v0.0.0-20180627143212-57f6aae5913c/go.mod h1:lzWF7FI github.com/pelletier/go-toml v1.2.0/go.mod h1:5z9KED0ma1S8pY6P1sdut58dfprrGBbd/94hg7ilaic= github.com/peterbourgon/diskv v2.0.1+incompatible/go.mod h1:uqqh8zWWbv1HBMNONnaR/tNboyR3/BZd58JJSHlUSCU= github.com/pkg/errors v0.8.0/go.mod h1:bwawxfHBFNV+L2hUp1rHADufV3IMtnDRdf1r5NINEl0= +github.com/pkg/errors v0.8.1 h1:iURUrRGxPUNPdy5/HRSm+Yj6okJ6UtLINN0Q9M4+h3I= github.com/pkg/errors v0.8.1/go.mod h1:bwawxfHBFNV+L2hUp1rHADufV3IMtnDRdf1r5NINEl0= github.com/pmezard/go-difflib v0.0.0-20151028094244-d8ed2627bdf0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4= github.com/pmezard/go-difflib v1.0.0 h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZbAQM= @@ -272,6 +280,7 @@ golang.org/x/crypto v0.0.0-20190211182817-74369b46fc67/go.mod h1:6SG95UA2DQfeDnf golang.org/x/crypto v0.0.0-20190308221718-c2843e01d9a2/go.mod h1:djNgcEr1/C05ACkg1iLfiJU5Ep61QUkGW8qpdssI0+w= golang.org/x/crypto v0.0.0-20190510104115-cbcb75029529/go.mod h1:yigFU9vqHzYiE8UmvKecakEJjdnWj3jj499lnFckfCI= golang.org/x/crypto v0.0.0-20190605123033-f99c8df09eb5/go.mod h1:yigFU9vqHzYiE8UmvKecakEJjdnWj3jj499lnFckfCI= +golang.org/x/crypto v0.0.0-20190820162420-60c769a6c586 h1:7KByu05hhLed2MO29w7p1XfZvZ13m8mub3shuVftRs0= golang.org/x/crypto v0.0.0-20190820162420-60c769a6c586/go.mod h1:yigFU9vqHzYiE8UmvKecakEJjdnWj3jj499lnFckfCI= golang.org/x/exp v0.0.0-20190121172915-509febef88a4/go.mod h1:CJ0aWSM057203Lf6IL+f9T1iT9GByDxfZKAQTCR3kQA= golang.org/x/exp v0.0.0-20190306152737-a1d7652674e8/go.mod h1:CJ0aWSM057203Lf6IL+f9T1iT9GByDxfZKAQTCR3kQA= @@ -311,8 +320,11 @@ golang.org/x/net v0.0.0-20191004110552-13f9640d40b9/go.mod h1:z5CRVTTTmAJ677TzLL golang.org/x/net v0.0.0-20191209160850-c0dbc17a3553/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s= golang.org/x/net v0.0.0-20210510120150-4163338589ed h1:p9UgmWI9wKpfYmgaV/IZKGdXc5qEK45tDwwwDyjS26I= golang.org/x/net v0.0.0-20210510120150-4163338589ed/go.mod h1:9nx3DQGgdP8bBQD5qxJ1jj9UTztislL4KSBs9R2vV5Y= +golang.org/x/net v0.7.0 h1:rJrUqqhjsgNp7KqAIc25s9pZnjU7TUcSY7HcVZjdn1g= +golang.org/x/net v0.7.0/go.mod h1:2Tu9+aMcznHK/AK1HMvgo6xiTLG5rD5rZLDS+rp2Bjs= golang.org/x/oauth2 v0.0.0-20180821212333-d2e6202438be/go.mod h1:N/0e6XlmueqKjAGxoOufVs8QHGRruUQn6yWY3a++T0U= golang.org/x/oauth2 v0.0.0-20190226205417-e64efc72b421/go.mod h1:gOpvHmFTYa4IltrdGE7lF6nIHvwfUNPOp7c8zoXwtLw= +golang.org/x/oauth2 v0.0.0-20190604053449-0f29369cfe45 h1:SVwTIAaPC2U/AvvLNZ2a7OVsmBpC8L5BlwK1whH3hm0= golang.org/x/oauth2 v0.0.0-20190604053449-0f29369cfe45/go.mod h1:gOpvHmFTYa4IltrdGE7lF6nIHvwfUNPOp7c8zoXwtLw= golang.org/x/sync v0.0.0-20180314180146-1d60e4601c6f/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= golang.org/x/sync v0.0.0-20181108010431-42b317875d0f/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= @@ -340,6 +352,8 @@ golang.org/x/sys v0.0.0-20191220220014-0732a990476f/go.mod h1:h1NjWce9XRLGQEsW7w golang.org/x/sys v0.0.0-20201119102817-f84b799fce68/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= golang.org/x/sys v0.0.0-20210423082822-04245dca01da h1:b3NXsE2LusjYGGjL5bxEVZZORm/YEFFrWFjR8eFrw/c= golang.org/x/sys v0.0.0-20210423082822-04245dca01da/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= +golang.org/x/sys v0.5.0 h1:MUK/U/4lj1t1oPg0HfuXDN/Z1wv31ZJ/YcPiGccS4DU= +golang.org/x/sys v0.5.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= golang.org/x/term v0.0.0-20201126162022-7de9c90e9dd1/go.mod h1:bj7SfCRtBDWHUb9snDiAeCFNEtKQo2Wmx5Cou7ajbmo= golang.org/x/text v0.0.0-20160726164857-2910a502d2bf/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ= golang.org/x/text v0.3.0/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ= @@ -347,7 +361,10 @@ golang.org/x/text v0.3.1-0.20180807135948-17ff2d5776d2/go.mod h1:NqM8EUOU14njkJ3 golang.org/x/text v0.3.2/go.mod h1:bEr9sfX3Q8Zfm5fL9x+3itogRgK3+ptLWKqgva+5dAk= golang.org/x/text v0.3.6 h1:aRYxNxv6iGQlyVaZmk6ZgYEDa+Jg18DxebPSrd6bg1M= golang.org/x/text v0.3.6/go.mod h1:5Zoc/QRtKVWzQhOtBMvqHzDpF6irO9z98xDceosuGiQ= +golang.org/x/text v0.7.0 h1:4BRB4x83lYWy72KwLD/qYDuTu7q9PjSagHvijDw7cLo= +golang.org/x/text v0.7.0/go.mod h1:mrYo+phRRbMaCq/xk9113O4dZlRixOauAjOtrjsXDZ8= golang.org/x/time v0.0.0-20181108054448-85acf8d2951c/go.mod h1:tRJNPiyCQ0inRvYxbN9jk5I+vvW/OXSQhTDSoE431IQ= +golang.org/x/time v0.0.0-20190308202827-9d24e82272b4 h1:SvFZT6jyqRaOeXpc5h/JSfZenJ2O330aBsf7JfSUXmQ= golang.org/x/time v0.0.0-20190308202827-9d24e82272b4/go.mod h1:tRJNPiyCQ0inRvYxbN9jk5I+vvW/OXSQhTDSoE431IQ= golang.org/x/tools v0.0.0-20180221164845-07fd8470d635/go.mod h1:n7NCudcB/nEzxVGmLbDWY5pfWTLqBcC2KZ6jyYvM4mQ= golang.org/x/tools v0.0.0-20180917221912-90fa682c2a6e/go.mod h1:n7NCudcB/nEzxVGmLbDWY5pfWTLqBcC2KZ6jyYvM4mQ= @@ -380,6 +397,7 @@ google.golang.org/api v0.13.0/go.mod h1:iLdEw5Ide6rF15KTC1Kkl0iskquN2gFfn9o9XIsb google.golang.org/appengine v1.1.0/go.mod h1:EbEs0AVv82hx2wNQdGPgUI5lhzA/G0D9YwlJXL52JkM= google.golang.org/appengine v1.4.0/go.mod h1:xpcJRLb0r/rnEns0DIKYYv+WjYCduHsrkT7/EB5XEv4= google.golang.org/appengine v1.5.0/go.mod h1:xpcJRLb0r/rnEns0DIKYYv+WjYCduHsrkT7/EB5XEv4= +google.golang.org/appengine v1.6.1 h1:QzqyMA1tlu6CgqCDUtU9V+ZKhLFT2dkJuANu5QaxI3I= google.golang.org/appengine v1.6.1/go.mod h1:i06prIuMbXzDqacNJfV5OdTW448YApPu5ww/cMBSeb0= google.golang.org/genproto v0.0.0-20180817151627-c66870c02cf8/go.mod h1:JiN7NxoALGmiZfu7CAH4rXhgtRTLTxftemlI0sWmxmc= google.golang.org/genproto v0.0.0-20190307195333-5fe7a883aa19/go.mod h1:VzzqZJRnGkLBvHegQrXjBqPurQTc5/KpmUdxsrq26oE= @@ -419,6 +437,7 @@ gopkg.in/check.v1 v1.0.0-20200227125254-8fa46927fb4f h1:BLraFXnmrev5lT+xlilqcH8X gopkg.in/check.v1 v1.0.0-20200227125254-8fa46927fb4f/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0= gopkg.in/errgo.v2 v2.1.0/go.mod h1:hNsd1EY+bozCKY1Ytp96fpM3vjJbqLJn88ws8XvfDNI= gopkg.in/fsnotify.v1 v1.4.7/go.mod h1:Tz8NjZHkW78fSQdbUxIjBTcgA1z1m8ZHf0WmKUhAMys= +gopkg.in/inf.v0 v0.9.1 h1:73M5CoZyi3ZLMOyDlQh031Cx6N9NDJ2Vvfl76EDAgDc= gopkg.in/inf.v0 v0.9.1/go.mod h1:cWUDdTG/fYaXco+Dcufb5Vnc6Gp2YChqWtbxRZE0mXw= gopkg.in/ini.v1 v1.51.0/go.mod h1:pNLf8WUiyNEtQjuu5G5vTm06TEv9tsIgeAvK8hOrP4k= gopkg.in/resty.v1 v1.12.0/go.mod h1:mDo4pnntr5jdWRML875a/NmxYqAlA73dVijT2AXvQQo= @@ -427,6 +446,7 @@ gopkg.in/yaml.v2 v2.0.0-20170812160011-eb3733d160e7/go.mod h1:JAlM8MvJe8wmxCU4Bl gopkg.in/yaml.v2 v2.2.1/go.mod h1:hI93XBmqTisBFMUTm0b8Fm+jr3Dg1NNxqwp+5A1VGuI= gopkg.in/yaml.v2 v2.2.2/go.mod h1:hI93XBmqTisBFMUTm0b8Fm+jr3Dg1NNxqwp+5A1VGuI= gopkg.in/yaml.v2 v2.2.4/go.mod h1:hI93XBmqTisBFMUTm0b8Fm+jr3Dg1NNxqwp+5A1VGuI= +gopkg.in/yaml.v2 v2.4.0 h1:D8xgwECY7CYvx+Y2n4sBz93Jn9JRvxdiyyo8CTfuKaY= gopkg.in/yaml.v2 v2.4.0/go.mod h1:RDklbk79AGWmwhnvt/jBztapEOGDOx6ZbXqjP6csGnQ= gopkg.in/yaml.v3 v3.0.0-20200313102051-9f266ea9e77c h1:dUUwHk2QECo/6vqA44rthZ8ie2QXMNeKRTHCNY2nXvo= gopkg.in/yaml.v3 v3.0.0-20200313102051-9f266ea9e77c/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM= @@ -435,9 +455,12 @@ honnef.co/go/tools v0.0.0-20190106161140-3f1c8253044a/go.mod h1:rf3lG4BRIbNafJWh honnef.co/go/tools v0.0.0-20190418001031-e561f6794a2a/go.mod h1:rf3lG4BRIbNafJWhAfAdb/ePZxsR/4RtNHQocxwk9r4= honnef.co/go/tools v0.0.0-20190523083050-ea95bdfd59fc/go.mod h1:rf3lG4BRIbNafJWhAfAdb/ePZxsR/4RtNHQocxwk9r4= honnef.co/go/tools v0.0.1-2019.2.3/go.mod h1:a3bituU0lyd329TUQxRnasdCoJDkEUEAqEt0JzvZhAg= +k8s.io/api v0.17.0 h1:H9d/lw+VkZKEVIUc8F3wgiQ+FUXTTr21M87jXLU7yqM= k8s.io/api v0.17.0/go.mod h1:npsyOePkeP0CPwyGfXDHxvypiYMJxBWAMpQxCaJ4ZxI= k8s.io/apimachinery v0.17.0/go.mod h1:b9qmWdKlLuU9EBh+06BtLcSf/Mu89rWL33naRxs1uZg= +k8s.io/apimachinery v0.17.1-beta.0 h1:0Wl/KpAiFOMe9to5h8x2Y6JnjV+BEWJiTcUk1Vx7zdE= k8s.io/apimachinery v0.17.1-beta.0/go.mod h1:b9qmWdKlLuU9EBh+06BtLcSf/Mu89rWL33naRxs1uZg= +k8s.io/client-go v0.17.0 h1:8QOGvUGdqDMFrm9sD6IUFl256BcffynGoe80sxgTEDg= k8s.io/client-go v0.17.0/go.mod h1:TYgR6EUHs6k45hb6KWjVD6jFZvJV4gHDikv/It0xz+k= k8s.io/component-base v0.17.0/go.mod h1:rKuRAokNMY2nn2A6LP/MiwpoaMRHpfRnrPaUJJj1Yoc= k8s.io/gengo v0.0.0-20190128074634-0689ccc1d7d6/go.mod h1:ezvh/TsK7cY6rbqRK0oQQ8IAqLxYwwyPxAX1Pzy0ii0= @@ -456,4 +479,5 @@ k8s.io/utils v0.0.0-20201110183641-67b214c5f920 h1:CbnUZsM497iRC5QMVkHwyl8s2tB3g k8s.io/utils v0.0.0-20201110183641-67b214c5f920/go.mod h1:jPW/WVKK9YHAvNhRxK0md/EJ228hCsBRufyofKtW8HA= rsc.io/binaryregexp v0.2.0/go.mod h1:qTv7/COck+e2FymRvadv62gMdZztPaShugOCi3I+8D8= sigs.k8s.io/structured-merge-diff v0.0.0-20190525122527-15d366b2352e/go.mod h1:wWxsB5ozmmv/SG7nM11ayaAW51xMvak/t1r0CSlcokI= +sigs.k8s.io/yaml v1.1.0 h1:4A07+ZFc2wgJwo8YNlQpr1rVlgUDlxXHhPJciaPY5gs= sigs.k8s.io/yaml v1.1.0/go.mod h1:UJmg0vDUVViEyp3mgSv9WPwZCDxu4rQW1olrI1uml+o= From 86d720faad0d90e2b8da7447a279a55e4e64781b Mon Sep 17 00:00:00 2001 From: Jason Tieu <6509369+tieujason330@users.noreply.github.com> Date: Mon, 20 Mar 2023 13:39:35 -0700 Subject: [PATCH 204/334] [DOCFIX] Fix Azure Data Lake Gen2 doc broken link ### What changes are proposed in this pull request? See Title ### Why are the changes needed? Needed to make broken link clickable. ### Does this PR introduce any user facing changes? Yes, will update docs page pr-link: Alluxio/alluxio#17123 change-id: cid-3b53ece3119d975387b08446ef350dff2133d8c0 --- docs/en/ufs/Azure-Data-Lake-Gen2.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/en/ufs/Azure-Data-Lake-Gen2.md b/docs/en/ufs/Azure-Data-Lake-Gen2.md index eda965f32613..edfd25bc4ac2 100644 --- a/docs/en/ufs/Azure-Data-Lake-Gen2.md +++ b/docs/en/ufs/Azure-Data-Lake-Gen2.md @@ -20,7 +20,7 @@ or [download the precompiled binaries directly]({{ '/en/deploy/Running-Alluxio-L In preparation for using Azure Data Lake storage with Alluxio, [create a new Data Lake storage in your Azure account](https://docs.microsoft.com/en-in/azure/storage/blobs/create-data-lake-storage-account) or use an existing Data Lake storage. You should also note the directory you want to use, either by creating a new directory, or using an existing one. You also need a -[SharedKey](https://docs.microsoft.com/en-us/rest/api/storageservices/authorize-with-shared-key(. +[SharedKey](https://docs.microsoft.com/en-us/rest/api/storageservices/authorize-with-shared-key). For the purposes of this guide, the Azure storage account name is called ``, the directory in that storage account is called ``, and the name of the container is called ``. From d680e56180ce9a8fc67a878a4e23c62954ac337c Mon Sep 17 00:00:00 2001 From: elega <445092967@qq.com> Date: Tue, 21 Mar 2023 10:29:58 +0800 Subject: [PATCH 205/334] Introduce S3 mock server to enhance unit testing ### What changes are proposed in this pull request? Introduce a s3 mock server and used it in unit testing ### Why are the changes needed? When we doing the new metadata sync redesign project, we need a reliable way to test the interactions with s3 UFS and help us deal with corner cases. ### Does this PR introduce any user facing changes? N/A pr-link: Alluxio/alluxio#17102 change-id: cid-d9724631504cdbc058311409a8f5eee6d4060c30 --- core/server/master/pom.xml | 11 ++ .../file/FileSystemMasterS3UfsTest.java | 115 +++++++++++++++ .../master/AbstractLocalAlluxioCluster.java | 6 +- pom.xml | 7 + tests/pom.xml | 11 ++ .../fs/FileSystemS3UfsIntegrationTest.java | 117 +++++++++++++++ underfs/pom.xml | 7 + .../s3a/S3AUnderFileSystemMockServerTest.java | 136 ++++++++++++++++++ 8 files changed, 409 insertions(+), 1 deletion(-) create mode 100644 core/server/master/src/test/java/alluxio/master/file/FileSystemMasterS3UfsTest.java create mode 100644 tests/src/test/java/alluxio/client/fs/FileSystemS3UfsIntegrationTest.java create mode 100644 underfs/s3a/src/test/java/alluxio/underfs/s3a/S3AUnderFileSystemMockServerTest.java diff --git a/core/server/master/pom.xml b/core/server/master/pom.xml index 8fe392d7f5de..99305b7c0e4e 100644 --- a/core/server/master/pom.xml +++ b/core/server/master/pom.xml @@ -117,6 +117,11 @@ guava-testlib test + + io.findify + s3mock_2.13 + test + @@ -136,6 +141,12 @@ org.apache.httpcomponents httpclient + + org.alluxio + alluxio-underfs-s3a + ${project.version} + test + diff --git a/core/server/master/src/test/java/alluxio/master/file/FileSystemMasterS3UfsTest.java b/core/server/master/src/test/java/alluxio/master/file/FileSystemMasterS3UfsTest.java new file mode 100644 index 000000000000..b54ce176585b --- /dev/null +++ b/core/server/master/src/test/java/alluxio/master/file/FileSystemMasterS3UfsTest.java @@ -0,0 +1,115 @@ +/* + * The Alluxio Open Foundation licenses this work under the Apache License, version 2.0 + * (the "License"). You may not use this work except in compliance with the License, which is + * available at www.apache.org/licenses/LICENSE-2.0 + * + * This software is distributed on an "AS IS" basis, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, + * either express or implied, as more fully set forth in the License. + * + * See the NOTICE file distributed with this work for information regarding copyright ownership. + */ + +package alluxio.master.file; + +import static org.junit.Assert.assertEquals; +import static org.junit.Assert.assertNotNull; +import static org.junit.Assert.assertTrue; + +import alluxio.AlluxioURI; +import alluxio.client.WriteType; +import alluxio.conf.Configuration; +import alluxio.conf.PropertyKey; +import alluxio.exception.AccessControlException; +import alluxio.exception.FileAlreadyExistsException; +import alluxio.exception.FileDoesNotExistException; +import alluxio.exception.InvalidPathException; +import alluxio.master.file.contexts.CreateDirectoryContext; +import alluxio.master.file.contexts.ExistsContext; +import alluxio.master.file.contexts.MountContext; + +import com.amazonaws.auth.AWSStaticCredentialsProvider; +import com.amazonaws.auth.AnonymousAWSCredentials; +import com.amazonaws.client.builder.AwsClientBuilder; +import com.amazonaws.services.s3.AmazonS3; +import com.amazonaws.services.s3.AmazonS3ClientBuilder; +import io.findify.s3mock.S3Mock; +import org.junit.Test; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import java.io.IOException; + +/** + * Unit tests for {@link FileSystemMaster}. + */ +public final class FileSystemMasterS3UfsTest extends FileSystemMasterTestBase { + private static final Logger LOG = LoggerFactory.getLogger(FileSystemMasterS3UfsTest.class); + private static final String TEST_BUCKET = "test-bucket"; + private static final String TEST_FILE = "test_file"; + private static final String TEST_DIRECTORY = "test_directory"; + private static final String TEST_CONTENT = "test_content"; + private static final AlluxioURI UFS_ROOT = new AlluxioURI("s3://test-bucket/"); + private static final AlluxioURI MOUNT_POINT = new AlluxioURI("/s3_mount"); + private AmazonS3 mS3Client; + private S3Mock mS3MockServer; + + @Override + public void before() throws Exception { + mS3MockServer = new S3Mock.Builder().withPort(8001).withInMemoryBackend().build(); + mS3MockServer.start(); + + Configuration.set(PropertyKey.UNDERFS_S3_ENDPOINT, "localhost:8001"); + Configuration.set(PropertyKey.UNDERFS_S3_ENDPOINT_REGION, "us-west-2"); + Configuration.set(PropertyKey.UNDERFS_S3_DISABLE_DNS_BUCKETS, true); + Configuration.set(PropertyKey.S3A_ACCESS_KEY, "_"); + Configuration.set(PropertyKey.S3A_SECRET_KEY, "_"); + + AwsClientBuilder.EndpointConfiguration + endpoint = new AwsClientBuilder.EndpointConfiguration( + "http://localhost:8001", "us-west-2"); + mS3Client = AmazonS3ClientBuilder + .standard() + .withPathStyleAccessEnabled(true) + .withEndpointConfiguration(endpoint) + .withCredentials(new AWSStaticCredentialsProvider(new AnonymousAWSCredentials())) + .build(); + mS3Client.createBucket(TEST_BUCKET); + + super.before(); + } + + @Test + public void basicWrite() + throws FileDoesNotExistException, FileAlreadyExistsException, AccessControlException, + IOException, InvalidPathException { + mFileSystemMaster.mount(MOUNT_POINT, UFS_ROOT, MountContext.defaults()); + mFileSystemMaster.createDirectory( + MOUNT_POINT.join(TEST_DIRECTORY), + CreateDirectoryContext.defaults().setWriteType(WriteType.THROUGH) + ); + assertEquals(1, mS3Client.listObjects(TEST_BUCKET).getObjectSummaries().size()); + assertNotNull(mS3Client.getObject(TEST_BUCKET, TEST_DIRECTORY + "/")); + } + + @Test + public void basicSync() + throws FileDoesNotExistException, FileAlreadyExistsException, AccessControlException, + IOException, InvalidPathException { + mFileSystemMaster.mount(MOUNT_POINT, UFS_ROOT, MountContext.defaults()); + mS3Client.putObject(TEST_BUCKET, TEST_FILE, TEST_CONTENT); + assertTrue(mFileSystemMaster.exists(MOUNT_POINT.join(TEST_FILE), ExistsContext.defaults())); + } + + @Override + public void after() throws Exception { + mS3Client = null; + try { + if (mS3MockServer != null) { + mS3MockServer.shutdown(); + } + } finally { + mS3MockServer = null; + } + super.after(); + } +} diff --git a/minicluster/src/main/java/alluxio/master/AbstractLocalAlluxioCluster.java b/minicluster/src/main/java/alluxio/master/AbstractLocalAlluxioCluster.java index e37692de1e19..ea475dc909d2 100644 --- a/minicluster/src/main/java/alluxio/master/AbstractLocalAlluxioCluster.java +++ b/minicluster/src/main/java/alluxio/master/AbstractLocalAlluxioCluster.java @@ -224,7 +224,11 @@ protected void setupTest() throws IOException { String underfsAddress = Configuration.getString(PropertyKey.MASTER_MOUNT_TABLE_ROOT_UFS); // Deletes the ufs dir for this test from to avoid permission problems - UnderFileSystemUtils.deleteDirIfExists(ufs, underfsAddress); + // Do not delete the ufs root if the ufs is an object storage. + // In test environment, this means s3 mock is used. + if (!ufs.isObjectStorage()) { + UnderFileSystemUtils.deleteDirIfExists(ufs, underfsAddress); + } // Creates ufs dir. This must be called before starting UFS with UnderFileSystemCluster.create() UnderFileSystemUtils.mkdirIfNotExists(ufs, underfsAddress); diff --git a/pom.xml b/pom.xml index e2f332c1d115..b0d98b1e8371 100644 --- a/pom.xml +++ b/pom.xml @@ -171,6 +171,7 @@ 0.5.5 1.0.1 8.5.9 + 0.2.6 @@ -717,6 +718,12 @@ ${guava.version} test + + io.findify + s3mock_2.13 + ${s3mock.version} + test + io.grpc grpc-testing diff --git a/tests/pom.xml b/tests/pom.xml index aefc0f6016e8..5e9b20917958 100644 --- a/tests/pom.xml +++ b/tests/pom.xml @@ -55,6 +55,11 @@ commons-io commons-io + + io.findify + s3mock_2.13 + test + org.hamcrest hamcrest @@ -217,6 +222,12 @@ alluxio-integration-fuse ${project.version} + + org.alluxio + alluxio-underfs-s3a + ${project.version} + test + diff --git a/tests/src/test/java/alluxio/client/fs/FileSystemS3UfsIntegrationTest.java b/tests/src/test/java/alluxio/client/fs/FileSystemS3UfsIntegrationTest.java new file mode 100644 index 000000000000..bb1af5ab85d9 --- /dev/null +++ b/tests/src/test/java/alluxio/client/fs/FileSystemS3UfsIntegrationTest.java @@ -0,0 +1,117 @@ +/* + * The Alluxio Open Foundation licenses this work under the Apache License, version 2.0 + * (the "License"). You may not use this work except in compliance with the License, which is + * available at www.apache.org/licenses/LICENSE-2.0 + * + * This software is distributed on an "AS IS" basis, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, + * either express or implied, as more fully set forth in the License. + * + * See the NOTICE file distributed with this work for information regarding copyright ownership. + */ + +package alluxio.client.fs; + +import static org.junit.Assert.assertEquals; + +import alluxio.AlluxioURI; +import alluxio.client.file.FileInStream; +import alluxio.client.file.FileOutStream; +import alluxio.client.file.FileSystem; +import alluxio.conf.PropertyKey; +import alluxio.exception.AlluxioException; +import alluxio.grpc.CreateFilePOptions; +import alluxio.grpc.WritePType; +import alluxio.testutils.BaseIntegrationTest; +import alluxio.testutils.LocalAlluxioClusterResource; + +import com.amazonaws.auth.AWSStaticCredentialsProvider; +import com.amazonaws.auth.AnonymousAWSCredentials; +import com.amazonaws.client.builder.AwsClientBuilder; +import com.amazonaws.services.s3.AmazonS3; +import com.amazonaws.services.s3.AmazonS3ClientBuilder; +import com.amazonaws.services.s3.model.S3Object; +import io.findify.s3mock.S3Mock; +import org.apache.commons.io.IOUtils; +import org.junit.After; +import org.junit.Before; +import org.junit.Rule; +import org.junit.Test; +import org.junit.rules.ExpectedException; + +import java.io.IOException; +import java.nio.charset.StandardCharsets; + +public class FileSystemS3UfsIntegrationTest extends BaseIntegrationTest { + private static final String TEST_CONTENT = "TestContents"; + private static final String TEST_FILE = "test_file"; + private static final int USER_QUOTA_UNIT_BYTES = 1000; + @Rule + public LocalAlluxioClusterResource mLocalAlluxioClusterResource = + new LocalAlluxioClusterResource.Builder() + .setProperty(PropertyKey.USER_FILE_BUFFER_BYTES, USER_QUOTA_UNIT_BYTES) + .setProperty(PropertyKey.UNDERFS_S3_ENDPOINT, "localhost:8001") + .setProperty(PropertyKey.UNDERFS_S3_ENDPOINT_REGION, "us-west-2") + .setProperty(PropertyKey.UNDERFS_S3_DISABLE_DNS_BUCKETS, true) + .setProperty(PropertyKey.MASTER_MOUNT_TABLE_ROOT_UFS, "s3://" + TEST_BUCKET) + .setProperty(PropertyKey.S3A_ACCESS_KEY, "_") + .setProperty(PropertyKey.S3A_SECRET_KEY, "_") + .setStartCluster(false) + .build(); + private FileSystem mFileSystem = null; + private AmazonS3 mS3Client = null; + @Rule + public ExpectedException mThrown = ExpectedException.none(); + private S3Mock mS3MockServer; + private static final String TEST_BUCKET = "test-bucket"; + + @Before + public void before() throws Exception { + mS3MockServer = new S3Mock.Builder().withPort(8001).withInMemoryBackend().build(); + mS3MockServer.start(); + AwsClientBuilder.EndpointConfiguration + endpoint = new AwsClientBuilder.EndpointConfiguration( + "http://localhost:8001", "us-west-2"); + mS3Client = AmazonS3ClientBuilder + .standard() + .withPathStyleAccessEnabled(true) + .withEndpointConfiguration(endpoint) + .withCredentials(new AWSStaticCredentialsProvider(new AnonymousAWSCredentials())) + .build(); + mS3Client.createBucket(TEST_BUCKET); + + mLocalAlluxioClusterResource.start(); + mFileSystem = mLocalAlluxioClusterResource.get().getClient(); + } + + @After + public void after() { + mS3Client = null; + try { + if (mS3MockServer != null) { + mS3MockServer.shutdown(); + } + } finally { + mS3MockServer = null; + } + } + + @Test + public void basicMetadataSync() throws IOException, AlluxioException { + mS3Client.putObject(TEST_BUCKET, TEST_FILE, TEST_CONTENT); + FileInStream fis = mFileSystem.openFile(new AlluxioURI("/" + TEST_FILE)); + assertEquals(TEST_CONTENT, IOUtils.toString(fis, StandardCharsets.UTF_8)); + } + + @Test + public void basicWriteThrough() throws IOException, AlluxioException { + FileOutStream fos = mFileSystem.createFile( + new AlluxioURI("/" + TEST_FILE), + CreateFilePOptions.newBuilder().setWriteType(WritePType.CACHE_THROUGH).build()); + fos.write(TEST_CONTENT.getBytes()); + fos.close(); + try (S3Object s3Object = mS3Client.getObject(TEST_BUCKET, TEST_FILE)) { + assertEquals( + TEST_CONTENT, IOUtils.toString(s3Object.getObjectContent(), StandardCharsets.UTF_8)); + } + } +} diff --git a/underfs/pom.xml b/underfs/pom.xml index f2cbb590d060..a1c8e26f2995 100755 --- a/underfs/pom.xml +++ b/underfs/pom.xml @@ -76,6 +76,13 @@ log4j-slf4j-impl provided + + + + io.findify + s3mock_2.13 + test + diff --git a/underfs/s3a/src/test/java/alluxio/underfs/s3a/S3AUnderFileSystemMockServerTest.java b/underfs/s3a/src/test/java/alluxio/underfs/s3a/S3AUnderFileSystemMockServerTest.java new file mode 100644 index 000000000000..6df76561cb23 --- /dev/null +++ b/underfs/s3a/src/test/java/alluxio/underfs/s3a/S3AUnderFileSystemMockServerTest.java @@ -0,0 +1,136 @@ +/* + * The Alluxio Open Foundation licenses this work under the Apache License, version 2.0 + * (the "License"). You may not use this work except in compliance with the License, which is + * available at www.apache.org/licenses/LICENSE-2.0 + * + * This software is distributed on an "AS IS" basis, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, + * either express or implied, as more fully set forth in the License. + * + * See the NOTICE file distributed with this work for information regarding copyright ownership. + */ + +package alluxio.underfs.s3a; + +import static org.junit.Assert.assertEquals; +import static org.junit.Assert.assertNotNull; + +import alluxio.AlluxioURI; +import alluxio.conf.Configuration; +import alluxio.conf.InstancedConfiguration; +import alluxio.underfs.UfsStatus; +import alluxio.underfs.UnderFileSystemConfiguration; +import alluxio.underfs.options.ListOptions; + +import com.amazonaws.AmazonClientException; +import com.amazonaws.auth.AWSStaticCredentialsProvider; +import com.amazonaws.auth.AnonymousAWSCredentials; +import com.amazonaws.client.builder.AwsClientBuilder; +import com.amazonaws.services.s3.AmazonS3; +import com.amazonaws.services.s3.AmazonS3ClientBuilder; +import com.amazonaws.services.s3.transfer.TransferManager; +import io.findify.s3mock.S3Mock; +import org.apache.commons.io.IOUtils; +import org.junit.After; +import org.junit.Before; +import org.junit.Rule; +import org.junit.Test; +import org.junit.rules.ExpectedException; + +import java.io.IOException; +import java.io.InputStream; +import java.nio.charset.StandardCharsets; +import java.util.concurrent.Executors; + +/** + * Unit tests for the {@link S3AUnderFileSystem} using an s3 mock server. + */ +public class S3AUnderFileSystemMockServerTest { + private static final InstancedConfiguration CONF = Configuration.copyGlobal(); + + private static final String TEST_BUCKET = "test-bucket"; + private static final String TEST_FILE = "test_file"; + private static final AlluxioURI TEST_FILE_URI = new AlluxioURI("s3://test-bucket/test_file"); + private static final String TEST_CONTENT = "test_content"; + + private S3AUnderFileSystem mS3UnderFileSystem; + private AmazonS3 mClient; + + private S3Mock mS3MockServer; + + @Rule + public final ExpectedException mThrown = ExpectedException.none(); + + @Before + public void before() throws AmazonClientException { + mS3MockServer = new S3Mock.Builder().withPort(8001).withInMemoryBackend().build(); + mS3MockServer.start(); + + AwsClientBuilder.EndpointConfiguration + endpoint = new AwsClientBuilder.EndpointConfiguration( + "http://localhost:8001", "us-west-2"); + mClient = AmazonS3ClientBuilder + .standard() + .withPathStyleAccessEnabled(true) + .withEndpointConfiguration(endpoint) + .withCredentials(new AWSStaticCredentialsProvider(new AnonymousAWSCredentials())) + .build(); + mClient.createBucket(TEST_BUCKET); + mS3UnderFileSystem = + new S3AUnderFileSystem(new AlluxioURI("s3://" + TEST_BUCKET), mClient, TEST_BUCKET, + Executors.newSingleThreadExecutor(), new TransferManager(), + UnderFileSystemConfiguration.defaults(CONF), false); + } + + @After + public void after() { + mClient = null; + try { + if (mS3MockServer != null) { + mS3MockServer.shutdown(); + } + } finally { + mS3MockServer = null; + } + } + + @Test + public void read() throws IOException { + mClient.putObject(TEST_BUCKET, TEST_FILE, TEST_CONTENT); + + InputStream is = + mS3UnderFileSystem.open(TEST_FILE_URI.getPath()); + assertEquals(TEST_CONTENT, IOUtils.toString(is, StandardCharsets.UTF_8)); + } + + @Test + public void listRecursive() throws IOException { + mClient.putObject(TEST_BUCKET, "d1/d1/f1", TEST_CONTENT); + mClient.putObject(TEST_BUCKET, "d1/d1/f2", TEST_CONTENT); + mClient.putObject(TEST_BUCKET, "d1/d2/f1", TEST_CONTENT); + mClient.putObject(TEST_BUCKET, "d2/d1/f1", TEST_CONTENT); + mClient.putObject(TEST_BUCKET, "d3/", ""); + mClient.putObject(TEST_BUCKET, "f1", TEST_CONTENT); + mClient.putObject(TEST_BUCKET, "f2", TEST_CONTENT); + + UfsStatus[] ufsStatuses = mS3UnderFileSystem.listStatus( + "/", ListOptions.defaults().setRecursive(true)); + + /* + Objects: + d1/ + d1/d1/ + d1/d1/f1 + d1/d1/f2 + d1/d2/ + d1/d2/f1 + d2/ + d2/d1/ + d2/d1/f1 + d3/ + f1 + f2 + */ + assertNotNull(ufsStatuses); + assertEquals(12, ufsStatuses.length); + } +} From 9532ce03c88d18d7e132bc25c74dcb6d6b357b49 Mon Sep 17 00:00:00 2001 From: Shawn Sun <32376495+ssz1997@users.noreply.github.com> Date: Tue, 21 Mar 2023 11:51:50 -0700 Subject: [PATCH 206/334] Fix collectMetrics command in k8s In container env worker address is stored in the field `mContainerHost`, not in `mHost`. `getHost()` will get the host machine hostname, which is not what we want. pr-link: Alluxio/alluxio#17127 change-id: cid-7f3b74c01c01eb988b2e61e51579d4765e805bb3 --- .../alluxio/cli/bundler/command/CollectMetricsCommand.java | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/shell/src/main/java/alluxio/cli/bundler/command/CollectMetricsCommand.java b/shell/src/main/java/alluxio/cli/bundler/command/CollectMetricsCommand.java index b6ded1c8003f..4a6b427c2a55 100644 --- a/shell/src/main/java/alluxio/cli/bundler/command/CollectMetricsCommand.java +++ b/shell/src/main/java/alluxio/cli/bundler/command/CollectMetricsCommand.java @@ -182,7 +182,9 @@ public static List workerMetrics(FileSystemContext fsContext) throws IOE return metricsResponses; } for (BlockWorkerInfo worker : workers) { - String url = String.format("http://%s:%s%s", worker.getNetAddress().getHost(), + String workerAddress = worker.getNetAddress().getContainerHost().equals("") + ? worker.getNetAddress().getHost() : worker.getNetAddress().getContainerHost(); + String url = String.format("http://%s:%s%s", workerAddress, fsContext.getClusterConf().get(PropertyKey.WORKER_WEB_PORT), METRICS_SERVLET_PATH); LOG.info(String.format("Metric address URL: %s", url)); From 65ed1bba309d0af25251ae2554757518d65d59fc Mon Sep 17 00:00:00 2001 From: humengyu Date: Wed, 22 Mar 2023 07:11:24 +0800 Subject: [PATCH 207/334] Make light and heavy thread pool configurable for s3 proxy v2 ### What changes are proposed in this pull request? Add properties to config s3 proxy v2 thread pool. for light pool: 1. `alluxio.proxy.s3.v2.async.light.pool.core.thread.number` ; 2. `alluxio.proxy.s3.v2.async.light.pool.maximum.thread.number`; 3. `alluxio.proxy.s3.v2.async.light.pool.queue.size`. for heavy pool: 1. `alluxio.proxy.s3.v2.async.heavy.pool.core.thread.number` ; 2. `alluxio.proxy.s3.v2.async.heavy.pool.maximum.thread.number`; 3. `alluxio.proxy.s3.v2.async.heavy.pool.queue.size`. ### Why are the changes needed? We config `alluxio.web.threads` as 1000, because our OPS has reached 2000. The default value of thread number is not big enough. pr-link: Alluxio/alluxio#17082 change-id: cid-a89b016822a511619d23971118336a34cdf39328 --- .../main/java/alluxio/conf/PropertyKey.java | 54 ++++++++++++++++++ .../main/java/alluxio/web/ProxyWebServer.java | 56 ++++++++++++++++--- 2 files changed, 101 insertions(+), 9 deletions(-) diff --git a/core/common/src/main/java/alluxio/conf/PropertyKey.java b/core/common/src/main/java/alluxio/conf/PropertyKey.java index 5e42b86b1799..de625b63dc94 100755 --- a/core/common/src/main/java/alluxio/conf/PropertyKey.java +++ b/core/common/src/main/java/alluxio/conf/PropertyKey.java @@ -5338,6 +5338,48 @@ public String toString() { .setConsistencyCheckLevel(ConsistencyCheckLevel.ENFORCE) .setScope(Scope.SERVER) .build(); + public static final PropertyKey PROXY_S3_V2_ASYNC_LIGHT_POOL_CORE_THREAD_NUMBER = + intBuilder(Name.PROXY_S3_V2_ASYNC_LIGHT_POOL_CORE_THREAD_NUMBER) + .setDefaultValue(8) + .setDescription("Core thread number for async light thread pool.") + .setConsistencyCheckLevel(ConsistencyCheckLevel.ENFORCE) + .setScope(Scope.SERVER) + .build(); + public static final PropertyKey PROXY_S3_V2_ASYNC_LIGHT_POOL_MAXIMUM_THREAD_NUMBER = + intBuilder(Name.PROXY_S3_V2_ASYNC_LIGHT_POOL_MAXIMUM_THREAD_NUMBER) + .setDefaultValue(64) + .setDescription("Maximum thread number for async light thread pool.") + .setConsistencyCheckLevel(ConsistencyCheckLevel.ENFORCE) + .setScope(Scope.SERVER) + .build(); + public static final PropertyKey PROXY_S3_V2_ASYNC_LIGHT_POOL_QUEUE_SIZE = + intBuilder(Name.PROXY_S3_V2_ASYNC_LIGHT_POOL_QUEUE_SIZE) + .setDefaultValue(64 * 1024) + .setDescription("Queue size for async light thread pool.") + .setConsistencyCheckLevel(ConsistencyCheckLevel.ENFORCE) + .setScope(Scope.SERVER) + .build(); + public static final PropertyKey PROXY_S3_V2_ASYNC_HEAVY_POOL_CORE_THREAD_NUMBER = + intBuilder(Name.PROXY_S3_V2_ASYNC_HEAVY_POOL_CORE_THREAD_NUMBER) + .setDefaultValue(8) + .setDescription("Core thread number for async heavy thread pool.") + .setConsistencyCheckLevel(ConsistencyCheckLevel.ENFORCE) + .setScope(Scope.SERVER) + .build(); + public static final PropertyKey PROXY_S3_V2_ASYNC_HEAVY_POOL_MAXIMUM_THREAD_NUMBER = + intBuilder(Name.PROXY_S3_V2_ASYNC_HEAVY_POOL_MAXIMUM_THREAD_NUMBER) + .setDefaultValue(64) + .setDescription("Maximum thread number for async heavy thread pool.") + .setConsistencyCheckLevel(ConsistencyCheckLevel.ENFORCE) + .setScope(Scope.SERVER) + .build(); + public static final PropertyKey PROXY_S3_V2_ASYNC_HEAVY_POOL_QUEUE_SIZE = + intBuilder(Name.PROXY_S3_V2_ASYNC_HEAVY_POOL_QUEUE_SIZE) + .setDefaultValue(64 * 1024) + .setDescription("Queue size for async heavy thread pool.") + .setConsistencyCheckLevel(ConsistencyCheckLevel.ENFORCE) + .setScope(Scope.SERVER) + .build(); public static final PropertyKey PROXY_STREAM_CACHE_TIMEOUT_MS = durationBuilder(Name.PROXY_STREAM_CACHE_TIMEOUT_MS) .setAlias("alluxio.proxy.stream.cache.timeout.ms") @@ -8538,6 +8580,18 @@ public static final class Name { "alluxio.proxy.s3.v2.version.enabled"; public static final String PROXY_S3_V2_ASYNC_PROCESSING_ENABLED = "alluxio.proxy.s3.v2.async.processing.enabled"; + public static final String PROXY_S3_V2_ASYNC_LIGHT_POOL_CORE_THREAD_NUMBER = + "alluxio.proxy.s3.v2.async.light.pool.core.thread.number"; + public static final String PROXY_S3_V2_ASYNC_LIGHT_POOL_MAXIMUM_THREAD_NUMBER = + "alluxio.proxy.s3.v2.async.light.pool.maximum.thread.number"; + public static final String PROXY_S3_V2_ASYNC_LIGHT_POOL_QUEUE_SIZE = + "alluxio.proxy.s3.v2.async.light.pool.queue.size"; + public static final String PROXY_S3_V2_ASYNC_HEAVY_POOL_CORE_THREAD_NUMBER = + "alluxio.proxy.s3.v2.async.heavy.pool.core.thread.number"; + public static final String PROXY_S3_V2_ASYNC_HEAVY_POOL_MAXIMUM_THREAD_NUMBER = + "alluxio.proxy.s3.v2.async.heavy.pool.maximum.thread.number"; + public static final String PROXY_S3_V2_ASYNC_HEAVY_POOL_QUEUE_SIZE = + "alluxio.proxy.s3.v2.async.heavy.pool.queue.size"; public static final String S3_UPLOADS_ID_XATTR_KEY = "s3_uploads_mulitpartupload_id"; public static final String PROXY_S3_BUCKETPATHCACHE_TIMEOUT_MS = "alluxio.proxy.s3.bucketpathcache.timeout"; diff --git a/core/server/proxy/src/main/java/alluxio/web/ProxyWebServer.java b/core/server/proxy/src/main/java/alluxio/web/ProxyWebServer.java index 7aac4bca16ac..8f82a949eccc 100644 --- a/core/server/proxy/src/main/java/alluxio/web/ProxyWebServer.java +++ b/core/server/proxy/src/main/java/alluxio/web/ProxyWebServer.java @@ -29,6 +29,7 @@ import alluxio.util.ThreadFactoryUtils; import alluxio.util.io.PathUtils; +import com.google.common.base.Preconditions; import com.google.common.base.Stopwatch; import com.google.common.util.concurrent.RateLimiter; import org.eclipse.jetty.server.HttpChannel; @@ -165,15 +166,8 @@ public void init() throws ServletException { new StreamCache(Configuration.getMs(PropertyKey.PROXY_STREAM_CACHE_TIMEOUT_MS))); getServletContext().setAttribute(ALLUXIO_PROXY_AUDIT_LOG_WRITER_KEY, mAsyncAuditLogWriter); - - getServletContext().setAttribute(PROXY_S3_V2_LIGHT_POOL, - new ThreadPoolExecutor(8, 64, 0, - TimeUnit.SECONDS, new ArrayBlockingQueue<>(64 * 1024), - ThreadFactoryUtils.build("S3-LIGHTPOOL-%d", false))); - getServletContext().setAttribute(PROXY_S3_V2_HEAVY_POOL, - new ThreadPoolExecutor(8, 64, 0, - TimeUnit.SECONDS, new ArrayBlockingQueue<>(64 * 1024), - ThreadFactoryUtils.build("S3-HEAVYPOOL-%d", false))); + getServletContext().setAttribute(PROXY_S3_V2_LIGHT_POOL, createLightThreadPool()); + getServletContext().setAttribute(PROXY_S3_V2_HEAVY_POOL, createHeavyThreadPool()); getServletContext().setAttribute(PROXY_S3_HANDLER_MAP, mS3HandlerMap); } }); @@ -187,6 +181,50 @@ public void init() throws ServletException { .addServlet(rsServletHolder, PathUtils.concatPath(Constants.REST_API_PREFIX, "*")); } + private ThreadPoolExecutor createLightThreadPool() { + int lightCorePoolSize = Configuration.getInt( + PropertyKey.PROXY_S3_V2_ASYNC_LIGHT_POOL_CORE_THREAD_NUMBER); + Preconditions.checkArgument(lightCorePoolSize > 0, + PropertyKey.PROXY_S3_V2_ASYNC_LIGHT_POOL_CORE_THREAD_NUMBER.getName() + + " must be a positive integer."); + int lightMaximumPoolSize = Configuration.getInt( + PropertyKey.PROXY_S3_V2_ASYNC_LIGHT_POOL_MAXIMUM_THREAD_NUMBER); + Preconditions.checkArgument(lightMaximumPoolSize >= lightCorePoolSize, + PropertyKey.PROXY_S3_V2_ASYNC_LIGHT_POOL_MAXIMUM_THREAD_NUMBER.getName() + + " must be greater than or equal to the value of " + + PropertyKey.PROXY_S3_V2_ASYNC_LIGHT_POOL_CORE_THREAD_NUMBER.getName()); + int lightPoolQueueSize = Configuration.getInt( + PropertyKey.PROXY_S3_V2_ASYNC_LIGHT_POOL_QUEUE_SIZE); + Preconditions.checkArgument(lightPoolQueueSize > 0, + PropertyKey.PROXY_S3_V2_ASYNC_LIGHT_POOL_QUEUE_SIZE.getName() + + " must be a positive integer."); + return new ThreadPoolExecutor(lightCorePoolSize, lightMaximumPoolSize, 0, + TimeUnit.SECONDS, new ArrayBlockingQueue<>(lightPoolQueueSize), + ThreadFactoryUtils.build("S3-LIGHTPOOL-%d", false)); + } + + private ThreadPoolExecutor createHeavyThreadPool() { + int heavyCorePoolSize = Configuration.getInt( + PropertyKey.PROXY_S3_V2_ASYNC_HEAVY_POOL_CORE_THREAD_NUMBER); + Preconditions.checkArgument(heavyCorePoolSize > 0, + PropertyKey.PROXY_S3_V2_ASYNC_HEAVY_POOL_CORE_THREAD_NUMBER.getName() + + " must be a positive integer."); + int heavyMaximumPoolSize = Configuration.getInt( + PropertyKey.PROXY_S3_V2_ASYNC_HEAVY_POOL_MAXIMUM_THREAD_NUMBER); + Preconditions.checkArgument(heavyMaximumPoolSize >= heavyCorePoolSize, + PropertyKey.PROXY_S3_V2_ASYNC_HEAVY_POOL_MAXIMUM_THREAD_NUMBER.getName() + + " must be greater than or equal to the value of " + + PropertyKey.PROXY_S3_V2_ASYNC_HEAVY_POOL_CORE_THREAD_NUMBER.getName()); + int heavyPoolQueueSize = Configuration.getInt( + PropertyKey.PROXY_S3_V2_ASYNC_HEAVY_POOL_QUEUE_SIZE); + Preconditions.checkArgument(heavyPoolQueueSize > 0, + PropertyKey.PROXY_S3_V2_ASYNC_HEAVY_POOL_QUEUE_SIZE.getName() + + " must be a positive integer."); + return new ThreadPoolExecutor(heavyCorePoolSize, heavyMaximumPoolSize, 0, + TimeUnit.SECONDS, new ArrayBlockingQueue<>(heavyPoolQueueSize), + ThreadFactoryUtils.build("S3-HEAVYPOOL-%d", false)); + } + @Override public void stop() throws Exception { if (mAsyncAuditLogWriter != null) { From 9c1f0006ad38f8f6be2658fd88eff5de4e8530b3 Mon Sep 17 00:00:00 2001 From: Jiaming Mai Date: Wed, 22 Mar 2023 15:23:16 +0800 Subject: [PATCH 208/334] Fix dependencies conflicts thrown by trino **PR Background - Bug description:** [PrestoDB](https://prestodb.io/) and [TrinoDB](https://trino.io/) use our shaded JAR in their `pom.xml`. When upgrading the version of `alluxio-shaded-client` from `2.8.1` to `2.9.1`, we fail to compile the codes of presto/trino. This is because there is dependency conflict. **Solution:** Relocate the dependencies in `shaded/client/pom.xml` to avoid deplicate classes and files. **TIPS:** You can execute `cd ${ALLUXIO_HOME}/shaded && mvn clean install -Prelease -DskipTests -T 4C` to package the shaded JAR. pr-link: Alluxio/alluxio#17129 change-id: cid-1c9e013c98369ebb206b5c5d52966780ee34944c --- shaded/client/pom.xml | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/shaded/client/pom.xml b/shaded/client/pom.xml index f9137bb8413f..346044c25acc 100644 --- a/shaded/client/pom.xml +++ b/shaded/client/pom.xml @@ -293,6 +293,10 @@ org/rocksdb/**/* + + software/amazon/ion/ + ${shading.prefix}.software.amazon.ion + From 595a55a194fe2d6bc6b8e94043ab64da5fd7ac1c Mon Sep 17 00:00:00 2001 From: lucyge2022 <111789461+lucyge2022@users.noreply.github.com> Date: Wed, 22 Mar 2023 13:53:18 -0700 Subject: [PATCH 209/334] Add more details in Log4j default settings ### What changes are proposed in this pull request? Add filename:linenumber:threadid details into default log4j settings so logs could be more informative when doing debugging. ### Why are the changes needed? During debugging, it is hard to trace events for one particular file/event/thread with current default log4j settings ### Does this PR introduce any user facing changes? N/A pr-link: Alluxio/alluxio#17072 change-id: cid-419cb5bc0148b4d34ece0b0325e45f8fae2ba9a8 --- conf/log4j.properties | 28 ++++++++++++++-------------- 1 file changed, 14 insertions(+), 14 deletions(-) diff --git a/conf/log4j.properties b/conf/log4j.properties index ff3714669160..5140e0e62f7a 100644 --- a/conf/log4j.properties +++ b/conf/log4j.properties @@ -31,7 +31,7 @@ log4j.appender.=org.apache.log4j.varia.NullAppender log4j.appender.Console=org.apache.log4j.ConsoleAppender log4j.appender.Console.Target=System.out log4j.appender.Console.layout=org.apache.log4j.PatternLayout -log4j.appender.Console.layout.ConversionPattern=%d{ISO8601} %-5p %c{1} - %m%n +log4j.appender.Console.layout.ConversionPattern=%d{ISO8601} %-5p [%t](%F:%L) - %m%n # The ParquetWriter logs for every row group which is not noisy for large row group size, # but very noisy for small row group size. @@ -44,7 +44,7 @@ log4j.appender.JOB_MASTER_LOGGER.File=${alluxio.logs.dir}/job_master.log log4j.appender.JOB_MASTER_LOGGER.MaxFileSize=10MB log4j.appender.JOB_MASTER_LOGGER.MaxBackupIndex=100 log4j.appender.JOB_MASTER_LOGGER.layout=org.apache.log4j.PatternLayout -log4j.appender.JOB_MASTER_LOGGER.layout.ConversionPattern=%d{ISO8601} %-5p %c{1} - %m%n +log4j.appender.JOB_MASTER_LOGGER.layout.ConversionPattern=%d{ISO8601} %-5p [%t](%F:%L) - %m%n # Appender for Job Workers log4j.appender.JOB_WORKER_LOGGER=org.apache.log4j.RollingFileAppender @@ -52,7 +52,7 @@ log4j.appender.JOB_WORKER_LOGGER.File=${alluxio.logs.dir}/job_worker.log log4j.appender.JOB_WORKER_LOGGER.MaxFileSize=10MB log4j.appender.JOB_WORKER_LOGGER.MaxBackupIndex=100 log4j.appender.JOB_WORKER_LOGGER.layout=org.apache.log4j.PatternLayout -log4j.appender.JOB_WORKER_LOGGER.layout.ConversionPattern=%d{ISO8601} %-5p %c{1} - %m%n +log4j.appender.JOB_WORKER_LOGGER.layout.ConversionPattern=%d{ISO8601} %-5p [%t](%F:%L) - %m%n # Appender for Master log4j.appender.MASTER_LOGGER=org.apache.log4j.RollingFileAppender @@ -60,7 +60,7 @@ log4j.appender.MASTER_LOGGER.File=${alluxio.logs.dir}/master.log log4j.appender.MASTER_LOGGER.MaxFileSize=10MB log4j.appender.MASTER_LOGGER.MaxBackupIndex=100 log4j.appender.MASTER_LOGGER.layout=org.apache.log4j.PatternLayout -log4j.appender.MASTER_LOGGER.layout.ConversionPattern=%d{ISO8601} %-5p %c{1} - %m%n +log4j.appender.MASTER_LOGGER.layout.ConversionPattern=%d{ISO8601} %-5p [%t](%F:%L) - %m%n # Appender for Master log4j.appender.SECONDARY_MASTER_LOGGER=org.apache.log4j.RollingFileAppender @@ -68,7 +68,7 @@ log4j.appender.SECONDARY_MASTER_LOGGER.File=${alluxio.logs.dir}/secondary_master log4j.appender.SECONDARY_MASTER_LOGGER.MaxFileSize=10MB log4j.appender.SECONDARY_MASTER_LOGGER.MaxBackupIndex=100 log4j.appender.SECONDARY_MASTER_LOGGER.layout=org.apache.log4j.PatternLayout -log4j.appender.SECONDARY_MASTER_LOGGER.layout.ConversionPattern=%d{ISO8601} %-5p %c{1} - %m%n +log4j.appender.SECONDARY_MASTER_LOGGER.layout.ConversionPattern=%d{ISO8601} %-5p [%t](%F:%L) - %m%n # Appender for Master audit log4j.appender.MASTER_AUDIT_LOGGER=org.apache.log4j.RollingFileAppender @@ -76,7 +76,7 @@ log4j.appender.MASTER_AUDIT_LOGGER.File=${alluxio.logs.dir}/master_audit.log log4j.appender.MASTER_AUDIT_LOGGER.MaxFileSize=10MB log4j.appender.MASTER_AUDIT_LOGGER.MaxBackupIndex=100 log4j.appender.MASTER_AUDIT_LOGGER.layout=org.apache.log4j.PatternLayout -log4j.appender.MASTER_AUDIT_LOGGER.layout.ConversionPattern=%d{ISO8601} %-5p %c{1} - %m%n +log4j.appender.MASTER_AUDIT_LOGGER.layout.ConversionPattern=%d{ISO8601} %-5p [%t](%F:%L) - %m%n # Appender for Job Master audit log4j.appender.JOB_MASTER_AUDIT_LOGGER=org.apache.log4j.RollingFileAppender @@ -84,7 +84,7 @@ log4j.appender.JOB_MASTER_AUDIT_LOGGER.File=${alluxio.logs.dir}/job_master_audit log4j.appender.JOB_MASTER_AUDIT_LOGGER.MaxFileSize=10MB log4j.appender.JOB_MASTER_AUDIT_LOGGER.MaxBackupIndex=100 log4j.appender.JOB_MASTER_AUDIT_LOGGER.layout=org.apache.log4j.PatternLayout -log4j.appender.JOB_MASTER_AUDIT_LOGGER.layout.ConversionPattern=%d{ISO8601} %-5p %c{1} - %m%n +log4j.appender.JOB_MASTER_AUDIT_LOGGER.layout.ConversionPattern=%d{ISO8601} %-5p [%t](%F:%L) - %m%n # Appender for Proxy log4j.appender.PROXY_LOGGER=org.apache.log4j.RollingFileAppender @@ -92,7 +92,7 @@ log4j.appender.PROXY_LOGGER.File=${alluxio.logs.dir}/proxy.log log4j.appender.PROXY_LOGGER.MaxFileSize=10MB log4j.appender.PROXY_LOGGER.MaxBackupIndex=100 log4j.appender.PROXY_LOGGER.layout=org.apache.log4j.PatternLayout -log4j.appender.PROXY_LOGGER.layout.ConversionPattern=%d{ISO8601} %-5p %c{1} - %m%n +log4j.appender.PROXY_LOGGER.layout.ConversionPattern=%d{ISO8601} %-5p [%t](%F:%L) - %m%n # Appender for Proxy audit log4j.appender.PROXY_AUDIT_LOGGER=org.apache.log4j.RollingFileAppender @@ -100,7 +100,7 @@ log4j.appender.PROXY_AUDIT_LOGGER.File=${alluxio.logs.dir}/proxy_audit.log log4j.appender.PROXY_AUDIT_LOGGER.MaxFileSize=10MB log4j.appender.PROXY_AUDIT_LOGGER.MaxBackupIndex=100 log4j.appender.PROXY_AUDIT_LOGGER.layout=org.apache.log4j.PatternLayout -log4j.appender.PROXY_AUDIT_LOGGER.layout.ConversionPattern=%d{ISO8601} %-5p %c{2} (%F:%M) - %m%n +log4j.appender.PROXY_AUDIT_LOGGER.layout.ConversionPattern=%d{ISO8601} %-5p %c{2}[%t](%F:%M:%L) - %m%n # Appender for Workers log4j.appender.WORKER_LOGGER=org.apache.log4j.RollingFileAppender @@ -108,7 +108,7 @@ log4j.appender.WORKER_LOGGER.File=${alluxio.logs.dir}/worker.log log4j.appender.WORKER_LOGGER.MaxFileSize=10MB log4j.appender.WORKER_LOGGER.MaxBackupIndex=100 log4j.appender.WORKER_LOGGER.layout=org.apache.log4j.PatternLayout -log4j.appender.WORKER_LOGGER.layout.ConversionPattern=%d{ISO8601} %-5p %c{1} - %m%n +log4j.appender.WORKER_LOGGER.layout.ConversionPattern=%d{ISO8601} %-5p [%t](%F:%L) - %m%n # Remote appender for Job Master log4j.appender.REMOTE_JOB_MASTER_LOGGER=org.apache.log4j.net.SocketAppender @@ -170,7 +170,7 @@ log4j.appender.LOGSERVER_LOGGER.File=${alluxio.logs.dir}/logserver.log log4j.appender.LOGSERVER_LOGGER.MaxFileSize=10MB log4j.appender.LOGSERVER_LOGGER.MaxBackupIndex=100 log4j.appender.LOGSERVER_LOGGER.layout=org.apache.log4j.PatternLayout -log4j.appender.LOGSERVER_LOGGER.layout.ConversionPattern=%d{ISO8601} %-5p %c{1} - %m%n +log4j.appender.LOGSERVER_LOGGER.layout.ConversionPattern=%d{ISO8601} %-5p [%t](%F:%L) - %m%n # (Local) appender for log server to log on behalf of log clients # No need to configure file path because log server will dynamically @@ -179,7 +179,7 @@ log4j.appender.LOGSERVER_CLIENT_LOGGER=org.apache.log4j.RollingFileAppender log4j.appender.LOGSERVER_CLIENT_LOGGER.MaxFileSize=10MB log4j.appender.LOGSERVER_CLIENT_LOGGER.MaxBackupIndex=100 log4j.appender.LOGSERVER_CLIENT_LOGGER.layout=org.apache.log4j.PatternLayout -log4j.appender.LOGSERVER_CLIENT_LOGGER.layout.ConversionPattern=%d{ISO8601} %-5p %c{1} - %m%n +log4j.appender.LOGSERVER_CLIENT_LOGGER.layout.ConversionPattern=%d{ISO8601} %-5p [%t](%F:%L) - %m%n # Appender for User log4j.appender.USER_LOGGER=org.apache.log4j.RollingFileAppender @@ -187,7 +187,7 @@ log4j.appender.USER_LOGGER.File=${alluxio.user.logs.dir}/user_${user.name}.log log4j.appender.USER_LOGGER.MaxFileSize=10MB log4j.appender.USER_LOGGER.MaxBackupIndex=10 log4j.appender.USER_LOGGER.layout=org.apache.log4j.PatternLayout -log4j.appender.USER_LOGGER.layout.ConversionPattern=%d{ISO8601} %-5p %c{1} - %m%n +log4j.appender.USER_LOGGER.layout.ConversionPattern=%d{ISO8601} %-5p [%t](%F:%L) - %m%n # Appender for Fuse log4j.appender.FUSE_LOGGER=org.apache.log4j.RollingFileAppender @@ -195,7 +195,7 @@ log4j.appender.FUSE_LOGGER.File=${alluxio.logs.dir}/fuse.log log4j.appender.FUSE_LOGGER.MaxFileSize=100MB log4j.appender.FUSE_LOGGER.MaxBackupIndex=10 log4j.appender.FUSE_LOGGER.layout=org.apache.log4j.PatternLayout -log4j.appender.FUSE_LOGGER.layout.ConversionPattern=%d{ISO8601} %-5p %c{1} - %m%n +log4j.appender.FUSE_LOGGER.layout.ConversionPattern=%d{ISO8601} %-5p [%t](%F:%L) - %m%n # Disable noisy DEBUG logs log4j.logger.com.amazonaws.util.EC2MetadataUtils=OFF From b16a3fbc49693b306cebd4beedd675b35b6fb4b5 Mon Sep 17 00:00:00 2001 From: maobaolong <307499405@qq.com> Date: Thu, 23 Mar 2023 04:56:23 +0800 Subject: [PATCH 210/334] Add UseContainerSupport flag to default value.yaml of k8s helm chart Fix #17021 pr-link: Alluxio/alluxio#17055 change-id: cid-331e62e709e1959746880e5284e85c3564bc76d4 --- .../kubernetes/helm-chart/alluxio/values.yaml | 14 ++++++++------ 1 file changed, 8 insertions(+), 6 deletions(-) diff --git a/integration/kubernetes/helm-chart/alluxio/values.yaml b/integration/kubernetes/helm-chart/alluxio/values.yaml index c5ab77c976e4..2eadc39f0497 100644 --- a/integration/kubernetes/helm-chart/alluxio/values.yaml +++ b/integration/kubernetes/helm-chart/alluxio/values.yaml @@ -45,13 +45,15 @@ properties: # alluxio.user.metrics.collection.enabled: 'true' alluxio.security.stale.channel.purge.interval: 365d -# Recommended JVM Heap options for running in Docker -# Ref: https://developers.redhat.com/blog/2017/03/14/java-inside-docker/ # These JVM options are common to all Alluxio services -# jvmOptions: -# - "-XX:+UnlockExperimentalVMOptions" -# - "-XX:+UseCGroupMemoryLimitForHeap" -# - "-XX:MaxRAMFraction=2" +jvmOptions: + # https://github.com/Alluxio/alluxio/issues/17021 + - "-XX:+UseContainerSupport" + # Recommended JVM Heap options for running in Docker + # Ref: https://developers.redhat.com/blog/2017/03/14/java-inside-docker/ + # - "-XX:+UnlockExperimentalVMOptions" + # - "-XX:+UseCGroupMemoryLimitForHeap" + # - "-XX:MaxRAMFraction=2" # Mount Persistent Volumes to all components # mounts: From a95d3de77bdfeab095c6e7a50cfb2f5423f5c472 Mon Sep 17 00:00:00 2001 From: Rico Chiu Date: Thu, 23 Mar 2023 13:19:28 -0700 Subject: [PATCH 211/334] [DOCFIX] Update generated tables with docGen pr-link: Alluxio/alluxio#17133 change-id: cid-edd9b8f6eaa4b01b081879a3743097098ce2f4f8 --- docs/_data/table/common-configuration.csv | 12 ++++++++++-- docs/_data/table/en/common-configuration.yml | 18 +++++++++++++++++- docs/_data/table/en/master-configuration.yml | 6 ++++++ docs/_data/table/en/master-metrics.yml | 2 ++ docs/_data/table/en/user-configuration.yml | 2 +- docs/_data/table/master-configuration.csv | 3 +++ docs/_data/table/master-metrics.csv | 1 + docs/_data/table/user-configuration.csv | 2 +- 8 files changed, 41 insertions(+), 5 deletions(-) diff --git a/docs/_data/table/common-configuration.csv b/docs/_data/table/common-configuration.csv index 7ce38ca3f23b..c867283e17fa 100644 --- a/docs/_data/table/common-configuration.csv +++ b/docs/_data/table/common-configuration.csv @@ -1,6 +1,7 @@ propertyName,defaultValue alluxio.conf.dynamic.update.enabled,"false" alluxio.debug,"false" +alluxio.exit.collect.info,"true" alluxio.fuse.auth.policy.class,"alluxio.fuse.auth.LaunchUserGroupAuthPolicy" alluxio.fuse.auth.policy.custom.group,"" alluxio.fuse.auth.policy.custom.user,"" @@ -82,6 +83,7 @@ alluxio.network.host.resolution.timeout,"5sec" alluxio.network.ip.address.used,"false" alluxio.proxy.audit.logging.enabled,"false" alluxio.proxy.s3.bucket.naming.restrictions.enabled,"false" +alluxio.proxy.s3.bucketpathcache.timeout,"1min" alluxio.proxy.s3.complete.multipart.upload.keepalive.enabled,"false" alluxio.proxy.s3.complete.multipart.upload.keepalive.time.interval,"30sec" alluxio.proxy.s3.complete.multipart.upload.min.part.size,"5MB" @@ -89,15 +91,21 @@ alluxio.proxy.s3.complete.multipart.upload.pool.size,"20" alluxio.proxy.s3.deletetype,"ALLUXIO_AND_UFS" alluxio.proxy.s3.global.read.rate.limit.mb,"0" alluxio.proxy.s3.header.metadata.max.size,"2KB" -alluxio.proxy.s3.multipart.upload.cleaner.enabled,"true" +alluxio.proxy.s3.multipart.upload.cleaner.enabled,"false" alluxio.proxy.s3.multipart.upload.cleaner.pool.size,"1" alluxio.proxy.s3.multipart.upload.cleaner.retry.count,"3" alluxio.proxy.s3.multipart.upload.cleaner.retry.delay,"10sec" alluxio.proxy.s3.multipart.upload.cleaner.timeout,"10min" alluxio.proxy.s3.single.connection.read.rate.limit.mb,"0" alluxio.proxy.s3.tagging.restrictions.enabled,"true" +alluxio.proxy.s3.v2.async.heavy.pool.core.thread.number,"8" +alluxio.proxy.s3.v2.async.heavy.pool.maximum.thread.number,"64" +alluxio.proxy.s3.v2.async.heavy.pool.queue.size,"65536" +alluxio.proxy.s3.v2.async.light.pool.core.thread.number,"8" +alluxio.proxy.s3.v2.async.light.pool.maximum.thread.number,"64" +alluxio.proxy.s3.v2.async.light.pool.queue.size,"65536" alluxio.proxy.s3.v2.async.processing.enabled,"false" -alluxio.proxy.s3.v2.version.enabled,"false" +alluxio.proxy.s3.v2.version.enabled,"true" alluxio.proxy.s3.writetype,"CACHE_THROUGH" alluxio.proxy.stream.cache.timeout,"1hour" alluxio.proxy.web.bind.host,"0.0.0.0" diff --git a/docs/_data/table/en/common-configuration.yml b/docs/_data/table/en/common-configuration.yml index adac718283d0..adc535d25647 100644 --- a/docs/_data/table/en/common-configuration.yml +++ b/docs/_data/table/en/common-configuration.yml @@ -2,6 +2,8 @@ alluxio.conf.dynamic.update.enabled: 'Whether to support dynamic update property.' alluxio.debug: 'Set to true to enable debug mode which has additional logging and info in the Web UI.' +alluxio.exit.collect.info: + 'If true, the process will dump metrics and jstack into the log folder. This only applies to Alluxio master and worker processes.' alluxio.fuse.auth.policy.class: 'The fuse auth policy class. Valid options include: `alluxio.fuse.auth.LaunchUserGroupAuthPolicy` using the user launching the AlluxioFuse application to do authentication, `alluxio.fuse.auth.SystemUserGroupAuthPolicy` using the end-user running the fuse command to do authentication which matches POSIX standard but sacrifices performance, `alluxio.fuse.auth.CustomAuthPolicy` using the custom user group to do authentication.' alluxio.fuse.auth.policy.custom.group: @@ -164,6 +166,8 @@ alluxio.proxy.audit.logging.enabled: 'Set to true to enable proxy audit.' alluxio.proxy.s3.bucket.naming.restrictions.enabled: 'Toggles whether or not the Alluxio S3 API will enforce AWS S3 bucket naming restrictions. See https://docs.aws.amazon.com/AmazonS3/latest/userguide/bucketnamingrules.html.' +alluxio.proxy.s3.bucketpathcache.timeout: + 'Expire bucket path statistics in cache for this time period. Set 0min to disable the cache.' alluxio.proxy.s3.complete.multipart.upload.keepalive.enabled: 'Whether or not to enabled sending whitespace characters as a keepalive message during CompleteMultipartUpload. Enabling this will cause any errors to be silently ignored. However, the errors will appear in the Proxy logs.' alluxio.proxy.s3.complete.multipart.upload.keepalive.time.interval: @@ -179,7 +183,7 @@ alluxio.proxy.s3.global.read.rate.limit.mb: alluxio.proxy.s3.header.metadata.max.size: 'The maximum size to allow for user-defined metadata in S3 PUTrequest headers. Set to 0 to disable size limits.' alluxio.proxy.s3.multipart.upload.cleaner.enabled: - 'Whether or not to enable automatic cleanup of long-running multipart uploads.' + 'Enable automatic cleanup of long-running multipart uploads.' alluxio.proxy.s3.multipart.upload.cleaner.pool.size: 'The abort multipart upload cleaner pool size.' alluxio.proxy.s3.multipart.upload.cleaner.retry.count: @@ -192,6 +196,18 @@ alluxio.proxy.s3.single.connection.read.rate.limit.mb: 'Limit the maximum read speed for each connection. Set value less than or equal to 0 to disable rate limits.' alluxio.proxy.s3.tagging.restrictions.enabled: 'Toggles whether or not the Alluxio S3 API will enforce AWS S3 tagging restrictions (10 tags, 128 character keys, 256 character values) See https://docs.aws.amazon.com/AmazonS3/latest/userguide/tagging-managing.html.' +alluxio.proxy.s3.v2.async.heavy.pool.core.thread.number: + 'Core thread number for async heavy thread pool.' +alluxio.proxy.s3.v2.async.heavy.pool.maximum.thread.number: + 'Maximum thread number for async heavy thread pool.' +alluxio.proxy.s3.v2.async.heavy.pool.queue.size: + 'Queue size for async heavy thread pool.' +alluxio.proxy.s3.v2.async.light.pool.core.thread.number: + 'Core thread number for async light thread pool.' +alluxio.proxy.s3.v2.async.light.pool.maximum.thread.number: + 'Maximum thread number for async light thread pool.' +alluxio.proxy.s3.v2.async.light.pool.queue.size: + 'Queue size for async light thread pool.' alluxio.proxy.s3.v2.async.processing.enabled: '(Experimental) If enabled, handle S3 request in async mode when v2 version of Alluxio s3 proxy service is enabled.' alluxio.proxy.s3.v2.version.enabled: diff --git a/docs/_data/table/en/master-configuration.yml b/docs/_data/table/en/master-configuration.yml index b0abf5619b01..0a70499defc9 100644 --- a/docs/_data/table/en/master-configuration.yml +++ b/docs/_data/table/en/master-configuration.yml @@ -80,10 +80,14 @@ alluxio.master.embedded.journal.unsafe.flush.enabled: 'If true, embedded journal entries will be committed without waiting for the entry to be flushed to disk. This may improve performance of write operations on the Alluxio master if the journal is written to a slow or contested disk. WARNING: enabling this property may result in metadata loss if half or more of the master nodes fail. See Ratis property raft.server.log.unsafe-flush.enabled at https://github.com/apache/ratis/blob/master/ratis-docs/src/site/markdown/configuraions.md.' alluxio.master.embedded.journal.write.timeout: 'Maximum time to wait for a write/flush on embedded journal.' +alluxio.master.failover.collect.info: + 'If true, the primary master will persist metrics and jstack into the log folder when it transitions to standby. ' alluxio.master.file.access.time.journal.flush.interval: 'The minimum interval between files access time update journal entries get flushed asynchronously. Setting it to a non-positive value will make the the journal update synchronous. Asynchronous update reduces the performance impact of tracking access time but can lose some access time update when master stops unexpectedly.' alluxio.master.file.access.time.update.precision: 'The file last access time is precise up to this value. Setting it toa non-positive value will update last access time on every file access operation.Longer precision will help reduce the performance impact of tracking access time by reduce the amount of metadata writes occur while reading the same group of files repetitively.' +alluxio.master.file.access.time.updater.enabled: + 'If enabled, file access time updater will update the file last access time when an inode is accessed. This property can be turned off to improve performance and reduce the number of journal entries if your application does not rely on the file access time metadata.' alluxio.master.file.access.time.updater.shutdown.timeout: 'Maximum time to wait for access updater to stop on shutdown.' alluxio.master.filesystem.liststatus.result.message.length: @@ -346,6 +350,8 @@ alluxio.master.standby.heartbeat.interval: 'The heartbeat interval between Alluxio primary master and standby masters.' alluxio.master.startup.block.integrity.check.enabled: 'Whether the system should be checked on startup for orphaned blocks (blocks having no corresponding files but still taking system resource due to various system failures). Orphaned blocks will be deleted during master startup if this property is true. This property is available since 1.7.1' +alluxio.master.state.lock.error.threshold: + 'Used to trace and debug state lock issues. When a thread recursively acquires the state lock more than threshold, log an error for further debugging.' alluxio.master.throttle.active.cpu.load.ratio: 'N/A' alluxio.master.throttle.active.heap.gc.time: diff --git a/docs/_data/table/en/master-metrics.yml b/docs/_data/table/en/master-metrics.yml index 3ab1a4aa6ded..fed9d6810da1 100644 --- a/docs/_data/table/en/master-metrics.yml +++ b/docs/_data/table/en/master-metrics.yml @@ -22,6 +22,8 @@ Master.BlockHeapSize: 'An estimate of the blocks heap size' Master.BlockReplicaCount: 'Total number of block replicas in Alluxio' +Master.CachedBlockLocations: + 'Total number of cached block locations' Master.CompleteFileOps: 'Total number of the CompleteFile operations' Master.CompletedOperationRetryCount: diff --git a/docs/_data/table/en/user-configuration.yml b/docs/_data/table/en/user-configuration.yml index 61efaf3f1eb7..3ec7f66970af 100644 --- a/docs/_data/table/en/user-configuration.yml +++ b/docs/_data/table/en/user-configuration.yml @@ -109,7 +109,7 @@ alluxio.user.file.copyfromlocal.block.location.policy.class: alluxio.user.file.create.ttl: 'Time to live for files created by a user, no ttl by default.' alluxio.user.file.create.ttl.action: - 'When file''s ttl is expired, the action performs on it. Options: DELETE_ALLUXIO(default), FREE or DELETE' + 'When file''s ttl is expired, the action performs on it. Options: FREE(default), DELETE_ALLUXIO or DELETE' alluxio.user.file.delete.unchecked: 'Whether to check if the UFS contents are in sync with Alluxio before attempting to delete persisted directories recursively.' alluxio.user.file.include.operation.id: diff --git a/docs/_data/table/master-configuration.csv b/docs/_data/table/master-configuration.csv index a4d5ea6bef82..c0f0c81f8648 100644 --- a/docs/_data/table/master-configuration.csv +++ b/docs/_data/table/master-configuration.csv @@ -40,8 +40,10 @@ alluxio.master.embedded.journal.transport.max.inbound.message.size,"100MB" alluxio.master.embedded.journal.transport.request.timeout.ms,"5sec" alluxio.master.embedded.journal.unsafe.flush.enabled,"false" alluxio.master.embedded.journal.write.timeout,"30sec" +alluxio.master.failover.collect.info,"true" alluxio.master.file.access.time.journal.flush.interval,"1h" alluxio.master.file.access.time.update.precision,"1d" +alluxio.master.file.access.time.updater.enabled,"true" alluxio.master.file.access.time.updater.shutdown.timeout,"1sec" alluxio.master.filesystem.liststatus.result.message.length,"10000" alluxio.master.filesystem.merge.inode.journals,"true" @@ -173,6 +175,7 @@ alluxio.master.shell.backup.state.lock.timeout,"0s" alluxio.master.shell.backup.state.lock.try.duration,"0s" alluxio.master.standby.heartbeat.interval,"2min" alluxio.master.startup.block.integrity.check.enabled,"false" +alluxio.master.state.lock.error.threshold,"20" alluxio.master.throttle.active.cpu.load.ratio,"0.5" alluxio.master.throttle.active.heap.gc.time,"1sec" alluxio.master.throttle.active.heap.used.ratio,"0.5" diff --git a/docs/_data/table/master-metrics.csv b/docs/_data/table/master-metrics.csv index adb6a3876f55..4374cce3f9ca 100644 --- a/docs/_data/table/master-metrics.csv +++ b/docs/_data/table/master-metrics.csv @@ -11,6 +11,7 @@ Master.AsyncPersistSuccess,COUNTER Master.AuditLogEntriesSize,GAUGE Master.BlockHeapSize,GAUGE Master.BlockReplicaCount,GAUGE +Master.CachedBlockLocations,GAUGE Master.CompleteFileOps,COUNTER Master.CompletedOperationRetryCount,COUNTER Master.CreateDirectoryOps,COUNTER diff --git a/docs/_data/table/user-configuration.csv b/docs/_data/table/user-configuration.csv index 1c407ed1ddf1..9d53391d6115 100644 --- a/docs/_data/table/user-configuration.csv +++ b/docs/_data/table/user-configuration.csv @@ -54,7 +54,7 @@ alluxio.user.date.format.pattern,"MM-dd-yyyy HH:mm:ss:SSS" alluxio.user.file.buffer.bytes,"8MB" alluxio.user.file.copyfromlocal.block.location.policy.class,"alluxio.client.block.policy.RoundRobinPolicy" alluxio.user.file.create.ttl,"-1" -alluxio.user.file.create.ttl.action,"DELETE_ALLUXIO" +alluxio.user.file.create.ttl.action,"FREE" alluxio.user.file.delete.unchecked,"false" alluxio.user.file.include.operation.id,"true" alluxio.user.file.master.client.pool.gc.interval,"120sec" From 88c88b72430c01277e5890b35d02978bce6f6b8b Mon Sep 17 00:00:00 2001 From: voddle Date: Wed, 29 Mar 2023 20:13:11 +0800 Subject: [PATCH 212/334] Fix stress create tree file number ### What changes are proposed in this pull request? Fix one miss used variable, create tree correctly now ### Why are the changes needed? Previously create tree run incorrectly ### Does this PR introduce any user facing changes? No pr-link: Alluxio/alluxio#17136 change-id: cid-ade1ce2b614a7a0596e2de2404f94471f789496f --- .../alluxio/stress/cli/StressMasterBench.java | 3 +- .../stress/cli/StressMasterBenchBase.java | 45 +++++++++++-------- 2 files changed, 28 insertions(+), 20 deletions(-) diff --git a/stress/shell/src/main/java/alluxio/stress/cli/StressMasterBench.java b/stress/shell/src/main/java/alluxio/stress/cli/StressMasterBench.java index b203f4cb9935..98da05d34e73 100644 --- a/stress/shell/src/main/java/alluxio/stress/cli/StressMasterBench.java +++ b/stress/shell/src/main/java/alluxio/stress/cli/StressMasterBench.java @@ -272,7 +272,8 @@ private void runInternal() throws IOException, AlluxioException { break; } localCounter = mContext.getOperationCounter(0).getAndIncrement(); - if (mParameters.mOperation == Operation.CREATE_TREE && localCounter >= mTreeTotalCount) { + if (mParameters.mOperation == Operation.CREATE_TREE + && localCounter >= mTreeTotalNodeCount) { break; } if (mParameters.mOperation == Operation.LOAD_METADATA diff --git a/stress/shell/src/main/java/alluxio/stress/cli/StressMasterBenchBase.java b/stress/shell/src/main/java/alluxio/stress/cli/StressMasterBenchBase.java index 9eb0f711255e..93d5699219dd 100644 --- a/stress/shell/src/main/java/alluxio/stress/cli/StressMasterBenchBase.java +++ b/stress/shell/src/main/java/alluxio/stress/cli/StressMasterBenchBase.java @@ -16,6 +16,7 @@ import alluxio.client.file.FileOutStream; import alluxio.client.file.URIStatus; import alluxio.exception.AlluxioException; +import alluxio.exception.FileAlreadyExistsException; import alluxio.exception.UnexpectedAlluxioException; import alluxio.grpc.Bits; import alluxio.grpc.CreateDirectoryPOptions; @@ -79,9 +80,9 @@ public abstract class StressMasterBenchBase protected final String mFixedDir = "fixed"; // vars for createTestTree - protected int[] mPathRecord; - protected int[] mTreeLevelQuant; - protected int mTreeTotalCount; + protected int[] mPathNodeIds; + protected int[] mTreeLevelNodeCount; + protected int mTreeTotalNodeCount; /** * Creates instance. @@ -242,13 +243,14 @@ protected final class BenchContext { mBasePaths = new Path[operations.length]; mFixedBasePaths = new Path[operations.length]; - mPathRecord = new int[mParameters.mTreeDepth]; - mTreeLevelQuant = new int[mParameters.mTreeDepth]; - mTreeLevelQuant[mParameters.mTreeDepth - 1] = mParameters.mTreeWidth; - for (int i = mTreeLevelQuant.length - 2; i >= 0; i--) { - mTreeLevelQuant[i] = mTreeLevelQuant[i + 1] * mParameters.mTreeWidth; + mPathNodeIds = new int[mParameters.mTreeDepth]; + mTreeLevelNodeCount = new int[mParameters.mTreeDepth]; + mTreeLevelNodeCount[mParameters.mTreeDepth - 1] = mParameters.mTreeWidth; + for (int levelCount = mTreeLevelNodeCount.length - 2; levelCount >= 0; levelCount--) { + mTreeLevelNodeCount[levelCount] = + mTreeLevelNodeCount[levelCount + 1] * mParameters.mTreeWidth; } - mTreeTotalCount = mTreeLevelQuant[0] * mParameters.mTreeThreads; + mTreeTotalNodeCount = mTreeLevelNodeCount[0] * mParameters.mTreeThreads; for (int i = 0; i < operations.length; i++) { mOperationCounters[i] = new AtomicLong(); @@ -486,17 +488,22 @@ protected void applyNativeOperation( .setLoadMetadataOnly(true).build()); break; case CREATE_TREE: - String p = ""; - int redundent = (int) counter; - for (int i = 0; i < mParameters.mTreeWidth; i++) { - mPathRecord[i] = redundent / mTreeLevelQuant[i]; - redundent = redundent % mTreeLevelQuant[i]; - p += "/"; - p += mPathRecord[i]; + String nodePath = ""; + int nodeNumber = (int) counter; + for (int levelCount = 0; levelCount < mParameters.mTreeDepth; levelCount++) { + mPathNodeIds[levelCount] = nodeNumber / mTreeLevelNodeCount[levelCount]; + nodeNumber = nodeNumber % mTreeLevelNodeCount[levelCount]; + nodePath += "/"; + nodePath += mPathNodeIds[levelCount]; } - for (int i = 0; i < mParameters.mTreeFiles; i++) { - fs.createFile(new AlluxioURI((basePath + p + "/" + redundent + "/" + i + ".txt")), - CreateFilePOptions.newBuilder().setRecursive(true).build()).close(); + for (int fileNumber = 0; fileNumber < mParameters.mTreeFiles; fileNumber++) { + try { + fs.createFile(new AlluxioURI((basePath + nodePath + "/" + + nodeNumber + "/" + fileNumber + ".txt")), + CreateFilePOptions.newBuilder().setRecursive(true).build()).close(); + } catch (FileAlreadyExistsException e) { + break; + } } break; default: From fee877946f3ec59ca2f0e98dab308f7177484d40 Mon Sep 17 00:00:00 2001 From: voddle Date: Wed, 29 Mar 2023 20:14:07 +0800 Subject: [PATCH 213/334] [SMALLFIX] Improve stressbench result variable name ### What changes are proposed in this pull request? Renamed `mNumSuccess` to `mNumSuccesses` ### Why are the changes needed? To match overmind `stressbench/results.go` pr-link: Alluxio/alluxio#17153 change-id: cid-87f68848ad3510b4e0093f17cebb4bb5fef3354e --- .../main/java/alluxio/stress/client/ClientIOTaskResult.java | 2 +- .../java/alluxio/stress/client/CompactionTaskResult.java | 4 ++-- .../main/java/alluxio/stress/common/SummaryStatistics.java | 6 +++--- .../java/alluxio/stress/common/TaskResultStatistics.java | 6 +++--- .../alluxio/stress/jobservice/JobServiceBenchSummary.java | 4 ++-- .../stress/jobservice/JobServiceBenchTaskResult.java | 2 +- .../main/java/alluxio/stress/master/MasterBenchSummary.java | 4 ++-- .../java/alluxio/stress/master/MasterBenchTaskResult.java | 2 +- .../stress/master/MultiOperationMasterBenchSummary.java | 2 +- .../stress/master/MultiOperationMasterBenchTaskResult.java | 4 ++-- .../src/main/java/alluxio/stress/cli/MaxFileBench.java | 2 +- .../main/java/alluxio/stress/cli/StressJobServiceBench.java | 2 +- .../main/java/alluxio/stress/cli/StressMasterBenchBase.java | 2 +- .../alluxio/stress/StressMasterBenchIntegrationTest.java | 2 +- 14 files changed, 22 insertions(+), 22 deletions(-) diff --git a/stress/common/src/main/java/alluxio/stress/client/ClientIOTaskResult.java b/stress/common/src/main/java/alluxio/stress/client/ClientIOTaskResult.java index c0361b16b4f5..d2ce1f71e77d 100644 --- a/stress/common/src/main/java/alluxio/stress/client/ClientIOTaskResult.java +++ b/stress/common/src/main/java/alluxio/stress/client/ClientIOTaskResult.java @@ -178,7 +178,7 @@ private void getNumSuccessData(String series, LineGraph lineGraph) { threadEntry.getValue().entrySet()) { String prefix = series + ", method: " + methodEntry.getKey(); LineGraph.Data currentData = data.getOrDefault(prefix, new LineGraph.Data()); - currentData.addData(threadEntry.getKey(), methodEntry.getValue().mNumSuccess); + currentData.addData(threadEntry.getKey(), methodEntry.getValue().mNumSuccesses); data.put(prefix, currentData); } } diff --git a/stress/common/src/main/java/alluxio/stress/client/CompactionTaskResult.java b/stress/common/src/main/java/alluxio/stress/client/CompactionTaskResult.java index c09eb3837f01..fff4d673b11d 100644 --- a/stress/common/src/main/java/alluxio/stress/client/CompactionTaskResult.java +++ b/stress/common/src/main/java/alluxio/stress/client/CompactionTaskResult.java @@ -114,7 +114,7 @@ public void merge(CompactionTaskResult toMerge) throws Exception { * Increase number of successes by 1. */ public void incrementNumSuccess() { - mStatistics.mNumSuccess += 1; + mStatistics.mNumSuccesses += 1; } /** @@ -174,7 +174,7 @@ public static class CompactionSummary implements Summary { public CompactionSummary(CompactionTaskResult mergedResult) throws DataFormatException { mBaseParameters = mergedResult.getBaseParameters(); mParameters = mergedResult.getParameters(); - mNumSuccess = mergedResult.getStatistics().mNumSuccess; + mNumSuccess = mergedResult.getStatistics().mNumSuccesses; mErrors = mergedResult.getErrors(); mSummaryStatistics = mergedResult.getStatistics().toBenchSummaryStatistics(); } diff --git a/stress/common/src/main/java/alluxio/stress/common/SummaryStatistics.java b/stress/common/src/main/java/alluxio/stress/common/SummaryStatistics.java index 3f3942514c42..0b48c01ea2f7 100644 --- a/stress/common/src/main/java/alluxio/stress/common/SummaryStatistics.java +++ b/stress/common/src/main/java/alluxio/stress/common/SummaryStatistics.java @@ -23,7 +23,7 @@ */ public class SummaryStatistics { /** number of successes. */ - public long mNumSuccess; + public long mNumSuccesses; /** response times for all percentiles from 0 -> 100 (101 values). */ @SuppressFBWarnings(value = "EI_EXPOSE_REP2") @@ -59,7 +59,7 @@ public SummaryStatistics() { public SummaryStatistics(long numSuccess, float[] timePercentileMs, float[] time99PercentileMs, float[] maxTimeMs) { - mNumSuccess = numSuccess; + mNumSuccesses = numSuccess; mTimePercentileMs = timePercentileMs; mTime99PercentileMs = time99PercentileMs; mMaxTimeMs = maxTimeMs; @@ -71,7 +71,7 @@ public SummaryStatistics(long numSuccess, float[] timePercentileMs, public LineGraph.Data computeTimeData() { LineGraph.Data data = new LineGraph.Data(); - if (mNumSuccess == 0) { + if (mNumSuccesses == 0) { // Return empty data for empty results return data; } diff --git a/stress/common/src/main/java/alluxio/stress/common/TaskResultStatistics.java b/stress/common/src/main/java/alluxio/stress/common/TaskResultStatistics.java index 0f4896d72b3f..8be220d5bdf8 100644 --- a/stress/common/src/main/java/alluxio/stress/common/TaskResultStatistics.java +++ b/stress/common/src/main/java/alluxio/stress/common/TaskResultStatistics.java @@ -26,7 +26,7 @@ * abstract class that calculate statistics for{@link TaskResult}. */ public class TaskResultStatistics { - public long mNumSuccess; + public long mNumSuccesses; @SuppressFBWarnings(value = "EI_EXPOSE_REP2") public byte[] mResponseTimeNsRaw; @SuppressFBWarnings(value = "EI_EXPOSE_REP2") @@ -42,7 +42,7 @@ protected TaskResultStatistics() { * @param statistics the task result statistics to merge */ public void merge(TaskResultStatistics statistics) throws Exception { - mNumSuccess += statistics.mNumSuccess; + mNumSuccesses += statistics.mNumSuccesses; Histogram responseTime = new Histogram(StressConstants.TIME_HISTOGRAM_MAX, StressConstants.TIME_HISTOGRAM_PRECISION); @@ -107,7 +107,7 @@ public SummaryStatistics toBenchSummaryStatistics() throws DataFormatException { maxResponseTimesMs[i] = (float) mMaxResponseTimeNs[i] / Constants.MS_NANO; } - return new SummaryStatistics(mNumSuccess, responseTimePercentile, responseTime99Percentile, + return new SummaryStatistics(mNumSuccesses, responseTimePercentile, responseTime99Percentile, maxResponseTimesMs); } } diff --git a/stress/common/src/main/java/alluxio/stress/jobservice/JobServiceBenchSummary.java b/stress/common/src/main/java/alluxio/stress/jobservice/JobServiceBenchSummary.java index 09722ff7b017..a6df02ee0902 100644 --- a/stress/common/src/main/java/alluxio/stress/jobservice/JobServiceBenchSummary.java +++ b/stress/common/src/main/java/alluxio/stress/jobservice/JobServiceBenchSummary.java @@ -66,7 +66,7 @@ public JobServiceBenchSummary(JobServiceBenchTaskResult mergedTaskResults, mEndTimeMs = mergedTaskResults.getEndMs(); mParameters = mergedTaskResults.getParameters(); mDurationMs = mEndTimeMs - mergedTaskResults.getRecordStartMs(); - mThroughput = ((float) mStatistics.mNumSuccess / mDurationMs) * 1000.0f; + mThroughput = ((float) mStatistics.mNumSuccesses / mDurationMs) * 1000.0f; mNodeResults = nodes; } @@ -203,7 +203,7 @@ public List generate(List results) { responseTimeGraphPerMethod.get(method).addDataSeries(series, responseTimeData); // collect max success for each method methodCounts.put(method, - Math.max(methodCounts.getOrDefault(method, 0L), entry.getValue().mNumSuccess)); + Math.max(methodCounts.getOrDefault(method, 0L), entry.getValue().mNumSuccesses)); } } diff --git a/stress/common/src/main/java/alluxio/stress/jobservice/JobServiceBenchTaskResult.java b/stress/common/src/main/java/alluxio/stress/jobservice/JobServiceBenchTaskResult.java index 907d273ba740..f792aecbe1c1 100644 --- a/stress/common/src/main/java/alluxio/stress/jobservice/JobServiceBenchTaskResult.java +++ b/stress/common/src/main/java/alluxio/stress/jobservice/JobServiceBenchTaskResult.java @@ -84,7 +84,7 @@ public void aggregateByWorker(JobServiceBenchTaskResult result) throws Exception * @param numSuccess the amount to increment by */ public void incrementNumSuccess(long numSuccess) { - mStatistics.mNumSuccess += numSuccess; + mStatistics.mNumSuccesses += numSuccess; } @Override diff --git a/stress/common/src/main/java/alluxio/stress/master/MasterBenchSummary.java b/stress/common/src/main/java/alluxio/stress/master/MasterBenchSummary.java index 6a46603a85e8..f371698dbf80 100644 --- a/stress/common/src/main/java/alluxio/stress/master/MasterBenchSummary.java +++ b/stress/common/src/main/java/alluxio/stress/master/MasterBenchSummary.java @@ -72,7 +72,7 @@ public MasterBenchSummary(MasterBenchTaskResult mergedTaskResults, mDurationMs = mergedTaskResults.getEndMs() - mergedTaskResults.getRecordStartMs(); mEndTimeMs = mergedTaskResults.getEndMs(); - mThroughput = ((float) mStatistics.mNumSuccess / mDurationMs) * 1000.0f; + mThroughput = ((float) mStatistics.mNumSuccesses / mDurationMs) * 1000.0f; mParameters = mergedTaskResults.getParameters(); mNodeResults = nodes; } @@ -221,7 +221,7 @@ public List generate(List results) { // collect max success for each method methodCounts.put(method, - Math.max(methodCounts.getOrDefault(method, 0L), entry.getValue().mNumSuccess)); + Math.max(methodCounts.getOrDefault(method, 0L), entry.getValue().mNumSuccesses)); } } diff --git a/stress/common/src/main/java/alluxio/stress/master/MasterBenchTaskResult.java b/stress/common/src/main/java/alluxio/stress/master/MasterBenchTaskResult.java index 151f3bc4a24a..8f4ceed9d1d9 100644 --- a/stress/common/src/main/java/alluxio/stress/master/MasterBenchTaskResult.java +++ b/stress/common/src/main/java/alluxio/stress/master/MasterBenchTaskResult.java @@ -61,7 +61,7 @@ void mergeResultStatistics(MasterBenchTaskResultBase resu * @param numSuccess the amount to increment by */ public void incrementNumSuccess(long numSuccess) { - mStatistics.mNumSuccess += numSuccess; + mStatistics.mNumSuccesses += numSuccess; } /** diff --git a/stress/common/src/main/java/alluxio/stress/master/MultiOperationMasterBenchSummary.java b/stress/common/src/main/java/alluxio/stress/master/MultiOperationMasterBenchSummary.java index 8c1b36a77bfb..9aee895fc74e 100644 --- a/stress/common/src/main/java/alluxio/stress/master/MultiOperationMasterBenchSummary.java +++ b/stress/common/src/main/java/alluxio/stress/master/MultiOperationMasterBenchSummary.java @@ -76,7 +76,7 @@ public MultiOperationMasterBenchSummary( mThroughput = ((float) mergedTaskResults.getNumSuccessOperations() / mDurationMs) * 1000.0f; mOperationThroughputs = new ArrayList<>(); for (MasterBenchTaskResultStatistics statistics : mergedTaskResults.getAllStatistics()) { - float throughput = ((float) statistics.mNumSuccess / mDurationMs) * 1000.0f; + float throughput = ((float) statistics.mNumSuccesses / mDurationMs) * 1000.0f; mOperationThroughputs.add(new Pair<>(statistics.mOperation, throughput)); } mParameters = mergedTaskResults.getParameters(); diff --git a/stress/common/src/main/java/alluxio/stress/master/MultiOperationMasterBenchTaskResult.java b/stress/common/src/main/java/alluxio/stress/master/MultiOperationMasterBenchTaskResult.java index b00d4e101da0..ec6e20dcf3dd 100644 --- a/stress/common/src/main/java/alluxio/stress/master/MultiOperationMasterBenchTaskResult.java +++ b/stress/common/src/main/java/alluxio/stress/master/MultiOperationMasterBenchTaskResult.java @@ -82,7 +82,7 @@ void mergeResultStatistics(MasterBenchTaskResultBase entry : nameStatistics.entrySet()) { final JobServiceBenchTaskResultStatistics stats = new JobServiceBenchTaskResultStatistics(); stats.encodeResponseTimeNsRaw(entry.getValue().getTimeNs()); - stats.mNumSuccess = entry.getValue().getNumSuccess(); + stats.mNumSuccesses = entry.getValue().getNumSuccess(); stats.mMaxResponseTimeNs = entry.getValue().getMaxTimeNs(); mResult.putStatisticsForMethod(entry.getKey(), stats); } diff --git a/stress/shell/src/main/java/alluxio/stress/cli/StressMasterBenchBase.java b/stress/shell/src/main/java/alluxio/stress/cli/StressMasterBenchBase.java index 93d5699219dd..9c734963b193 100644 --- a/stress/shell/src/main/java/alluxio/stress/cli/StressMasterBenchBase.java +++ b/stress/shell/src/main/java/alluxio/stress/cli/StressMasterBenchBase.java @@ -353,7 +353,7 @@ public synchronized void addAdditionalResult() throws IOException { for (Map.Entry entry : nameStatistics.entrySet()) { final MasterBenchTaskResultStatistics stats = new MasterBenchTaskResultStatistics(); stats.encodeResponseTimeNsRaw(entry.getValue().getTimeNs()); - stats.mNumSuccess = entry.getValue().getNumSuccess(); + stats.mNumSuccesses = entry.getValue().getNumSuccess(); stats.mMaxResponseTimeNs = entry.getValue().getMaxTimeNs(); mResult.putStatisticsForMethod(entry.getKey(), stats); } diff --git a/tests/src/test/java/alluxio/stress/StressMasterBenchIntegrationTest.java b/tests/src/test/java/alluxio/stress/StressMasterBenchIntegrationTest.java index 8f6965fd9b22..ff795bf6bc1d 100644 --- a/tests/src/test/java/alluxio/stress/StressMasterBenchIntegrationTest.java +++ b/tests/src/test/java/alluxio/stress/StressMasterBenchIntegrationTest.java @@ -285,7 +285,7 @@ private void validateTheOutput(String operation) throws Exception { assertTrue(summary.getNodeResults().size() >= 1); assertTrue(summary.getDurationMs() > 0); assertTrue(summary.getThroughput() > 0); - assertEquals(summary.getStatistics().mNumSuccess, 100); + assertEquals(summary.getStatistics().mNumSuccesses, 100); assertTrue(summary.collectErrorsFromAllNodes().isEmpty()); } } From e3955bd494218ffbf70c6c4354d1a47a4617fa38 Mon Sep 17 00:00:00 2001 From: Haoning Sun Date: Thu, 30 Mar 2023 10:12:40 +0800 Subject: [PATCH 214/334] Remove duplicate word in comments Fixing a typo by removing duplicate word in comments. pr-link: Alluxio/alluxio#17161 change-id: cid-285cfd74f30549f76730484c99b00e4e394aa677 --- .../src/main/java/alluxio/master/job/plan/PlanCoordinator.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/job/server/src/main/java/alluxio/master/job/plan/PlanCoordinator.java b/job/server/src/main/java/alluxio/master/job/plan/PlanCoordinator.java index bea3461e18cf..4fa2a50cb57a 100644 --- a/job/server/src/main/java/alluxio/master/job/plan/PlanCoordinator.java +++ b/job/server/src/main/java/alluxio/master/job/plan/PlanCoordinator.java @@ -58,7 +58,7 @@ public final class PlanCoordinator { private final PlanInfo mPlanInfo; private final CommandManager mCommandManager; - /** The context containing containing the necessary references to schedule jobs. */ + /** The context containing the necessary references to schedule jobs. */ private final JobServerContext mJobServerContext; /** From 50c4c29279c53225d33b02d077964a430314ac19 Mon Sep 17 00:00:00 2001 From: bingzheng Date: Thu, 30 Mar 2023 13:32:44 +0800 Subject: [PATCH 215/334] Add worker decommission step 1 ### What changes are proposed in this pull request? This pr is related to #13758 This change achieves below functionalities: 1. Provide a client shell command to kick out workers directly. 2. Decommissioned worker nodes are not automatically shutdown and are not chosen for new IO requests 3. Decommissioned workers are not accepted on heartbeat or register Note there's a delay in when clients know the workers have been decommissioned. In the window, target worker will still receive requests. There's also no way for the worker to come back to the cluster. This command should only be used when this worker is going to leave the cluster (and never come back). ### Why are the changes needed? Offline workers more flexible, no need to wait master detection lost worker interval. For instance, We may encounter situations like we want to remove worker nodes, or we want to offline some worker nodes to save costs, this command is useful. ### Does this PR introduce any user facing changes? Yes pr-link: Alluxio/alluxio#14000 change-id: cid-822a1cad1b23cd381b907705c1b92bd6ac3f16d0 --- .../client/block/BlockMasterClient.java | 9 ++ .../block/RetryHandlingBlockMasterClient.java | 8 ++ .../alluxio/master/block/BlockMaster.java | 10 +- .../BlockMasterClientServiceHandler.java | 11 ++ .../master/block/DefaultBlockMaster.java | 68 +++++++++-- .../master/block/meta/MasterWorkerInfo.java | 12 +- .../master/block/meta/WorkerState.java | 31 +++++ .../alluxio/master/block/BlockMasterTest.java | 22 ++++ .../block/meta/MasterWorkerInfoTest.java | 2 +- .../src/main/proto/grpc/block_master.proto | 10 ++ core/transport/src/main/proto/proto.lock | 18 +++ .../fs/command/DecommissionWorkerCommand.java | 112 ++++++++++++++++++ 12 files changed, 290 insertions(+), 23 deletions(-) create mode 100644 core/server/master/src/main/java/alluxio/master/block/meta/WorkerState.java create mode 100644 shell/src/main/java/alluxio/cli/fs/command/DecommissionWorkerCommand.java diff --git a/core/client/fs/src/main/java/alluxio/client/block/BlockMasterClient.java b/core/client/fs/src/main/java/alluxio/client/block/BlockMasterClient.java index bcc5864d71bc..5051c2d2c0db 100644 --- a/core/client/fs/src/main/java/alluxio/client/block/BlockMasterClient.java +++ b/core/client/fs/src/main/java/alluxio/client/block/BlockMasterClient.java @@ -13,6 +13,8 @@ import alluxio.Client; import alluxio.client.block.options.GetWorkerReportOptions; +import alluxio.exception.status.AlluxioStatusException; +import alluxio.grpc.DecommissionWorkerPOptions; import alluxio.grpc.WorkerLostStorageInfo; import alluxio.master.MasterClientContext; import alluxio.wire.BlockInfo; @@ -104,4 +106,11 @@ List getWorkerReport(final GetWorkerReportOptions options) * @return amount of used space in bytes */ long getUsedBytes() throws IOException; + + /** + * Decommission a worker. + * @param options method options + * @throws AlluxioStatusException if something goes wrong + */ + void decommissionWorker(DecommissionWorkerPOptions options) throws IOException; } diff --git a/core/client/fs/src/main/java/alluxio/client/block/RetryHandlingBlockMasterClient.java b/core/client/fs/src/main/java/alluxio/client/block/RetryHandlingBlockMasterClient.java index e37da23a7a6f..55b1f53dab47 100644 --- a/core/client/fs/src/main/java/alluxio/client/block/RetryHandlingBlockMasterClient.java +++ b/core/client/fs/src/main/java/alluxio/client/block/RetryHandlingBlockMasterClient.java @@ -15,6 +15,7 @@ import alluxio.Constants; import alluxio.client.block.options.GetWorkerReportOptions; import alluxio.grpc.BlockMasterClientServiceGrpc; +import alluxio.grpc.DecommissionWorkerPOptions; import alluxio.grpc.GetBlockInfoPRequest; import alluxio.grpc.GetBlockMasterInfoPOptions; import alluxio.grpc.GetCapacityBytesPOptions; @@ -177,4 +178,11 @@ public long getUsedBytes() throws IOException { () -> mClient.getUsedBytes(GetUsedBytesPOptions.getDefaultInstance()).getBytes(), RPC_LOG, "GetUsedBytes", ""); } + + @Override + public void decommissionWorker(DecommissionWorkerPOptions options) throws IOException { + retryRPC(() -> mClient.decommissionWorker(options), + RPC_LOG, "DecommissionWorker", "workerName=%s,options=%s", + options.getWorkerName(), options); + } } diff --git a/core/server/master/src/main/java/alluxio/master/block/BlockMaster.java b/core/server/master/src/main/java/alluxio/master/block/BlockMaster.java index ce36c111427c..3d29e9cd7f8e 100644 --- a/core/server/master/src/main/java/alluxio/master/block/BlockMaster.java +++ b/core/server/master/src/main/java/alluxio/master/block/BlockMaster.java @@ -122,12 +122,18 @@ List getWorkerReport(GetWorkerReportOptions options) */ List getWorkerLostStorage(); + /** + * @param workerId the worker id + * @return true if the worker is excluded, otherwise false + */ + boolean isNotServing(long workerId); + /** * Decommission a worker. * - * @param workerId the WorkerInfo of worker to be decommissioned + * @param workerName the worker hostname of worker to be decommissioned */ - void decommissionWorker(long workerId) throws Exception; + void decommissionWorker(String workerName) throws NotFoundException; /** * Removes blocks from workers. diff --git a/core/server/master/src/main/java/alluxio/master/block/BlockMasterClientServiceHandler.java b/core/server/master/src/main/java/alluxio/master/block/BlockMasterClientServiceHandler.java index f6bd7c3aaa7d..b2088bc56653 100644 --- a/core/server/master/src/main/java/alluxio/master/block/BlockMasterClientServiceHandler.java +++ b/core/server/master/src/main/java/alluxio/master/block/BlockMasterClientServiceHandler.java @@ -17,6 +17,8 @@ import alluxio.grpc.BlockMasterClientServiceGrpc; import alluxio.grpc.BlockMasterInfo; import alluxio.grpc.BlockMasterInfoField; +import alluxio.grpc.DecommissionWorkerPOptions; +import alluxio.grpc.DecommissionWorkerPResponse; import alluxio.grpc.GetBlockInfoPOptions; import alluxio.grpc.GetBlockInfoPRequest; import alluxio.grpc.GetBlockInfoPResponse; @@ -184,4 +186,13 @@ public void getWorkerLostStorage(GetWorkerLostStoragePOptions options, .addAllWorkerLostStorageInfo(mBlockMaster.getWorkerLostStorage()).build(), "GetWorkerLostStorage", "options=%s", responseObserver, options); } + + @Override + public void decommissionWorker(DecommissionWorkerPOptions request, + StreamObserver responseObserver) { + RpcUtils.call(LOG, () -> { + mBlockMaster.decommissionWorker(request.getWorkerName()); + return DecommissionWorkerPResponse.getDefaultInstance(); + }, "DecommissionWorker", "request=%s", responseObserver, request); + } } diff --git a/core/server/master/src/main/java/alluxio/master/block/DefaultBlockMaster.java b/core/server/master/src/main/java/alluxio/master/block/DefaultBlockMaster.java index 0168f6449456..a852e43e300a 100644 --- a/core/server/master/src/main/java/alluxio/master/block/DefaultBlockMaster.java +++ b/core/server/master/src/main/java/alluxio/master/block/DefaultBlockMaster.java @@ -48,6 +48,7 @@ import alluxio.master.CoreMasterContext; import alluxio.master.block.meta.MasterWorkerInfo; import alluxio.master.block.meta.WorkerMetaLockSection; +import alluxio.master.block.meta.WorkerState; import alluxio.master.journal.JournalContext; import alluxio.master.journal.checkpoint.CheckpointName; import alluxio.master.metastore.BlockMetaStore; @@ -628,7 +629,7 @@ private List constructWorkerInfoList() { for (MasterWorkerInfo worker : mWorkers) { // extractWorkerInfo handles the locking internally workerInfoList.add(extractWorkerInfo(worker, - GetWorkerReportOptions.WorkerInfoField.ALL, true)); + GetWorkerReportOptions.WorkerInfoField.ALL, WorkerState.LIVE)); } return workerInfoList; } @@ -642,7 +643,7 @@ public List getLostWorkersInfoList() throws UnavailableException { for (MasterWorkerInfo worker : mLostWorkers) { // extractWorkerInfo handles the locking internally workerInfoList.add(extractWorkerInfo(worker, - GetWorkerReportOptions.WorkerInfoField.ALL, false)); + GetWorkerReportOptions.WorkerInfoField.ALL, WorkerState.LOST)); } workerInfoList.sort(new WorkerInfo.LastContactSecComparator()); return workerInfoList; @@ -724,15 +725,15 @@ public List getWorkerReport(GetWorkerReportOptions options) + selectedDecommissionedWorkers.size()); for (MasterWorkerInfo worker : selectedLiveWorkers) { // extractWorkerInfo handles the locking internally - workerInfoList.add(extractWorkerInfo(worker, options.getFieldRange(), true)); + workerInfoList.add(extractWorkerInfo(worker, options.getFieldRange(), WorkerState.LIVE)); } for (MasterWorkerInfo worker : selectedLostWorkers) { // extractWorkerInfo handles the locking internally - workerInfoList.add(extractWorkerInfo(worker, options.getFieldRange(), false)); + workerInfoList.add(extractWorkerInfo(worker, options.getFieldRange(), WorkerState.LOST)); } for (MasterWorkerInfo worker : selectedDecommissionedWorkers) { // extractWorkerInfo handles the locking internally - workerInfoList.add(extractWorkerInfo(worker, options.getFieldRange(), false)); + workerInfoList.add(extractWorkerInfo(worker, options.getFieldRange(), WorkerState.LOST)); } return workerInfoList; } @@ -741,9 +742,9 @@ public List getWorkerReport(GetWorkerReportOptions options) * Locks the {@link MasterWorkerInfo} properly and convert it to a {@link WorkerInfo}. */ private WorkerInfo extractWorkerInfo(MasterWorkerInfo worker, - Set fieldRange, boolean isLiveWorker) { + Set fieldRange, WorkerState workerState) { try (LockResource r = worker.lockWorkerMetaForInfo(fieldRange)) { - return worker.generateWorkerInfo(fieldRange, isLiveWorker); + return worker.generateWorkerInfo(fieldRange, workerState); } } @@ -815,9 +816,25 @@ public void removeBlocks(Collection blockIds, boolean delete) throws Unava } @Override - public void decommissionWorker(long workerId) - throws Exception { - //TODO(Tony Sun): added in another pr. + public boolean isNotServing(long workerId) { + return mDecommissionedWorkers.getFirstByField(ID_INDEX, workerId) != null; + } + + @Override + public void decommissionWorker(String workerHostName) + throws NotFoundException { + for (MasterWorkerInfo workerInfo : mWorkers) { + if (workerHostName.equals(workerInfo.getWorkerAddress().getHost())) { + try (LockResource r = workerInfo.lockWorkerMeta( + EnumSet.of(WorkerMetaLockSection.BLOCKS), false)) { + processDecommissionedWorker(workerInfo); + } + LOG.info("{} has been added to the decommissionedWorkers set.", + workerHostName); + return; + } + } + throw new NotFoundException("Worker {} not found in alive worker set"); } @Override @@ -942,7 +959,6 @@ public void commitBlock(long workerId, long usedBytesOnTier, String tierAlias, throws NotFoundException, UnavailableException { LOG.debug("Commit block from workerId: {}, usedBytesOnTier: {}, blockId: {}, length: {}", workerId, usedBytesOnTier, blockId, length); - MasterWorkerInfo worker = mWorkers.getFirstByField(ID_INDEX, workerId); // TODO(peis): Check lost workers as well. if (worker == null) { @@ -1201,6 +1217,10 @@ public void workerRegister(long workerId, List storageTiers, Map lostStorage, RegisterWorkerPOptions options) throws NotFoundException { + if (isNotServing(workerId)) { + return; + } + MasterWorkerInfo worker = mWorkers.getFirstByField(ID_INDEX, workerId); if (worker == null) { @@ -1267,9 +1287,32 @@ public MasterWorkerInfo getWorker(long workerId) throws NotFoundException { return worker; } + private void processDecommissionedWorkerBlocks(MasterWorkerInfo workerInfo) { + processWorkerRemovedBlocks(workerInfo, workerInfo.getBlocks(), false); + } + + /** + * Updates the metadata for the specified decommissioned worker. + * @param worker the master worker info + */ + private void processDecommissionedWorker(MasterWorkerInfo worker) { + mDecommissionedWorkers.add(worker); + mWorkers.remove(worker); + WorkerNetAddress workerNetAddress = worker.getWorkerAddress(); + // TODO(bzheng888): Maybe need a new listener such as WorkerDecommissionListener. + for (Consumer

function : mWorkerLostListeners) { + function.accept(new Address(workerNetAddress.getHost(), workerNetAddress.getRpcPort())); + } + processDecommissionedWorkerBlocks(worker); + } + @Override public void workerRegisterStream(WorkerRegisterContext context, RegisterWorkerPRequest chunk, boolean isFirstMsg) { + if (isNotServing(context.getWorkerId())) { + // Stop register the excluded worker + return; + } // TODO(jiacheng): find a place to check the lease if (isFirstMsg) { workerRegisterStart(context, chunk); @@ -1378,6 +1421,9 @@ public Command workerHeartbeat(long workerId, Map capacityBytesOnT Map> addedBlocks, Map lostStorage, List metrics) { + if (isNotServing(workerId)) { + return Command.newBuilder().setCommandType(CommandType.Nothing).build(); + } MasterWorkerInfo worker = mWorkers.getFirstByField(ID_INDEX, workerId); if (worker == null) { LOG.warn("Could not find worker id: {} for heartbeat.", workerId); diff --git a/core/server/master/src/main/java/alluxio/master/block/meta/MasterWorkerInfo.java b/core/server/master/src/main/java/alluxio/master/block/meta/MasterWorkerInfo.java index b8bde41ddd38..8974ce548176 100644 --- a/core/server/master/src/main/java/alluxio/master/block/meta/MasterWorkerInfo.java +++ b/core/server/master/src/main/java/alluxio/master/block/meta/MasterWorkerInfo.java @@ -116,8 +116,6 @@ @NotThreadSafe public final class MasterWorkerInfo { private static final Logger LOG = LoggerFactory.getLogger(MasterWorkerInfo.class); - private static final String LIVE_WORKER_STATE = "In Service"; - private static final String LOST_WORKER_STATE = "Out of Service"; private static final EnumSet USAGE_INFO_FIELDS = EnumSet.of(WorkerInfoField.WORKER_CAPACITY_BYTES, @@ -301,10 +299,10 @@ public void addLostStorage(Map lostStorage) { * The required locks will be determined internally based on the fields. * * @param fieldRange the client selected fields - * @param isLiveWorker the worker is live or not + * @param workerState the worker state * @return generated worker information */ - public WorkerInfo generateWorkerInfo(Set fieldRange, boolean isLiveWorker) { + public WorkerInfo generateWorkerInfo(Set fieldRange, WorkerState workerState) { WorkerInfo info = new WorkerInfo(); for (WorkerInfoField field : fieldRange) { switch (field) { @@ -331,11 +329,7 @@ public WorkerInfo generateWorkerInfo(Set fieldRange, boolean is info.setStartTimeMs(mMeta.mStartTimeMs); break; case STATE: - if (isLiveWorker) { - info.setState(LIVE_WORKER_STATE); - } else { - info.setState(LOST_WORKER_STATE); - } + info.setState(workerState.toString()); break; case WORKER_USED_BYTES: info.setUsedBytes(mUsage.mUsedBytes); diff --git a/core/server/master/src/main/java/alluxio/master/block/meta/WorkerState.java b/core/server/master/src/main/java/alluxio/master/block/meta/WorkerState.java new file mode 100644 index 000000000000..0f5d2327ba28 --- /dev/null +++ b/core/server/master/src/main/java/alluxio/master/block/meta/WorkerState.java @@ -0,0 +1,31 @@ +/* + * The Alluxio Open Foundation licenses this work under the Apache License, version 2.0 + * (the "License"). You may not use this work except in compliance with the License, which is + * available at www.apache.org/licenses/LICENSE-2.0 + * + * This software is distributed on an "AS IS" basis, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, + * either express or implied, as more fully set forth in the License. + * + * See the NOTICE file distributed with this work for information regarding copyright ownership. + */ + +package alluxio.master.block.meta; + +/*** + * The worker state maintained by master. + */ +public enum WorkerState { + LIVE("In Service"), + LOST("Out of Service"), + DECOMMISSIONED("Decommissioned"); + private final String mState; + + WorkerState(String s) { + mState = s; + } + + @Override + public String toString() { + return mState; + } +} diff --git a/core/server/master/src/test/java/alluxio/master/block/BlockMasterTest.java b/core/server/master/src/test/java/alluxio/master/block/BlockMasterTest.java index 7f10171bcb7a..c675588dae9a 100644 --- a/core/server/master/src/test/java/alluxio/master/block/BlockMasterTest.java +++ b/core/server/master/src/test/java/alluxio/master/block/BlockMasterTest.java @@ -220,6 +220,28 @@ public void detectLostWorkers() throws Exception { assertEquals(worker1, Iterables.getOnlyElement(info).getId()); } + @Test + public void decommissionWorker() throws Exception { + // Register a worker. + long worker1 = mBlockMaster.getWorkerId(NET_ADDRESS_1); + mBlockMaster.workerRegister(worker1, + ImmutableList.of(Constants.MEDIUM_MEM), + ImmutableMap.of(Constants.MEDIUM_MEM, 100L), + ImmutableMap.of(Constants.MEDIUM_MEM, 10L), + NO_BLOCKS_ON_LOCATION, + NO_LOST_STORAGE, + RegisterWorkerPOptions.getDefaultInstance()); + + // Decommission worker + mBlockMaster.decommissionWorker(NET_ADDRESS_1.getHost()); + + // Make sure the worker is decommissioned. + int decommissionedCount = mBlockMaster.getDecommissionedWorkerCount(); + int liveCount = mBlockMaster.getWorkerCount(); + assertEquals(1, decommissionedCount); + assertEquals(0, liveCount); + } + @Test public void autoDeleteTimeoutWorker() throws Exception { diff --git a/core/server/master/src/test/java/alluxio/master/block/meta/MasterWorkerInfoTest.java b/core/server/master/src/test/java/alluxio/master/block/meta/MasterWorkerInfoTest.java index 25f76f7ea64d..c855f79fb6c7 100644 --- a/core/server/master/src/test/java/alluxio/master/block/meta/MasterWorkerInfoTest.java +++ b/core/server/master/src/test/java/alluxio/master/block/meta/MasterWorkerInfoTest.java @@ -139,7 +139,7 @@ public void blockOperation() { @Test public void workerInfoGeneration() { WorkerInfo workerInfo = mInfo.generateWorkerInfo(GetWorkerReportOptions.WorkerInfoField.ALL, - true); + WorkerState.LIVE); assertEquals(mInfo.getId(), workerInfo.getId()); assertEquals(mInfo.getWorkerAddress(), workerInfo.getAddress()); assertEquals("In Service", workerInfo.getState()); diff --git a/core/transport/src/main/proto/grpc/block_master.proto b/core/transport/src/main/proto/grpc/block_master.proto index 22f649bcc96e..b7c3358232c6 100644 --- a/core/transport/src/main/proto/grpc/block_master.proto +++ b/core/transport/src/main/proto/grpc/block_master.proto @@ -119,6 +119,11 @@ message GetWorkerLostStoragePResponse { repeated WorkerLostStorageInfo workerLostStorageInfo = 1; } +message DecommissionWorkerPResponse {} +message DecommissionWorkerPOptions { + required string workerName = 1; +} + /** * This interface contains block master service endpoints for Alluxio clients. */ @@ -165,6 +170,11 @@ service BlockMasterClientService { * Returns a list of worker lost storage information */ rpc GetWorkerLostStorage(GetWorkerLostStoragePOptions) returns (GetWorkerLostStoragePResponse); + + /** + * Decommission the specific worker from Alluxio. + */ + rpc DecommissionWorker(DecommissionWorkerPOptions) returns (DecommissionWorkerPResponse); } message TierList { diff --git a/core/transport/src/main/proto/proto.lock b/core/transport/src/main/proto/proto.lock index cff9aab24eb9..473d6bf4d662 100644 --- a/core/transport/src/main/proto/proto.lock +++ b/core/transport/src/main/proto/proto.lock @@ -391,6 +391,19 @@ } ] }, + { + "name": "DecommissionWorkerPResponse" + }, + { + "name": "DecommissionWorkerPOptions", + "fields": [ + { + "id": 1, + "name": "workerName", + "type": "string" + } + ] + }, { "name": "TierList", "fields": [ @@ -831,6 +844,11 @@ "name": "GetWorkerLostStorage", "in_type": "GetWorkerLostStoragePOptions", "out_type": "GetWorkerLostStoragePResponse" + }, + { + "name": "DecommissionWorker", + "in_type": "DecommissionWorkerPOptions", + "out_type": "DecommissionWorkerPResponse" } ] }, diff --git a/shell/src/main/java/alluxio/cli/fs/command/DecommissionWorkerCommand.java b/shell/src/main/java/alluxio/cli/fs/command/DecommissionWorkerCommand.java new file mode 100644 index 000000000000..09fa2419e97e --- /dev/null +++ b/shell/src/main/java/alluxio/cli/fs/command/DecommissionWorkerCommand.java @@ -0,0 +1,112 @@ +/* + * The Alluxio Open Foundation licenses this work under the Apache License, version 2.0 + * (the "License"). You may not use this work except in compliance with the License, which is + * available at www.apache.org/licenses/LICENSE-2.0 + * + * This software is distributed on an "AS IS" basis, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, + * either express or implied, as more fully set forth in the License. + * + * See the NOTICE file distributed with this work for information regarding copyright ownership. + */ + +package alluxio.cli.fs.command; + +import alluxio.Constants; +import alluxio.client.block.BlockMasterClient; +import alluxio.client.block.BlockWorkerInfo; +import alluxio.client.file.FileSystemContext; +import alluxio.exception.AlluxioException; +import alluxio.grpc.DecommissionWorkerPOptions; +import alluxio.resource.CloseableResource; + +import org.apache.commons.cli.CommandLine; +import org.apache.commons.cli.Option; +import org.apache.commons.cli.Options; + +import java.io.IOException; +import java.util.List; +import java.util.Objects; + +/** + * Decommission a specific worker, the decommissioned worker is not automatically + * shutdown and are not chosen for writing new replicas. + */ +public final class DecommissionWorkerCommand extends AbstractFileSystemCommand { + + private static final int DEFAULT_TIMEOUT = 10 * Constants.MINUTE_MS; + + private static final Option HOST_OPTION = + Option.builder("h") + .longOpt("host") + .required(true) // Host option is mandatory. + .hasArg(true) + .numberOfArgs(1) + .argName("host") + .desc("A worker host name, which is mandatory.") + .build(); + + /** + * Constructs a new instance to decommission the given worker from Alluxio. + * @param fsContext the filesystem of Alluxio + */ + public DecommissionWorkerCommand(FileSystemContext fsContext) { + super(fsContext); + } + + @Override + public int run(CommandLine cl) throws AlluxioException, IOException { + String workerHost = cl.getOptionValue(HOST_OPTION.getLongOpt()); + + DecommissionWorkerPOptions options = + DecommissionWorkerPOptions.newBuilder() + .setWorkerName(workerHost).build(); + + List cachedWorkers = mFsContext.getCachedWorkers(); + + for (BlockWorkerInfo blockWorkerInfo : cachedWorkers) { + if (Objects.equals(blockWorkerInfo.getNetAddress().getHost(), workerHost)) { + try (CloseableResource blockMasterClient = + mFsContext.acquireBlockMasterClientResource()) { + long start = System.currentTimeMillis(); + blockMasterClient.get().decommissionWorker(options); + long duration = System.currentTimeMillis() - start; + System.out.printf("Decommission worker %s success, spend: %dms%n", + workerHost, duration); + } catch (IOException ie) { + throw new AlluxioException(ie.getMessage()); + } + return 0; + } + } + + System.out.println("Target worker is not found in Alluxio, please input another hostname.\n" + + "Available workers:"); + for (BlockWorkerInfo blockWorkerInfo : cachedWorkers) { + System.out.println("\t" + blockWorkerInfo.getNetAddress().getHost() + + ":" + blockWorkerInfo.getNetAddress().getRpcPort()); + } + return 0; + } + + @Override + public String getCommandName() { + return "decommissionWorker"; + } + + @Override + public Options getOptions() { + return new Options().addOption(HOST_OPTION); + } + + @Override + public String getUsage() { + return "decommissionWorker --h "; + } + + @Override + public String getDescription() { + return "Decommission a specific worker in the Alluxio cluster. The decommissioned" + + "worker is not shut down but will not accept new read/write operations. The ongoing " + + "operations will proceed until completion."; + } +} From a600f12b8f57ccf9ac3d37cafdd6aff157020b02 Mon Sep 17 00:00:00 2001 From: Haoning Sun Date: Fri, 31 Mar 2023 10:59:06 +0800 Subject: [PATCH 216/334] Check filesystem for mount point ### What changes are proposed in this pull request? Fix #17158. pr-link: Alluxio/alluxio#17159 change-id: cid-341bd1e1a2a23530da4ab97161704b5b22c8f0ed --- .../alluxio/underfs/AbstractUfsManager.java | 24 ++++++++++++++----- 1 file changed, 18 insertions(+), 6 deletions(-) diff --git a/core/server/common/src/main/java/alluxio/underfs/AbstractUfsManager.java b/core/server/common/src/main/java/alluxio/underfs/AbstractUfsManager.java index 4de988ca2e1b..3478edfe5dab 100644 --- a/core/server/common/src/main/java/alluxio/underfs/AbstractUfsManager.java +++ b/core/server/common/src/main/java/alluxio/underfs/AbstractUfsManager.java @@ -160,19 +160,20 @@ private UnderFileSystem getOrAddWithRecorder(AlluxioURI ufsUri, if (useManagedBlocking) { fs = new ManagedBlockingUfsForwarder(fs); } - - if (mUnderFileSystemMap.putIfAbsent(key, fs) != null) { - // This shouldn't occur unless our synchronization is incorrect - LOG.warn("UFS already existed in UFS manager"); - } mCloser.register(fs); try { connectUfs(fs); - } catch (IOException e) { + tryUseFileSystem(fs, ufsUri.getPath()); + } catch (Exception e) { String message = String.format( "Failed to perform initial connect to UFS %s: %s", ufsUri, e); recorder.record(message); LOG.warn(message); + throw new RuntimeException(e); + } + if (mUnderFileSystemMap.putIfAbsent(key, fs) != null) { + // This shouldn't occur unless our synchronization is incorrect + LOG.warn("UFS already existed in UFS manager"); } return fs; } @@ -185,6 +186,17 @@ private UnderFileSystem getOrAddWithRecorder(AlluxioURI ufsUri, */ protected abstract void connectUfs(UnderFileSystem fs) throws IOException; + /** + * To check whether the filesystem is available by calling exists. + * + * @param fs the filesystem + * @param ufsPath the UFS path + * @throws Exception + */ + private void tryUseFileSystem(UnderFileSystem fs, String ufsPath) throws Exception { + fs.exists(ufsPath); + } + @Override public void addMount(long mountId, final AlluxioURI ufsUri, final UnderFileSystemConfiguration ufsConf) { From 875f3f22329a0dd0991555913f96fe39ca953d66 Mon Sep 17 00:00:00 2001 From: secfree Date: Fri, 31 Mar 2023 15:08:22 +0800 Subject: [PATCH 217/334] Avoid getting meta sync lock if no need ### What changes are proposed in this pull request? Avoid getting meta sync lock if no need. ### Why are the changes needed? After backporting #16241, "ls /" encountered the following exception ``` 2023-03-30 14:00:51,657 ERROR FileSystemMasterClientServiceHandler - Exit (Error): ListStatus: request=path: "/" options { loadMetadataType: ONCE commonOptions { syncIntervalMs: 86400000 ttl: -1 ttlAction: DELETE } recursive: false loadMetadataOnly: false } java.lang.RuntimeException: Call cancelled by trackers: GRPC_CLIENT_TRACKER at alluxio.master.file.RpcContext.throwIfCancelled(RpcContext.java:107) at alluxio.master.file.InodeSyncStream.sync(InodeSyncStream.java:322) at alluxio.master.file.DefaultFileSystemMaster.syncMetadata(DefaultFileSystemMaster.java:3816) at alluxio.master.file.DefaultFileSystemMaster.listStatus(DefaultFileSystemMaster.java:1042) ... ``` We found that "ls /" was waiting for the WRITE lock of "/" in MetadataSyncLockManager even it did not need to sync metadata. As other clients were "ls" other paths and they hold READ lock on "/", "ls /" got stuck. ### Does this PR introduce any user facing changes? NO pr-link: Alluxio/alluxio#17172 change-id: cid-405084e218b150b4ffcce08294b30b3dd867a309 --- .../java/alluxio/master/file/InodeSyncStream.java | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/core/server/master/src/main/java/alluxio/master/file/InodeSyncStream.java b/core/server/master/src/main/java/alluxio/master/file/InodeSyncStream.java index ff0a6bcf8ec6..32511a364296 100644 --- a/core/server/master/src/main/java/alluxio/master/file/InodeSyncStream.java +++ b/core/server/master/src/main/java/alluxio/master/file/InodeSyncStream.java @@ -405,6 +405,12 @@ public InodeSyncStream(LockingScheme rootScheme, DefaultFileSystemMaster fsMaste * @return SyncStatus object */ public SyncStatus sync() throws AccessControlException, InvalidPathException { + LOG.debug("Running InodeSyncStream on path {}, with status {}, and force sync {}", + mRootScheme.getPath(), mRootScheme.shouldSync(), mForceSync); + if (!mRootScheme.shouldSync().isShouldSync() && !mForceSync) { + DefaultFileSystemMaster.Metrics.INODE_SYNC_STREAM_SKIPPED.inc(); + return SyncStatus.NOT_NEEDED; + } if (!mDedupConcurrentSync) { return syncInternal(); } @@ -430,12 +436,6 @@ private SyncStatus syncInternal() throws int failedSyncPathCount = 0; int skippedSyncPathCount = 0; int stopNum = -1; // stop syncing when we've processed this many paths. -1 for infinite - LOG.debug("Running InodeSyncStream on path {}, with status {}, and force sync {}", - mRootScheme.getPath(), mRootScheme.shouldSync(), mForceSync); - if (!mRootScheme.shouldSync().isShouldSync() && !mForceSync) { - DefaultFileSystemMaster.Metrics.INODE_SYNC_STREAM_SKIPPED.inc(); - return SyncStatus.NOT_NEEDED; - } if (mDedupConcurrentSync && mRootScheme.shouldSync() != SyncCheck.SHOULD_SYNC) { /* * If a concurrent sync on the same path is successful after this sync had already From db9a7637f7d6db72422647b5f3e28c8a485e9024 Mon Sep 17 00:00:00 2001 From: Wang Xiaoyong Date: Fri, 31 Mar 2023 15:41:46 +0800 Subject: [PATCH 218/334] Fix the issue where runTest ignores arguments ### What changes are proposed in this pull request? Fix the issue that TestRunner related parameters have no effect. ### Why are the changes needed? Fix the issue that TestRunner related parameters have no effect. #17066 ### Does this PR introduce any user facing changes? no pr-link: Alluxio/alluxio#17068 change-id: cid-2ae4ffc5ba0ba30c99da7247d92367028d22499b --- shell/src/main/java/alluxio/cli/TestRunner.java | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/shell/src/main/java/alluxio/cli/TestRunner.java b/shell/src/main/java/alluxio/cli/TestRunner.java index 8f1906ecc11d..c96d58cd0af2 100644 --- a/shell/src/main/java/alluxio/cli/TestRunner.java +++ b/shell/src/main/java/alluxio/cli/TestRunner.java @@ -83,11 +83,12 @@ private TestRunner() {} // prevent instantiation /** * Console program that validates the configuration. * - * @param args there are no arguments needed + * @param args array of arguments given by the user's input from the terminal */ public static void main(String[] args) throws Exception { TestRunner runner = new TestRunner(); JCommander jCommander = new JCommander(runner); + jCommander.parse(args); jCommander.setProgramName("TestRunner"); if (runner.mHelp) { jCommander.usage(); From aee3c5cb960b2e472f4fcdb86e731ca3b91d2f8c Mon Sep 17 00:00:00 2001 From: elega <445092967@qq.com> Date: Fri, 31 Mar 2023 22:34:36 +0800 Subject: [PATCH 219/334] Support executing runTests on specific workers ### What changes are proposed in this pull request? Support executing the runTests command on specific workers. e.g. ./alluxio runTests --workers worker1:29998,worker2:29998 If the workers are specified, runTests will be executed for each worker with writes and reads to go a dedicated worker. Screen Shot 2023-03-30 at 3 45 20 PM Screen Shot 2023-03-30 at 3 46 44 PM ### Why are the changes needed? So that we can verify if workers are up ### Does this PR introduce any user facing changes? N/A pr-link: Alluxio/alluxio#17170 change-id: cid-08eb445461ff6248def8164cfa529c070b8e5e9a --- .../block/policy/SpecificHostPolicy.java | 19 ++- .../client/file/options/InStreamOptions.java | 9 +- .../client/file/options/OutStreamOptions.java | 6 + .../main/proto/grpc/file_system_master.proto | 7 + core/transport/src/main/proto/proto.lock | 10 ++ .../cli/BasicNonByteBufferOperations.java | 31 +++- .../java/alluxio/cli/BasicOperations.java | 24 ++- .../src/main/java/alluxio/cli/TestRunner.java | 147 +++++++++++++++--- 8 files changed, 216 insertions(+), 37 deletions(-) diff --git a/core/client/fs/src/main/java/alluxio/client/block/policy/SpecificHostPolicy.java b/core/client/fs/src/main/java/alluxio/client/block/policy/SpecificHostPolicy.java index befb3248142b..30224aaf1c1e 100644 --- a/core/client/fs/src/main/java/alluxio/client/block/policy/SpecificHostPolicy.java +++ b/core/client/fs/src/main/java/alluxio/client/block/policy/SpecificHostPolicy.java @@ -22,6 +22,7 @@ import com.google.common.base.Preconditions; import java.util.Optional; +import javax.annotation.Nullable; import javax.annotation.concurrent.ThreadSafe; /** @@ -31,6 +32,8 @@ @ThreadSafe public final class SpecificHostPolicy implements BlockLocationPolicy { private final String mHostname; + @Nullable + private final Integer mRpcPort; /** * Constructs a new {@link SpecificHostPolicy} @@ -39,7 +42,7 @@ public final class SpecificHostPolicy implements BlockLocationPolicy { * @param conf Alluxio configuration */ public SpecificHostPolicy(AlluxioConfiguration conf) { - this(conf.getString(PropertyKey.WORKER_HOSTNAME)); + this(conf.getString(PropertyKey.WORKER_HOSTNAME), conf.getInt(PropertyKey.WORKER_RPC_PORT)); } /** @@ -48,7 +51,18 @@ public SpecificHostPolicy(AlluxioConfiguration conf) { * @param hostname the name of the host */ public SpecificHostPolicy(String hostname) { + this(hostname, null); + } + + /** + * Constructs the policy with the hostname and port. + * + * @param hostname the name of the host + * @param rpcPort the rpc port + */ + public SpecificHostPolicy(String hostname, @Nullable Integer rpcPort) { mHostname = Preconditions.checkNotNull(hostname, "hostname"); + mRpcPort = rpcPort; } /** @@ -59,7 +73,8 @@ public SpecificHostPolicy(String hostname) { public Optional getWorker(GetWorkerOptions options) { // find the first worker matching the host name for (BlockWorkerInfo info : options.getBlockWorkerInfos()) { - if (info.getNetAddress().getHost().equals(mHostname)) { + if (info.getNetAddress().getHost().equals(mHostname) + && (mRpcPort == null || info.getNetAddress().getRpcPort() == mRpcPort)) { return Optional.of(info.getNetAddress()); } } diff --git a/core/client/fs/src/main/java/alluxio/client/file/options/InStreamOptions.java b/core/client/fs/src/main/java/alluxio/client/file/options/InStreamOptions.java index 31b6c8f2b870..711722831622 100644 --- a/core/client/fs/src/main/java/alluxio/client/file/options/InStreamOptions.java +++ b/core/client/fs/src/main/java/alluxio/client/file/options/InStreamOptions.java @@ -13,6 +13,7 @@ import alluxio.client.ReadType; import alluxio.client.block.policy.BlockLocationPolicy; +import alluxio.client.block.policy.SpecificHostPolicy; import alluxio.client.file.FileSystemContext; import alluxio.client.file.URIStatus; import alluxio.conf.AlluxioConfiguration; @@ -91,7 +92,13 @@ public InStreamOptions(URIStatus status, @Nonnull OpenFilePOptions options, mStatus = status; mProtoOptions = openOptions; - mUfsReadLocationPolicy = context.getReadBlockLocationPolicy(alluxioConf); + if (options.hasUfsReadWorkerLocation()) { + int port = options.getUfsReadWorkerLocation().getRpcPort(); + mUfsReadLocationPolicy = new SpecificHostPolicy( + options.getUfsReadWorkerLocation().getHost(), port == 0 ? null : port); + } else { + mUfsReadLocationPolicy = context.getReadBlockLocationPolicy(alluxioConf); + } mPositionShort = false; } diff --git a/core/client/fs/src/main/java/alluxio/client/file/options/OutStreamOptions.java b/core/client/fs/src/main/java/alluxio/client/file/options/OutStreamOptions.java index 028ecda49ff5..48d2cf932c9b 100644 --- a/core/client/fs/src/main/java/alluxio/client/file/options/OutStreamOptions.java +++ b/core/client/fs/src/main/java/alluxio/client/file/options/OutStreamOptions.java @@ -16,6 +16,7 @@ import alluxio.client.UnderStorageType; import alluxio.client.WriteType; import alluxio.client.block.policy.BlockLocationPolicy; +import alluxio.client.block.policy.SpecificHostPolicy; import alluxio.client.file.FileSystemContext; import alluxio.conf.AlluxioConfiguration; import alluxio.conf.PropertyKey; @@ -113,6 +114,11 @@ public OutStreamOptions(CreateFilePOptions options, FileSystemContext context, if (options.hasWriteType()) { mWriteType = WriteType.fromProto(options.getWriteType()); } + if (options.hasWorkerLocation()) { + int port = options.getWorkerLocation().getRpcPort(); + mLocationPolicy = new SpecificHostPolicy( + options.getWorkerLocation().getHost(), port == 0 ? null : port); + } } private OutStreamOptions(FileSystemContext context, AlluxioConfiguration alluxioConf) { diff --git a/core/transport/src/main/proto/grpc/file_system_master.proto b/core/transport/src/main/proto/grpc/file_system_master.proto index c6a6c4099359..ad680ad10ae5 100644 --- a/core/transport/src/main/proto/grpc/file_system_master.proto +++ b/core/transport/src/main/proto/grpc/file_system_master.proto @@ -95,6 +95,11 @@ message OpenFilePOptions { optional int32 maxUfsReadConcurrency = 2; optional FileSystemMasterCommonPOptions commonOptions = 3; optional bool updateLastAccessTime = 4 [default = true]; + // If specified and the blocks are not cached in any worker, + // the data will be read and cached to the certain worker. + // If the blocks have been cached in some alluxio workers, + // this field will be ignored. + optional grpc.WorkerNetAddress ufsReadWorkerLocation = 15; } // XAttrPropagationStrategy controls the behaviour for assigning xAttr @@ -141,6 +146,8 @@ message CreateFilePOptions { optional XAttrPropagationStrategy xattrPropStrat = 12 [default = NEW_PATHS]; optional bool overwrite = 13; optional bool checkS3BucketPath = 14; + // If specified, the data will be written to the certain worker + optional grpc.WorkerNetAddress workerLocation = 15; } message CreateFilePRequest { /** the path of the file */ diff --git a/core/transport/src/main/proto/proto.lock b/core/transport/src/main/proto/proto.lock index 473d6bf4d662..29ae6ba37caf 100644 --- a/core/transport/src/main/proto/proto.lock +++ b/core/transport/src/main/proto/proto.lock @@ -2322,6 +2322,11 @@ "value": "true" } ] + }, + { + "id": 15, + "name": "ufsReadWorkerLocation", + "type": "grpc.WorkerNetAddress" } ] }, @@ -2482,6 +2487,11 @@ "id": 14, "name": "checkS3BucketPath", "type": "bool" + }, + { + "id": 15, + "name": "workerLocation", + "type": "grpc.WorkerNetAddress" } ], "maps": [ diff --git a/shell/src/main/java/alluxio/cli/BasicNonByteBufferOperations.java b/shell/src/main/java/alluxio/cli/BasicNonByteBufferOperations.java index 80159edd05c8..5d11a591af10 100644 --- a/shell/src/main/java/alluxio/cli/BasicNonByteBufferOperations.java +++ b/shell/src/main/java/alluxio/cli/BasicNonByteBufferOperations.java @@ -22,6 +22,7 @@ import alluxio.exception.FileAlreadyExistsException; import alluxio.grpc.CreateFilePOptions; import alluxio.grpc.OpenFilePOptions; +import alluxio.grpc.WorkerNetAddress; import alluxio.util.CommonUtils; import alluxio.util.FormatUtils; @@ -32,6 +33,7 @@ import java.io.DataOutputStream; import java.io.IOException; import java.util.concurrent.Callable; +import javax.annotation.Nullable; /** * Basic example of using the {@link FileSystem} for writing to and reading from files. @@ -55,6 +57,7 @@ public final class BasicNonByteBufferOperations implements Callable { private final boolean mDeleteIfExists; private final int mLength; private final FileSystemContext mFsContext; + private final @Nullable WorkerNetAddress mWorkerNetAddress; /** * @param filePath the path for the files @@ -63,15 +66,19 @@ public final class BasicNonByteBufferOperations implements Callable { * @param deleteIfExists delete files if they already exist * @param length the number of files * @param fsContext the {@link FileSystemContext} to use for client operations + * @param workerNetAddress if not null, the worker address the file data will be written into */ - public BasicNonByteBufferOperations(AlluxioURI filePath, ReadType readType, WriteType writeType, - boolean deleteIfExists, int length, FileSystemContext fsContext) { + public BasicNonByteBufferOperations( + AlluxioURI filePath, ReadType readType, WriteType writeType, + boolean deleteIfExists, int length, FileSystemContext fsContext, + @Nullable WorkerNetAddress workerNetAddress) { mFilePath = filePath; mWriteType = writeType; mReadType = readType; mDeleteIfExists = deleteIfExists; mLength = length; mFsContext = fsContext; + mWorkerNetAddress = workerNetAddress; } @Override @@ -95,8 +102,15 @@ private void write(FileSystem alluxioClient) throws IOException, AlluxioExceptio private FileOutStream createFile(FileSystem fileSystem, AlluxioURI filePath, boolean deleteIfExists) throws IOException, AlluxioException { - CreateFilePOptions options = CreateFilePOptions.newBuilder().setWriteType(mWriteType.toProto()) - .setRecursive(true).build(); + CreateFilePOptions.Builder optionsBuilder = + CreateFilePOptions.newBuilder().setWriteType(mWriteType.toProto()) + .setRecursive(true); + if (mWorkerNetAddress != null) { + optionsBuilder.setWorkerLocation(mWorkerNetAddress); + optionsBuilder.setReplicationMax(1); + optionsBuilder.setReplicationMin(1); + } + CreateFilePOptions options = optionsBuilder.build(); if (!fileSystem.exists(filePath)) { // file doesn't exist yet, so create it return fileSystem.createFile(filePath, options); @@ -110,8 +124,13 @@ private FileOutStream createFile(FileSystem fileSystem, AlluxioURI filePath, } private boolean read(FileSystem alluxioClient) throws IOException, AlluxioException { - OpenFilePOptions options = - OpenFilePOptions.newBuilder().setReadType(mReadType.toProto()).build(); + OpenFilePOptions.Builder optionsBuilder = + OpenFilePOptions.newBuilder().setReadType(mReadType.toProto()); + if (mWorkerNetAddress != null) { + optionsBuilder.setUfsReadWorkerLocation(mWorkerNetAddress); + } + OpenFilePOptions options = optionsBuilder.build(); + boolean pass = true; long startTimeMs = CommonUtils.getCurrentMs(); try (DataInputStream input = new DataInputStream(alluxioClient.openFile(mFilePath, options))) { diff --git a/shell/src/main/java/alluxio/cli/BasicOperations.java b/shell/src/main/java/alluxio/cli/BasicOperations.java index 006064b2f32c..b1731920e923 100644 --- a/shell/src/main/java/alluxio/cli/BasicOperations.java +++ b/shell/src/main/java/alluxio/cli/BasicOperations.java @@ -22,6 +22,7 @@ import alluxio.grpc.CreateFilePOptions; import alluxio.grpc.OpenFilePOptions; import alluxio.grpc.ReadPType; +import alluxio.grpc.WorkerNetAddress; import alluxio.grpc.WritePType; import alluxio.util.CommonUtils; import alluxio.util.FormatUtils; @@ -33,6 +34,7 @@ import java.nio.ByteBuffer; import java.nio.ByteOrder; import java.util.concurrent.Callable; +import javax.annotation.Nullable; import javax.annotation.concurrent.ThreadSafe; /** @@ -54,13 +56,25 @@ public class BasicOperations implements Callable { * @param readType the {@link ReadPType} * @param writeType the {@link WritePType} * @param fsContext the {@link FileSystemContext } to use for client operations + * @param workerNetAddress if not null, the worker address the file data will be written into */ - public BasicOperations(AlluxioURI filePath, ReadType readType, WriteType writeType, - FileSystemContext fsContext) { + public BasicOperations( + AlluxioURI filePath, ReadType readType, WriteType writeType, + FileSystemContext fsContext, @Nullable WorkerNetAddress workerNetAddress) { mFilePath = filePath; - mReadOptions = OpenFilePOptions.newBuilder().setReadType(readType.toProto()).build(); - mWriteOptions = CreateFilePOptions.newBuilder().setWriteType(writeType.toProto()) - .setRecursive(true).build(); + OpenFilePOptions.Builder readOptionsBuilder = + OpenFilePOptions.newBuilder().setReadType(readType.toProto()); + CreateFilePOptions.Builder writeOptionsBuilder = + CreateFilePOptions.newBuilder().setWriteType(writeType.toProto()) + .setRecursive(true); + if (workerNetAddress != null) { + writeOptionsBuilder.setWorkerLocation(workerNetAddress); + writeOptionsBuilder.setReplicationMax(1); + writeOptionsBuilder.setReplicationMin(1); + readOptionsBuilder.setUfsReadWorkerLocation(workerNetAddress); + } + mReadOptions = readOptionsBuilder.build(); + mWriteOptions = writeOptionsBuilder.build(); mFsContext = fsContext; } diff --git a/shell/src/main/java/alluxio/cli/TestRunner.java b/shell/src/main/java/alluxio/cli/TestRunner.java index c96d58cd0af2..406e99763149 100644 --- a/shell/src/main/java/alluxio/cli/TestRunner.java +++ b/shell/src/main/java/alluxio/cli/TestRunner.java @@ -12,19 +12,28 @@ package alluxio.cli; import alluxio.AlluxioURI; +import alluxio.Constants; +import alluxio.annotation.SuppressFBWarnings; import alluxio.client.ReadType; import alluxio.client.WriteType; import alluxio.client.file.FileSystem; import alluxio.client.file.FileSystemContext; +import alluxio.conf.Configuration; +import alluxio.conf.PropertyKey; import alluxio.grpc.DeletePOptions; +import alluxio.grpc.WorkerNetAddress; import alluxio.util.io.PathUtils; +import com.beust.jcommander.IStringConverter; import com.beust.jcommander.JCommander; import com.beust.jcommander.Parameter; import com.beust.jcommander.internal.Lists; +import com.google.common.base.Preconditions; import java.util.Arrays; +import java.util.HashMap; import java.util.List; +import javax.annotation.Nullable; import javax.annotation.concurrent.ThreadSafe; /** @@ -61,6 +70,14 @@ public final class TestRunner { + "THROUGH, ASYNC_THROUGH. By default all writeTypes are tested.") private String mWriteType; + @Parameter(names = {"--workers", "--worker"}, + description = "Alluxio worker addresses to run tests on. " + + "If not specified, random ones will be used.", + converter = WorkerAddressConverter.class) + @Nullable + @SuppressFBWarnings(value = "UWF_NULL_FIELD", justification = "Injected field") + private List mWorkerAddresses = null; + /** * The operation types to test. */ @@ -90,6 +107,7 @@ public static void main(String[] args) throws Exception { JCommander jCommander = new JCommander(runner); jCommander.parse(args); jCommander.setProgramName("TestRunner"); + jCommander.parse(args); if (runner.mHelp) { jCommander.usage(); return; @@ -105,6 +123,10 @@ public static void main(String[] args) throws Exception { * @return the number of failed tests */ private int runTests() throws Exception { + if (mWorkerAddresses != null) { + Configuration.set(PropertyKey.USER_FILE_PASSIVE_CACHE_ENABLED, false); + } + mDirectory = PathUtils.concatPath(mDirectory, TEST_DIRECTORY_NAME); AlluxioURI testDir = new AlluxioURI(mDirectory); @@ -123,47 +145,126 @@ private int runTests() throws Exception { List operations = mOperation == null ? Lists.newArrayList(OperationType.values()) : Lists.newArrayList(OperationType.valueOf(mOperation)); - for (ReadType readType : readTypes) { - for (WriteType writeType : writeTypes) { - for (OperationType opType : operations) { - System.out.println(String.format("runTest --operation %s --readType %s --writeType %s", - opType, readType, writeType)); - failed += runTest(opType, readType, writeType, fsContext); + if (mWorkerAddresses == null) { + for (ReadType readType : readTypes) { + for (WriteType writeType : writeTypes) { + for (OperationType opType : operations) { + System.out.println(String.format("runTest --operation %s --readType %s --writeType %s", + opType, readType, writeType)); + failed += runTest(opType, readType, writeType, fsContext, null); + } } } - } - if (failed > 0) { - System.out.println("Number of failed tests: " + failed); + if (failed > 0) { + System.out.println("Number of failed tests: " + failed); + } + } else { + // If workers are specified, the test will iterate through all workers and + // for each worker: + // 1. Create a file + // 2. Write the blocks of test files into a specific worker using SpecificHostPolicy + // 3. Open the file to read + // 4. If blocks are cached on the worker (MUST_CACHE/CACHE_THROUGH/ASYNC_THROUGH), + // then data will be read from that specific worker as there is only one copy of data. + // If blocks are not cached on the worker (THROUGH), + // then data will be loaded from UFS into that specific worker, by setting the + // ufsReadWorkerLocation field InStreamOptions. + // In this way, we made sure that a worker works normally on both reads and writes. + HashMap failedTestWorkers = new HashMap<>(); + boolean hasFailedWorkers = false; + for (WorkerNetAddress workerNetAddress : mWorkerAddresses) { + System.out.println("Running test for worker:" + getWorkerAddressString(workerNetAddress)); + for (ReadType readType : readTypes) { + for (WriteType writeType : writeTypes) { + for (OperationType opType : operations) { + System.out.printf("[%s] runTest --operation %s --readType %s --writeType %s%n", + getWorkerAddressString(workerNetAddress), opType, readType, writeType); + failed += runTest(opType, readType, writeType, fsContext, workerNetAddress); + failedTestWorkers.put( + workerNetAddress, failedTestWorkers.getOrDefault(workerNetAddress, 0) + failed); + if (failed != 0) { + hasFailedWorkers = true; + } + } + } + } + } + if (!hasFailedWorkers) { + System.out.println( + Constants.ANSI_GREEN + "All workers passed tests!" + Constants.ANSI_RESET); + } else { + System.out.println( + Constants.ANSI_RED + "Some workers failed tests!" + Constants.ANSI_RESET); + failedTestWorkers.forEach((k, v) -> { + if (v > 0) { + System.out.printf( + "%sWorker %s failed %s tests %s%n", + Constants.ANSI_RED, getWorkerAddressString(k), 4, Constants.ANSI_RESET); + } + }); + } } return failed; } - /** - * Runs a single test given operation, read and write type. - * - * @param opType operation type - * @param readType read type - * @param writeType write type - * @return 0 on success, 1 on failure - */ - private int runTest(OperationType opType, ReadType readType, WriteType writeType, - FileSystemContext fsContext) { - AlluxioURI filePath = - new AlluxioURI(String.format("%s/%s_%s_%s", mDirectory, opType, readType, writeType)); + /** + * Runs a single test given operation, read and write type. + * + * @param opType operation type + * @param readType read type + * @param writeType write type + * @return 0 on success, 1 on failure + */ + private int runTest( + OperationType opType, ReadType readType, WriteType writeType, + FileSystemContext fsContext, @Nullable WorkerNetAddress workerAddress) { + final AlluxioURI filePath; + if (workerAddress == null) { + filePath = new AlluxioURI( + String.format("%s/%s_%s_%s", mDirectory, opType, readType, writeType)); + } else { + String workerAddressString = getWorkerAddressString(workerAddress); + filePath = new AlluxioURI( + String.format("%s/%s/%s_%s_%s", mDirectory, workerAddressString, opType, readType, + writeType)); + } boolean result = true; switch (opType) { case BASIC: result = RunTestUtils.runExample( - new BasicOperations(filePath, readType, writeType, fsContext)); + new BasicOperations(filePath, readType, writeType, fsContext, workerAddress)); break; case BASIC_NON_BYTE_BUFFER: result = RunTestUtils.runExample( - new BasicNonByteBufferOperations(filePath, readType, writeType, true, 20, fsContext)); + new BasicNonByteBufferOperations( + filePath, readType, writeType, true, 20, fsContext, workerAddress)); break; default: System.out.println("Unrecognized operation type " + opType); } return result ? 0 : 1; } + + /** + * Parses worker address param. + */ + public static class WorkerAddressConverter implements IStringConverter { + @Override + public WorkerNetAddress convert(String s) { + if (s.contains(":")) { + String[] components = s.split(":"); + Preconditions.checkState(components.length == 2); + return WorkerNetAddress.newBuilder().setHost(components[0]) + .setRpcPort(Integer.parseInt(components[1])).build(); + } else { + return WorkerNetAddress.newBuilder().setHost(s).build(); + } + } + } + + private String getWorkerAddressString(WorkerNetAddress workerAddress) { + return workerAddress.getRpcPort() == 0 ? workerAddress.getHost() : + workerAddress.getHost() + "_" + workerAddress.getRpcPort(); + } } From 4435584bf15a370fe0a0fbf9350bc45b6fd922b3 Mon Sep 17 00:00:00 2001 From: jja725 Date: Mon, 3 Apr 2023 13:34:42 -0700 Subject: [PATCH 220/334] Fix double counting job success when restore job from journal ### What changes are proposed in this pull request? When we restore a job from the journal we would call setJobStatus and would double count job that already finished successfully ### Why are the changes needed? fix bug ### Does this PR introduce any user facing changes? na pr-link: Alluxio/alluxio#17139 change-id: cid-2e9fd04421d5069350396fb92a14577bfb949fc3 --- .../src/main/java/alluxio/master/job/AbstractJob.java | 3 --- .../master/src/main/java/alluxio/master/job/LoadJob.java | 6 ++++++ .../src/main/java/alluxio/master/scheduler/Scheduler.java | 2 +- job/common/src/main/java/alluxio/scheduler/job/Job.java | 5 +++++ 4 files changed, 12 insertions(+), 4 deletions(-) diff --git a/core/server/master/src/main/java/alluxio/master/job/AbstractJob.java b/core/server/master/src/main/java/alluxio/master/job/AbstractJob.java index ee75c5fe9bb3..eb3e31bff832 100644 --- a/core/server/master/src/main/java/alluxio/master/job/AbstractJob.java +++ b/core/server/master/src/main/java/alluxio/master/job/AbstractJob.java @@ -95,9 +95,6 @@ public void setJobState(JobState state) { if (!isRunning()) { mEndTime = OptionalLong.of(System.currentTimeMillis()); } - if (state == JobState.SUCCEEDED) { - LoadJob.JOB_LOAD_SUCCESS.inc(); - } } @Override diff --git a/core/server/master/src/main/java/alluxio/master/job/LoadJob.java b/core/server/master/src/main/java/alluxio/master/job/LoadJob.java index a7f8fb16dae9..36f2b277939e 100644 --- a/core/server/master/src/main/java/alluxio/master/job/LoadJob.java +++ b/core/server/master/src/main/java/alluxio/master/job/LoadJob.java @@ -208,6 +208,12 @@ public void failJob(AlluxioRuntimeException reason) { JOB_LOAD_FAIL.inc(); } + @Override + public void setJobSuccess() { + setJobState(JobState.SUCCEEDED); + JOB_LOAD_SUCCESS.inc(); + } + /** * Add bytes to total loaded bytes. * @param bytes bytes to be added to total diff --git a/core/server/master/src/main/java/alluxio/master/scheduler/Scheduler.java b/core/server/master/src/main/java/alluxio/master/scheduler/Scheduler.java index ee548d8bd436..dbdd599a475d 100644 --- a/core/server/master/src/main/java/alluxio/master/scheduler/Scheduler.java +++ b/core/server/master/src/main/java/alluxio/master/scheduler/Scheduler.java @@ -334,7 +334,7 @@ private void processJob(Job job, Set runningWorkers) { } else { if (job.isHealthy()) { - job.setJobState(JobState.SUCCEEDED); + job.setJobSuccess(); } else { job.failJob(new InternalRuntimeException("Job failed because it's not healthy.")); diff --git a/job/common/src/main/java/alluxio/scheduler/job/Job.java b/job/common/src/main/java/alluxio/scheduler/job/Job.java index 4b9f5fdd4483..49a29f88dbbd 100644 --- a/job/common/src/main/java/alluxio/scheduler/job/Job.java +++ b/job/common/src/main/java/alluxio/scheduler/job/Job.java @@ -67,6 +67,11 @@ public interface Job> { */ void failJob(AlluxioRuntimeException reason); + /** + * set job as success. + */ + void setJobSuccess(); + /** * Get job progress. * @param format progress report format From 9f758887a58a94ae9ab195dfc1d8b39ea5d51e75 Mon Sep 17 00:00:00 2001 From: Haoning Sun Date: Tue, 4 Apr 2023 10:25:52 +0800 Subject: [PATCH 221/334] Add usage info for omit-mount-info ### What changes are proposed in this pull request? Add usage info for omit-mount-info. ### Why are the changes needed? Show usage for omit-mount-info parameter. pr-link: Alluxio/alluxio#17165 change-id: cid-4c8c41999a1c307d7a6b2f5209166751526dddb6 --- docs/en/operation/User-CLI.md | 1 + shell/src/main/java/alluxio/cli/fs/command/LsCommand.java | 3 ++- 2 files changed, 3 insertions(+), 1 deletion(-) diff --git a/docs/en/operation/User-CLI.md b/docs/en/operation/User-CLI.md index 103b23b78a67..80fd1c16d8ab 100644 --- a/docs/en/operation/User-CLI.md +++ b/docs/en/operation/User-CLI.md @@ -1203,6 +1203,7 @@ By default, it loads metadata only at the first time at which a directory is lis * `-r` reverses the sorting order. * `--timestamp` display the timestamp of the given option. Possible values are creationTime, lastModificationTime, and lastAccessTime. The default option is lastModificationTime. +* `-m` option excludes mount point related information. For example, `ls` can be used to browse the file system. diff --git a/shell/src/main/java/alluxio/cli/fs/command/LsCommand.java b/shell/src/main/java/alluxio/cli/fs/command/LsCommand.java index 6b6cfc7d228d..ebc542fff2ab 100644 --- a/shell/src/main/java/alluxio/cli/fs/command/LsCommand.java +++ b/shell/src/main/java/alluxio/cli/fs/command/LsCommand.java @@ -332,7 +332,8 @@ public int run(CommandLine cl) throws AlluxioException, IOException { @Override public String getUsage() { - return "ls [-d|-f|-p|-R/--recursive|-h|--sort=option|--timestamp=option|-r] ..."; + return "ls [-d|-f|-p|-R/--recursive|-h|--sort=option|--timestamp=option|-r" + + "|-m/--omit-mount-info] ..."; } @Override From 922b2a83bbca27108b9750d1ab1f79cbe6669834 Mon Sep 17 00:00:00 2001 From: humengyu Date: Tue, 4 Apr 2023 10:26:20 +0800 Subject: [PATCH 222/334] [SMALLFIX] Delete duplicate words ### What changes are proposed in this pull request? Delete duplicate words. ### Why are the changes needed? Please clarify why the changes are needed. For instance, 1. If you propose a new API, clarify the use case for a new API. 2. If you fix a bug, describe the bug. ### Does this PR introduce any user facing changes? Please list the user-facing changes introduced by your change, including 1. change in user-facing APIs 2. addition or removal of property keys 3. webui pr-link: Alluxio/alluxio#17178 change-id: cid-06ee4c54d254030e5cb2f141e81fe23a9fe43326 --- .../src/main/java/alluxio/worker/block/CacheRequestManager.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/core/server/worker/src/main/java/alluxio/worker/block/CacheRequestManager.java b/core/server/worker/src/main/java/alluxio/worker/block/CacheRequestManager.java index 911954bd27c5..464feabc2356 100644 --- a/core/server/worker/src/main/java/alluxio/worker/block/CacheRequestManager.java +++ b/core/server/worker/src/main/java/alluxio/worker/block/CacheRequestManager.java @@ -92,7 +92,7 @@ public void submitRequest(CacheRequest request) long blockId = request.getBlockId(); boolean async = request.getAsync(); if (mActiveCacheRequests.putIfAbsent(blockId, request) != null) { - // This block is already planned and just just return. + // This block is already planned and just return. if (async) { LOG.debug("request already planned: {}", request); } else { From aeae6ae4dae14a403e8123f77053f1af030f566d Mon Sep 17 00:00:00 2001 From: jianghuazhu <740087514@qq.com> Date: Tue, 4 Apr 2023 10:26:53 +0800 Subject: [PATCH 223/334] [DOCFIX]Fix some bugs related to Caching.md ### What changes are proposed in this pull request? The purpose of this PR is to fix some bugs related to Caching.md ### Why are the changes needed? Fixing these errors will make Caching.md look cleaner. ### Does this PR introduce any user facing changes? Please list the user-facing changes introduced by your change, including 1. change in user-facing APIs 2. addition or removal of property keys 3. webui pr-link: Alluxio/alluxio#17187 change-id: cid-877b353109edfe228b434f2c594cb4db7be9428e --- docs/cn/core-services/Caching.md | 8 ++++++-- docs/en/core-services/Caching.md | 6 +++++- 2 files changed, 11 insertions(+), 3 deletions(-) diff --git a/docs/cn/core-services/Caching.md b/docs/cn/core-services/Caching.md index 74279ffa46b6..643a0f3a5329 100644 --- a/docs/cn/core-services/Caching.md +++ b/docs/cn/core-services/Caching.md @@ -334,7 +334,7 @@ Alluxio支持命名空间中每个文件和目录的"生存时间(TTL)"设置。 则TTL功能可用于明确刷新旧数据,从而为新文件释放缓存空间。 Alluxio具有与每个文件或目录关联的TTL属性。这些属性将保存为 -日志的一部分,所以集群重新后也能持久保持。活跃master节点负责 +日志的一部分,所以集群重启后也能持久保持。活跃master节点负责 当Alluxio提供服务时将元数据保存在内存中。在内部,master运行一个后台 线程,该线程定期检查文件是否已达到其TTL到期时间。 @@ -478,8 +478,12 @@ Alluxio cluster summary: Started: 09-28-2018 12:52:09:486 Uptime: 0 day(s), 0 hour(s), 0 minute(s), and 26 second(s) Version: 2.0.0 - Safe Mode: true + Safe Mode: false Zookeeper Enabled: false + Raft-based Journal: true + Raft Journal Addresses: + localhost:19200 + localhost:19201 Live Workers: 1 Lost Workers: 0 Total Capacity: 10.67GB diff --git a/docs/en/core-services/Caching.md b/docs/en/core-services/Caching.md index 2b008e00c266..b381dae1aa03 100644 --- a/docs/en/core-services/Caching.md +++ b/docs/en/core-services/Caching.md @@ -657,8 +657,12 @@ Alluxio cluster summary: Started: 09-28-2018 12:52:09:486 Uptime: 0 day(s), 0 hour(s), 0 minute(s), and 26 second(s) Version: 2.0.0 - Safe Mode: true + Safe Mode: false Zookeeper Enabled: false + Raft-based Journal: true + Raft Journal Addresses: + localhost:19200 + localhost:19201 Live Workers: 1 Lost Workers: 0 Total Capacity: 10.67GB From d79aa75c1c8e3d42f52ceb88f287021d8673ae27 Mon Sep 17 00:00:00 2001 From: humengyu Date: Wed, 5 Apr 2023 02:08:41 +0800 Subject: [PATCH 224/334] Print exception stack in s3 proxy ### What changes are proposed in this pull request? Print exception stack for s3 proxy. ### Why are the changes needed? Sometimes, the message of exception is null, and we need print stack to find where the exception occurred. Before add exception stack: ``` 2023-03-24 14:47:50,164 INFO ProxyWebServer - [ACCESSLOG] ListObjects Request:Request[GET //localhost:39999/api/v1/s3/s3/?prefix=user%2Fhumengyu%2Fword1&encoding-type=url]@54efde7a - Status:500 - ContentLength:None - Elapsed(ms):561 2023-03-24 14:47:50,325 WARN S3RestUtils - Error invoking REST endpoint for s3: null 2023-03-24 14:47:50,328 INFO ProxyWebServer - [ACCESSLOG] ListObjects Request:Request[GET //localhost:39999/api/v1/s3/s3/?prefix=user%2Fhumengyu%2Fword1&encoding-type=url]@54efde7a - Status:500 - ContentLength:None - Elapsed(ms):18 2023-03-24 14:47:50,469 WARN S3RestUtils - Error invoking REST endpoint for s3: null 2023-03-24 14:47:50,472 INFO ProxyWebServer - [ACCESSLOG] ListObjects Request:Request[GET //localhost:39999/api/v1/s3/s3/?prefix=user%2Fhumengyu%2Fword1&encoding-type=url]@54efde7a - Status:500 - ContentLength:None - Elapsed(ms):16 2023-03-24 14:47:50,828 WARN S3RestUtils - Error invoking REST endpoint for s3: null 2023-03-24 14:47:50,830 INFO ProxyWebServer - [ACCESSLOG] ListObjects Request:Request[GET //localhost:39999/api/v1/s3/s3/?prefix=user%2Fhumengyu%2Fword1&encoding-type=url]@54efde7a - Status:500 - ContentLength:None - Elapsed(ms):14 ``` After add exception stack: ``` 2023-03-24 14:54:41,470 WARN S3RestUtils - Error invoking REST endpoint for s3: null alluxio.proxy.s3.S3Exception at alluxio.proxy.s3.S3RestUtils.toBucketS3Exception(S3RestUtils.java:214) at alluxio.proxy.s3.S3BucketTask$ListObjectsTask.lambda$continueTask$1(S3BucketTask.java:335) at alluxio.proxy.s3.S3RestUtils.call(S3RestUtils.java:107) at alluxio.proxy.s3.S3BucketTask$ListObjectsTask.continueTask(S3BucketTask.java:264) at alluxio.proxy.s3.S3RequestServlet.serveRequest(S3RequestServlet.java:124) at alluxio.proxy.s3.S3RequestServlet.lambda$service$0(S3RequestServlet.java:93) at java.util.concurrent.Executors$RunnableAdapter.call(Executors.java:511) at java.util.concurrent.FutureTask.run(FutureTask.java:266) at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1149) at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:624) at java.lang.Thread.run(Thread.java:748) Caused by: alluxio.exception.AlluxioException at alluxio.exception.status.AlluxioStatusException.toAlluxioException(AlluxioStatusException.java:110) at alluxio.client.file.BaseFileSystem.wrapAndThrowAlluxioStatusException(BaseFileSystem.java:648) at alluxio.client.file.BaseFileSystem.rpc(BaseFileSystem.java:625) at alluxio.client.file.BaseFileSystem.exists(BaseFileSystem.java:207) at alluxio.client.file.DelegatingFileSystem.exists(DelegatingFileSystem.java:99) at alluxio.client.file.FileSystemCache$InstanceCachingFileSystem.exists(FileSystemCache.java:251) at alluxio.client.file.FileSystem.exists(FileSystem.java:302) at alluxio.proxy.s3.S3BucketTask$ListObjectsTask.lambda$continueTask$1(S3BucketTask.java:315) ... 9 more Caused by: alluxio.exception.status.UnknownException at alluxio.exception.status.AlluxioStatusException.from(AlluxioStatusException.java:174) at alluxio.exception.status.AlluxioStatusException.fromStatusRuntimeException(AlluxioStatusException.java:215) at alluxio.AbstractClient.retryRPCInternal(AbstractClient.java:486) at alluxio.AbstractClient.retryRPC(AbstractClient.java:450) at alluxio.AbstractClient.retryRPC(AbstractClient.java:439) at alluxio.client.file.RetryHandlingFileSystemMasterClient.exists(RetryHandlingFileSystemMasterClient.java:192) at alluxio.client.file.BaseFileSystem.lambda$exists$4(BaseFileSystem.java:210) at alluxio.client.file.BaseFileSystem.rpc(BaseFileSystem.java:623) ... 14 more Caused by: io.grpc.StatusRuntimeException: UNKNOWN at io.grpc.stub.ClientCalls.toStatusRuntimeException(ClientCalls.java:262) at io.grpc.stub.ClientCalls.getUnchecked(ClientCalls.java:243) at io.grpc.stub.ClientCalls.blockingUnaryCall(ClientCalls.java:156) at alluxio.grpc.FileSystemMasterClientServiceGrpc$FileSystemMasterClientServiceBlockingStub.exists(FileSystemMasterClientServiceGrpc.java:2018) at alluxio.client.file.RetryHandlingFileSystemMasterClient.lambda$exists$6(RetryHandlingFileSystemMasterClient.java:192) at alluxio.AbstractClient.retryRPCInternal(AbstractClient.java:484) ... 19 more ``` pr-link: Alluxio/alluxio#17141 change-id: cid-e1b5d218d52120bbf38b4277bbfa0ddd12af1e10 --- .../proxy/src/main/java/alluxio/proxy/s3/S3RestUtils.java | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/core/server/proxy/src/main/java/alluxio/proxy/s3/S3RestUtils.java b/core/server/proxy/src/main/java/alluxio/proxy/s3/S3RestUtils.java index 19c6a07acb55..d10f9beb1aee 100644 --- a/core/server/proxy/src/main/java/alluxio/proxy/s3/S3RestUtils.java +++ b/core/server/proxy/src/main/java/alluxio/proxy/s3/S3RestUtils.java @@ -37,6 +37,7 @@ import alluxio.security.authentication.AuthenticatedClientUser; import alluxio.security.user.ServerUserState; import alluxio.util.SecurityUtils; +import alluxio.util.ThreadUtils; import com.fasterxml.jackson.dataformat.xml.XmlMapper; import com.google.common.annotations.VisibleForTesting; @@ -129,7 +130,11 @@ public static Response call(String resource, S3RestUtils.RestCallable cal XmlMapper mapper = new XmlMapper(); return Response.ok(mapper.writeValueAsString(result)).build(); } catch (Exception e) { - LOG.warn("Error invoking REST endpoint for {}:\n{}", resource, e.getMessage()); + String errOutputMsg = e.getMessage(); + if (StringUtils.isEmpty(errOutputMsg)) { + errOutputMsg = ThreadUtils.formatStackTrace(e); + } + LOG.warn("Error invoking REST endpoint for {}:\n{}", resource, errOutputMsg); return S3ErrorResponse.createErrorResponse(e, resource); } } From 79a7b4dc2e65cae22dec5584ccbbd9fd0df3ef8f Mon Sep 17 00:00:00 2001 From: Rico Chiu Date: Wed, 5 Apr 2023 23:28:14 -0700 Subject: [PATCH 225/334] Fix double arg parsing in runTests both https://github.com/Alluxio/alluxio/pull/17170/files#diff-c80fd5152cf94b0cc483c26efb94f3946af189626fc4f10c0232bf968432078e and https://github.com/Alluxio/alluxio/pull/17068/files introduced the same change but in different lines so the merge was clean, but resulted in parsing args twice caught by running `bin/alluxio runTests --directory /path/to/dir` with error msg: ``` error: Exception in thread "main" com.beust.jcommander.ParameterException: Can only specify option --directory once. at com.beust.jcommander.ParameterDescription.addValue(ParameterDescription.java:240) at com.beust.jcommander.JCommander.processFixedArity(JCommander.java:913) at com.beust.jcommander.JCommander.processFixedArity(JCommander.java:894) at com.beust.jcommander.JCommander.parseValues(JCommander.java:724) at com.beust.jcommander.JCommander.parse(JCommander.java:356) at com.beust.jcommander.JCommander.parse(JCommander.java:335) at alluxio.cli.TestRunner.main(TestRunner.java:110) ``` pr-link: Alluxio/alluxio#17199 change-id: cid-41ef7b12702f10c1e8b4cc729f99acc9e1b33753 --- shell/src/main/java/alluxio/cli/TestRunner.java | 1 - 1 file changed, 1 deletion(-) diff --git a/shell/src/main/java/alluxio/cli/TestRunner.java b/shell/src/main/java/alluxio/cli/TestRunner.java index 406e99763149..7dfd74ae61c7 100644 --- a/shell/src/main/java/alluxio/cli/TestRunner.java +++ b/shell/src/main/java/alluxio/cli/TestRunner.java @@ -105,7 +105,6 @@ private TestRunner() {} // prevent instantiation public static void main(String[] args) throws Exception { TestRunner runner = new TestRunner(); JCommander jCommander = new JCommander(runner); - jCommander.parse(args); jCommander.setProgramName("TestRunner"); jCommander.parse(args); if (runner.mHelp) { From 9a4e154e7293667572fec25c63136462fb58a345 Mon Sep 17 00:00:00 2001 From: elega <445092967@qq.com> Date: Mon, 10 Apr 2023 11:19:39 +0800 Subject: [PATCH 226/334] Show master versions in fsadmin report command ./alluxio fsadmin report summary ![image](https://user-images.githubusercontent.com/14806853/230587552-b44a3aea-fb69-49d8-bd61-6b21950a137e.png) edit: updated screenshot pr-link: Alluxio/alluxio#17177 change-id: cid-d8075ccbbcdaed6c4dd488428d20c59718e125c3 --- .../meta/MetaMasterClientServiceHandler.java | 32 ++++++++++++++++++ .../src/main/proto/grpc/meta_master.proto | 8 +++++ core/transport/src/main/proto/proto.lock | 30 +++++++++++++++++ .../cli/fsadmin/report/SummaryCommand.java | 14 +++++++- .../fsadmin/report/SummaryCommandTest.java | 33 +++++++++++++++++++ 5 files changed, 116 insertions(+), 1 deletion(-) diff --git a/core/server/master/src/main/java/alluxio/master/meta/MetaMasterClientServiceHandler.java b/core/server/master/src/main/java/alluxio/master/meta/MetaMasterClientServiceHandler.java index cb33f8391d04..2d76019d89d4 100644 --- a/core/server/master/src/main/java/alluxio/master/meta/MetaMasterClientServiceHandler.java +++ b/core/server/master/src/main/java/alluxio/master/meta/MetaMasterClientServiceHandler.java @@ -26,7 +26,9 @@ import alluxio.grpc.GetMasterInfoPResponse; import alluxio.grpc.MasterInfo; import alluxio.grpc.MasterInfoField; +import alluxio.grpc.MasterVersion; import alluxio.grpc.MetaMasterClientServiceGrpc; +import alluxio.grpc.NetAddress; import alluxio.master.StateLockOptions; import alluxio.master.journal.raft.RaftJournalSystem; import alluxio.wire.Address; @@ -139,6 +141,36 @@ public void getMasterInfo(GetMasterInfoPOptions options, masterInfo.setRaftJournal(mMetaMaster.getMasterContext().getJournalSystem() instanceof RaftJournalSystem); break; + case MASTER_VERSION: + masterInfo.addMasterVersions( + MasterVersion.newBuilder() + .setAddresses(NetAddress.newBuilder().setHost( + mMetaMaster.getRpcAddress().getHostName()) + .setRpcPort(mMetaMaster.getRpcAddress().getPort()).build()) + .setVersion(RuntimeConstants.VERSION) + .setState("PRIMARY") + .build() + ); + List standbyMasterVersions = + Arrays.stream(mMetaMaster.getStandbyMasterInfos()) + .map(it -> MasterVersion.newBuilder() + .setVersion(it.getVersion()) + .setAddresses(it.getAddress().toProto()) + .setState("STANDBY") + .build()) + .collect(Collectors.toList()); + + masterInfo.addAllMasterVersions(standbyMasterVersions); + List lostMasterVersions = + Arrays.stream(mMetaMaster.getLostMasterInfos()) + .map(it -> MasterVersion.newBuilder() + .setVersion(it.getVersion()) + .setAddresses(it.getAddress().toProto()) + .setState("LOST") + .build()) + .collect(Collectors.toList()); + masterInfo.addAllMasterVersions(lostMasterVersions); + break; default: LOG.warn("Unrecognized meta master info field: " + field); } diff --git a/core/transport/src/main/proto/grpc/meta_master.proto b/core/transport/src/main/proto/grpc/meta_master.proto index 5ec40ce402bd..6790b05c3144 100644 --- a/core/transport/src/main/proto/grpc/meta_master.proto +++ b/core/transport/src/main/proto/grpc/meta_master.proto @@ -77,6 +77,13 @@ message MasterInfo { optional string clusterId = 11; optional bool raftJournal = 12; repeated string raftAddress = 13; + repeated MasterVersion masterVersions = 14; +} + +message MasterVersion { + optional grpc.NetAddress addresses = 1; + optional string version = 2; + optional string state = 3; } enum MasterInfoField { @@ -93,6 +100,7 @@ enum MasterInfoField { CLUSTER_ID = 10; RAFT_JOURNAL = 11; RAFT_ADDRESSES = 12; + MASTER_VERSION = 13; } message GetMasterInfoPOptions { diff --git a/core/transport/src/main/proto/proto.lock b/core/transport/src/main/proto/proto.lock index 29ae6ba37caf..fa24040577ba 100644 --- a/core/transport/src/main/proto/proto.lock +++ b/core/transport/src/main/proto/proto.lock @@ -5746,6 +5746,10 @@ { "name": "RAFT_ADDRESSES", "integer": 12 + }, + { + "name": "MASTER_VERSION", + "integer": 13 } ] }, @@ -6012,6 +6016,32 @@ "name": "raftAddress", "type": "string", "is_repeated": true + }, + { + "id": 14, + "name": "masterVersions", + "type": "MasterVersion", + "is_repeated": true + } + ] + }, + { + "name": "MasterVersion", + "fields": [ + { + "id": 1, + "name": "addresses", + "type": "grpc.NetAddress" + }, + { + "id": 2, + "name": "version", + "type": "string" + }, + { + "id": 3, + "name": "state", + "type": "string" } ] }, diff --git a/shell/src/main/java/alluxio/cli/fsadmin/report/SummaryCommand.java b/shell/src/main/java/alluxio/cli/fsadmin/report/SummaryCommand.java index 386803bde1eb..7cf44993e8a7 100644 --- a/shell/src/main/java/alluxio/cli/fsadmin/report/SummaryCommand.java +++ b/shell/src/main/java/alluxio/cli/fsadmin/report/SummaryCommand.java @@ -16,6 +16,8 @@ import alluxio.client.meta.MetaMasterClient; import alluxio.grpc.MasterInfo; import alluxio.grpc.MasterInfoField; +import alluxio.grpc.MasterVersion; +import alluxio.grpc.NetAddress; import alluxio.util.CommonUtils; import alluxio.util.FormatUtils; import alluxio.wire.BlockMasterInfo; @@ -82,7 +84,8 @@ private void printMetaMasterInfo() throws IOException { MasterInfoField.RPC_PORT, MasterInfoField.START_TIME_MS, MasterInfoField.UP_TIME_MS, MasterInfoField.VERSION, MasterInfoField.SAFE_MODE, MasterInfoField.ZOOKEEPER_ADDRESSES, - MasterInfoField.RAFT_JOURNAL, MasterInfoField.RAFT_ADDRESSES)); + MasterInfoField.RAFT_JOURNAL, MasterInfoField.RAFT_ADDRESSES, + MasterInfoField.MASTER_VERSION)); MasterInfo masterInfo = mMetaMasterClient.getMasterInfo(masterInfoFilter); print("Master Address: " + masterInfo.getLeaderMasterAddress()); @@ -118,6 +121,15 @@ private void printMetaMasterInfo() throws IOException { } else { print("Raft-based Journal: false"); } + String formatString = "%-32s %-8s %-32s"; + print(String.format(formatString, "Master Address", "State", "Version")); + for (MasterVersion masterVersion: masterInfo.getMasterVersionsList()) { + NetAddress address = masterVersion.getAddresses(); + print(String.format(formatString, + address.getHost() + ":" + address.getRpcPort(), + masterVersion.getState(), + masterVersion.getVersion())); + } } /** diff --git a/shell/src/test/java/alluxio/cli/fsadmin/report/SummaryCommandTest.java b/shell/src/test/java/alluxio/cli/fsadmin/report/SummaryCommandTest.java index 0fe10e4bdf5a..e500e259bdd9 100644 --- a/shell/src/test/java/alluxio/cli/fsadmin/report/SummaryCommandTest.java +++ b/shell/src/test/java/alluxio/cli/fsadmin/report/SummaryCommandTest.java @@ -16,12 +16,15 @@ import static org.mockito.Mockito.when; import alluxio.Constants; +import alluxio.RuntimeConstants; import alluxio.client.block.BlockMasterClient; import alluxio.client.meta.MetaMasterClient; import alluxio.conf.AlluxioConfiguration; import alluxio.conf.Configuration; import alluxio.conf.PropertyKey; import alluxio.grpc.MasterInfo; +import alluxio.grpc.MasterVersion; +import alluxio.grpc.NetAddress; import alluxio.util.CommonUtils; import alluxio.wire.BlockMasterInfo; @@ -93,19 +96,45 @@ public void prepareBaseDependencies() throws IOException { } void prepareZKHADependencies() throws IOException { + MasterVersion primaryVersion = MasterVersion.newBuilder() + .setVersion(RuntimeConstants.VERSION).setState("Primary").setAddresses( + NetAddress.newBuilder().setHost("hostname1").setRpcPort(10000).build() + ).build(); + MasterVersion standby1Version = MasterVersion.newBuilder() + .setVersion(RuntimeConstants.VERSION).setState("Standby").setAddresses( + NetAddress.newBuilder().setHost("hostname2").setRpcPort(10001).build() + ).build(); + MasterVersion standby2Version = MasterVersion.newBuilder() + .setVersion(RuntimeConstants.VERSION).setState("Standby").setAddresses( + NetAddress.newBuilder().setHost("hostname3").setRpcPort(10002).build() + ).build(); mMasterInfo = MasterInfo.newBuilder(mMasterInfo) .addAllZookeeperAddresses(Arrays.asList("[zookeeper_hostname1]:2181", "[zookeeper_hostname2]:2181", "[zookeeper_hostname3]:2181")) + .addAllMasterVersions(Arrays.asList(primaryVersion, standby1Version, standby2Version)) .setRaftJournal(false) .build(); when(mMetaMasterClient.getMasterInfo(any())).thenReturn(mMasterInfo); } void prepareRaftHaDependencies() throws IOException { + MasterVersion primaryVersion = MasterVersion.newBuilder() + .setVersion(RuntimeConstants.VERSION).setState("Primary").setAddresses( + NetAddress.newBuilder().setHost("hostname1").setRpcPort(10000).build() + ).build(); + MasterVersion standby1Version = MasterVersion.newBuilder() + .setVersion(RuntimeConstants.VERSION).setState("Standby").setAddresses( + NetAddress.newBuilder().setHost("hostname2").setRpcPort(10001).build() + ).build(); + MasterVersion standby2Version = MasterVersion.newBuilder() + .setVersion(RuntimeConstants.VERSION).setState("Standby").setAddresses( + NetAddress.newBuilder().setHost("hostname3").setRpcPort(10002).build() + ).build(); mMasterInfo = MasterInfo.newBuilder(mMasterInfo) .setRaftJournal(true) .addAllRaftAddress(Arrays.asList("[raftJournal_hostname1]:19200", "[raftJournal_hostname2]:19200", "[raftJournal_hostname3]:19200")) + .addAllMasterVersions(Arrays.asList(primaryVersion, standby1Version, standby2Version)) .build(); when(mMetaMasterClient.getMasterInfo(any())).thenReturn(mMasterInfo); } @@ -165,6 +194,10 @@ private void checkIfOutputValid(String dateFormatPattern, List " Safe Mode: false")); expectedOutput.addAll(HAPattern); expectedOutput.addAll(new ArrayList<>(Arrays.asList( + " Master Address State Version ", + " hostname1:10000 Primary 2.10.0-SNAPSHOT ", + " hostname2:10001 Standby 2.10.0-SNAPSHOT ", + " hostname3:10002 Standby 2.10.0-SNAPSHOT ", " Live Workers: 12", " Lost Workers: 4", " Total Capacity: 1309.92KB", From 01bfecc16d384ece3ad920edd307112520d29219 Mon Sep 17 00:00:00 2001 From: lucyge2022 <111789461+lucyge2022@users.noreply.github.com> Date: Mon, 10 Apr 2023 14:26:19 -0700 Subject: [PATCH 227/334] Fix CopyObject writetype and unclosed outstream in InitiateMPUpload ### What changes are proposed in this pull request? 1. when s3 write type is CACHE_THRU, initiateMultipartUpload creates MultipartMetaFile without closing the outstream, causing leak in BlockWorkerClient resource. 2. createFilePOption in CopyObject didn't set any write type (which should respect alluxio.proxy.s3.writetype), causing all objects copied are in the MUST_CACHE write type. ### Why are the changes needed? To fix the above 2 problems. ### Does this PR introduce any user facing changes? No. pr-link: Alluxio/alluxio#17164 change-id: cid-968c2381d8cbcb6152fe8d4af2272cb201776b98 --- .../java/alluxio/proxy/s3/S3ObjectTask.java | 36 +++++-------- .../proxy/s3/S3RestServiceHandler.java | 3 +- .../client/rest/S3ClientRestApiTest.java | 53 ++++++++++++++++++- 3 files changed, 68 insertions(+), 24 deletions(-) diff --git a/core/server/proxy/src/main/java/alluxio/proxy/s3/S3ObjectTask.java b/core/server/proxy/src/main/java/alluxio/proxy/s3/S3ObjectTask.java index 165f6fe75fb3..27d77684b02e 100644 --- a/core/server/proxy/src/main/java/alluxio/proxy/s3/S3ObjectTask.java +++ b/core/server/proxy/src/main/java/alluxio/proxy/s3/S3ObjectTask.java @@ -448,7 +448,6 @@ public Response continueTask() { if (objectPath.endsWith(AlluxioURI.SEPARATOR)) { createDirectory(objectPath, userFs, auditContext); } - AlluxioURI objectUri = new AlluxioURI(objectPath); // Populate the xattr Map with the metadata tags if provided Map xattrMap = new HashMap<>(); @@ -459,19 +458,6 @@ public Response continueTask() { final String contentTypeHeader = mHandler.getHeader(S3Constants.S3_CONTENT_TYPE_HEADER); S3RestUtils.populateContentTypeInXAttr(xattrMap, contentTypeHeader); - CreateFilePOptions filePOptions = - CreateFilePOptions.newBuilder() - .setRecursive(true) - .setMode(PMode.newBuilder() - .setOwnerBits(Bits.ALL) - .setGroupBits(Bits.ALL) - .setOtherBits(Bits.NONE).build()) - .setWriteType(S3RestUtils.getS3WriteType()) - .putAllXattr(xattrMap) - .setXattrPropStrat(XAttrPropagationStrategy.LEAF_NODE) - .setOverwrite(true) - .build(); - try { copySource = URLDecoder.decode(copySource, "UTF-8"); } catch (UnsupportedEncodingException ex) { @@ -483,15 +469,19 @@ public Response continueTask() { .setMode(PMode.newBuilder() .setOwnerBits(Bits.ALL) .setGroupBits(Bits.ALL) - .setOtherBits(Bits.NONE).build()); + .setOtherBits(Bits.NONE) + .build()) + .setWriteType(S3RestUtils.getS3WriteType()) + .setXattrPropStrat(XAttrPropagationStrategy.LEAF_NODE) + .setOverwrite(true); // Handle metadata directive final String metadataDirective = mHandler.getHeader( S3Constants.S3_METADATA_DIRECTIVE_HEADER); if (StringUtils.equals(metadataDirective, S3Constants.Directive.REPLACE.name()) - && filePOptions.getXattrMap().containsKey(S3Constants.CONTENT_TYPE_XATTR_KEY)) { + && xattrMap.containsKey(S3Constants.CONTENT_TYPE_XATTR_KEY)) { copyFilePOptionsBuilder.putXattr(S3Constants.CONTENT_TYPE_XATTR_KEY, - filePOptions.getXattrMap().get(S3Constants.CONTENT_TYPE_XATTR_KEY)); + xattrMap.get(S3Constants.CONTENT_TYPE_XATTR_KEY)); } else { // defaults to COPY try { status = userFs.getStatus(new AlluxioURI(copySource)); @@ -510,9 +500,9 @@ public Response continueTask() { final String taggingDirective = mHandler.getHeader( S3Constants.S3_TAGGING_DIRECTIVE_HEADER); if (StringUtils.equals(taggingDirective, S3Constants.Directive.REPLACE.name()) - && filePOptions.getXattrMap().containsKey(S3Constants.TAGGING_XATTR_KEY)) { + && xattrMap.containsKey(S3Constants.TAGGING_XATTR_KEY)) { copyFilePOptionsBuilder.putXattr(S3Constants.TAGGING_XATTR_KEY, - filePOptions.getXattrMap().get(S3Constants.TAGGING_XATTR_KEY)); + xattrMap.get(S3Constants.TAGGING_XATTR_KEY)); } else { // defaults to COPY try { if (status == null) { @@ -712,7 +702,6 @@ public Response continueTask() { if (objectPath.endsWith(AlluxioURI.SEPARATOR)) { return createDirectory(objectPath, userFs, auditContext); } - AlluxioURI objectUri = new AlluxioURI(objectPath); // Populate the xattr Map with the metadata tags if provided Map xattrMap = new HashMap<>(); @@ -802,6 +791,7 @@ public Response continueTask() { .setOwnerBits(Bits.ALL) .setGroupBits(Bits.ALL) .setOtherBits(Bits.NONE).build()) + .setWriteType(S3RestUtils.getS3WriteType()) .setOverwrite(true); String entityTag = copyObject(userFs, auditContext, objectPath, copySource, copyFilePOptionsBuilder.build()); @@ -900,7 +890,7 @@ public Response continueTask() { ByteString.copyFrom(mHandler.getObject(), S3Constants.XATTR_STR_CHARSET)); xattrMap.put(S3Constants.UPLOADS_FILE_ID_XATTR_KEY, ByteString.copyFrom( Longs.toByteArray(userFs.getStatus(multipartTemporaryDir).getFileId()))); - mHandler.getMetaFS().createFile( + try (FileOutStream fos = mHandler.getMetaFS().createFile( new AlluxioURI(S3RestUtils.getMultipartMetaFilepathForUploadId(uploadId)), CreateFilePOptions.newBuilder() .setRecursive(true) @@ -912,7 +902,9 @@ public Response continueTask() { .putAllXattr(xattrMap) .setXattrPropStrat(XAttrPropagationStrategy.LEAF_NODE) .build() - ); + )) { + // Empty file creation, nothing to do. + } SetAttributePOptions attrPOptions = SetAttributePOptions.newBuilder() .setOwner(user) .build(); diff --git a/core/server/proxy/src/main/java/alluxio/proxy/s3/S3RestServiceHandler.java b/core/server/proxy/src/main/java/alluxio/proxy/s3/S3RestServiceHandler.java index fb86ee921833..967528c4c01c 100644 --- a/core/server/proxy/src/main/java/alluxio/proxy/s3/S3RestServiceHandler.java +++ b/core/server/proxy/src/main/java/alluxio/proxy/s3/S3RestServiceHandler.java @@ -914,6 +914,7 @@ public Response createObjectOrUploadPart(@HeaderParam("Content-MD5") final Strin .setOwnerBits(Bits.ALL) .setGroupBits(Bits.ALL) .setOtherBits(Bits.NONE).build()) + .setWriteType(S3RestUtils.getS3WriteType()) .setCheckS3BucketPath(true) .setOverwrite(true); // Handle metadata directive @@ -1089,7 +1090,7 @@ public Response initiateMultipartUpload( .putAllXattr(xattrMap) .setXattrPropStrat(XAttrPropagationStrategy.LEAF_NODE) .build() - ); + ).close(); SetAttributePOptions attrPOptions = SetAttributePOptions.newBuilder() .setOwner(user) .build(); diff --git a/tests/src/test/java/alluxio/client/rest/S3ClientRestApiTest.java b/tests/src/test/java/alluxio/client/rest/S3ClientRestApiTest.java index 1ce63d6c5d4f..091ed6918a34 100644 --- a/tests/src/test/java/alluxio/client/rest/S3ClientRestApiTest.java +++ b/tests/src/test/java/alluxio/client/rest/S3ClientRestApiTest.java @@ -1315,6 +1315,17 @@ public void initiateMultipartUpload() throws Exception { String expectedResult = XML_MAPPER.writeValueAsString(expected); Assert.assertEquals(expectedResult, result); + + URIStatus mpMetaFileStatus = mFileSystem.getStatus( + new AlluxioURI(S3RestUtils.getMultipartMetaFilepathForUploadId(uploadId))); + Assert.assertTrue(mpMetaFileStatus.isCompleted()); + + AlluxioURI mpTempDirURI = new AlluxioURI(S3RestUtils.getMultipartTemporaryDirForObject( + S3RestUtils.parsePath(AlluxioURI.SEPARATOR + bucketName), + objectName, uploadId)); + Assert.assertTrue(mFileSystem.exists(mpTempDirURI)); + URIStatus mpTempDirStatus = mFileSystem.getStatus(mpTempDirURI); + Assert.assertTrue(mpTempDirStatus.getFileInfo().isFolder()); } @Test @@ -1374,7 +1385,47 @@ public void uploadPartWithoutInitiation() throws Exception { Assert.fail("Upload part of an object without multipart upload initialization should fail"); } - // TODO(czhu): Add test for UploadPartCopy + @Test + public void testUploadPartCopy() throws Exception { + final String bucketName = "bucket"; + createBucketRestCall(bucketName); + + final String objectName = "src-object"; + String srcObjectKey = bucketName + AlluxioURI.SEPARATOR + objectName; + final byte[] srcObjectContent = CommonUtils.randomAlphaNumString(DATA_SIZE).getBytes(); + putObjectTest(bucketName, objectName, srcObjectContent, null, null); + + // UploadPartCopy object + String targetObjectName = "target-MP-object"; + String targetMPObjectKey = bucketName + AlluxioURI.SEPARATOR + targetObjectName; + String result = initiateMultipartUploadRestCall(targetMPObjectKey); + final String uploadId = XML_MAPPER.readValue(result, InitiateMultipartUploadResult.class) + .getUploadId(); + Map params = new HashMap<>(); + params.put("uploadId", uploadId); + params.put("partNumber", "1"); + + new TestCase(mHostname, mPort, mBaseUri, + targetMPObjectKey, + params, HttpMethod.PUT, + getDefaultOptionsWithAuth() + .addHeader(S3Constants.S3_COPY_SOURCE_HEADER, srcObjectKey)).runAndGetResponse(); + + List partList = new ArrayList<>(); + partList.add(new CompleteMultipartUploadRequest.Part("", 1)); + result = completeMultipartUploadRestCall(targetMPObjectKey, uploadId, + new CompleteMultipartUploadRequest(partList)); + + // Verify the object's content. + byte[] downloadTargetMpObj = new byte[DATA_SIZE]; + MessageDigest md5 = MessageDigest.getInstance("MD5"); + try (FileInStream is = mFileSystem + .openFile(new AlluxioURI("/" + targetMPObjectKey))) { + is.read(downloadTargetMpObj, 0, DATA_SIZE); + Assert.assertTrue(is.available() <= 0); + } + Assert.assertArrayEquals(srcObjectContent, downloadTargetMpObj); + } @Test public void listParts() throws Exception { From 8cbcbcd6d3f3451f2bfeb326efadfd1372c9351e Mon Sep 17 00:00:00 2001 From: Arthur Jenoudet <23088925+jenoudet@users.noreply.github.com> Date: Tue, 11 Apr 2023 16:22:11 -0600 Subject: [PATCH 228/334] Enhance embedded journal checkpointing significantly This PR aims to change the implementation of the embedded journal snapshotting mechanism. This includes taking a snapshot, uploading a snapshot from one master to another, and restoring from a snapshot. Previously, the snapshot would concatenate all snapshotting information into one compressed file. To restore from said snapshot, the master would have to read the snapshot file sequentially to restore its state. To upload the snapshot between masters, it would first request information through Apache Ratis, and then use custom grpc endpoints to upload the snapshot from one master to another. The new implementation allocates one path per class implementing the `Chepointed` interface. Each class can then decide its own snapshot format. Most will write information into one file. The Rocks inode and block stores use theirs as directories to dump `.sst` files in. Each file in the snapshot directory has an associated `.md5` file to protect against file corruption. To upload the snapshot, the master now has a grpc server enabled at all times. New endpoints have developed to retrieve information about snapshots and to download snapshots. The snapshot is compressed as it is being sent over the network using custom input and output streams. The focus is to deliver a reliable snapshot upload mechanism, and a quick recovery from snapshots, so that failovers can be more reliable. Preliminary results show a 95+% improvement in snapshot recovery time. pr-link: Alluxio/alluxio#16998 change-id: cid-0db7ec0d037a4476eeb2433059fff58bc970b652 --- core/common/pom.xml | 4 + .../src/main/java/alluxio/AbstractClient.java | 1 + .../alluxio/conf/InstancedConfiguration.java | 5 +- .../main/java/alluxio/conf/PropertyKey.java | 61 +- .../main/java/alluxio/metrics/MetricKey.java | 103 ++- .../java/alluxio/metrics/MetricsSystem.java | 11 + .../util/compression/DirectoryMarshaller.java | 72 ++ .../util/compression/GzipMarshaller.java | 49 ++ .../compression/NoCompressionMarshaller.java | 85 +++ .../util/compression}/ParallelZipUtils.java | 2 +- .../util/compression/TarGzMarshaller.java | 38 ++ .../alluxio/util/compression}/TarUtils.java | 24 +- core/server/common/pom.xml | 5 + .../master/journal/DelegatingJournaled.java | 15 + .../master/journal/JournaledGroup.java | 19 + .../alluxio/master/journal/NoopJournaled.java | 15 + .../master/journal/SingleEntryJournaled.java | 53 ++ .../checkpoint/CheckpointInputStream.java | 2 +- .../journal/checkpoint/CheckpointName.java | 2 + .../journal/checkpoint/Checkpointed.java | 67 ++ .../OptimizedCheckpointInputStream.java | 38 ++ .../OptimizedCheckpointOutputStream.java | 65 ++ .../journal/raft/JournalStateMachine.java | 330 ++++------ .../raft/RaftJournalServiceClient.java | 62 +- .../raft/RaftJournalServiceHandler.java | 172 ++++- .../journal/raft/RaftJournalSystem.java | 15 +- .../journal/raft/RaftSnapshotManager.java | 313 +++++++++ .../raft/SnapshotDirStateMachineStorage.java | 162 +++++ .../journal/raft/SnapshotDownloader.java | 212 ------ .../journal/raft/SnapshotIdJournaled.java | 30 + .../raft/SnapshotReplicationManager.java | 620 ------------------ .../master/journal/raft/SnapshotUploader.java | 178 ----- .../master/journal/JournalUtilsTest.java | 193 ++++++ .../checkpoint/CheckpointStreamTest.java | 79 +++ .../journal/raft/RaftSnapshotManagerTest.java | 274 ++++++++ .../SnapshotDirStateMachineStorageTest.java | 167 +++++ .../compression/DirectoryMarshallerTest.java | 101 +++ .../util/{ => compression}/FileUtil.java | 2 +- .../ParallelZipUtilsTest.java | 2 +- .../util/{ => compression}/TarUtilsTest.java | 2 +- .../alluxio/master/AlluxioMasterProcess.java | 2 +- .../java/alluxio/master/MasterProcess.java | 10 +- .../master/backup/BackupWorkerRole.java | 9 + .../master/block/DefaultBlockMaster.java | 38 +- ...efaultBlockMasterContainerIdJournaled.java | 25 + .../file/meta/InodeTreePersistentState.java | 22 + .../journal/tool/RaftJournalDumper.java | 42 +- .../master/meta/DefaultMetaMaster.java | 6 + .../metastore/caching/CachingInodeStore.java | 36 + .../metastore/rocks/RocksBlockMetaStore.java | 21 +- .../metastore/rocks/RocksCheckpointed.java | 80 +++ .../metastore/rocks/RocksInodeStore.java | 22 +- .../master/metastore/rocks/RocksStore.java | 33 +- .../master/service/rpc/RpcServerService.java | 20 +- .../rpc/RpcServerStandbyGrpcService.java | 10 + .../master/AlluxioMasterProcessTest.java | 10 +- .../BlockMasterWorkerServiceHandlerTest.java | 5 +- .../DefaultBlockMasterCheckpointTest.java | 125 ++++ .../file/meta/CheckpointedIdHashSetTest.java | 63 ++ .../replication/ReplicationCheckerTest.java | 4 +- .../raft/RaftJournalSystemMetricsTest.java | 6 +- .../raft/SnapshotReplicationManagerTest.java | 484 -------------- .../AlluxioMasterRestServiceHandlerTest.java | 5 +- .../metastore/InodeStoreCheckpointTest.java | 129 ++++ .../service/rpc/RpcServerServiceTest.java | 8 + .../src/main/proto/grpc/common.proto | 2 + .../src/main/proto/grpc/raft_journal.proto | 15 + core/transport/src/main/proto/proto.lock | 23 + .../java/alluxio/snapshot/SnapshotBench.java | 141 ++++ .../alluxio/master/LocalAlluxioMaster.java | 2 +- .../alluxio/client/cli/JournalToolTest.java | 9 +- ...MasterDeleteLostWorkerIntegrationTest.java | 5 +- .../journal/MultiProcessCheckpointTest.java | 2 +- ...dJournalIntegrationTestFaultTolerance.java | 39 +- .../WorkerAllMasterRegistrationTest.java | 1 + 75 files changed, 3234 insertions(+), 1870 deletions(-) create mode 100644 core/common/src/main/java/alluxio/util/compression/DirectoryMarshaller.java create mode 100644 core/common/src/main/java/alluxio/util/compression/GzipMarshaller.java create mode 100644 core/common/src/main/java/alluxio/util/compression/NoCompressionMarshaller.java rename core/{server/common/src/main/java/alluxio/util => common/src/main/java/alluxio/util/compression}/ParallelZipUtils.java (99%) create mode 100644 core/common/src/main/java/alluxio/util/compression/TarGzMarshaller.java rename core/{server/common/src/main/java/alluxio/util => common/src/main/java/alluxio/util/compression}/TarUtils.java (79%) create mode 100644 core/server/common/src/main/java/alluxio/master/journal/SingleEntryJournaled.java create mode 100644 core/server/common/src/main/java/alluxio/master/journal/checkpoint/OptimizedCheckpointInputStream.java create mode 100644 core/server/common/src/main/java/alluxio/master/journal/checkpoint/OptimizedCheckpointOutputStream.java create mode 100644 core/server/common/src/main/java/alluxio/master/journal/raft/RaftSnapshotManager.java create mode 100644 core/server/common/src/main/java/alluxio/master/journal/raft/SnapshotDirStateMachineStorage.java delete mode 100644 core/server/common/src/main/java/alluxio/master/journal/raft/SnapshotDownloader.java create mode 100644 core/server/common/src/main/java/alluxio/master/journal/raft/SnapshotIdJournaled.java delete mode 100644 core/server/common/src/main/java/alluxio/master/journal/raft/SnapshotReplicationManager.java delete mode 100644 core/server/common/src/main/java/alluxio/master/journal/raft/SnapshotUploader.java create mode 100644 core/server/common/src/test/java/alluxio/master/journal/checkpoint/CheckpointStreamTest.java create mode 100644 core/server/common/src/test/java/alluxio/master/journal/raft/RaftSnapshotManagerTest.java create mode 100644 core/server/common/src/test/java/alluxio/master/journal/raft/SnapshotDirStateMachineStorageTest.java create mode 100644 core/server/common/src/test/java/alluxio/util/compression/DirectoryMarshallerTest.java rename core/server/common/src/test/java/alluxio/util/{ => compression}/FileUtil.java (98%) rename core/server/common/src/test/java/alluxio/util/{ => compression}/ParallelZipUtilsTest.java (99%) rename core/server/common/src/test/java/alluxio/util/{ => compression}/TarUtilsTest.java (99%) create mode 100644 core/server/master/src/main/java/alluxio/master/block/DefaultBlockMasterContainerIdJournaled.java create mode 100644 core/server/master/src/main/java/alluxio/master/metastore/rocks/RocksCheckpointed.java create mode 100644 core/server/master/src/test/java/alluxio/master/block/DefaultBlockMasterCheckpointTest.java create mode 100644 core/server/master/src/test/java/alluxio/master/file/meta/CheckpointedIdHashSetTest.java delete mode 100644 core/server/master/src/test/java/alluxio/master/journal/raft/SnapshotReplicationManagerTest.java create mode 100644 core/server/master/src/test/java/alluxio/master/metastore/InodeStoreCheckpointTest.java create mode 100644 microbench/src/main/java/alluxio/snapshot/SnapshotBench.java diff --git a/core/common/pom.xml b/core/common/pom.xml index 27de5aa8f6db..24ef667caa2e 100644 --- a/core/common/pom.xml +++ b/core/common/pom.xml @@ -103,6 +103,10 @@ org.apache.commons commons-lang3 + + org.apache.commons + commons-compress + org.apache.curator curator-client diff --git a/core/common/src/main/java/alluxio/AbstractClient.java b/core/common/src/main/java/alluxio/AbstractClient.java index 7f340ba6a87a..efe5f0fcde6f 100644 --- a/core/common/src/main/java/alluxio/AbstractClient.java +++ b/core/common/src/main/java/alluxio/AbstractClient.java @@ -487,6 +487,7 @@ private synchronized V retryRPCInternal(RetryPolicy retryPolicy, RpcCallable if (se.getStatusCode() == Status.Code.UNAVAILABLE || se.getStatusCode() == Status.Code.CANCELLED || se.getStatusCode() == Status.Code.UNAUTHENTICATED + || se.getStatusCode() == Status.Code.UNIMPLEMENTED // for standby grpc enabled || e.getCause() instanceof UnresolvedAddressException) { ex = se; } else { diff --git a/core/common/src/main/java/alluxio/conf/InstancedConfiguration.java b/core/common/src/main/java/alluxio/conf/InstancedConfiguration.java index 6f62b66a479f..bed376293f4e 100644 --- a/core/common/src/main/java/alluxio/conf/InstancedConfiguration.java +++ b/core/common/src/main/java/alluxio/conf/InstancedConfiguration.java @@ -602,11 +602,12 @@ void checkTieredStorage() { * @throws IllegalStateException if invalid checkpoint zip configuration parameters are found */ private void checkCheckpointZipConfig() { - int compression = getInt(PropertyKey.MASTER_METASTORE_ROCKS_CHECKPOINT_COMPRESSION_LEVEL); + int compression = getInt( + PropertyKey.MASTER_EMBEDDED_JOURNAL_SNAPSHOT_REPLICATION_COMPRESSION_LEVEL); if (compression < -1 || compression > 9) { throw new IllegalStateException(String.format("Zip compression level for property key %s" + " must be between -1 and 9 inclusive", - PropertyKey.MASTER_METASTORE_ROCKS_CHECKPOINT_COMPRESSION_LEVEL.getName())); + PropertyKey.MASTER_EMBEDDED_JOURNAL_SNAPSHOT_REPLICATION_COMPRESSION_LEVEL.getName())); } } diff --git a/core/common/src/main/java/alluxio/conf/PropertyKey.java b/core/common/src/main/java/alluxio/conf/PropertyKey.java index de625b63dc94..f05f12f37699 100755 --- a/core/common/src/main/java/alluxio/conf/PropertyKey.java +++ b/core/common/src/main/java/alluxio/conf/PropertyKey.java @@ -52,6 +52,7 @@ import alluxio.security.authentication.AuthType; import alluxio.util.FormatUtils; import alluxio.util.OSUtils; +import alluxio.util.compression.DirectoryMarshaller; import alluxio.util.io.PathUtils; import alluxio.worker.block.BlockStoreType; import alluxio.worker.block.management.BackoffStrategy; @@ -67,6 +68,7 @@ import com.google.common.collect.ImmutableList; import com.sun.management.OperatingSystemMXBean; import io.netty.util.ResourceLeakDetector; +import org.rocksdb.CompressionType; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -2398,6 +2400,28 @@ public String toString() { .setConsistencyCheckLevel(ConsistencyCheckLevel.WARN) .setScope(Scope.MASTER) .build(); + public static final PropertyKey MASTER_EMBEDDED_JOURNAL_SNAPSHOT_REPLICATION_COMPRESSION_TYPE = + enumBuilder(Name.MASTER_EMBEDDED_JOURNAL_SNAPSHOT_REPLICATION_COMPRESSION_TYPE, + DirectoryMarshaller.Type.class) + .setDefaultValue(DirectoryMarshaller.Type.NO_COMPRESSION) + .setDescription("The type of compression to use when transferring a snapshot from one " + + "master to another. Options are NO_COMPRESSION, GZIP, TAR_GZIP") + .setConsistencyCheckLevel(ConsistencyCheckLevel.ENFORCE) + .setScope(Scope.MASTER) + .build(); + public static final PropertyKey MASTER_EMBEDDED_JOURNAL_SNAPSHOT_REPLICATION_COMPRESSION_LEVEL = + intBuilder(Name.MASTER_EMBEDDED_JOURNAL_SNAPSHOT_REPLICATION_COMPRESSION_LEVEL) + .setAlias(Name.MASTER_METASTORE_ROCKS_CHECKPOINT_COMPRESSION_LEVEL) + .setDefaultValue(1) + .setDescription("The zip compression level of sending a snapshot from one master to " + + "another. Only applicable when " + + "alluxio.master.embedded.journal.snapshot.replication.compression.type is not " + + "NO_COMPRESSION. The zip format defines ten levels of compression, ranging from 0 " + + "(no compression, but very fast) to 9 (best compression, but slow). " + + "Or -1 for the system default compression level.") + .setConsistencyCheckLevel(ConsistencyCheckLevel.WARN) + .setScope(Scope.MASTER) + .build(); public static final PropertyKey MASTER_EMBEDDED_JOURNAL_SNAPSHOT_REPLICATION_CHUNK_SIZE = dataSizeBuilder(Name.MASTER_EMBEDDED_JOURNAL_SNAPSHOT_REPLICATION_CHUNK_SIZE) .setDefaultValue("4MB") @@ -2581,13 +2605,14 @@ public String toString() { .setConsistencyCheckLevel(ConsistencyCheckLevel.WARN) .setScope(Scope.MASTER) .build(); - public static final PropertyKey MASTER_METASTORE_ROCKS_CHECKPOINT_COMPRESSION_LEVEL = - intBuilder(Name.MASTER_METASTORE_ROCKS_CHECKPOINT_COMPRESSION_LEVEL) - .setDefaultValue(1) - .setDescription("The zip compression level of checkpointing rocksdb, the zip" - + " format defines ten levels of compression, ranging from 0" - + " (no compression, but very fast) to 9 (best compression, but slow)." - + " Or -1 for the system default compression level.") + public static final PropertyKey MASTER_METASTORE_ROCKS_CHECKPOINT_COMPRESSION_TYPE = + enumBuilder(Name.MASTER_METASTORE_ROCKS_CHECKPOINT_COMPRESSION_TYPE, CompressionType.class) + // default value informed by https://github.com/facebook/rocksdb/wiki/Compression + .setDefaultValue(CompressionType.LZ4_COMPRESSION) + .setDescription("The compression algorithm that RocksDB uses internally. One of " + + "{NO_COMPRESSION SNAPPY_COMPRESSION ZLIB_COMPRESSION BZLIB2_COMPRESSION " + + "LZ4_COMPRESSION LZ4HC_COMPRESSION XPRESS_COMPRESSION ZSTD_COMPRESSION " + + "DISABLE_COMPRESSION_OPTION}") .setConsistencyCheckLevel(ConsistencyCheckLevel.WARN) .setScope(Scope.MASTER) .build(); @@ -3095,14 +3120,6 @@ public String toString() { .setConsistencyCheckLevel(ConsistencyCheckLevel.WARN) .setScope(Scope.MASTER) .build(); - public static final PropertyKey MASTER_JOURNAL_LOG_CONCURRENCY_MAX = - intBuilder(Name.MASTER_JOURNAL_LOG_CONCURRENCY_MAX) - .setDefaultValue(256) - .setDescription("Max concurrency for notifyTermIndexUpdated method, be sure it's " - + "enough") - .setConsistencyCheckLevel(ConsistencyCheckLevel.WARN) - .setScope(Scope.MASTER) - .build(); public static final PropertyKey MASTER_JOURNAL_REQUEST_DATA_TIMEOUT = durationBuilder(Name.MASTER_JOURNAL_REQUEST_DATA_TIMEOUT) .setDefaultValue(20000) @@ -3112,7 +3129,7 @@ public String toString() { .build(); public static final PropertyKey MASTER_JOURNAL_REQUEST_INFO_TIMEOUT = durationBuilder(Name.MASTER_JOURNAL_REQUEST_INFO_TIMEOUT) - .setDefaultValue(20000) + .setDefaultValue(10_000) .setDescription("Time to wait for follower to respond to request to get information" + " about its latest snapshot") .setConsistencyCheckLevel(ConsistencyCheckLevel.WARN) @@ -3883,8 +3900,10 @@ public String toString() { .build(); public static final PropertyKey STANDBY_MASTER_GRPC_ENABLED = booleanBuilder(Name.STANDBY_MASTER_GRPC_ENABLED) - .setDefaultValue(false) - .setDescription("Whether a standby master runs a grpc server") + .setDefaultValue(true) + .setIsHidden(true) + .setDescription("Whether a standby master runs a grpc server. WARNING: disabling this " + + "will prevent master snapshotting from working correctly.") .setScope(Scope.ALL) .setConsistencyCheckLevel(ConsistencyCheckLevel.ENFORCE) .build(); @@ -8007,6 +8026,10 @@ public static final class Name { "alluxio.master.embedded.journal.write.timeout"; public static final String MASTER_EMBEDDED_JOURNAL_SNAPSHOT_REPLICATION_CHUNK_SIZE = "alluxio.master.embedded.journal.snapshot.replication.chunk.size"; + public static final String MASTER_EMBEDDED_JOURNAL_SNAPSHOT_REPLICATION_COMPRESSION_TYPE = + "alluxio.master.embedded.journal.snapshot.replication.compression.type"; + public static final String MASTER_EMBEDDED_JOURNAL_SNAPSHOT_REPLICATION_COMPRESSION_LEVEL = + "alluxio.master.embedded.journal.snapshot.replication.compression.level"; public static final String MASTER_EMBEDDED_JOURNAL_RAFT_CLIENT_REQUEST_TIMEOUT = "alluxio.master.embedded.journal.raft.client.request.timeout"; public static final String MASTER_EMBEDDED_JOURNAL_RAFT_CLIENT_REQUEST_INTERVAL = @@ -8058,6 +8081,8 @@ public static final class Name { "alluxio.master.metastore.dir.block"; public static final String MASTER_METASTORE_ROCKS_CHECKPOINT_COMPRESSION_LEVEL = "alluxio.master.metastore.rocks.checkpoint.compression.level"; + public static final String MASTER_METASTORE_ROCKS_CHECKPOINT_COMPRESSION_TYPE = + "alluxio.master.metastore.rocks.checkpoint.compression.type"; public static final String MASTER_METASTORE_ROCKS_PARALLEL_BACKUP = "alluxio.master.metastore.rocks.parallel.backup"; public static final String MASTER_METASTORE_ROCKS_PARALLEL_BACKUP_THREADS = diff --git a/core/common/src/main/java/alluxio/metrics/MetricKey.java b/core/common/src/main/java/alluxio/metrics/MetricKey.java index abd286028bb7..e82b2972a788 100644 --- a/core/common/src/main/java/alluxio/metrics/MetricKey.java +++ b/core/common/src/main/java/alluxio/metrics/MetricKey.java @@ -870,6 +870,18 @@ public static String getSyncMetricName(long mountId) { .build(); // Journal metrics + public static final MetricKey MASTER_EMBEDDED_JOURNAL_LAST_SNAPSHOT_DURATION_MS = + new Builder("Master.EmbeddedJournalLastSnapshotDurationMs") + .setDescription("Describes the amount of time taken to generate the last local journal " + + "snapshots on this master. Only valid when using the embedded journal.") + .setMetricType(MetricType.GAUGE) + .build(); + public static final MetricKey MASTER_EMBEDDED_JOURNAL_LAST_SNAPSHOT_ENTRIES_COUNT = + new Builder("Master.EmbeddedJournalLastSnapshotEntriesCount") + .setDescription("Describes the number of entries in the last local journal " + + "snapshots on this master. Only valid when using the embedded journal.") + .setMetricType(MetricType.GAUGE) + .build(); public static final MetricKey MASTER_EMBEDDED_JOURNAL_SNAPSHOT_GENERATE_TIMER = new Builder("Master.EmbeddedJournalSnapshotGenerateTimer") .setDescription("Describes the amount of time taken to generate local journal snapshots" @@ -880,11 +892,84 @@ public static String getSyncMetricName(long mountId) { public static final MetricKey MASTER_EMBEDDED_JOURNAL_SNAPSHOT_DOWNLOAD_TIMER = new Builder("Master.EmbeddedJournalSnapshotDownloadGenerate") .setDescription("Describes the amount of time taken to download journal snapshots from " - + "other masters in the cluster. Only valid when using the embedded journal. Use " - + "this metric to determine if there are potential communication bottlenecks " - + "between Alluxio masters.") + + "other masters in the cluster. Only valid when using the embedded journal. Long " + + "running average.") + .setMetricType(MetricType.TIMER) + .build(); + public static final MetricKey MASTER_EMBEDDED_JOURNAL_LAST_SNAPSHOT_DOWNLOAD_DURATION_MS = + new Builder("Master.EmbeddedJournalLastSnapshotDownloadDurationMs") + .setDescription("Describes the amount of time taken to download journal snapshots from " + + "other masters in the cluster the previous time the download occurred. Only " + + "valid when using the embedded journal.") + .setMetricType(MetricType.GAUGE) + .build(); + public static final MetricKey MASTER_EMBEDDED_JOURNAL_SNAPSHOT_DOWNLOAD_HISTOGRAM = + new Builder("Master.EmbeddedJournalSnapshotDownloadHistogram") + .setDescription("Describes the size of the snapshot downloaded from another master in " + + "the cluster. Only valid when using the embedded journal. Long running average.") + .setMetricType(MetricType.HISTOGRAM) + .build(); + public static final MetricKey MASTER_EMBEDDED_JOURNAL_LAST_SNAPSHOT_DOWNLOAD_SIZE = + new Builder("Master.EmbeddedJournalLastSnapshotDownloadSize") + .setDescription("Describes the size of the snapshot downloaded from " + + "other masters in the cluster the previous time the download occurred. Only " + + "valid when using the embedded journal.") + .setMetricType(MetricType.GAUGE) + .build(); + public static final MetricKey MASTER_EMBEDDED_JOURNAL_SNAPSHOT_DOWNLOAD_DISK_HISTOGRAM = + new Builder("Master.EmbeddedJournalSnapshotDownloadDiskHistogram") + .setDescription("Describes the size on disk of the snapshot downloaded from another " + + "master in the cluster. Only valid when using the embedded journal. " + + "Long running average.") + .setMetricType(MetricType.HISTOGRAM) + .build(); + public static final MetricKey MASTER_EMBEDDED_JOURNAL_LAST_SNAPSHOT_DOWNLOAD_DISK_SIZE = + new Builder("Master.EmbeddedJournalLastSnapshotDownloadDiskSize") + .setDescription("Describes the size on disk of the snapshot downloaded from " + + "other masters in the cluster the previous time the download occurred. Only " + + "valid when using the embedded journal.") + .setMetricType(MetricType.GAUGE) + .build(); + public static final MetricKey MASTER_EMBEDDED_JOURNAL_SNAPSHOT_UPLOAD_HISTOGRAM = + new Builder("Master.EmbeddedJournalSnapshotUploadHistogram") + .setDescription("Describes the size of the snapshot uploaded to another master in " + + "the cluster. Only valid when using the embedded journal. Long running average.") + .setMetricType(MetricType.HISTOGRAM) + .build(); + public static final MetricKey MASTER_EMBEDDED_JOURNAL_LAST_SNAPSHOT_UPLOAD_SIZE = + new Builder("Master.EmbeddedJournalLastSnapshotUploadSize") + .setDescription("Describes the size of the snapshot uploaded to " + + "other masters in the cluster the previous time the download occurred. Only " + + "valid when using the embedded journal.") + .setMetricType(MetricType.GAUGE) + .build(); + public static final MetricKey MASTER_EMBEDDED_JOURNAL_SNAPSHOT_UPLOAD_DISK_HISTOGRAM = + new Builder("Master.EmbeddedJournalSnapshotUploadDiskHistogram") + .setDescription("Describes the size on disk of the snapshot uploaded to another master " + + "in the cluster. Only valid when using the embedded journal. Long running average.") + .setMetricType(MetricType.HISTOGRAM) + .build(); + public static final MetricKey MASTER_EMBEDDED_JOURNAL_LAST_SNAPSHOT_UPLOAD_DISK_SIZE = + new Builder("Master.EmbeddedJournalLastSnapshotUploadDiskSize") + .setDescription("Describes the size on disk of the snapshot uploaded to " + + "other masters in the cluster the previous time the download occurred. Only " + + "valid when using the embedded journal.") + .setMetricType(MetricType.GAUGE) + .build(); + public static final MetricKey MASTER_EMBEDDED_JOURNAL_SNAPSHOT_UPLOAD_TIMER = + new Builder("Master.EmbeddedJournalSnapshotUploadTimer") + .setDescription("Describes the amount of time taken to upload journal snapshots to " + + "another master in the cluster. Only valid when using the embedded journal. long " + + "running average") .setMetricType(MetricType.TIMER) .build(); + public static final MetricKey MASTER_EMBEDDED_JOURNAL_LAST_SNAPSHOT_UPLOAD_DURATION_MS = + new Builder("Master.EmbeddedJournalLastSnapshotUploadDurationMs") + .setDescription("Describes the amount of time taken to upload journal snapshots to " + + "another master in the cluster the previous time the upload occurred. Only " + + "valid when using the embedded journal.") + .setMetricType(MetricType.GAUGE) + .build(); public static final MetricKey MASTER_EMBEDDED_JOURNAL_SNAPSHOT_INSTALL_TIMER = new Builder("Master.EmbeddedJournalSnapshotInstallTimer") .setDescription("Describes the amount of time taken to install a downloaded journal " @@ -902,6 +987,18 @@ public static String getSyncMetricName(long mountId) { + "snapshot file. Higher numbers may indicate a slow disk or CPU contention") .setMetricType(MetricType.TIMER) .build(); + public static final MetricKey MASTER_EMBEDDED_JOURNAL_LAST_SNAPSHOT_REPLAY_DURATION_MS = + new Builder("Master.EmbeddedJournalLastSnapshotReplayDurationMs") + .setDescription("Represents the time the last restore from checkpoint operation took in" + + " milliseconds.") + .setMetricType(MetricType.GAUGE) + .build(); + public static final MetricKey MASTER_EMBEDDED_JOURNAL_LAST_SNAPSHOT_REPLAY_ENTRIES_COUNT = + new Builder("Master.EmbeddedJournalLastSnapshotReplayEntriesCount") + .setDescription("Represents the time the last restore from checkpoint operation took in" + + " milliseconds.") + .setMetricType(MetricType.GAUGE) + .build(); public static final MetricKey MASTER_EMBEDDED_JOURNAL_SNAPSHOT_LAST_INDEX = new Builder("Master.EmbeddedJournalSnapshotLastIndex") .setDescription("Represents the latest journal index that was recorded by this master " diff --git a/core/common/src/main/java/alluxio/metrics/MetricsSystem.java b/core/common/src/main/java/alluxio/metrics/MetricsSystem.java index c74667b5b970..c43099df7a74 100644 --- a/core/common/src/main/java/alluxio/metrics/MetricsSystem.java +++ b/core/common/src/main/java/alluxio/metrics/MetricsSystem.java @@ -24,6 +24,7 @@ import com.codahale.metrics.CachedGauge; import com.codahale.metrics.Counter; import com.codahale.metrics.Gauge; +import com.codahale.metrics.Histogram; import com.codahale.metrics.Meter; import com.codahale.metrics.MetricRegistry; import com.codahale.metrics.Timer; @@ -642,6 +643,16 @@ public static Timer uniformTimer(String name) { }); } + /** + * Get or add a histogram with the given name. + * + * @param name the name of the metric + * @return a histogram object with the qualified metric name + */ + public static Histogram histogram(String name) { + return METRIC_REGISTRY.histogram(getMetricName(name)); + } + /** * Registers a gauge if it has not been registered. * diff --git a/core/common/src/main/java/alluxio/util/compression/DirectoryMarshaller.java b/core/common/src/main/java/alluxio/util/compression/DirectoryMarshaller.java new file mode 100644 index 000000000000..d03bc19d5a8c --- /dev/null +++ b/core/common/src/main/java/alluxio/util/compression/DirectoryMarshaller.java @@ -0,0 +1,72 @@ +/* + * The Alluxio Open Foundation licenses this work under the Apache License, version 2.0 + * (the "License"). You may not use this work except in compliance with the License, which is + * available at www.apache.org/licenses/LICENSE-2.0 + * + * This software is distributed on an "AS IS" basis, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, + * either express or implied, as more fully set forth in the License. + * + * See the NOTICE file distributed with this work for information regarding copyright ownership. + */ + +package alluxio.util.compression; + +import alluxio.conf.Configuration; +import alluxio.conf.PropertyKey; + +import java.io.IOException; +import java.io.InputStream; +import java.io.OutputStream; +import java.nio.file.Path; + +/** + * Interface for a directory marshaller to follow. + */ +public interface DirectoryMarshaller { + /** + * Writes the contents of path ot outputStream such that it can be read by + * {@link #read(Path, InputStream)}. + * @param path the directory to marshall + * @param outputStream the output stream that the marshalled information + * @return the number of bytes read in path + */ + long write(Path path, OutputStream outputStream) throws IOException, InterruptedException; + + /** + * Reads the content from the inputStream and writes them to the specified path. + * @param path the output path + * @param inputStream the stream to read the data from + * @return the number of bytes written to path + */ + long read(Path path, InputStream inputStream) throws IOException; + + /** + * An enum to represent the different {@link DirectoryMarshaller} types. + */ + enum Type { + NO_COMPRESSION, + GZIP, + TAR_GZIP, + } + + /** + * Factory to access the DirectoryMarshaller. + */ + class Factory { + /** + * @return a {@link DirectoryMarshaller} + */ + public static DirectoryMarshaller create() { + Type compressionType = Configuration.getEnum( + PropertyKey.MASTER_EMBEDDED_JOURNAL_SNAPSHOT_REPLICATION_COMPRESSION_TYPE, Type.class); + switch (compressionType) { + case GZIP: + return new GzipMarshaller(); + case TAR_GZIP: + return new TarGzMarshaller(); + default: + return new NoCompressionMarshaller(); + } + } + } +} diff --git a/core/common/src/main/java/alluxio/util/compression/GzipMarshaller.java b/core/common/src/main/java/alluxio/util/compression/GzipMarshaller.java new file mode 100644 index 000000000000..a0693d9461a4 --- /dev/null +++ b/core/common/src/main/java/alluxio/util/compression/GzipMarshaller.java @@ -0,0 +1,49 @@ +/* + * The Alluxio Open Foundation licenses this work under the Apache License, version 2.0 + * (the "License"). You may not use this work except in compliance with the License, which is + * available at www.apache.org/licenses/LICENSE-2.0 + * + * This software is distributed on an "AS IS" basis, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, + * either express or implied, as more fully set forth in the License. + * + * See the NOTICE file distributed with this work for information regarding copyright ownership. + */ + +package alluxio.util.compression; + +import alluxio.conf.Configuration; +import alluxio.conf.PropertyKey; + +import org.apache.commons.compress.compressors.gzip.GzipCompressorInputStream; +import org.apache.commons.compress.compressors.gzip.GzipCompressorOutputStream; +import org.apache.commons.compress.compressors.gzip.GzipParameters; + +import java.io.IOException; +import java.io.InputStream; +import java.io.OutputStream; +import java.nio.file.Path; + +/** + * Applies a simple Gzip compression to the {@link NoCompressionMarshaller}. + */ +public class GzipMarshaller implements DirectoryMarshaller { + private final int mSnapshotCompressionLevel = Configuration.getInt( + PropertyKey.MASTER_EMBEDDED_JOURNAL_SNAPSHOT_REPLICATION_COMPRESSION_LEVEL); + private final NoCompressionMarshaller mMarshaller = new NoCompressionMarshaller(); + + @Override + public long write(Path path, OutputStream outputStream) throws IOException, InterruptedException { + GzipParameters params = new GzipParameters(); + params.setCompressionLevel(mSnapshotCompressionLevel); + GzipCompressorOutputStream zipStream = new GzipCompressorOutputStream(outputStream, params); + long bytes = mMarshaller.write(path, zipStream); + zipStream.finish(); + return bytes; + } + + @Override + public long read(Path path, InputStream inputStream) throws IOException { + InputStream zipStream = new GzipCompressorInputStream(inputStream); + return mMarshaller.read(path, zipStream); + } +} diff --git a/core/common/src/main/java/alluxio/util/compression/NoCompressionMarshaller.java b/core/common/src/main/java/alluxio/util/compression/NoCompressionMarshaller.java new file mode 100644 index 000000000000..fcc7e4f735fc --- /dev/null +++ b/core/common/src/main/java/alluxio/util/compression/NoCompressionMarshaller.java @@ -0,0 +1,85 @@ +/* + * The Alluxio Open Foundation licenses this work under the Apache License, version 2.0 + * (the "License"). You may not use this work except in compliance with the License, which is + * available at www.apache.org/licenses/LICENSE-2.0 + * + * This software is distributed on an "AS IS" basis, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, + * either express or implied, as more fully set forth in the License. + * + * See the NOTICE file distributed with this work for information regarding copyright ownership. + */ + +package alluxio.util.compression; + +import org.apache.commons.io.FileUtils; +import org.apache.commons.io.IOUtils; + +import java.io.BufferedInputStream; +import java.io.BufferedOutputStream; +import java.io.DataInputStream; +import java.io.DataOutputStream; +import java.io.File; +import java.io.IOException; +import java.io.InputStream; +import java.io.OutputStream; +import java.nio.file.Files; +import java.nio.file.Path; +import java.util.stream.Collectors; +import java.util.stream.Stream; + +/** + * Simple marshaller that applies no compression. + */ +public class NoCompressionMarshaller implements DirectoryMarshaller { + private static final char DIR_CHAR = 'd'; + private static final char FILE_CHAR = 'f'; + + @Override + public long write(Path path, OutputStream outputStream) throws IOException, InterruptedException { + long totalBytesCopied = 0; + try (final Stream stream = Files.walk(path); + DataOutputStream dataOS = new DataOutputStream(outputStream)) { + for (Path subpath : stream.collect(Collectors.toList())) { + byte[] relativePath = path.relativize(subpath).toString().getBytes(); + dataOS.write(relativePath.length); + dataOS.write(relativePath); + if (subpath.toFile().isDirectory()) { + dataOS.writeChar(DIR_CHAR); + } else { + dataOS.writeChar(FILE_CHAR); + dataOS.writeLong(FileUtils.sizeOf(subpath.toFile())); + try (InputStream fileIn = new BufferedInputStream(Files.newInputStream(subpath))) { + totalBytesCopied += IOUtils.copyLarge(fileIn, dataOS); + } + } + } + } + return totalBytesCopied; + } + + @Override + public long read(Path path, InputStream inputStream) throws IOException { + path.toFile().mkdirs(); + long totalBytesRead = 0; + try (DataInputStream dataIS = new DataInputStream(inputStream)) { + int pathSize; + while ((pathSize = dataIS.read()) != -1) { + byte[] relativePath = new byte[pathSize]; + dataIS.read(relativePath); + File filePath = new File(path.toFile(), new String(relativePath)); + char c = dataIS.readChar(); + if (c == DIR_CHAR) { + filePath.mkdirs(); + } else { + filePath.getParentFile().mkdirs(); + long fileSize = dataIS.readLong(); + try (OutputStream fileOut = + new BufferedOutputStream(Files.newOutputStream(filePath.toPath()))) { + totalBytesRead += IOUtils.copyLarge(dataIS, fileOut, 0, fileSize); + } + } + } + } + return totalBytesRead; + } +} diff --git a/core/server/common/src/main/java/alluxio/util/ParallelZipUtils.java b/core/common/src/main/java/alluxio/util/compression/ParallelZipUtils.java similarity index 99% rename from core/server/common/src/main/java/alluxio/util/ParallelZipUtils.java rename to core/common/src/main/java/alluxio/util/compression/ParallelZipUtils.java index c01001e8b093..7258c783ef9b 100644 --- a/core/server/common/src/main/java/alluxio/util/ParallelZipUtils.java +++ b/core/common/src/main/java/alluxio/util/compression/ParallelZipUtils.java @@ -9,7 +9,7 @@ * See the NOTICE file distributed with this work for information regarding copyright ownership. */ -package alluxio.util; +package alluxio.util.compression; import static java.util.stream.Collectors.toList; diff --git a/core/common/src/main/java/alluxio/util/compression/TarGzMarshaller.java b/core/common/src/main/java/alluxio/util/compression/TarGzMarshaller.java new file mode 100644 index 000000000000..59597c417134 --- /dev/null +++ b/core/common/src/main/java/alluxio/util/compression/TarGzMarshaller.java @@ -0,0 +1,38 @@ +/* + * The Alluxio Open Foundation licenses this work under the Apache License, version 2.0 + * (the "License"). You may not use this work except in compliance with the License, which is + * available at www.apache.org/licenses/LICENSE-2.0 + * + * This software is distributed on an "AS IS" basis, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, + * either express or implied, as more fully set forth in the License. + * + * See the NOTICE file distributed with this work for information regarding copyright ownership. + */ + +package alluxio.util.compression; + +import alluxio.conf.Configuration; +import alluxio.conf.PropertyKey; + +import java.io.IOException; +import java.io.InputStream; +import java.io.OutputStream; +import java.nio.file.Path; + +/** + * Marshall directory following the .tar.gz specification. + */ +public class TarGzMarshaller implements DirectoryMarshaller { + private final int mSnapshotCompressionLevel = Configuration.getInt( + PropertyKey.MASTER_EMBEDDED_JOURNAL_SNAPSHOT_REPLICATION_COMPRESSION_LEVEL); + + @Override + public long write(Path path, OutputStream outputStream) throws IOException, InterruptedException { + return TarUtils.writeTarGz(path, outputStream, mSnapshotCompressionLevel); + } + + @Override + public long read(Path path, InputStream inputStream) throws IOException { + return TarUtils.readTarGz(path, inputStream); + } +} diff --git a/core/server/common/src/main/java/alluxio/util/TarUtils.java b/core/common/src/main/java/alluxio/util/compression/TarUtils.java similarity index 79% rename from core/server/common/src/main/java/alluxio/util/TarUtils.java rename to core/common/src/main/java/alluxio/util/compression/TarUtils.java index 6c2f7146c264..e75e965bfa70 100644 --- a/core/server/common/src/main/java/alluxio/util/TarUtils.java +++ b/core/common/src/main/java/alluxio/util/compression/TarUtils.java @@ -9,7 +9,7 @@ * See the NOTICE file distributed with this work for information regarding copyright ownership. */ -package alluxio.util; +package alluxio.util.compression; import static java.util.stream.Collectors.toList; @@ -21,8 +21,9 @@ import org.apache.commons.compress.compressors.gzip.GzipParameters; import org.apache.commons.io.IOUtils; +import java.io.BufferedInputStream; +import java.io.BufferedOutputStream; import java.io.File; -import java.io.FileOutputStream; import java.io.IOException; import java.io.InputStream; import java.io.OutputStream; @@ -42,8 +43,9 @@ public final class TarUtils { * @param compressionLevel the compression level to use (0 for no compression, 9 for the most * compression, or -1 for system default) * @param output the output stream to write the data to + * @return the number of bytes copied from the directory into the archive */ - public static void writeTarGz(Path dirPath, OutputStream output, int compressionLevel) + public static long writeTarGz(Path dirPath, OutputStream output, int compressionLevel) throws IOException, InterruptedException { GzipParameters params = new GzipParameters(); params.setCompressionLevel(compressionLevel); @@ -51,6 +53,7 @@ public static void writeTarGz(Path dirPath, OutputStream output, int compression TarArchiveOutputStream archiveStream = new TarArchiveOutputStream(zipStream); archiveStream.setLongFileMode(TarArchiveOutputStream.LONGFILE_POSIX); archiveStream.setBigNumberMode(TarArchiveOutputStream.BIGNUMBER_POSIX); + long totalBytesCopied = 0; try (final Stream stream = Files.walk(dirPath)) { for (Path subPath : stream.collect(toList())) { if (Thread.interrupted()) { @@ -60,8 +63,8 @@ public static void writeTarGz(Path dirPath, OutputStream output, int compression TarArchiveEntry entry = new TarArchiveEntry(file, dirPath.relativize(subPath).toString()); archiveStream.putArchiveEntry(entry); if (file.isFile()) { - try (InputStream fileIn = Files.newInputStream(subPath)) { - IOUtils.copy(fileIn, archiveStream); + try (InputStream fileIn = new BufferedInputStream(Files.newInputStream(subPath))) { + totalBytesCopied += IOUtils.copyLarge(fileIn, archiveStream); } } archiveStream.closeArchiveEntry(); @@ -69,6 +72,7 @@ public static void writeTarGz(Path dirPath, OutputStream output, int compression } archiveStream.finish(); zipStream.finish(); + return totalBytesCopied; } /** @@ -76,22 +80,26 @@ public static void writeTarGz(Path dirPath, OutputStream output, int compression * * @param dirPath the path to write the archive to * @param input the input stream + * @return the number of bytes copied from the archive in the directory */ - public static void readTarGz(Path dirPath, InputStream input) throws IOException { + public static long readTarGz(Path dirPath, InputStream input) throws IOException { InputStream zipStream = new GzipCompressorInputStream(input); TarArchiveInputStream archiveStream = new TarArchiveInputStream(zipStream); TarArchiveEntry entry; + long totalBytesCopied = 0; while ((entry = (TarArchiveEntry) archiveStream.getNextEntry()) != null) { File outputFile = new File(dirPath.toFile(), entry.getName()); if (entry.isDirectory()) { outputFile.mkdirs(); } else { outputFile.getParentFile().mkdirs(); - try (FileOutputStream fileOut = new FileOutputStream(outputFile)) { - IOUtils.copy(archiveStream, fileOut); + try (OutputStream fileOut = + new BufferedOutputStream(Files.newOutputStream(outputFile.toPath()))) { + totalBytesCopied += IOUtils.copyLarge(archiveStream, fileOut); } } } + return totalBytesCopied; } private TarUtils() {} // Utils class diff --git a/core/server/common/pom.xml b/core/server/common/pom.xml index 438d656008fc..ec5b82504477 100644 --- a/core/server/common/pom.xml +++ b/core/server/common/pom.xml @@ -135,6 +135,11 @@ + + org.lz4 + lz4-java + 1.8.0 + diff --git a/core/server/common/src/main/java/alluxio/master/journal/DelegatingJournaled.java b/core/server/common/src/main/java/alluxio/master/journal/DelegatingJournaled.java index 2c5523fbff31..fdf278d8aa67 100644 --- a/core/server/common/src/main/java/alluxio/master/journal/DelegatingJournaled.java +++ b/core/server/common/src/main/java/alluxio/master/journal/DelegatingJournaled.java @@ -16,8 +16,11 @@ import alluxio.proto.journal.Journal.JournalEntry; import alluxio.resource.CloseableIterator; +import java.io.File; import java.io.IOException; import java.io.OutputStream; +import java.util.concurrent.CompletableFuture; +import java.util.concurrent.ExecutorService; import java.util.function.Supplier; /** @@ -45,11 +48,23 @@ default CheckpointName getCheckpointName() { return getDelegate().getCheckpointName(); } + @Override + default CompletableFuture writeToCheckpoint(File directory, + ExecutorService executorService) { + return getDelegate().writeToCheckpoint(directory, executorService); + } + @Override default void writeToCheckpoint(OutputStream output) throws IOException, InterruptedException { getDelegate().writeToCheckpoint(output); } + @Override + default CompletableFuture restoreFromCheckpoint(File directory, + ExecutorService executorService) { + return getDelegate().restoreFromCheckpoint(directory, executorService); + } + @Override default void restoreFromCheckpoint(CheckpointInputStream input) throws IOException { getDelegate().restoreFromCheckpoint(input); diff --git a/core/server/common/src/main/java/alluxio/master/journal/JournaledGroup.java b/core/server/common/src/main/java/alluxio/master/journal/JournaledGroup.java index b90c0003a4cd..bc444dacee53 100644 --- a/core/server/common/src/main/java/alluxio/master/journal/JournaledGroup.java +++ b/core/server/common/src/main/java/alluxio/master/journal/JournaledGroup.java @@ -19,9 +19,12 @@ import com.google.common.collect.Lists; +import java.io.File; import java.io.IOException; import java.io.OutputStream; import java.util.List; +import java.util.concurrent.CompletableFuture; +import java.util.concurrent.ExecutorService; /** * Convenience class which groups together multiple Journaled components as a single Journaled @@ -70,11 +73,27 @@ public CheckpointName getCheckpointName() { return mCheckpointName; } + @Override + public CompletableFuture writeToCheckpoint(File directory, + ExecutorService executorService) { + return CompletableFuture.allOf(mJournaled.stream() + .map(journaled -> journaled.writeToCheckpoint(directory, executorService)) + .toArray(CompletableFuture[]::new)); + } + @Override public void writeToCheckpoint(OutputStream output) throws IOException, InterruptedException { JournalUtils.writeToCheckpoint(output, mJournaled); } + @Override + public CompletableFuture restoreFromCheckpoint(File directory, + ExecutorService executorService) { + return CompletableFuture.allOf(mJournaled.stream() + .map(journaled -> journaled.restoreFromCheckpoint(directory, executorService)) + .toArray(CompletableFuture[]::new)); + } + @Override public void restoreFromCheckpoint(CheckpointInputStream input) throws IOException { JournalUtils.restoreFromCheckpoint(input, mJournaled); diff --git a/core/server/common/src/main/java/alluxio/master/journal/NoopJournaled.java b/core/server/common/src/main/java/alluxio/master/journal/NoopJournaled.java index ebcf77b7c83a..410454c51aec 100644 --- a/core/server/common/src/main/java/alluxio/master/journal/NoopJournaled.java +++ b/core/server/common/src/main/java/alluxio/master/journal/NoopJournaled.java @@ -18,9 +18,12 @@ import alluxio.proto.journal.Journal.JournalEntry; import alluxio.resource.CloseableIterator; +import java.io.File; import java.io.IOException; import java.io.OutputStream; import java.util.Collections; +import java.util.concurrent.CompletableFuture; +import java.util.concurrent.ExecutorService; /** * Interface providing default implementations which do nothing. @@ -40,6 +43,12 @@ default CheckpointName getCheckpointName() { return CheckpointName.NOOP; } + @Override + default CompletableFuture writeToCheckpoint(File directory, + ExecutorService executorService) { + return CompletableFuture.completedFuture(null); + } + @Override default void writeToCheckpoint(OutputStream output) throws IOException { // Just write a checkpoint type with no data. The stream constructor writes unbuffered to the @@ -47,6 +56,12 @@ default void writeToCheckpoint(OutputStream output) throws IOException { new CheckpointOutputStream(output, CheckpointType.JOURNAL_ENTRY); } + @Override + default CompletableFuture restoreFromCheckpoint(File directory, + ExecutorService executorService) { + return CompletableFuture.completedFuture(null); + } + @Override default void restoreFromCheckpoint(CheckpointInputStream input) { } diff --git a/core/server/common/src/main/java/alluxio/master/journal/SingleEntryJournaled.java b/core/server/common/src/main/java/alluxio/master/journal/SingleEntryJournaled.java new file mode 100644 index 000000000000..7e4c224ade9a --- /dev/null +++ b/core/server/common/src/main/java/alluxio/master/journal/SingleEntryJournaled.java @@ -0,0 +1,53 @@ +/* + * The Alluxio Open Foundation licenses this work under the Apache License, version 2.0 + * (the "License"). You may not use this work except in compliance with the License, which is + * available at www.apache.org/licenses/LICENSE-2.0 + * + * This software is distributed on an "AS IS" basis, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, + * either express or implied, as more fully set forth in the License. + * + * See the NOTICE file distributed with this work for information regarding copyright ownership. + */ + +package alluxio.master.journal; + +import alluxio.proto.journal.Journal; +import alluxio.resource.CloseableIterator; +import alluxio.util.CommonUtils; + +/** + * Journaled component responsible for journaling a single journal entry. + */ +public abstract class SingleEntryJournaled implements Journaled { + + private Journal.JournalEntry mEntry = Journal.JournalEntry.getDefaultInstance(); + + @Override + public CloseableIterator getJournalEntryIterator() { + return CloseableIterator.noopCloseable(CommonUtils.singleElementIterator(mEntry)); + } + + @Override + public boolean processJournalEntry(Journal.JournalEntry entry) { + if (!Journal.JournalEntry.getDefaultInstance().equals(mEntry)) { + LOG.warn("{} has already processed an entry", getCheckpointName()); + } + mEntry = entry; + return true; + } + + @Override + public void resetState() { + mEntry = Journal.JournalEntry.getDefaultInstance(); + } + + /** + * @return the entry stored by this object + */ + public Journal.JournalEntry getEntry() { + if (Journal.JournalEntry.getDefaultInstance().equals(mEntry)) { + LOG.warn("{} has not processed any entries", getCheckpointName()); + } + return mEntry; + } +} diff --git a/core/server/common/src/main/java/alluxio/master/journal/checkpoint/CheckpointInputStream.java b/core/server/common/src/main/java/alluxio/master/journal/checkpoint/CheckpointInputStream.java index 8c09303b7d0b..14eabc267c20 100644 --- a/core/server/common/src/main/java/alluxio/master/journal/checkpoint/CheckpointInputStream.java +++ b/core/server/common/src/main/java/alluxio/master/journal/checkpoint/CheckpointInputStream.java @@ -28,7 +28,7 @@ * * @see CheckpointOutputStream */ -public final class CheckpointInputStream extends DataInputStream { +public class CheckpointInputStream extends DataInputStream { private static final Logger LOG = LoggerFactory.getLogger(CheckpointInputStream.class); private final CheckpointType mType; diff --git a/core/server/common/src/main/java/alluxio/master/journal/checkpoint/CheckpointName.java b/core/server/common/src/main/java/alluxio/master/journal/checkpoint/CheckpointName.java index 75615919331b..8870f7ec5d4f 100644 --- a/core/server/common/src/main/java/alluxio/master/journal/checkpoint/CheckpointName.java +++ b/core/server/common/src/main/java/alluxio/master/journal/checkpoint/CheckpointName.java @@ -18,6 +18,7 @@ public enum CheckpointName { ACTIVE_SYNC_MANAGER, BLOCK_MASTER, + BLOCK_MASTER_CONTAINER_ID, CACHING_INODE_STORE, CLUSTER_INFO, FILE_SYSTEM_MASTER, @@ -40,4 +41,5 @@ public enum CheckpointName { TO_BE_PERSISTED_FILE_IDS, TTL_BUCKET_LIST, SCHEDULER, + SNAPSHOT_ID, } diff --git a/core/server/common/src/main/java/alluxio/master/journal/checkpoint/Checkpointed.java b/core/server/common/src/main/java/alluxio/master/journal/checkpoint/Checkpointed.java index 7ba2f5098433..d381587d5851 100644 --- a/core/server/common/src/main/java/alluxio/master/journal/checkpoint/Checkpointed.java +++ b/core/server/common/src/main/java/alluxio/master/journal/checkpoint/Checkpointed.java @@ -11,18 +11,59 @@ package alluxio.master.journal.checkpoint; +import alluxio.exception.runtime.AlluxioRuntimeException; +import alluxio.grpc.ErrorType; + +import io.grpc.Status; +import org.apache.ratis.io.MD5Hash; +import org.apache.ratis.util.MD5FileUtil; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import java.io.File; import java.io.IOException; import java.io.OutputStream; +import java.security.MessageDigest; +import java.util.concurrent.CompletableFuture; +import java.util.concurrent.ExecutorService; /** * Base class for Alluxio classes which can be written to and read from metadata checkpoints. */ public interface Checkpointed { + Logger LOG = LoggerFactory.getLogger(Checkpointed.class); /** * @return a name for this checkpointed class */ CheckpointName getCheckpointName(); + /** + * Writes a checkpoint to the specified directory asynchronously using the provided executor. + * + * @param directory where the checkpoint will be written + * @param executorService to use when running tasks asynchronously + * @return a future that processes the computation + */ + default CompletableFuture writeToCheckpoint(File directory, + ExecutorService executorService) { + return CompletableFuture.runAsync(() -> { + LOG.debug("taking {} snapshot started", getCheckpointName()); + File file = new File(directory, getCheckpointName().toString()); + try { + MessageDigest md5 = MD5Hash.getDigester(); + try (OutputStream outputStream = new OptimizedCheckpointOutputStream(file, md5)) { + writeToCheckpoint(outputStream); + } + MD5FileUtil.saveMD5File(file, new MD5Hash(md5.digest())); + } catch (Exception e) { + throw new AlluxioRuntimeException(Status.INTERNAL, + String.format("Failed to take snapshot %s", getCheckpointName()), + e, ErrorType.Internal, false); + } + LOG.debug("taking {} snapshot finished", getCheckpointName()); + }, executorService); + } + /** * Writes a checkpoint of all state to the given output stream. * @@ -33,6 +74,32 @@ public interface Checkpointed { */ void writeToCheckpoint(OutputStream output) throws IOException, InterruptedException; + /** + * Restores state from a checkpoint asynchronously. + * @param directory where the checkpoint will be located + * @param executorService to use when running asynchronous tasks + * @return a future to track the progress + */ + default CompletableFuture restoreFromCheckpoint(File directory, + ExecutorService executorService) { + return CompletableFuture.runAsync(() -> { + LOG.debug("loading {} snapshot started", getCheckpointName()); + File file = new File(directory, getCheckpointName().toString()); + try { + MessageDigest md5 = MD5Hash.getDigester(); + try (CheckpointInputStream is = new OptimizedCheckpointInputStream(file, md5)) { + restoreFromCheckpoint(is); + } + MD5FileUtil.verifySavedMD5(file, new MD5Hash(md5.digest())); + } catch (Exception e) { + throw new AlluxioRuntimeException(Status.INTERNAL, + String.format("Failed to restore snapshot %s", getCheckpointName()), + e, ErrorType.Internal, false); + } + LOG.debug("loading {} snapshot finished", getCheckpointName()); + }, executorService); + } + /** * Restores state from a checkpoint. * diff --git a/core/server/common/src/main/java/alluxio/master/journal/checkpoint/OptimizedCheckpointInputStream.java b/core/server/common/src/main/java/alluxio/master/journal/checkpoint/OptimizedCheckpointInputStream.java new file mode 100644 index 000000000000..a29e4390c003 --- /dev/null +++ b/core/server/common/src/main/java/alluxio/master/journal/checkpoint/OptimizedCheckpointInputStream.java @@ -0,0 +1,38 @@ +/* + * The Alluxio Open Foundation licenses this work under the Apache License, version 2.0 + * (the "License"). You may not use this work except in compliance with the License, which is + * available at www.apache.org/licenses/LICENSE-2.0 + * + * This software is distributed on an "AS IS" basis, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, + * either express or implied, as more fully set forth in the License. + * + * See the NOTICE file distributed with this work for information regarding copyright ownership. + */ + +package alluxio.master.journal.checkpoint; + +import net.jpountz.lz4.LZ4FrameInputStream; + +import java.io.BufferedInputStream; +import java.io.File; +import java.io.IOException; +import java.nio.file.Files; +import java.security.DigestInputStream; +import java.security.MessageDigest; + +/** + * InputStream to read checkpoint files efficiently. + */ +public class OptimizedCheckpointInputStream extends CheckpointInputStream { + + /** + * @param file where the checkpoint will be read from + * @param digest that verifies the file has not been corrupted + * @throws IOException propagates wrapped input stream exceptions + */ + public OptimizedCheckpointInputStream(File file, MessageDigest digest) throws IOException { + super(new DigestInputStream(new LZ4FrameInputStream( + new BufferedInputStream(Files.newInputStream(file.toPath()), + OptimizedCheckpointOutputStream.BUFFER_SIZE)), digest)); + } +} diff --git a/core/server/common/src/main/java/alluxio/master/journal/checkpoint/OptimizedCheckpointOutputStream.java b/core/server/common/src/main/java/alluxio/master/journal/checkpoint/OptimizedCheckpointOutputStream.java new file mode 100644 index 000000000000..f010e020eb08 --- /dev/null +++ b/core/server/common/src/main/java/alluxio/master/journal/checkpoint/OptimizedCheckpointOutputStream.java @@ -0,0 +1,65 @@ +/* + * The Alluxio Open Foundation licenses this work under the Apache License, version 2.0 + * (the "License"). You may not use this work except in compliance with the License, which is + * available at www.apache.org/licenses/LICENSE-2.0 + * + * This software is distributed on an "AS IS" basis, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, + * either express or implied, as more fully set forth in the License. + * + * See the NOTICE file distributed with this work for information regarding copyright ownership. + */ + +package alluxio.master.journal.checkpoint; + +import alluxio.util.FormatUtils; + +import net.jpountz.lz4.LZ4FrameOutputStream; + +import java.io.BufferedOutputStream; +import java.io.File; +import java.io.IOException; +import java.io.OutputStream; +import java.nio.file.Files; +import java.security.DigestOutputStream; +import java.security.MessageDigest; + +/** + * OutputStream to write checkpoint files efficiently. + */ +public class OptimizedCheckpointOutputStream extends OutputStream { + public static final int BUFFER_SIZE = (int) FormatUtils.parseSpaceSize("4MB"); + + private final OutputStream mOutputStream; + + /** + * @param file where the checkpoint will be written + * @param digest to ensure uncorrupted data + * @throws IOException propagates wrapped output stream exceptions + */ + public OptimizedCheckpointOutputStream(File file, MessageDigest digest) throws IOException { + this(file, digest, BUFFER_SIZE); + } + + /** + * Constructor used for benchmarking. + * @param file where the checkpoint will be written + * @param digest to ensure uncorrupted data + * @param bufferSize the buffer size that the output stream should use + */ + public OptimizedCheckpointOutputStream(File file, MessageDigest digest, int bufferSize) + throws IOException { + mOutputStream = new DigestOutputStream(new LZ4FrameOutputStream( + new BufferedOutputStream(Files.newOutputStream(file.toPath()), bufferSize)), + digest); + } + + @Override + public void write(int b) throws IOException { + mOutputStream.write(b); + } + + @Override + public void close() throws IOException { + mOutputStream.close(); + } +} diff --git a/core/server/common/src/main/java/alluxio/master/journal/raft/JournalStateMachine.java b/core/server/common/src/main/java/alluxio/master/journal/raft/JournalStateMachine.java index 5088146a554a..f8f57ac0f697 100644 --- a/core/server/common/src/main/java/alluxio/master/journal/raft/JournalStateMachine.java +++ b/core/server/common/src/main/java/alluxio/master/journal/raft/JournalStateMachine.java @@ -24,22 +24,19 @@ import alluxio.master.journal.CatchupFuture; import alluxio.master.journal.JournalUtils; import alluxio.master.journal.Journaled; +import alluxio.master.journal.SingleEntryJournaled; import alluxio.master.journal.checkpoint.CheckpointInputStream; import alluxio.metrics.MetricKey; import alluxio.metrics.MetricsSystem; import alluxio.proto.journal.Journal.JournalEntry; import alluxio.resource.LockResource; -import alluxio.util.FormatUtils; -import alluxio.util.LogUtils; import alluxio.util.StreamUtils; import alluxio.util.logging.SamplingLogger; import com.codahale.metrics.Timer; import com.google.common.base.Preconditions; -import org.apache.ratis.io.MD5Hash; import org.apache.ratis.proto.RaftProtos; import org.apache.ratis.protocol.Message; -import org.apache.ratis.protocol.RaftGroup; import org.apache.ratis.protocol.RaftGroupId; import org.apache.ratis.protocol.RaftGroupMemberId; import org.apache.ratis.protocol.RaftPeerId; @@ -52,19 +49,17 @@ import org.apache.ratis.statemachine.TransactionContext; import org.apache.ratis.statemachine.impl.BaseStateMachine; import org.apache.ratis.statemachine.impl.SimpleStateMachineStorage; -import org.apache.ratis.statemachine.impl.SingleFileSnapshotInfo; import org.apache.ratis.util.LifeCycle; -import org.apache.ratis.util.MD5FileUtil; +import org.joda.time.DateTime; import org.slf4j.Logger; import org.slf4j.LoggerFactory; import java.io.DataInputStream; -import java.io.DataOutputStream; import java.io.File; import java.io.FileInputStream; -import java.io.FileNotFoundException; -import java.io.FileOutputStream; import java.io.IOException; +import java.time.Duration; +import java.time.Instant; import java.util.Collection; import java.util.List; import java.util.Map; @@ -72,10 +67,9 @@ import java.util.concurrent.CompletableFuture; import java.util.concurrent.CompletionException; import java.util.concurrent.ExecutorService; -import java.util.concurrent.ForkJoinPool; +import java.util.concurrent.Executors; import java.util.concurrent.atomic.AtomicReference; -import java.util.concurrent.locks.Lock; -import java.util.concurrent.locks.ReentrantLock; +import java.util.stream.Stream; import javax.annotation.concurrent.GuardedBy; import javax.annotation.concurrent.ThreadSafe; @@ -101,7 +95,8 @@ public class JournalStateMachine extends BaseStateMachine { /** Journals managed by this applier. */ private final Map mJournals; private final RaftJournalSystem mJournalSystem; - private final SnapshotReplicationManager mSnapshotManager; + private final RaftSnapshotManager mSnapshotManager; + private final SnapshotDirStateMachineStorage mStorage; private final AtomicReference mStateLockManagerRef = new AtomicReference<>(null); @GuardedBy("this") @@ -109,10 +104,6 @@ public class JournalStateMachine extends BaseStateMachine { @GuardedBy("this") private boolean mClosed = false; - private final Lock mGroupLock = new ReentrantLock(); - @GuardedBy("mGroupLock") - private boolean mServerClosing = false; - private volatile long mLastAppliedCommitIndex = -1; // The last special "primary start" sequence number applied to this state machine. These special // sequence numbers are identified by being negative. @@ -121,7 +112,7 @@ public class JournalStateMachine extends BaseStateMachine { private volatile boolean mSnapshotting = false; private volatile boolean mIsLeader = false; - private final ExecutorService mJournalPool; + private final ExecutorService mJournalPool = Executors.newCachedThreadPool(); /** * This callback is used for interrupting someone who suspends the journal applier to work on @@ -142,31 +133,34 @@ public class JournalStateMachine extends BaseStateMachine { // The last index of the latest journal snapshot // created by this master or downloaded from other masters private volatile long mSnapshotLastIndex = -1; + private long mLastSnapshotTime = -1; + @SuppressFBWarnings(value = "IS2_INCONSISTENT_SYNC", + justification = "Written in synchronized block, read by metrics") + private long mLastSnapshotDurationMs = -1; + @SuppressFBWarnings(value = "IS2_INCONSISTENT_SYNC", + justification = "Written in synchronized block, read by metrics") + private long mLastSnapshotEntriesCount = -1; + private long mLastSnapshotReplayDurationMs = -1; + private long mLastSnapshotReplayEntriesCount = -1; /** Used to control applying to masters. */ private BufferedJournalApplier mJournalApplier; - private final SimpleStateMachineStorage mStorage = new SimpleStateMachineStorage(); - private RaftGroupId mRaftGroupId; - private RaftServer mServer; - private long mLastCheckPointTime = -1; /** * @param journals master journals; these journals are still owned by the caller, not by the * journal state machine * @param journalSystem the raft journal system + * @param storage the {@link SnapshotDirStateMachineStorage} that this state machine will use */ - public JournalStateMachine(Map journals, RaftJournalSystem journalSystem) { - int maxConcurrencyPoolSize = - Configuration.getInt(PropertyKey.MASTER_JOURNAL_LOG_CONCURRENCY_MAX); - mJournalPool = new ForkJoinPool(maxConcurrencyPoolSize); - LOG.info("Ihe max concurrency for notifyTermIndexUpdated is loading with max threads {}", - maxConcurrencyPoolSize); + public JournalStateMachine(Map journals, RaftJournalSystem journalSystem, + SnapshotDirStateMachineStorage storage) { mJournals = journals; mJournalApplier = new BufferedJournalApplier(journals, () -> journalSystem.getJournalSinks(null)); resetState(); LOG.info("Initialized new journal state machine"); mJournalSystem = journalSystem; - mSnapshotManager = new SnapshotReplicationManager(journalSystem, mStorage); + mStorage = storage; + mSnapshotManager = new RaftSnapshotManager(mStorage, mJournalPool); MetricsSystem.registerGaugeIfAbsent( MetricKey.MASTER_EMBEDDED_JOURNAL_SNAPSHOT_LAST_INDEX.getName(), @@ -176,7 +170,7 @@ public JournalStateMachine(Map journals, RaftJournalSystem () -> getLastAppliedTermIndex().getIndex() - mSnapshotLastIndex); MetricsSystem.registerGaugeIfAbsent( MetricKey.MASTER_JOURNAL_LAST_CHECKPOINT_TIME.getName(), - () -> mLastCheckPointTime); + () -> mLastSnapshotTime); MetricsSystem.registerGaugeIfAbsent( MetricKey.MASTER_JOURNAL_LAST_APPLIED_COMMIT_INDEX.getName(), () -> mLastAppliedCommitIndex); @@ -184,9 +178,21 @@ public JournalStateMachine(Map journals, RaftJournalSystem MetricKey.MASTER_JOURNAL_CHECKPOINT_WARN.getName(), () -> getLastAppliedTermIndex().getIndex() - mSnapshotLastIndex > Configuration.getInt(PropertyKey.MASTER_JOURNAL_CHECKPOINT_PERIOD_ENTRIES) - && System.currentTimeMillis() - mLastCheckPointTime > Configuration.getMs( + && System.currentTimeMillis() - mLastSnapshotTime > Configuration.getMs( PropertyKey.MASTER_WEB_JOURNAL_CHECKPOINT_WARNING_THRESHOLD_TIME) ); + MetricsSystem.registerGaugeIfAbsent( + MetricKey.MASTER_EMBEDDED_JOURNAL_LAST_SNAPSHOT_DURATION_MS.getName(), + () -> mLastSnapshotDurationMs); + MetricsSystem.registerGaugeIfAbsent( + MetricKey.MASTER_EMBEDDED_JOURNAL_LAST_SNAPSHOT_ENTRIES_COUNT.getName(), + () -> mLastSnapshotEntriesCount); + MetricsSystem.registerGaugeIfAbsent( + MetricKey.MASTER_EMBEDDED_JOURNAL_LAST_SNAPSHOT_REPLAY_DURATION_MS.getName(), + () -> mLastSnapshotReplayDurationMs); + MetricsSystem.registerGaugeIfAbsent( + MetricKey.MASTER_EMBEDDED_JOURNAL_LAST_SNAPSHOT_REPLAY_ENTRIES_COUNT.getName(), + () -> mLastSnapshotReplayEntriesCount); } @Override @@ -194,13 +200,8 @@ public void initialize(RaftServer server, RaftGroupId groupId, RaftStorage raftStorage) throws IOException { getLifeCycle().startAndTransition(() -> { super.initialize(server, groupId, raftStorage); - mServer = server; - mRaftGroupId = groupId; mStorage.init(raftStorage); - loadSnapshot(mStorage.getLatestSnapshot()); - synchronized (mSnapshotManager) { - mSnapshotManager.notifyAll(); - } + loadSnapshot(getLatestSnapshot()); }); } @@ -208,58 +209,27 @@ public void initialize(RaftServer server, RaftGroupId groupId, public void reinitialize() throws IOException { LOG.info("Reinitializing state machine."); mStorage.loadLatestSnapshot(); - loadSnapshot(mStorage.getLatestSnapshot()); + loadSnapshot(getLatestSnapshot()); unpause(); - synchronized (mSnapshotManager) { - mSnapshotManager.notifyAll(); - } } - private synchronized void loadSnapshot(SingleFileSnapshotInfo snapshot) throws IOException { + private synchronized void loadSnapshot(SnapshotInfo snapshot) throws IOException { if (snapshot == null) { LOG.info("No snapshot to load"); return; } - LOG.info("Loading Snapshot {}", snapshot); - final File snapshotFile = snapshot.getFile().getPath().toFile(); - if (!snapshotFile.exists()) { - throw new FileNotFoundException( - String.format("The snapshot file %s does not exist", snapshotFile.getPath())); - } try { resetState(); setLastAppliedTermIndex(snapshot.getTermIndex()); - install(snapshotFile); + LOG.debug("Loading snapshot {}", snapshot); + install(snapshot); + LOG.debug("Finished loading snapshot {}", snapshot); mSnapshotLastIndex = getLatestSnapshot() != null ? getLatestSnapshot().getIndex() : -1; - synchronized (mSnapshotManager) { - mSnapshotManager.notifyAll(); - } } catch (Exception e) { throw new IOException(String.format("Failed to load snapshot %s", snapshot), e); } } - /** - * Called by {@link RaftJournalSystem} stop internal method before - * shutting down the raft server to prevent a deadlock on - * the lock in RaftServerProxy. - */ - protected void setServerClosing() { - try (LockResource ignored = new LockResource(mGroupLock)) { - mServerClosing = true; - } - } - - /** - * Called by {@link RaftJournalSystem} stop internal method after - * shutting down the raft server. - */ - protected void afterServerClosing() { - try (LockResource ignored = new LockResource(mGroupLock)) { - mServerClosing = false; - } - } - /** * Allows leader to take snapshots. This is used exclusively for the * `bin/alluxio fsadmin journal checkpoint` command. @@ -278,48 +248,32 @@ public long takeSnapshot() { long index; StateLockManager stateLockManager = mStateLockManagerRef.get(); if (!mIsLeader) { + LOG.info("Taking local snapshot as follower"); index = takeLocalSnapshot(false); } else if (stateLockManager != null) { // the leader has been allowed to take a local snapshot by being given a non-null // StateLockManager through the #allowLeaderSnapshots method try (LockResource stateLock = stateLockManager.lockExclusive(StateLockOptions.defaults())) { + LOG.info("Taking local snapshot as leader"); index = takeLocalSnapshot(true); } catch (Exception e) { return RaftLog.INVALID_LOG_INDEX; } } else { - RaftGroup group; - try (LockResource ignored = new LockResource(mGroupLock)) { - if (mServerClosing) { - return RaftLog.INVALID_LOG_INDEX; - } - // These calls are protected by mGroupLock and mServerClosing - // as they will access the lock in RaftServerProxy.java - // which is also accessed during raft server shutdown which - // can cause a deadlock as the shutdown takes the lock while - // waiting for this thread to finish - Preconditions.checkState(mServer.getGroups().iterator().hasNext()); - group = mServer.getGroups().iterator().next(); - } catch (IOException e) { - SAMPLING_LOG.warn("Failed to get raft group info: {}", e.getMessage()); - return RaftLog.INVALID_LOG_INDEX; - } - if (group.getPeers().size() < 2) { - SAMPLING_LOG.warn("No follower to perform delegated snapshot. Please add more masters to " - + "the quorum or manually take snapshot using 'alluxio fsadmin journal checkpoint'"); - return RaftLog.INVALID_LOG_INDEX; - } else { - index = mSnapshotManager.maybeCopySnapshotFromFollower(); - } + index = mSnapshotManager.downloadSnapshotFromOtherMasters(); } // update metrics if took a snapshot if (index != RaftLog.INVALID_LOG_INDEX) { mSnapshotLastIndex = index; - mLastCheckPointTime = System.currentTimeMillis(); + mLastSnapshotTime = System.currentTimeMillis(); + LOG.info("Took snapshot up to index {} at time {}", mSnapshotLastIndex, DateTime.now()); } return index; } + /** + * @return the latest snapshot information, or null of no snapshot exists + */ @Override public SnapshotInfo getLatestSnapshot() { return mStorage.getLatestSnapshot(); @@ -337,14 +291,6 @@ public CompletableFuture query(Message request) { JournalQueryRequest queryRequest = JournalQueryRequest.parseFrom( request.getContent().asReadOnlyByteBuffer()); LOG.debug("Received query request: {}", queryRequest); - // give snapshot manager a chance to handle snapshot related requests - Message reply = mSnapshotManager.handleRequest(queryRequest); - if (reply != null) { - future.complete(reply); - return future; - } - // Snapshot manager returned null indicating the request is not handled. Check and handle - // other type of requests. if (queryRequest.hasAddQuorumServerRequest()) { AddQuorumServerRequest addRequest = queryRequest.getAddQuorumServerRequest(); return CompletableFuture.supplyAsync(() -> { @@ -355,13 +301,14 @@ public CompletableFuture query(Message request) { } return Message.EMPTY; }); + } else { + return super.query(request); } } catch (Exception e) { LOG.error("failed processing request {}", request, e); future.completeExceptionally(e); return future; } - return super.query(request); } @Override @@ -372,9 +319,7 @@ public void close() { MetricsSystem.removeMetrics(MetricKey.MASTER_JOURNAL_LAST_CHECKPOINT_TIME.getName()); MetricsSystem.removeMetrics(MetricKey.MASTER_JOURNAL_LAST_APPLIED_COMMIT_INDEX.getName()); MetricsSystem.removeMetrics(MetricKey.MASTER_JOURNAL_CHECKPOINT_WARN.getName()); - synchronized (mSnapshotManager) { - mSnapshotManager.notifyAll(); - } + mSnapshotManager.close(); } @Override @@ -395,9 +340,6 @@ public CompletableFuture applyTransaction(TransactionContext trx) { public void notifyNotLeader(Collection pendingEntries) { mIsLeader = false; mJournalSystem.notifyLeadershipStateChanged(false); - synchronized (mSnapshotManager) { - mSnapshotManager.notifyAll(); - } } @Override @@ -406,38 +348,22 @@ public void notifyConfigurationChanged(long term, long index, CompletableFuture.runAsync(mJournalSystem::updateGroup, mJournalPool); } - private long getNextIndex() { - try { - return mServer.getDivision(mRaftGroupId).getRaftLog().getNextIndex(); - } catch (IOException e) { - throw new IllegalStateException("Cannot obtain raft log index", e); - } - } - @Override public CompletableFuture notifyInstallSnapshotFromLeader( RaftProtos.RoleInfoProto roleInfoProto, TermIndex firstTermIndexInLog) { - if (roleInfoProto.getRole() != RaftProtos.RaftPeerRole.FOLLOWER) { - return RaftJournalUtils.completeExceptionally( - new IllegalStateException(String.format( - "Server should be a follower when installing a snapshot from leader. Actual: %s", - roleInfoProto.getRole()))); - } - return mSnapshotManager.installSnapshotFromLeader().thenApply(snapshotIndex -> { - long latestJournalIndex = getNextIndex() - 1; - if (latestJournalIndex >= snapshotIndex.getIndex()) { - // do not reload the state machine if the downloaded snapshot is older than the latest entry - // fail the request after installation so the leader will stop sending the same request - throw new IllegalArgumentException( - String.format("Downloaded snapshot index %d is older than the latest entry index %d", - snapshotIndex.getIndex(), latestJournalIndex)); - } - mSnapshotLastIndex = snapshotIndex.getIndex(); - synchronized (mSnapshotManager) { - mSnapshotManager.notifyAll(); + // this method is called automatically by Ratis when the leader does not have all the logs to + // give to this follower. This method instructs the follower to download a snapshot from + // other masters to become up-to-date. + LOG.info("Received instruction to install snapshot from other master asynchronously"); + return CompletableFuture.supplyAsync(() -> { + mSnapshotManager.downloadSnapshotFromOtherMasters(); + long index = mSnapshotManager.waitForAttemptToComplete(); + if (index == RaftLog.INVALID_LOG_INDEX) { + LOG.info("Failed to install snapshot from other master asynchronously"); + return null; } - return snapshotIndex; - }); + return getLatestSnapshot().getTermIndex(); + }, mJournalPool); } @Override @@ -571,9 +497,10 @@ public synchronized long takeLocalSnapshot(boolean hasStateLock) { SAMPLING_LOG.info("Skip taking snapshot because state machine is closed."); return RaftLog.INVALID_LOG_INDEX; } - if (mServer.getLifeCycleState() != LifeCycle.State.RUNNING) { + RaftServer server = getServer().join(); // gets completed during initialization + if (server.getLifeCycleState() != LifeCycle.State.RUNNING) { SAMPLING_LOG.info("Skip taking snapshot because raft server is not in running state: " - + "current state is {}.", mServer.getLifeCycleState()); + + "current state is {}.", server.getLifeCycleState()); return RaftLog.INVALID_LOG_INDEX; } if (mJournalApplier.isSuspended()) { @@ -590,66 +517,37 @@ public synchronized long takeLocalSnapshot(boolean hasStateLock) { LOG.debug("Calling snapshot"); Preconditions.checkState(!mSnapshotting, "Cannot call snapshot multiple times concurrently"); mSnapshotting = true; - try (Timer.Context ctx = MetricsSystem - .timer(MetricKey.MASTER_EMBEDDED_JOURNAL_SNAPSHOT_GENERATE_TIMER.getName()).time()) { - // The start time of the most recent snapshot - long lastSnapshotStartTime = System.currentTimeMillis(); + TermIndex last = getLastAppliedTermIndex(); + + File snapshotDir = getSnapshotDir(last.getTerm(), last.getIndex()); + if (!snapshotDir.isDirectory() && !snapshotDir.mkdir()) { + return RaftLog.INVALID_LOG_INDEX; + } + try (Timer.Context ctx = MetricsSystem.timer( + MetricKey.MASTER_EMBEDDED_JOURNAL_SNAPSHOT_GENERATE_TIMER.getName()).time()) { + Instant start = Instant.now(); long snapshotId = mNextSequenceNumberToRead - 1; - TermIndex last = getLastAppliedTermIndex(); - File tempFile; - try { - tempFile = RaftJournalUtils.createTempSnapshotFile(mStorage); - } catch (IOException e) { - LogUtils.warnWithException(LOG, "Failed to create temp snapshot file", e); - return RaftLog.INVALID_LOG_INDEX; - } - LOG.info("Taking a snapshot to file {}", tempFile); - final File snapshotFile = mStorage.getSnapshotFile(last.getTerm(), last.getIndex()); - try (DataOutputStream outputStream = new DataOutputStream(new FileOutputStream(tempFile))) { - outputStream.writeLong(snapshotId); - JournalUtils.writeToCheckpoint(outputStream, getStateMachines()); - } catch (Exception e) { - tempFile.delete(); - LogUtils.warnWithException(LOG, - "Failed to write snapshot {} to file {}", snapshotId, tempFile, e); - return RaftLog.INVALID_LOG_INDEX; - } - try { - final MD5Hash digest = MD5FileUtil.computeMd5ForFile(tempFile); - LOG.info("Saving digest for snapshot file {}", snapshotFile); - MD5FileUtil.saveMD5File(snapshotFile, digest); - LOG.info("Renaming a snapshot file {} to {}", tempFile, snapshotFile); - if (!tempFile.renameTo(snapshotFile)) { - tempFile.delete(); - LOG.warn("Failed to rename snapshot from {} to {}", tempFile, snapshotFile); - return RaftLog.INVALID_LOG_INDEX; - } - LOG.info("Completed snapshot with size {} up to SN {} in {}ms", - FormatUtils.getSizeFromBytes(snapshotFile.length()), - snapshotId, System.currentTimeMillis() - lastSnapshotStartTime); - } catch (Exception e) { - tempFile.delete(); - LogUtils.warnWithException(LOG, - "Failed to complete snapshot: {} - {}", snapshotId, snapshotFile, e); - return RaftLog.INVALID_LOG_INDEX; - } - try { - mStorage.loadLatestSnapshot(); - } catch (Exception e) { - snapshotFile.delete(); - LogUtils.warnWithException(LOG, "Failed to refresh latest snapshot: {}", snapshotId, e); - return RaftLog.INVALID_LOG_INDEX; - } + SingleEntryJournaled idWriter = new SnapshotIdJournaled(); + idWriter.processJournalEntry(JournalEntry.newBuilder().setSequenceNumber(snapshotId).build()); + CompletableFuture.allOf(Stream.concat(Stream.of(idWriter), getStateMachines().stream()) + .map(journaled -> journaled.writeToCheckpoint(snapshotDir, mJournalPool)) + .toArray(CompletableFuture[]::new)) + .join(); + mStorage.loadLatestSnapshot(); + mStorage.signalNewSnapshot(); + + mLastSnapshotDurationMs = Duration.between(start, Instant.now()).toMillis(); + mLastSnapshotEntriesCount = mNextSequenceNumberToRead; return last.getIndex(); + } catch (Exception e) { + LOG.error("error taking snapshot", e); + return RaftLog.INVALID_LOG_INDEX; } finally { mSnapshotting = false; - synchronized (mSnapshotManager) { - mSnapshotManager.notifyAll(); - } } } - private void install(File snapshotFile) { + private void install(SnapshotInfo snapshot) { if (mClosed) { return; } @@ -658,14 +556,29 @@ private void install(File snapshotFile) { return; } + File snapshotDir = getSnapshotDir(snapshot.getTerm(), snapshot.getIndex()); long snapshotId = 0L; try (Timer.Context ctx = MetricsSystem.timer(MetricKey - .MASTER_EMBEDDED_JOURNAL_SNAPSHOT_REPLAY_TIMER.getName()).time(); - DataInputStream stream = new DataInputStream(new FileInputStream(snapshotFile))) { - snapshotId = stream.readLong(); - JournalUtils.restoreFromCheckpoint(new CheckpointInputStream(stream), getStateMachines()); + .MASTER_EMBEDDED_JOURNAL_SNAPSHOT_REPLAY_TIMER.getName()).time()) { + Instant start = Instant.now(); + if (snapshotDir.isFile()) { + LOG.info("Restoring from snapshot {} in old format", snapshot.getTermIndex()); + try (DataInputStream stream = new DataInputStream(new FileInputStream(snapshotDir))) { + snapshotId = stream.readLong(); + JournalUtils.restoreFromCheckpoint(new CheckpointInputStream(stream), getStateMachines()); + } + } else { + SingleEntryJournaled idReader = new SnapshotIdJournaled(); + CompletableFuture.allOf(Stream.concat(Stream.of(idReader), getStateMachines().stream()) + .map(journaled -> journaled.restoreFromCheckpoint(snapshotDir, mJournalPool)) + .toArray(CompletableFuture[]::new)) + .join(); + snapshotId = idReader.getEntry().getSequenceNumber(); + } + mLastSnapshotReplayDurationMs = Duration.between(start, Instant.now()).toMillis(); } catch (Exception e) { - JournalUtils.handleJournalReplayFailure(LOG, e, "Failed to install snapshot: %s", snapshotId); + JournalUtils.handleJournalReplayFailure(LOG, e, "Failed to install snapshot: %s", + snapshot.getTermIndex()); if (Configuration.getBoolean(PropertyKey.MASTER_JOURNAL_TOLERATE_CORRUPTION)) { return; } @@ -676,9 +589,15 @@ private void install(File snapshotFile) { mNextSequenceNumberToRead); } mNextSequenceNumberToRead = snapshotId + 1; + mLastSnapshotReplayEntriesCount = mNextSequenceNumberToRead; LOG.info("Successfully installed snapshot up to SN {}", snapshotId); } + private File getSnapshotDir(long term, long index) { + String dirName = SimpleStateMachineStorage.getSnapshotFileName(term, index); + return new File(mStorage.getSnapshotDir(), dirName); + } + /** * Suspends applying to masters. * @@ -795,22 +714,15 @@ public synchronized boolean isSnapshotting() { @Override public void notifyLeaderChanged(RaftGroupMemberId groupMemberId, RaftPeerId raftPeerId) { - if (mRaftGroupId == groupMemberId.getGroupId()) { + if (getGroupId() == groupMemberId.getGroupId()) { mIsLeader = groupMemberId.getPeerId() == raftPeerId; mJournalSystem.notifyLeadershipStateChanged(mIsLeader); } else { LOG.warn("Received notification for unrecognized group {}, current group is {}", - groupMemberId.getGroupId(), mRaftGroupId); + groupMemberId.getGroupId(), getGroupId()); } } - /** - * @return the snapshot replication manager - */ - public SnapshotReplicationManager getSnapshotReplicationManager() { - return mSnapshotManager; - } - /** * @return whether the journal is suspended */ diff --git a/core/server/common/src/main/java/alluxio/master/journal/raft/RaftJournalServiceClient.java b/core/server/common/src/main/java/alluxio/master/journal/raft/RaftJournalServiceClient.java index 4531c847217f..9bbd13273980 100644 --- a/core/server/common/src/main/java/alluxio/master/journal/raft/RaftJournalServiceClient.java +++ b/core/server/common/src/main/java/alluxio/master/journal/raft/RaftJournalServiceClient.java @@ -12,28 +12,41 @@ package alluxio.master.journal.raft; import alluxio.AbstractMasterClient; +import alluxio.ClientContext; import alluxio.Constants; -import alluxio.grpc.DownloadSnapshotPRequest; -import alluxio.grpc.DownloadSnapshotPResponse; +import alluxio.conf.Configuration; +import alluxio.conf.PropertyKey; +import alluxio.grpc.LatestSnapshotInfoPRequest; import alluxio.grpc.RaftJournalServiceGrpc; import alluxio.grpc.ServiceType; -import alluxio.grpc.UploadSnapshotPRequest; -import alluxio.grpc.UploadSnapshotPResponse; +import alluxio.grpc.SnapshotData; +import alluxio.grpc.SnapshotMetadata; import alluxio.master.MasterClientContext; +import alluxio.master.selectionpolicy.MasterSelectionPolicy; +import alluxio.retry.RetryPolicy; -import io.grpc.stub.StreamObserver; +import java.util.Iterator; +import java.util.concurrent.TimeUnit; +import java.util.function.Supplier; /** * A client for raft journal service. */ public class RaftJournalServiceClient extends AbstractMasterClient { - private RaftJournalServiceGrpc.RaftJournalServiceStub mClient = null; + private final long mRequestInfoTimeoutMs = + Configuration.getMs(PropertyKey.MASTER_JOURNAL_REQUEST_INFO_TIMEOUT); + + private RaftJournalServiceGrpc.RaftJournalServiceBlockingStub mBlockingClient = null; /** - * @param clientContext master client context + * Create a client that talks to a specific master. + * @param selectionPolicy specifies which master is targeted + * @param retryPolicySupplier the retry policy to use when connecting to another master */ - public RaftJournalServiceClient(MasterClientContext clientContext) { - super(clientContext); + public RaftJournalServiceClient(MasterSelectionPolicy selectionPolicy, + Supplier retryPolicySupplier) { + super(MasterClientContext.newBuilder(ClientContext.create(Configuration.global())).build(), + selectionPolicy, retryPolicySupplier); } @Override @@ -51,28 +64,33 @@ protected long getServiceVersion() { return Constants.RAFT_JOURNAL_SERVICE_VERSION; } + @Override + protected void beforeConnect() { + // the default behavior of this method is to search for the primary master + // in our case we do no care which one is the primary master as MasterSelectionPolicy is + // explicitly specified + } + @Override protected void afterConnect() { - mClient = RaftJournalServiceGrpc.newStub(mChannel); + mBlockingClient = RaftJournalServiceGrpc.newBlockingStub(mChannel); } /** - * Uploads a snapshot. - * @param responseObserver the response stream observer - * @return the request stream observer + * @return {@link SnapshotMetadata} from specified master */ - public StreamObserver uploadSnapshot( - StreamObserver responseObserver) { - return mClient.uploadSnapshot(responseObserver); + public SnapshotMetadata requestLatestSnapshotInfo() { + return mBlockingClient.withDeadlineAfter(mRequestInfoTimeoutMs, TimeUnit.MILLISECONDS) + .requestLatestSnapshotInfo(LatestSnapshotInfoPRequest.getDefaultInstance()); } /** - * Downloads a snapshot. - * @param responseObserver the response stream observer - * @return the request stream observer + * Receive snapshot data from specified follower. + * + * @param request the request detailing which file to download + * @return an iterator containing the snapshot data */ - public StreamObserver downloadSnapshot( - StreamObserver responseObserver) { - return mClient.downloadSnapshot(responseObserver); + public Iterator requestLatestSnapshotData(SnapshotMetadata request) { + return mBlockingClient.requestLatestSnapshotData(request); } } diff --git a/core/server/common/src/main/java/alluxio/master/journal/raft/RaftJournalServiceHandler.java b/core/server/common/src/main/java/alluxio/master/journal/raft/RaftJournalServiceHandler.java index 4291759023d3..8bba26f74dea 100644 --- a/core/server/common/src/main/java/alluxio/master/journal/raft/RaftJournalServiceHandler.java +++ b/core/server/common/src/main/java/alluxio/master/journal/raft/RaftJournalServiceHandler.java @@ -11,47 +11,173 @@ package alluxio.master.journal.raft; -import alluxio.grpc.DownloadSnapshotPRequest; -import alluxio.grpc.DownloadSnapshotPResponse; +import alluxio.conf.Configuration; +import alluxio.conf.PropertyKey; +import alluxio.grpc.LatestSnapshotInfoPRequest; import alluxio.grpc.RaftJournalServiceGrpc; -import alluxio.grpc.UploadSnapshotPRequest; -import alluxio.grpc.UploadSnapshotPResponse; +import alluxio.grpc.SnapshotData; +import alluxio.grpc.SnapshotMetadata; +import alluxio.metrics.MetricKey; +import alluxio.metrics.MetricsSystem; +import alluxio.util.compression.DirectoryMarshaller; +import com.google.protobuf.ByteString; +import com.google.protobuf.UnsafeByteOperations; +import io.grpc.Context; +import io.grpc.Status; import io.grpc.stub.StreamObserver; +import org.apache.ratis.server.protocol.TermIndex; +import org.apache.ratis.statemachine.SnapshotInfo; +import org.apache.ratis.statemachine.StateMachineStorage; +import org.apache.ratis.statemachine.impl.SimpleStateMachineStorage; import org.slf4j.Logger; import org.slf4j.LoggerFactory; +import java.io.File; +import java.io.OutputStream; +import java.nio.file.Path; +import java.time.Duration; +import java.time.Instant; +import java.util.concurrent.TimeUnit; + /** * RPC handler for raft journal service. */ public class RaftJournalServiceHandler extends RaftJournalServiceGrpc.RaftJournalServiceImplBase { - private static final Logger LOG = - LoggerFactory.getLogger(RaftJournalServiceHandler.class); - private final SnapshotReplicationManager mManager; - private final RaftJournalSystem mRaftJournalSystem; + private static final Logger LOG = LoggerFactory.getLogger(RaftJournalServiceHandler.class); + + private final StateMachineStorage mStateMachineStorage; + private volatile long mLastSnapshotUploadDurationMs = -1; + private volatile long mLastSnapshotUploadSize = -1; + private volatile long mLastSnapshotUploadDiskSize = -1; /** - * @param manager the snapshot replication manager - * @param raftJournalSystem the raft journal system + * @param storage the storage that the state machine uses for its snapshots */ - public RaftJournalServiceHandler( - SnapshotReplicationManager manager, - RaftJournalSystem raftJournalSystem) { - mManager = manager; - mRaftJournalSystem = raftJournalSystem; - LOG.debug("RaftJournalServiceHandler initialized, journal system {}", - mRaftJournalSystem); + public RaftJournalServiceHandler(StateMachineStorage storage) { + mStateMachineStorage = storage; + + MetricsSystem.registerGaugeIfAbsent( + MetricKey.MASTER_EMBEDDED_JOURNAL_LAST_SNAPSHOT_UPLOAD_DURATION_MS.getName(), + () -> mLastSnapshotUploadDurationMs); + MetricsSystem.registerGaugeIfAbsent( + MetricKey.MASTER_EMBEDDED_JOURNAL_LAST_SNAPSHOT_UPLOAD_SIZE.getName(), + () -> mLastSnapshotUploadSize); + MetricsSystem.registerGaugeIfAbsent( + MetricKey.MASTER_EMBEDDED_JOURNAL_LAST_SNAPSHOT_UPLOAD_DISK_SIZE.getName(), + () -> mLastSnapshotUploadDiskSize); } @Override - public StreamObserver uploadSnapshot( - StreamObserver responseObserver) { - return mManager.receiveSnapshotFromFollower(responseObserver); + public void requestLatestSnapshotInfo(LatestSnapshotInfoPRequest request, + StreamObserver responseObserver) { + LOG.info("Received request for latest snapshot info"); + if (Context.current().isCancelled()) { + responseObserver.onError( + Status.CANCELLED.withDescription("Cancelled by client").asRuntimeException()); + return; + } + SnapshotInfo snapshot = mStateMachineStorage.getLatestSnapshot(); + SnapshotMetadata.Builder metadata = SnapshotMetadata.newBuilder(); + if (snapshot == null) { + LOG.info("No snapshot to send"); + metadata.setExists(false); + } else { + LOG.info("Found snapshot {}", snapshot.getTermIndex()); + metadata.setExists(true) + .setSnapshotTerm(snapshot.getTerm()) + .setSnapshotIndex(snapshot.getIndex()); + } + responseObserver.onNext(metadata.build()); + responseObserver.onCompleted(); } @Override - public StreamObserver downloadSnapshot( - StreamObserver responseObserver) { - return mManager.sendSnapshotToFollower(responseObserver); + public void requestLatestSnapshotData(SnapshotMetadata request, + StreamObserver responseObserver) { + TermIndex index = TermIndex.valueOf(request.getSnapshotTerm(), request.getSnapshotIndex()); + LOG.info("Received request for snapshot data {}", index); + if (Context.current().isCancelled()) { + responseObserver.onError( + Status.CANCELLED.withDescription("Cancelled by client").asRuntimeException()); + return; + } + + String snapshotDirName = SimpleStateMachineStorage + .getSnapshotFileName(request.getSnapshotTerm(), request.getSnapshotIndex()); + Path snapshotPath = new File(mStateMachineStorage.getSnapshotDir(), snapshotDirName).toPath(); + long totalBytesSent; + long diskSize; + LOG.info("Begin snapshot upload of {}", index); + Instant start = Instant.now(); + try (SnapshotGrpcOutputStream stream = new SnapshotGrpcOutputStream(responseObserver)) { + DirectoryMarshaller marshaller = DirectoryMarshaller.Factory.create(); + diskSize = marshaller.write(snapshotPath, stream); + totalBytesSent = stream.totalBytes(); + } catch (Exception e) { + LOG.warn("Failed to upload snapshot {}", index, e); + responseObserver.onError(Status.INTERNAL.withCause(e).asRuntimeException()); + return; + } + responseObserver.onCompleted(); + // update last duration and duration timer metrics + mLastSnapshotUploadDurationMs = Duration.between(start, Instant.now()).toMillis(); + MetricsSystem.timer(MetricKey.MASTER_EMBEDDED_JOURNAL_SNAPSHOT_UPLOAD_TIMER.getName()) + .update(mLastSnapshotUploadDurationMs, TimeUnit.MILLISECONDS); + LOG.info("Total milliseconds to upload {}: {}", index, mLastSnapshotUploadDurationMs); + // update uncompressed snapshot size metric + mLastSnapshotUploadDiskSize = diskSize; + MetricsSystem.histogram( + MetricKey.MASTER_EMBEDDED_JOURNAL_SNAPSHOT_UPLOAD_DISK_HISTOGRAM.getName()) + .update(mLastSnapshotUploadDiskSize); + LOG.info("Total snapshot uncompressed bytes for {}: {}", index, mLastSnapshotUploadDiskSize); + // update compressed snapshot size (aka size sent over the network) + mLastSnapshotUploadSize = totalBytesSent; + MetricsSystem.histogram(MetricKey.MASTER_EMBEDDED_JOURNAL_SNAPSHOT_UPLOAD_HISTOGRAM.getName()) + .update(mLastSnapshotUploadSize); + LOG.info("Total bytes sent for {}: {}", index, mLastSnapshotUploadSize); + LOG.info("Uploaded snapshot {}", index); + } + + static class SnapshotGrpcOutputStream extends OutputStream { + private final int mSnapshotReplicationChunkSize = (int) Configuration.getBytes( + PropertyKey.MASTER_EMBEDDED_JOURNAL_SNAPSHOT_REPLICATION_CHUNK_SIZE); + private final StreamObserver mObserver; + private long mTotalBytesSent = 0; + private byte[] mBuffer = new byte[mSnapshotReplicationChunkSize]; + private int mBufferPosition = 0; + + public SnapshotGrpcOutputStream(StreamObserver responseObserver) { + mObserver = responseObserver; + } + + @Override + public void write(int b) { + mBuffer[mBufferPosition++] = (byte) b; + if (mBufferPosition == mBuffer.length) { + flushBuffer(); + } + } + + @Override + public void close() { + if (mBufferPosition > 0) { + flushBuffer(); + } + } + + private void flushBuffer() { + // avoids copy + ByteString bytes = UnsafeByteOperations.unsafeWrap(mBuffer, 0, mBufferPosition); + mBuffer = new byte[mSnapshotReplicationChunkSize]; + LOG.debug("Sending chunk of size {}: {}", mBufferPosition, bytes); + mObserver.onNext(SnapshotData.newBuilder().setChunk(bytes).build()); + mTotalBytesSent += mBufferPosition; + mBufferPosition = 0; + } + + public long totalBytes() { + return mTotalBytesSent + mBufferPosition; + } } } diff --git a/core/server/common/src/main/java/alluxio/master/journal/raft/RaftJournalSystem.java b/core/server/common/src/main/java/alluxio/master/journal/raft/RaftJournalSystem.java index 2fffe155726e..37ecc043c3cc 100644 --- a/core/server/common/src/main/java/alluxio/master/journal/raft/RaftJournalSystem.java +++ b/core/server/common/src/main/java/alluxio/master/journal/raft/RaftJournalSystem.java @@ -209,6 +209,12 @@ public class RaftJournalSystem extends AbstractJournalSystem { * and installing snapshots. */ private JournalStateMachine mStateMachine; + + /** + * Serves as the storage object for the above state machine. + */ + private final SnapshotDirStateMachineStorage mStateMachineStorage = + new SnapshotDirStateMachineStorage(); /** * Ratis server. */ @@ -294,7 +300,7 @@ private synchronized void initServer() throws IOException { if (mStateMachine != null) { mStateMachine.close(); } - mStateMachine = new JournalStateMachine(mJournals, this); + mStateMachine = new JournalStateMachine(mJournals, this, mStateMachineStorage); RaftProperties properties = new RaftProperties(); Parameters parameters = new Parameters(); @@ -373,7 +379,7 @@ private synchronized void initServer() throws IOException { TimeUnit.MILLISECONDS)); // snapshot retention - RaftServerConfigKeys.Snapshot.setRetentionFileNum(properties, 3); + RaftServerConfigKeys.Snapshot.setRetentionFileNum(properties, 2); // unsafe flush RaftServerConfigKeys.Log.setUnsafeFlushEnabled(properties, @@ -656,7 +662,7 @@ public synchronized void checkpoint(StateLockManager stateLockManager) throws IO public synchronized Map getJournalServices() { Map services = new HashMap<>(); services.put(alluxio.grpc.ServiceType.RAFT_JOURNAL_SERVICE, new GrpcService( - new RaftJournalServiceHandler(mStateMachine.getSnapshotReplicationManager(), this))); + new RaftJournalServiceHandler(mStateMachineStorage))); return services; } @@ -834,13 +840,10 @@ public synchronized void stopInternal() { if (mRaftJournalWriter != null) { mRaftJournalWriter.close(); } - mStateMachine.setServerClosing(); try { mServer.close(); } catch (IOException e) { throw new RuntimeException("Failed to shut down Raft server", e); - } finally { - mStateMachine.afterServerClosing(); } LOG.info("Journal shutdown complete"); } diff --git a/core/server/common/src/main/java/alluxio/master/journal/raft/RaftSnapshotManager.java b/core/server/common/src/main/java/alluxio/master/journal/raft/RaftSnapshotManager.java new file mode 100644 index 000000000000..7bd6980e99bb --- /dev/null +++ b/core/server/common/src/main/java/alluxio/master/journal/raft/RaftSnapshotManager.java @@ -0,0 +1,313 @@ +/* + * The Alluxio Open Foundation licenses this work under the Apache License, version 2.0 + * (the "License"). You may not use this work except in compliance with the License, which is + * available at www.apache.org/licenses/LICENSE-2.0 + * + * This software is distributed on an "AS IS" basis, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, + * either express or implied, as more fully set forth in the License. + * + * See the NOTICE file distributed with this work for information regarding copyright ownership. + */ + +package alluxio.master.journal.raft; + +import alluxio.AbstractClient; +import alluxio.Constants; +import alluxio.conf.Configuration; +import alluxio.conf.PropertyKey; +import alluxio.grpc.SnapshotData; +import alluxio.grpc.SnapshotMetadata; +import alluxio.master.selectionpolicy.MasterSelectionPolicy; +import alluxio.metrics.MetricKey; +import alluxio.metrics.MetricsSystem; +import alluxio.retry.ExponentialBackoffRetry; +import alluxio.retry.RetryPolicy; +import alluxio.util.ConfigurationUtils; +import alluxio.util.compression.DirectoryMarshaller; +import alluxio.util.logging.SamplingLogger; +import alluxio.util.network.NetworkAddressUtils; + +import com.codahale.metrics.Timer; +import org.apache.commons.io.FileUtils; +import org.apache.commons.lang3.tuple.ImmutablePair; +import org.apache.ratis.server.protocol.TermIndex; +import org.apache.ratis.server.raftlog.RaftLog; +import org.apache.ratis.statemachine.SnapshotInfo; +import org.apache.ratis.statemachine.impl.SimpleStateMachineStorage; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import java.io.File; +import java.io.InputStream; +import java.net.InetSocketAddress; +import java.nio.ByteBuffer; +import java.time.Duration; +import java.time.Instant; +import java.util.Collections; +import java.util.Comparator; +import java.util.Iterator; +import java.util.List; +import java.util.Map; +import java.util.PriorityQueue; +import java.util.concurrent.CompletableFuture; +import java.util.concurrent.ExecutorService; +import java.util.concurrent.TimeUnit; +import java.util.function.Function; +import java.util.function.Supplier; +import java.util.stream.Collectors; +import javax.annotation.Nullable; + +/** + * Manages a snapshot download. + */ +public class RaftSnapshotManager implements AutoCloseable { + private static final Logger LOG = LoggerFactory.getLogger(RaftSnapshotManager.class); + private static final Logger SAMPLING_LOG = new SamplingLogger(LOG, 10L * Constants.SECOND_MS); + + private final int mRequestInfoTimeout = (int) + Configuration.getMs(PropertyKey.MASTER_JOURNAL_REQUEST_INFO_TIMEOUT); + + private final SnapshotDirStateMachineStorage mStorage; + private final ExecutorService mExecutor; + private final Map mClients; + + private volatile long mLastSnapshotDownloadDurationMs = -1; + private volatile long mLastSnapshotDownloadSize = -1; + private volatile long mLastSnapshotDownloadDiskSize = -1; + + @Nullable + private CompletableFuture mDownloadFuture = null; + + RaftSnapshotManager(SnapshotDirStateMachineStorage storage, ExecutorService executor) { + mStorage = storage; + mExecutor = executor; + + InetSocketAddress localAddress = NetworkAddressUtils.getConnectAddress( + NetworkAddressUtils.ServiceType.MASTER_RPC, Configuration.global()); + mClients = ConfigurationUtils.getMasterRpcAddresses(Configuration.global()).stream() + .filter(address -> !address.equals(localAddress)) + .collect(Collectors.toMap(Function.identity(), address -> { + MasterSelectionPolicy selection = MasterSelectionPolicy.Factory.specifiedMaster(address); + int numTries = 10; + int sleep = Math.max(1, mRequestInfoTimeout / numTries); + // try to connect to other master once per second for until request info timeout + Supplier retry = () -> new ExponentialBackoffRetry(sleep, sleep, numTries); + return new RaftJournalServiceClient(selection, retry); + })); + + MetricsSystem.registerGaugeIfAbsent( + MetricKey.MASTER_EMBEDDED_JOURNAL_LAST_SNAPSHOT_DOWNLOAD_DURATION_MS.getName(), + () -> mLastSnapshotDownloadDurationMs); + MetricsSystem.registerGaugeIfAbsent( + MetricKey.MASTER_EMBEDDED_JOURNAL_LAST_SNAPSHOT_DOWNLOAD_SIZE.getName(), + () -> mLastSnapshotDownloadSize); + MetricsSystem.registerGaugeIfAbsent( + MetricKey.MASTER_EMBEDDED_JOURNAL_LAST_SNAPSHOT_DOWNLOAD_DISK_SIZE.getName(), + () -> mLastSnapshotDownloadDiskSize); + } + + /** + * Waits synchronously for the download attempt to be complete. + * @return the result of the download attempt, or {@link RaftLog#INVALID_LOG_INDEX} if no + * attempt is underway + */ + public long waitForAttemptToComplete() { + if (mDownloadFuture == null) { + return RaftLog.INVALID_LOG_INDEX; + } + mDownloadFuture.join(); + // this is to make sure that mDownloadFuture gets reset to null + return downloadSnapshotFromOtherMasters(); + } + + /** + * Launches an asynchronous download of the most updated snapshot found on other masters in the + * cluster. If the asynchronous download is already in flight, it polls for the results. + * @return the log index of the last successful snapshot installation, or -1 if the download is in + * flight or has failed. + */ + public long downloadSnapshotFromOtherMasters() { + if (mClients.isEmpty()) { + SAMPLING_LOG.warn("No followers are present to download a snapshot from"); + return RaftLog.INVALID_LOG_INDEX; + } + if (mDownloadFuture == null) { + mDownloadFuture = CompletableFuture.supplyAsync(this::core, mExecutor).exceptionally(err -> { + LOG.debug("Failed to download snapshot", err); + return RaftLog.INVALID_LOG_INDEX; + }); + } else if (mDownloadFuture.isDone()) { + LOG.debug("Download operation is done"); + Long snapshotIndex = mDownloadFuture.join(); + LOG.debug("Retrieved downloaded snapshot at index {}", snapshotIndex); + mDownloadFuture = null; + return snapshotIndex; + } + return RaftLog.INVALID_LOG_INDEX; + } + + private long core() { + SnapshotInfo localSnapshotInfo = mStorage.getLatestSnapshot(); + if (localSnapshotInfo == null) { + LOG.info("No local snapshot found"); + } else { + LOG.info("Local snapshot is {}", TermIndex.valueOf(localSnapshotInfo.getTerm(), + localSnapshotInfo.getIndex())); + } + // max heap based on TermIndex extracted from the SnapshotMetadata of each pair + PriorityQueue> otherInfos = + new PriorityQueue<>(Math.max(1, mClients.size()), + Collections.reverseOrder(Comparator.comparing(pair -> toTermIndex(pair.getLeft())))); + // wait mRequestInfoTimeout between each attempt to contact the masters + RetryPolicy retryPolicy = + new ExponentialBackoffRetry(mRequestInfoTimeout, mRequestInfoTimeout, 10); + while (otherInfos.isEmpty() && retryPolicy.attempt()) { + LOG.debug("Attempt to retrieve info"); + otherInfos.addAll(retrieveFollowerInfos(localSnapshotInfo)); + LOG.debug("Attempt to retrieve info over"); + } + + while (!otherInfos.isEmpty()) { + ImmutablePair info = otherInfos.poll(); + InetSocketAddress address = info.getRight(); + SnapshotMetadata snapshotMetadata = info.getLeft(); + long index; + if ((index = downloadSnapshotFromAddress(snapshotMetadata, address)) + != RaftLog.INVALID_LOG_INDEX) { + return index; + } + } + return RaftLog.INVALID_LOG_INDEX; + } + + /** + * @param localSnapshotInfo contains information about the most up-to-date snapshot on this master + * @return a sorted list of pairs containing a follower's address and its most up-to-date snapshot + */ + private List> retrieveFollowerInfos( + SnapshotInfo localSnapshotInfo) { + return mClients.keySet().parallelStream() + // map to a pair of (address, SnapshotMetadata) by requesting all followers in parallel + .map(address -> { + RaftJournalServiceClient client = mClients.get(address); + try { + client.connect(); + LOG.info("Receiving snapshot info from {}", address); + SnapshotMetadata metadata = client.requestLatestSnapshotInfo(); + if (!metadata.getExists()) { + LOG.info("No snapshot is present on {}", address); + } else { + LOG.info("Received snapshot info {} from {}", toTermIndex(metadata), address); + } + return ImmutablePair.of(metadata, address); + } catch (Exception e) { + client.disconnect(); + LOG.debug("Failed to retrieve snapshot info from {}", address, e); + return ImmutablePair.of(SnapshotMetadata.newBuilder().setExists(false).build(), + address); + } + }) + // filter out followers that do not have any snapshot or no updated snapshot + .filter(pair -> pair.getLeft().getExists() && (localSnapshotInfo == null + || localSnapshotInfo.getTermIndex().compareTo(toTermIndex(pair.getLeft())) < 0)) + .collect(Collectors.toList()); + } + + /** + * Retrieves snapshot from the specified address. + * @param snapshotMetadata helps identify which snapshot is desired + * @param address where to retrieve it from + * @return the index of the snapshot taken + */ + private long downloadSnapshotFromAddress(SnapshotMetadata snapshotMetadata, + InetSocketAddress address) { + TermIndex index = toTermIndex(snapshotMetadata); + LOG.info("Retrieving snapshot {} from {}", index, address); + Instant start = Instant.now(); + RaftJournalServiceClient client = mClients.get(address); + try { + client.connect(); + Iterator it = client.requestLatestSnapshotData(snapshotMetadata); + long totalBytesRead; + long snapshotDiskSize; + try (SnapshotGrpcInputStream stream = new SnapshotGrpcInputStream(it)) { + DirectoryMarshaller marshaller = DirectoryMarshaller.Factory.create(); + snapshotDiskSize = marshaller.read(mStorage.getTmpDir().toPath(), stream); + totalBytesRead = stream.totalBytes(); + } + + File finalSnapshotDestination = new File(mStorage.getSnapshotDir(), + SimpleStateMachineStorage.getSnapshotFileName(snapshotMetadata.getSnapshotTerm(), + snapshotMetadata.getSnapshotIndex())); + FileUtils.moveDirectory(mStorage.getTmpDir(), finalSnapshotDestination); + // update last duration and duration timer metrics + mLastSnapshotDownloadDurationMs = Duration.between(start, Instant.now()).toMillis(); + MetricsSystem.timer(MetricKey.MASTER_EMBEDDED_JOURNAL_SNAPSHOT_DOWNLOAD_TIMER.getName()) + .update(mLastSnapshotDownloadDurationMs, TimeUnit.MILLISECONDS); + LOG.info("Total milliseconds to download {}: {}", index, mLastSnapshotDownloadDurationMs); + // update uncompressed snapshot size metric + mLastSnapshotDownloadDiskSize = snapshotDiskSize; + MetricsSystem.histogram( + MetricKey.MASTER_EMBEDDED_JOURNAL_SNAPSHOT_DOWNLOAD_DISK_HISTOGRAM.getName()) + .update(mLastSnapshotDownloadDiskSize); + LOG.info("Total extracted bytes of snapshot {}: {}", index, mLastSnapshotDownloadDiskSize); + // update compressed snapshot size (aka size sent over the network) + mLastSnapshotDownloadSize = totalBytesRead; + MetricsSystem.histogram( + MetricKey.MASTER_EMBEDDED_JOURNAL_SNAPSHOT_DOWNLOAD_HISTOGRAM.getName()) + .update(mLastSnapshotDownloadSize); + LOG.info("Total bytes read from {} for {}: {}", address, index, mLastSnapshotDownloadSize); + try (Timer.Context ctx = MetricsSystem.timer( + MetricKey.MASTER_EMBEDDED_JOURNAL_SNAPSHOT_INSTALL_TIMER.getName()).time()) { + mStorage.loadLatestSnapshot(); + mStorage.signalNewSnapshot(); + } + LOG.info("Retrieved snapshot {} from {}", index, address); + return snapshotMetadata.getSnapshotIndex(); + } catch (Exception e) { + client.disconnect(); + LOG.warn("Failed to download snapshot {} from {}", index, address); + LOG.debug("Download failure error", e); + return RaftLog.INVALID_LOG_INDEX; + } finally { + FileUtils.deleteQuietly(mStorage.getTmpDir()); + } + } + + @Override + public void close() { + mClients.values().forEach(AbstractClient::close); + } + + private TermIndex toTermIndex(SnapshotMetadata metadata) { + return TermIndex.valueOf(metadata.getSnapshotTerm(), metadata.getSnapshotIndex()); + } + + static class SnapshotGrpcInputStream extends InputStream { + private final Iterator mIt; + private long mTotalBytesRead = 0; + // using a read-only ByteBuffer avoids array copy + private ByteBuffer mCurrentBuffer = ByteBuffer.allocate(0); + + public SnapshotGrpcInputStream(Iterator iterator) { + mIt = iterator; + } + + @Override + public int read() { + if (!mCurrentBuffer.hasRemaining()) { + if (!mIt.hasNext()) { + return -1; + } + mCurrentBuffer = mIt.next().getChunk().asReadOnlyByteBuffer(); + LOG.debug("Received chunk of size {}: {}", mCurrentBuffer.capacity(), mCurrentBuffer); + mTotalBytesRead += mCurrentBuffer.capacity(); + } + return Byte.toUnsignedInt(mCurrentBuffer.get()); + } + + public long totalBytes() { + return mTotalBytesRead; + } + } +} diff --git a/core/server/common/src/main/java/alluxio/master/journal/raft/SnapshotDirStateMachineStorage.java b/core/server/common/src/main/java/alluxio/master/journal/raft/SnapshotDirStateMachineStorage.java new file mode 100644 index 000000000000..1fcc21826971 --- /dev/null +++ b/core/server/common/src/main/java/alluxio/master/journal/raft/SnapshotDirStateMachineStorage.java @@ -0,0 +1,162 @@ +/* + * The Alluxio Open Foundation licenses this work under the Apache License, version 2.0 + * (the "License"). You may not use this work except in compliance with the License, which is + * available at www.apache.org/licenses/LICENSE-2.0 + * + * This software is distributed on an "AS IS" basis, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, + * either express or implied, as more fully set forth in the License. + * + * See the NOTICE file distributed with this work for information regarding copyright ownership. + */ + +package alluxio.master.journal.raft; + +import alluxio.annotation.SuppressFBWarnings; + +import org.apache.commons.io.FileUtils; +import org.apache.commons.io.filefilter.NotFileFilter; +import org.apache.commons.io.filefilter.SuffixFileFilter; +import org.apache.commons.io.filefilter.TrueFileFilter; +import org.apache.ratis.io.MD5Hash; +import org.apache.ratis.server.protocol.TermIndex; +import org.apache.ratis.server.storage.FileInfo; +import org.apache.ratis.server.storage.RaftStorage; +import org.apache.ratis.statemachine.SnapshotInfo; +import org.apache.ratis.statemachine.SnapshotRetentionPolicy; +import org.apache.ratis.statemachine.StateMachineStorage; +import org.apache.ratis.statemachine.impl.FileListSnapshotInfo; +import org.apache.ratis.statemachine.impl.SimpleStateMachineStorage; +import org.apache.ratis.statemachine.impl.SingleFileSnapshotInfo; +import org.apache.ratis.util.MD5FileUtil; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import java.io.File; +import java.io.IOException; +import java.nio.file.Files; +import java.nio.file.Path; +import java.util.ArrayList; +import java.util.Collection; +import java.util.Collections; +import java.util.Comparator; +import java.util.List; +import java.util.Optional; +import java.util.regex.Matcher; +import java.util.stream.Stream; +import javax.annotation.Nullable; + +/** + * Simple state machine storage that can handle directories. + */ +public class SnapshotDirStateMachineStorage implements StateMachineStorage { + private static final Logger LOG = LoggerFactory.getLogger(SnapshotDirStateMachineStorage.class); + + private RaftStorage mStorage; + @Nullable + private volatile SnapshotInfo mLatestSnapshotInfo = null; + private volatile boolean mNewSnapshotTaken = false; + + private final Comparator mSnapshotPathComparator = Comparator.comparing( + path -> SimpleStateMachineStorage.getTermIndexFromSnapshotFile(path.toFile())); + + /** + * @param path to evaluate + * @return a matcher to evaluate if the leaf of the provided path has a name that matches the + * pattern of snapshot directories + */ + @SuppressFBWarnings(value = "NP_NULL_ON_SOME_PATH_FROM_RETURN_VALUE", + justification = "argument 'path' is never null, and method 'matcher' returns NotNull") + public static Matcher matchSnapshotPath(Path path) { + return SimpleStateMachineStorage.SNAPSHOT_REGEX.matcher(path.getFileName().toString()); + } + + @Override + public void init(RaftStorage raftStorage) throws IOException { + mStorage = raftStorage; + loadLatestSnapshot(); + } + + private SnapshotInfo findLatestSnapshot() { + try (Stream stream = Files.list(getSnapshotDir().toPath())) { + Optional max = stream + .filter(path -> matchSnapshotPath(path).matches()) + .max(mSnapshotPathComparator); + if (max.isPresent()) { + TermIndex ti = SimpleStateMachineStorage.getTermIndexFromSnapshotFile(max.get().toFile()); + // for backwards compatibility with previous versions of snapshots + if (max.get().toFile().isFile()) { + MD5Hash md5Hash = MD5FileUtil.readStoredMd5ForFile(max.get().toFile()); + FileInfo fileInfo = new FileInfo(max.get(), md5Hash); + return new SingleFileSnapshotInfo(fileInfo, ti.getTerm(), ti.getIndex()); + } + // new snapshot format + List fileInfos = new ArrayList<>(); + Collection nonMd5Files = FileUtils.listFiles(max.get().toFile(), + new NotFileFilter(new SuffixFileFilter(MD5FileUtil.MD5_SUFFIX)), + TrueFileFilter.INSTANCE); + for (File file : nonMd5Files) { + MD5Hash md5Hash = MD5FileUtil.readStoredMd5ForFile(file); // null if no md5 file + Path relativePath = max.get().relativize(file.toPath()); + fileInfos.add(new FileInfo(relativePath, md5Hash)); + } + return new FileListSnapshotInfo(fileInfos, ti.getTerm(), ti.getIndex()); + } + } catch (Exception e) { + // Files.list may throw an unchecked exception + // do nothing and return null + LOG.warn("Error reading snapshot directory", e); + } + return null; + } + + /** + * Loads the latest snapshot information into the StateMachineStorage. + */ + public void loadLatestSnapshot() { + mLatestSnapshotInfo = findLatestSnapshot(); + } + + @Override @Nullable + public SnapshotInfo getLatestSnapshot() { + return mLatestSnapshotInfo; + } + + @Override + public void format() throws IOException {} + + /** + * Signal to the StateMachineStorage that a new snapshot was taken. + */ + public void signalNewSnapshot() { + mNewSnapshotTaken = true; + } + + @Override + public void cleanupOldSnapshots(SnapshotRetentionPolicy retentionPolicy) throws IOException { + if (!mNewSnapshotTaken) { + LOG.trace("No new snapshot to delete old one"); + return; + } + mNewSnapshotTaken = false; + try (Stream stream = Files.list(getSnapshotDir().toPath())) { + stream.filter(path -> matchSnapshotPath(path).matches()) + .sorted(Collections.reverseOrder(mSnapshotPathComparator)) + .skip(retentionPolicy.getNumSnapshotsRetained()) + .forEach(path -> { + LOG.debug("removing dir {}", path.getFileName()); + boolean b = FileUtils.deleteQuietly(path.toFile()); + LOG.debug("{}successful deletion", b ? "" : "un"); + }); + } + } + + @Override + public File getSnapshotDir() { + return mStorage.getStorageDir().getStateMachineDir(); + } + + @Override + public File getTmpDir() { + return mStorage.getStorageDir().getTmpDir(); + } +} diff --git a/core/server/common/src/main/java/alluxio/master/journal/raft/SnapshotDownloader.java b/core/server/common/src/main/java/alluxio/master/journal/raft/SnapshotDownloader.java deleted file mode 100644 index a632aaef38c9..000000000000 --- a/core/server/common/src/main/java/alluxio/master/journal/raft/SnapshotDownloader.java +++ /dev/null @@ -1,212 +0,0 @@ -/* - * The Alluxio Open Foundation licenses this work under the Apache License, version 2.0 - * (the "License"). You may not use this work except in compliance with the License, which is - * available at www.apache.org/licenses/LICENSE-2.0 - * - * This software is distributed on an "AS IS" basis, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, - * either express or implied, as more fully set forth in the License. - * - * See the NOTICE file distributed with this work for information regarding copyright ownership. - */ - -package alluxio.master.journal.raft; - -import alluxio.grpc.DownloadSnapshotPRequest; -import alluxio.grpc.DownloadSnapshotPResponse; -import alluxio.grpc.SnapshotData; -import alluxio.grpc.UploadSnapshotPRequest; -import alluxio.grpc.UploadSnapshotPResponse; - -import io.grpc.stub.ClientCallStreamObserver; -import io.grpc.stub.ClientResponseObserver; -import io.grpc.stub.StreamObserver; -import org.apache.ratis.io.MD5Hash; -import org.apache.ratis.server.protocol.TermIndex; -import org.apache.ratis.server.storage.FileInfo; -import org.apache.ratis.statemachine.SnapshotInfo; -import org.apache.ratis.statemachine.impl.SimpleStateMachineStorage; -import org.apache.ratis.statemachine.impl.SingleFileSnapshotInfo; -import org.apache.ratis.util.MD5FileUtil; -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; - -import java.io.File; -import java.io.FileOutputStream; -import java.io.IOException; -import java.util.concurrent.CompletableFuture; -import java.util.function.Function; - -/** - * A stream observer for downloading a snapshot. - * - * @param type of the message to send - * @param type of the message to receive - */ -public class SnapshotDownloader implements ClientResponseObserver { - private static final Logger LOG = LoggerFactory.getLogger(SnapshotDownloader.class); - - private final SimpleStateMachineStorage mStorage; - private final CompletableFuture mFuture = new CompletableFuture<>(); - private final Function mMessageBuilder; - private final Function mDataGetter; - private final String mSource; - - /** The term and index for the latest journal entry included in the snapshot. */ - private TermIndex mTermIndex; - private File mTempFile; - private FileOutputStream mOutputStream; - private long mBytesWritten = 0; - private StreamObserver mStream; - private SnapshotInfo mSnapshotToInstall; - - /** - * Builds a stream for leader to download a snapshot. - * - * @param storage the snapshot storage - * @param stream the response stream - * @param source the source of the snapshot - * @return the download stream for leader - */ - public static SnapshotDownloader forLeader( - SimpleStateMachineStorage storage, StreamObserver stream, - String source) { - return new SnapshotDownloader<>(storage, - offset -> UploadSnapshotPResponse.newBuilder().setOffsetReceived(offset).build(), - UploadSnapshotPRequest::getData, stream, source); - } - - /** - * Builds a stream for follower to download a snapshot. - * - * @param storage the snapshot storage - * @param source the source of the snapshot - * @return the download stream for follower - */ - public static SnapshotDownloader - forFollower(SimpleStateMachineStorage storage, String source) { - return new SnapshotDownloader<>(storage, - offset -> DownloadSnapshotPRequest.newBuilder().setOffsetReceived(offset).build(), - DownloadSnapshotPResponse::getData, null, source); - } - - private SnapshotDownloader(SimpleStateMachineStorage storage, Function messageBuilder, - Function dataGetter, StreamObserver stream, String source) { - mStorage = storage; - mMessageBuilder = messageBuilder; - mDataGetter = dataGetter; - mStream = stream; - mSource = source; - } - - @Override - public void onNext(R response) { - try { - onNextInternal(response); - } catch (Exception e) { - mStream.onError(e); - mFuture.completeExceptionally(e); - cleanup(); - } - } - - private void cleanup() { - if (mOutputStream != null) { - try { - mOutputStream.close(); - } catch (IOException ioException) { - LOG.error("Error closing snapshot file {}", mTempFile, ioException); - } - } - if (mTempFile != null && !mTempFile.delete()) { - LOG.error("Error deleting snapshot file {}", mTempFile.getPath()); - } - } - - private void onNextInternal(R response) throws IOException { - TermIndex termIndex = TermIndex.valueOf( - mDataGetter.apply(response).getSnapshotTerm(), - mDataGetter.apply(response).getSnapshotIndex()); - if (mTermIndex == null) { - LOG.info("Downloading new snapshot {} from {}", termIndex, mSource); - mTermIndex = termIndex; - // start a new file - mTempFile = RaftJournalUtils.createTempSnapshotFile(mStorage); - - mTempFile.deleteOnExit(); - mStream.onNext(mMessageBuilder.apply(0L)); - } else { - if (!termIndex.equals(mTermIndex)) { - throw new IOException(String.format( - "Mismatched term index when downloading the snapshot. expected: %s actual: %s", - mTermIndex, termIndex)); - } - if (!mDataGetter.apply(response).hasChunk()) { - throw new IOException(String.format( - "A chunk for file %s is missing from the response %s.", mTempFile, response)); - } - // write the chunk - if (mOutputStream == null) { - LOG.info("Start writing to temporary file {}", mTempFile.getPath()); - mOutputStream = new FileOutputStream(mTempFile); - } - long position = mOutputStream.getChannel().position(); - if (position != mDataGetter.apply(response).getOffset()) { - throw new IOException( - String.format("Mismatched offset in file %d, expect %d, bytes written %d", - position, mDataGetter.apply(response).getOffset(), mBytesWritten)); - } - mOutputStream.write(mDataGetter.apply(response).getChunk().toByteArray()); - mBytesWritten += mDataGetter.apply(response).getChunk().size(); - LOG.debug("Written {} bytes to snapshot file {}", mBytesWritten, mTempFile.getPath()); - if (mDataGetter.apply(response).getEof()) { - LOG.debug("Completed writing to temporary file {} with size {}", - mTempFile.getPath(), mOutputStream.getChannel().position()); - mOutputStream.close(); - mOutputStream = null; - final MD5Hash digest = MD5FileUtil.computeMd5ForFile(mTempFile); - mSnapshotToInstall = new SingleFileSnapshotInfo( - new FileInfo(mTempFile.toPath(), digest), - mTermIndex.getTerm(), mTermIndex.getIndex()); - mFuture.complete(mTermIndex); - LOG.info("Finished copying snapshot to local file {}.", mTempFile); - mStream.onCompleted(); - } else { - mStream.onNext(mMessageBuilder.apply(mBytesWritten)); - } - } - } - - @Override - public void onError(Throwable t) { - mFuture.completeExceptionally(t); - cleanup(); - } - - @Override - public void onCompleted() { - if (mOutputStream != null) { - mFuture.completeExceptionally( - new IllegalStateException("Request completed with unfinished upload")); - cleanup(); - } - } - - @Override - public void beforeStart(ClientCallStreamObserver requestStream) { - mStream = requestStream; - } - - /** - * @return a future that tracks when the stream is completed - */ - public CompletableFuture getFuture() { - return mFuture; - } - - /** - * @return the snapshot information if it is downloaded completely, or null otherwise - */ - public SnapshotInfo getSnapshotToInstall() { - return mSnapshotToInstall; - } -} diff --git a/core/server/common/src/main/java/alluxio/master/journal/raft/SnapshotIdJournaled.java b/core/server/common/src/main/java/alluxio/master/journal/raft/SnapshotIdJournaled.java new file mode 100644 index 000000000000..fc8457d4b966 --- /dev/null +++ b/core/server/common/src/main/java/alluxio/master/journal/raft/SnapshotIdJournaled.java @@ -0,0 +1,30 @@ +/* + * The Alluxio Open Foundation licenses this work under the Apache License, version 2.0 + * (the "License"). You may not use this work except in compliance with the License, which is + * available at www.apache.org/licenses/LICENSE-2.0 + * + * This software is distributed on an "AS IS" basis, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, + * either express or implied, as more fully set forth in the License. + * + * See the NOTICE file distributed with this work for information regarding copyright ownership. + */ + +package alluxio.master.journal.raft; + +import alluxio.master.journal.SingleEntryJournaled; +import alluxio.master.journal.checkpoint.CheckpointName; +import alluxio.proto.journal.Journal; + +/** + * Simple implementation to write and recover the snapshot ID when checkpointing. The snapshot ID + * is a long that represents the sequence number of the last entry that was processed by the + * journal. The snapshot ID will be inserted into and retrieved through the + * {@link Journal.JournalEntry.Builder#setSequenceNumber(long)} and + * {@link Journal.JournalEntry.Builder#getSequenceNumber()} methods, respectively. + */ +public class SnapshotIdJournaled extends SingleEntryJournaled { + @Override + public CheckpointName getCheckpointName() { + return CheckpointName.SNAPSHOT_ID; + } +} diff --git a/core/server/common/src/main/java/alluxio/master/journal/raft/SnapshotReplicationManager.java b/core/server/common/src/main/java/alluxio/master/journal/raft/SnapshotReplicationManager.java deleted file mode 100644 index feeb17dcec70..000000000000 --- a/core/server/common/src/main/java/alluxio/master/journal/raft/SnapshotReplicationManager.java +++ /dev/null @@ -1,620 +0,0 @@ -/* - * The Alluxio Open Foundation licenses this work under the Apache License, version 2.0 - * (the "License"). You may not use this work except in compliance with the License, which is - * available at www.apache.org/licenses/LICENSE-2.0 - * - * This software is distributed on an "AS IS" basis, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, - * either express or implied, as more fully set forth in the License. - * - * See the NOTICE file distributed with this work for information regarding copyright ownership. - */ - -package alluxio.master.journal.raft; - -import alluxio.ClientContext; -import alluxio.Constants; -import alluxio.collections.Pair; -import alluxio.conf.Configuration; -import alluxio.conf.PropertyKey; -import alluxio.exception.status.AbortedException; -import alluxio.exception.status.AlluxioStatusException; -import alluxio.exception.status.NotFoundException; -import alluxio.grpc.DownloadSnapshotPRequest; -import alluxio.grpc.DownloadSnapshotPResponse; -import alluxio.grpc.GetSnapshotInfoRequest; -import alluxio.grpc.GetSnapshotInfoResponse; -import alluxio.grpc.GetSnapshotRequest; -import alluxio.grpc.JournalQueryRequest; -import alluxio.grpc.JournalQueryResponse; -import alluxio.grpc.QuorumServerState; -import alluxio.grpc.SnapshotData; -import alluxio.grpc.SnapshotMetadata; -import alluxio.grpc.UploadSnapshotPRequest; -import alluxio.grpc.UploadSnapshotPResponse; -import alluxio.master.MasterClientContext; -import alluxio.metrics.MetricKey; -import alluxio.metrics.MetricsSystem; -import alluxio.resource.LockResource; -import alluxio.security.authentication.ClientContextServerInjector; -import alluxio.util.FormatUtils; -import alluxio.util.LogUtils; -import alluxio.util.logging.SamplingLogger; - -import com.codahale.metrics.Timer; -import com.google.common.annotations.VisibleForTesting; -import com.google.common.base.Preconditions; -import com.google.protobuf.MessageLite; -import io.grpc.Status; -import io.grpc.stub.StreamObserver; -import org.apache.ratis.protocol.Message; -import org.apache.ratis.protocol.RaftClientReply; -import org.apache.ratis.protocol.RaftPeerId; -import org.apache.ratis.server.protocol.TermIndex; -import org.apache.ratis.server.raftlog.RaftLog; -import org.apache.ratis.server.storage.FileInfo; -import org.apache.ratis.statemachine.SnapshotInfo; -import org.apache.ratis.statemachine.impl.SimpleStateMachineStorage; -import org.apache.ratis.statemachine.impl.SingleFileSnapshotInfo; -import org.apache.ratis.thirdparty.com.google.protobuf.UnsafeByteOperations; -import org.apache.ratis.util.MD5FileUtil; -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; - -import java.io.File; -import java.io.FileNotFoundException; -import java.io.IOException; -import java.time.Duration; -import java.time.Instant; -import java.util.Map; -import java.util.Objects; -import java.util.PriorityQueue; -import java.util.concurrent.CancellationException; -import java.util.concurrent.CompletableFuture; -import java.util.concurrent.CompletionException; -import java.util.concurrent.ExecutorService; -import java.util.concurrent.Executors; -import java.util.concurrent.Future; -import java.util.concurrent.TimeUnit; -import java.util.concurrent.atomic.AtomicReference; -import java.util.concurrent.locks.Condition; -import java.util.concurrent.locks.Lock; -import java.util.concurrent.locks.ReentrantLock; -import java.util.function.Function; -import java.util.stream.Collectors; - -/** - * Class for managing snapshot replication among masters. - * It manages two snapshot replication workflows - worker to master and master to worker. - * - * 1. Worker to Master - * When a raft leader needs a snapshot, instead of taking snapshot locally it copies a recent - * snapshot from one of the followers. - * - * Workflow: - * - * - Ratis calls leader state machine to take a snapshot - * - leader gets snapshot metadata from follower - * - leader pick one of the follower and send a request for copying the snapshot - * - follower receives the request and calls the leader raft journal service to upload the snapshot - * - after the upload completes, leader remembers the temporary snapshot location and index - * - Ratis calls the leader state machine again to take a snapshot - * - leader moves the temporary snapshot to the journal snapshot folder and returns snapshot index - * - * 2. Master to Worker - * When a raft follower receives a notification to download a snapshot, it downloads the latest - * snapshot from the leader. - * - * Workflow: - * - * - Ratis leader determines one of the follower needs a snapshot because it misses journal entries - * from a long time ago - * - Ratis leader notifies Ratis follower to install a snapshot from leader, the follower calls the - * Alluxio state machine to fulfill this request - * - the follower state machine calls the snapshot manager which calls the raft journal service from - * leader to download a snapshot - * - after the downloads completes, follower moves the file to snapshot directory and gives Ratis - * the snapshot index - */ -public class SnapshotReplicationManager { - private static final Logger LOG = LoggerFactory.getLogger(SnapshotReplicationManager.class); - private static final Logger SAMPLING_LOG = new SamplingLogger(LOG, 5L * Constants.SECOND_MS); - - private final SimpleStateMachineStorage mStorage; - private final RaftJournalSystem mJournalSystem; - private volatile SnapshotInfo mDownloadedSnapshot; - private final PriorityQueue> mSnapshotCandidates; - private Future mRequestDataFuture; - private final Lock mRequestDataLock = new ReentrantLock(); - private final Condition mRequestDataCondition = mRequestDataLock.newCondition(); - private final ExecutorService mRequestDataExecutor = Executors.newSingleThreadExecutor(); - - private static final long SNAPSHOT_INFO_TIMEOUT_MS = - Configuration.getMs(PropertyKey.MASTER_JOURNAL_REQUEST_INFO_TIMEOUT); - private static final long SNAPSHOT_DATA_TIMEOUT_MS = - Configuration.getMs(PropertyKey.MASTER_JOURNAL_REQUEST_DATA_TIMEOUT); - - private enum DownloadState { - /** No snapshot download is in progress. */ - IDLE, - - /** Snapshot information is requested from available followers. */ - REQUEST_INFO, - - /** The latest snapshot data is requested from one of the followers. */ - REQUEST_DATA, - - /** The latest snapshot is being downloaded from one of the followers. */ - STREAM_DATA, - - /** A snapshot is downloaded and ready for installation. */ - DOWNLOADED, - - /** A snapshot is being installed to the journal storage. */ - INSTALLING, - } - - private final AtomicReference mDownloadState = - new AtomicReference<>(DownloadState.IDLE); - - /** - * @param journalSystem the raft journal system - * @param storage the snapshot storage - */ - public SnapshotReplicationManager(RaftJournalSystem journalSystem, - SimpleStateMachineStorage storage) { - mStorage = storage; - mJournalSystem = journalSystem; - mSnapshotCandidates = new PriorityQueue<>((pair1, pair2) -> { - SnapshotMetadata first = pair1.getFirst(); - SnapshotMetadata second = pair2.getFirst(); - // deliberately reversing the compare order to have bigger numbers rise to the top - // bigger terms and indexes means a more recent snapshot - if (first.getSnapshotTerm() == second.getSnapshotTerm()) { - return Long.compare(second.getSnapshotIndex(), first.getSnapshotIndex()); - } - return Long.compare(second.getSnapshotTerm(), first.getSnapshotTerm()); - }); - } - - /** - * Downloads and installs a snapshot from the leader. - * - * @return a future with the term index of the installed snapshot - */ - public CompletableFuture installSnapshotFromLeader() { - if (mJournalSystem.isLeader()) { - return RaftJournalUtils.completeExceptionally( - new IllegalStateException("Abort snapshot installation after becoming a leader")); - } - if (!transitionState(DownloadState.IDLE, DownloadState.STREAM_DATA)) { - return RaftJournalUtils.completeExceptionally( - new IllegalStateException("State is not IDLE when starting a snapshot installation")); - } - try { - RaftJournalServiceClient client = createJournalServiceClient(); - String address = String.valueOf(client.getRemoteSockAddress()); - SnapshotDownloader observer = - SnapshotDownloader.forFollower(mStorage, address); - Timer.Context ctx = MetricsSystem - .timer(MetricKey.MASTER_EMBEDDED_JOURNAL_SNAPSHOT_DOWNLOAD_TIMER.getName()).time(); - client.downloadSnapshot(observer); - return observer.getFuture().thenApplyAsync((termIndex) -> { - ctx.close(); - mDownloadedSnapshot = observer.getSnapshotToInstall(); - transitionState(DownloadState.STREAM_DATA, DownloadState.DOWNLOADED); - long index = installDownloadedSnapshot(); - if (index == RaftLog.INVALID_LOG_INDEX) { - throw new CompletionException(new RuntimeException( - String.format("Failed to install the downloaded snapshot %s", termIndex))); - } - if (index != termIndex.getIndex()) { - throw new CompletionException(new IllegalStateException( - String.format("Mismatched snapshot installed - downloaded %d, installed %d", - termIndex.getIndex(), index))); - } - return termIndex; - }).whenComplete((termIndex, throwable) -> { - if (throwable != null) { - LOG.error("Unexpected exception downloading snapshot from leader {}.", address, - throwable); - transitionState(DownloadState.STREAM_DATA, DownloadState.IDLE); - } - client.close(); - }); - } catch (Exception e) { - transitionState(DownloadState.STREAM_DATA, DownloadState.IDLE); - return RaftJournalUtils.completeExceptionally(e); - } - } - - /** - * Sends a snapshot to the leader. - * - * @throws IOException if error occurs while initializing the data stream - */ - public void sendSnapshotToLeader() throws IOException { - if (mJournalSystem.isLeader()) { - throw new IllegalStateException("Server is no longer a follower"); - } - LOG.debug("Checking latest snapshot to send"); - SnapshotInfo snapshot = mStorage.getLatestSnapshot(); - if (snapshot == null) { - throw new NotFoundException("No snapshot available"); - } - - SnapshotUploader snapshotUploader = - SnapshotUploader.forFollower(mStorage, snapshot); - RaftJournalServiceClient client = createJournalServiceClient(); - LOG.info("Sending stream request to leader {} for snapshot {}", client.getRemoteSockAddress(), - snapshot.getTermIndex()); - StreamObserver requestObserver = - client.uploadSnapshot(snapshotUploader); - requestObserver.onNext(UploadSnapshotPRequest.newBuilder() - .setData(SnapshotData.newBuilder() - .setSnapshotTerm(snapshot.getTerm()) - .setSnapshotIndex(snapshot.getIndex()) - .setOffset(0)) - .build()); - snapshotUploader.getCompletionFuture().whenComplete((info, t) -> client.close()); - } - - /** - * Attempts to copy a snapshot from one of the followers. - * - * The leader state machine calls this method regularly when it needs a new snapshot. - * To avoid blocking normal journal operations, This method always returns a value immediately - * without waiting for download to finish: - * - * - If no download is in progress, it schedules a new download asynchronously and returns - * {@link RaftLog#INVALID_LOG_INDEX}. - * - If a download is in progress, it returns {@link RaftLog#INVALID_LOG_INDEX} immediately. - * - If a download is completed, it moves the downloaded file to the snapshot directory and - * returns the snapshot index. - * - * @return the index of the downloaded snapshot, or {@link RaftLog#INVALID_LOG_INDEX} - * if no snapshot is installed. - */ - public long maybeCopySnapshotFromFollower() { - if (mDownloadState.get() == DownloadState.DOWNLOADED) { - return installDownloadedSnapshot(); - } - SAMPLING_LOG.info("Call copy snapshot from follower in state {}", mDownloadState.get()); - if (mDownloadState.get() == DownloadState.IDLE) { - CompletableFuture.runAsync(this::requestSnapshotFromFollowers); - } - return RaftLog.INVALID_LOG_INDEX; - } - - /** - * Receives a snapshot from follower. - * - * @param responseStreamObserver the response stream observer - * @return the request stream observer - */ - public StreamObserver receiveSnapshotFromFollower( - StreamObserver responseStreamObserver) { - String followerIp = ClientContextServerInjector.getIpAddress(); - LOG.info("Received upload snapshot request from follower {}", followerIp); - - SnapshotDownloader observer = - SnapshotDownloader.forLeader(mStorage, responseStreamObserver, - followerIp); - if (!transitionState(DownloadState.REQUEST_DATA, DownloadState.STREAM_DATA)) { - responseStreamObserver.onCompleted(); - return observer; - } - observer.getFuture() - .thenApply(termIndex -> { - try (LockResource ignored = new LockResource(mRequestDataLock)) { - mDownloadedSnapshot = observer.getSnapshotToInstall(); - transitionState(DownloadState.STREAM_DATA, DownloadState.DOWNLOADED); - // Cancel any pending data requests since the download was successful - mRequestDataFuture.cancel(true); - mRequestDataCondition.signalAll(); - return termIndex; - } - }).exceptionally(e -> { - try (LockResource ignored = new LockResource(mRequestDataLock)) { - LOG.error("Unexpected exception downloading snapshot from follower {}.", followerIp, e); - // this allows the leading master to request other followers for their snapshots. It - // previously collected information about other snapshots in requestInfo(). If no other - // snapshots are available requestData() will return false and mDownloadState will be - // IDLE - transitionState(DownloadState.STREAM_DATA, DownloadState.REQUEST_DATA); - // Notify the request data tasks to start a request with a new candidate - mRequestDataCondition.signalAll(); - return null; - } - }); - return observer; - } - - /** - * Handles snapshot requests. - * - * @param queryRequest the query request - * @return the response message, or null if the request is not handled - * @throws IOException if any error occurred while handling the request - */ - public Message handleRequest(JournalQueryRequest queryRequest) throws IOException { - if (queryRequest.hasSnapshotInfoRequest()) { - SnapshotMetadata requestSnapshot = queryRequest.getSnapshotInfoRequest().getSnapshotInfo(); - Instant start = Instant.now(); - SnapshotInfo latestSnapshot = mStorage.getLatestSnapshot(); - synchronized (this) { - // We may need to wait for a valid snapshot to be ready - while ((latestSnapshot == null - || (queryRequest.getSnapshotInfoRequest().hasSnapshotInfo() - && (requestSnapshot.getSnapshotTerm() > latestSnapshot.getTerm() - || (requestSnapshot.getSnapshotTerm() == latestSnapshot.getTerm() - && requestSnapshot.getSnapshotIndex() >= latestSnapshot.getIndex())))) - && Duration.between(start, Instant.now()).toMillis() < SNAPSHOT_INFO_TIMEOUT_MS) { - LOG.info("Received snapshot info request from leader - {}, but do not have a " - + "snapshot ready - {}", requestSnapshot, latestSnapshot); - try { - wait(SNAPSHOT_DATA_TIMEOUT_MS - Long.min(SNAPSHOT_DATA_TIMEOUT_MS, - Math.abs(Duration.between(start, Instant.now()).toMillis()))); - } catch (InterruptedException e) { - LOG.debug("Interrupted while waiting for snapshot", e); - break; - } - latestSnapshot = mStorage.getLatestSnapshot(); - } - } - if (latestSnapshot == null) { - LOG.debug("No snapshot to send"); - return toMessage(GetSnapshotInfoResponse.getDefaultInstance()); - } - JournalQueryResponse response = JournalQueryResponse.newBuilder() - .setSnapshotInfoResponse(GetSnapshotInfoResponse.newBuilder().setLatest( - toSnapshotMetadata(latestSnapshot.getTermIndex()))) - .build(); - LOG.info("Sent snapshot info response to leader {}", response); - return toMessage(response); - } - if (queryRequest.hasSnapshotRequest()) { - LOG.info("Start sending snapshot to leader"); - sendSnapshotToLeader(); - return Message.EMPTY; - } - return null; - } - - /** - * Sends a snapshot to a follower. - * - * @param responseObserver the response stream observer - * @return the request stream observer - */ - public StreamObserver sendSnapshotToFollower( - StreamObserver responseObserver) { - SnapshotInfo snapshot = mStorage.getLatestSnapshot(); - LOG.debug("Received snapshot download request from {}", - ClientContextServerInjector.getIpAddress()); - SnapshotUploader requestStreamObserver = - SnapshotUploader.forLeader(mStorage, snapshot, responseObserver); - if (snapshot == null) { - responseObserver.onError(Status.NOT_FOUND - .withDescription("Cannot find a valid snapshot to download.") - .asException()); - return requestStreamObserver; - } - responseObserver.onNext(DownloadSnapshotPResponse.newBuilder() - .setData(SnapshotData.newBuilder() - .setSnapshotTerm(snapshot.getTerm()) - .setSnapshotIndex(snapshot.getIndex()) - .setOffset(0)) - .build()); - return requestStreamObserver; - } - - private static Message toMessage(MessageLite value) { - return Message.valueOf( - UnsafeByteOperations.unsafeWrap(value.toByteString().asReadOnlyByteBuffer())); - } - - private SnapshotMetadata toSnapshotMetadata(TermIndex value) { - return value == null ? null : - SnapshotMetadata.newBuilder() - .setSnapshotTerm(value.getTerm()) - .setSnapshotIndex(value.getIndex()) - .build(); - } - - private boolean transitionState(DownloadState expected, DownloadState update) { - if (!mDownloadState.compareAndSet(expected, update)) { - LOG.warn("Failed to transition from {} to {}: current state is {}", - expected, update, mDownloadState.get()); - return false; - } - LOG.debug("Successfully transitioned from {} to {}", expected, update); - return true; - } - - /** - * Installs a downloaded snapshot in the journal snapshot directory. - * - * @return the index of the installed snapshot - */ - private long installDownloadedSnapshot() { - LOG.info("Call install downloaded snapshot"); - if (!transitionState(DownloadState.DOWNLOADED, DownloadState.INSTALLING)) { - return RaftLog.INVALID_LOG_INDEX; - } - File tempFile = null; - try (Timer.Context ctx = MetricsSystem - .timer(MetricKey.MASTER_EMBEDDED_JOURNAL_SNAPSHOT_INSTALL_TIMER.getName()).time()) { - SnapshotInfo snapshot = mDownloadedSnapshot; - if (snapshot == null) { - throw new IllegalStateException("Snapshot is not completed"); - } - FileInfo fileInfo = snapshot.getFiles().get(0); - tempFile = fileInfo.getPath().toFile(); - if (!tempFile.exists()) { - throw new FileNotFoundException(String.format("Snapshot file %s is not found", tempFile)); - } - SnapshotInfo latestSnapshot = mStorage.getLatestSnapshot(); - TermIndex lastInstalled = latestSnapshot == null ? null : latestSnapshot.getTermIndex(); - TermIndex downloaded = snapshot.getTermIndex(); - if (lastInstalled != null && downloaded.compareTo(lastInstalled) < 0) { - throw new AbortedException( - String.format("Snapshot to be installed %s is older than current snapshot %s", - downloaded, lastInstalled)); - } - final File snapshotFile = mStorage.getSnapshotFile( - downloaded.getTerm(), downloaded.getIndex()); - LOG.debug("Moving temp snapshot {} to file {}", tempFile, snapshotFile); - MD5FileUtil.saveMD5File(snapshotFile, fileInfo.getFileDigest()); - if (!tempFile.renameTo(snapshotFile)) { - throw new IOException(String.format("Failed to rename %s to %s", tempFile, snapshotFile)); - } - synchronized (this) { - mStorage.loadLatestSnapshot(); - notifyAll(); - } - LOG.info("Completed storing snapshot at {} to file {} with size {}", downloaded, - snapshotFile, FormatUtils.getSizeFromBytes(snapshotFile.length())); - return downloaded.getIndex(); - } catch (Exception e) { - LOG.error("Failed to install snapshot", e); - if (tempFile != null) { - tempFile.delete(); - } - return RaftLog.INVALID_LOG_INDEX; - } finally { - transitionState(DownloadState.INSTALLING, DownloadState.IDLE); - } - } - - /** - * Finds a follower with the latest snapshot and sends a request to download it. - */ - private void requestSnapshotFromFollowers() { - if (mDownloadState.get() == DownloadState.IDLE) { - if (!transitionState(DownloadState.IDLE, DownloadState.REQUEST_INFO)) { - return; - } - // we want fresh info not polluted by older requests. This ensures that requestData() requests - // from at most # followers before requesting new info. Otherwise, the candidate queue might - // grow indefinitely. - mSnapshotCandidates.clear(); - requestInfo(); - transitionState(DownloadState.REQUEST_INFO, DownloadState.REQUEST_DATA); - mRequestDataFuture = mRequestDataExecutor.submit(this::requestData, null); - } - } - - private void requestInfo() { - Preconditions.checkState(mDownloadState.get() == DownloadState.REQUEST_INFO); - try { - LOG.info("Call request snapshot info from followers"); - SingleFileSnapshotInfo latestSnapshot = mStorage.getLatestSnapshot(); - SnapshotMetadata snapshotMetadata = latestSnapshot == null ? null : - SnapshotMetadata.newBuilder() - .setSnapshotTerm(latestSnapshot.getTerm()) - .setSnapshotIndex(latestSnapshot.getIndex()) - .build(); - // build SnapshotInfoRequests - GetSnapshotInfoRequest infoRequest; - if (snapshotMetadata == null) { - infoRequest = GetSnapshotInfoRequest.getDefaultInstance(); - } else { - infoRequest = GetSnapshotInfoRequest.newBuilder() - .setSnapshotInfo(snapshotMetadata).build(); - } - Map> jobs = mJournalSystem - .getQuorumServerInfoList() - .stream() - .filter(server -> server.getServerState() == QuorumServerState.AVAILABLE) - .map(server -> RaftJournalUtils.getPeerId( - server.getServerAddress().getHost(), - server.getServerAddress().getRpcPort())) - .filter(peerId -> !peerId.equals(mJournalSystem.getLocalPeerId())) - .collect(Collectors.toMap(Function.identity(), - peerId -> mJournalSystem.sendMessageAsync(peerId, toMessage(JournalQueryRequest - .newBuilder() - .setSnapshotInfoRequest(infoRequest) - .build()), SNAPSHOT_INFO_TIMEOUT_MS))); - // query all secondary masters for information about their latest snapshot - for (Map.Entry> job : jobs.entrySet()) { - RaftPeerId peerId = job.getKey(); - try { - RaftClientReply reply = job.getValue().get(); - if (reply.getException() != null) { - throw reply.getException(); - } - JournalQueryResponse response = JournalQueryResponse.parseFrom( - reply.getMessage().getContent().asReadOnlyByteBuffer()); - if (!response.hasSnapshotInfoResponse()) { - throw new IOException("Invalid response for GetSnapshotInfoRequest " + response); - } - SnapshotMetadata latest = response.getSnapshotInfoResponse().getLatest(); - LOG.info("Received snapshot info from follower {} - {}, my current snapshot is {}", - peerId, latest, snapshotMetadata); - if (snapshotMetadata == null - || (latest.getSnapshotTerm() >= snapshotMetadata.getSnapshotTerm()) - && latest.getSnapshotIndex() > snapshotMetadata.getSnapshotIndex()) { - mSnapshotCandidates.add(new Pair<>(latest, peerId)); - } - } catch (Exception e) { - LOG.warn("Error while requesting snapshot info from {}: {}", peerId, e.toString()); - } - } - } catch (Exception e) { - LogUtils.warnWithException(LOG, "Failed to request snapshot info from followers", e); - } - } - - private void requestData() { - Preconditions.checkState(mDownloadState.get() == DownloadState.REQUEST_DATA); - // request snapshots from the most recent to the least recent - try { - while (!mSnapshotCandidates.isEmpty() && mDownloadState.get() == DownloadState.REQUEST_DATA) { - Pair candidate = mSnapshotCandidates.poll(); - SnapshotMetadata metadata = Objects.requireNonNull(candidate).getFirst(); - RaftPeerId peerId = candidate.getSecond(); - LOG.info("Request data from follower {} for snapshot (t: {}, i: {})", - peerId, metadata.getSnapshotTerm(), metadata.getSnapshotIndex()); - try { - RaftClientReply reply = mJournalSystem.sendMessageAsync(peerId, - toMessage(JournalQueryRequest.newBuilder() - .setSnapshotRequest(GetSnapshotRequest.getDefaultInstance()).build())) - .get(); - if (reply.getException() != null) { - throw reply.getException(); - } - // Wait a timeout before trying the next follower, or until we are awoken - try (LockResource ignored = new LockResource(mRequestDataLock)) { - do { - mRequestDataCondition.await(SNAPSHOT_DATA_TIMEOUT_MS, TimeUnit.MILLISECONDS); - } while (mDownloadState.get() != DownloadState.REQUEST_DATA); - } - } catch (InterruptedException | CancellationException ignored) { - // We are usually interrupted when a snapshot transfer is complete, - // so we can just return without trying a new candidate. - // It is fine even if we are interrupted in other cases as - // a new request info will be initiated by the next takeSnapshot() call. - return; - } catch (Exception e) { - LOG.warn("Failed to request snapshot data from {}: {}", peerId, e); - } - } - } finally { - // Ensure that we return to the IDLE state in case the REQUEST_DATA operations - // were not successful, for example if we were interrupted for some reason - // other than a successful download. - if (mDownloadState.get() == DownloadState.REQUEST_DATA) { - transitionState(DownloadState.REQUEST_DATA, DownloadState.IDLE); - } - } - } - - @VisibleForTesting - synchronized RaftJournalServiceClient createJournalServiceClient() - throws AlluxioStatusException { - RaftJournalServiceClient client = new RaftJournalServiceClient(MasterClientContext - .newBuilder(ClientContext.create(Configuration.global())).build()); - client.connect(); - return client; - } -} diff --git a/core/server/common/src/main/java/alluxio/master/journal/raft/SnapshotUploader.java b/core/server/common/src/main/java/alluxio/master/journal/raft/SnapshotUploader.java deleted file mode 100644 index 727f6a288b7c..000000000000 --- a/core/server/common/src/main/java/alluxio/master/journal/raft/SnapshotUploader.java +++ /dev/null @@ -1,178 +0,0 @@ -/* - * The Alluxio Open Foundation licenses this work under the Apache License, version 2.0 - * (the "License"). You may not use this work except in compliance with the License, which is - * available at www.apache.org/licenses/LICENSE-2.0 - * - * This software is distributed on an "AS IS" basis, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, - * either express or implied, as more fully set forth in the License. - * - * See the NOTICE file distributed with this work for information regarding copyright ownership. - */ - -package alluxio.master.journal.raft; - -import alluxio.conf.Configuration; -import alluxio.conf.PropertyKey; -import alluxio.exception.status.InvalidArgumentException; -import alluxio.grpc.DownloadSnapshotPRequest; -import alluxio.grpc.DownloadSnapshotPResponse; -import alluxio.grpc.SnapshotData; -import alluxio.grpc.UploadSnapshotPRequest; -import alluxio.grpc.UploadSnapshotPResponse; - -import com.google.protobuf.UnsafeByteOperations; -import io.grpc.stub.ClientCallStreamObserver; -import io.grpc.stub.ClientResponseObserver; -import io.grpc.stub.StreamObserver; -import org.apache.commons.io.IOUtils; -import org.apache.ratis.statemachine.SnapshotInfo; -import org.apache.ratis.statemachine.impl.SimpleStateMachineStorage; -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; - -import java.io.File; -import java.io.FileInputStream; -import java.io.FileNotFoundException; -import java.io.IOException; -import java.io.InputStream; -import java.util.concurrent.CompletableFuture; -import java.util.function.Function; - -/** - * A stream observer for uploading a snapshot. - * - * @param the message type to send - * @param the message type to receive - */ -public class SnapshotUploader - implements StreamObserver, ClientResponseObserver { - private static final Logger LOG = LoggerFactory.getLogger(SnapshotUploader.class); - private static final int SNAPSHOT_CHUNK_SIZE = (int) Configuration.getBytes( - PropertyKey.MASTER_EMBEDDED_JOURNAL_SNAPSHOT_REPLICATION_CHUNK_SIZE); - - private final Function mDataMessageBuilder; - private final Function mOffsetGetter; - private final File mSnapshotFile; - private final long mLength; - private final SnapshotInfo mSnapshotInfo; - private long mOffset = 0; - private StreamObserver mStream; - private final CompletableFuture mCompletionFuture = new CompletableFuture<>(); - - /** - * Builds a stream for leader to upload a snapshot. - * - * @param storage the snapshot storage - * @param snapshot the snapshot to upload - * @param stream the download stream - * @return the upload stream for leader - */ - public static SnapshotUploader forLeader( - SimpleStateMachineStorage storage, SnapshotInfo snapshot, - StreamObserver stream) { - return new SnapshotUploader<>(storage, snapshot, stream, - data -> DownloadSnapshotPResponse.getDefaultInstance().toBuilder().setData(data).build(), - DownloadSnapshotPRequest::getOffsetReceived); - } - - /** - * Builds a stream for follower to upload a snapshot. - * - * @param storage the snapshot storage - * @param snapshot the snapshot to upload - * @return the upload stream for follower - */ - public static SnapshotUploader forFollower( - SimpleStateMachineStorage storage, SnapshotInfo snapshot) { - return new SnapshotUploader<>(storage, snapshot, null, - data -> UploadSnapshotPRequest.getDefaultInstance().toBuilder().setData(data).build(), - UploadSnapshotPResponse::getOffsetReceived); - } - - private SnapshotUploader(SimpleStateMachineStorage storage, SnapshotInfo snapshot, - StreamObserver stream, - Function buildFunc, Function offsetGetter) { - mSnapshotInfo = snapshot; - mDataMessageBuilder = buildFunc; - mOffsetGetter = offsetGetter; - mSnapshotFile = storage.getSnapshotFile(snapshot.getTerm(), snapshot.getIndex()); - mLength = mSnapshotFile.length(); - mStream = stream; - } - - @Override - public void onNext(R value) { - try { - onNextInternal(value); - } catch (Exception e) { - LOG.error("Error occurred while sending snapshot", e); - mStream.onError(e); - } - } - - private void onNextInternal(R value) throws IOException { - LOG.debug("Received request {}", value); - if (mStream == null) { - throw new IllegalStateException("No request stream assigned"); - } - if (!mSnapshotFile.exists()) { - throw new FileNotFoundException( - String.format("Snapshot file %s does not exist", mSnapshotFile.getPath())); - } - long offsetReceived = mOffsetGetter.apply(value); - // TODO(feng): implement better flow control - if (mOffset != offsetReceived) { - throw new InvalidArgumentException( - String.format("Received mismatched offset: %d. Expect %d", offsetReceived, mOffset)); - } - LOG.debug("Streaming data at {}", mOffset); - try (InputStream is = new FileInputStream(mSnapshotFile)) { - is.skip(mOffset); - boolean eof = false; - int chunkSize = SNAPSHOT_CHUNK_SIZE; - long available = mLength - mOffset; - if (available <= SNAPSHOT_CHUNK_SIZE) { - eof = true; - chunkSize = (int) available; - } - byte[] buffer = new byte[chunkSize]; - IOUtils.readFully(is, buffer); - LOG.debug("Read {} bytes from file {}", chunkSize, mSnapshotFile); - mStream.onNext(mDataMessageBuilder.apply(SnapshotData.newBuilder() - .setOffset(mOffset) - .setEof(eof) - .setChunk(UnsafeByteOperations.unsafeWrap(buffer)) - .setSnapshotTerm(mSnapshotInfo.getTerm()) - .setSnapshotIndex(mSnapshotInfo.getIndex()) - .build())); - mOffset += chunkSize; - LOG.debug("Uploaded total {} bytes of file {}", mOffset, mSnapshotFile); - } - } - - @Override - public void onError(Throwable t) { - LOG.error("Error sending snapshot {} at {}", mSnapshotFile, mOffset, t); - mStream.onError(t); - mCompletionFuture.completeExceptionally(t); - } - - @Override - public void onCompleted() { - LOG.debug("Received onComplete for {}", mSnapshotInfo); - mStream.onCompleted(); - mCompletionFuture.complete(mSnapshotInfo); - } - - /** - * @return a future used to propagate completion status to {@link SnapshotReplicationManager} - */ - public CompletableFuture getCompletionFuture() { - return mCompletionFuture; - } - - @Override - public void beforeStart(ClientCallStreamObserver requestStream) { - mStream = requestStream; - } -} diff --git a/core/server/common/src/test/java/alluxio/master/journal/JournalUtilsTest.java b/core/server/common/src/test/java/alluxio/master/journal/JournalUtilsTest.java index bc211e90bf69..50a42359e9d3 100644 --- a/core/server/common/src/test/java/alluxio/master/journal/JournalUtilsTest.java +++ b/core/server/common/src/test/java/alluxio/master/journal/JournalUtilsTest.java @@ -12,22 +12,29 @@ package alluxio.master.journal; import static org.junit.Assert.assertEquals; +import static org.junit.Assert.assertTrue; import alluxio.master.journal.checkpoint.CheckpointInputStream; import alluxio.master.journal.checkpoint.CheckpointName; import alluxio.master.journal.checkpoint.CheckpointOutputStream; import alluxio.master.journal.checkpoint.CheckpointType; +import alluxio.master.journal.checkpoint.Checkpointed; import alluxio.proto.journal.File.AddMountPointEntry; import alluxio.proto.journal.Journal.JournalEntry; import alluxio.resource.CloseableIterator; +import org.junit.Assert; import org.junit.Rule; import org.junit.Test; import org.junit.rules.ExpectedException; +import org.junit.rules.TemporaryFolder; import java.io.ByteArrayInputStream; import java.io.ByteArrayOutputStream; +import java.io.File; import java.io.IOException; +import java.io.OutputStream; +import java.nio.file.Files; import java.util.ArrayList; import java.util.Arrays; import java.util.List; @@ -39,6 +46,9 @@ public final class JournalUtilsTest { @Rule public ExpectedException mThrown = ExpectedException.none(); + @Rule + public TemporaryFolder mFolder = new TemporaryFolder(); + @Test public void checkpointAndRestore() throws IOException, InterruptedException { Journaled journaled = new TestJournaled(0); @@ -75,6 +85,92 @@ public void checkpointAndRestoreComponents() throws Exception { components.forEach(c -> assertEquals(1, c.getNumEntriesProcessed())); } + @Test + public void noEntryTest() throws IOException, InterruptedException { + testEntries(0); + } + + @Test + public void oneEntryTest() throws IOException, InterruptedException { + testEntries(1); + } + + @Test + public void multiEntryTest() throws IOException, InterruptedException { + testEntries(5); + } + + private void testEntries(int numEntries) throws IOException, InterruptedException { + TestMultiEntryJournaled journaled = createJournaled(numEntries, 0L); + ArrayList copy = new ArrayList<>(journaled.mProcessedEntries); + File file = mFolder.newFile(); + try (OutputStream outputStream = Files.newOutputStream(file.toPath())) { + JournalUtils.writeJournalEntryCheckpoint(outputStream, journaled); + } + journaled.resetState(); + try (CheckpointInputStream inputStream = + new CheckpointInputStream(Files.newInputStream(file.toPath()))) { + JournalUtils.restoreJournalEntryCheckpoint(inputStream, journaled); + } + Assert.assertEquals(copy, journaled.mProcessedEntries); + } + + @Test + public void testCompoundNone() throws IOException, InterruptedException { + testCompound(0); + } + + @Test + public void testCompoundOne() throws IOException, InterruptedException { + testCompound(1); + } + + @Test + public void testCompoundMulti() throws IOException, InterruptedException { + testCompound(5); + } + + private void testCompound(int numElements) throws IOException, InterruptedException { + List checkpointed = new ArrayList<>(numElements); + int numEntries = 5; + long sequenceNumber = 0; + for (int i = 0; i < numElements; i++) { + if (i % 2 == 0) { + checkpointed.add(createJournaled(numEntries, sequenceNumber)); + } else { + checkpointed.add(new TestCheckpointed(numEntries, sequenceNumber)); + } + sequenceNumber += numEntries; + } + + ArrayList copy = new ArrayList<>(checkpointed); + File file = mFolder.newFile(); + try (OutputStream outputStream = Files.newOutputStream(file.toPath())) { + JournalUtils.writeToCheckpoint(outputStream, checkpointed); + } + for (Checkpointed c : checkpointed) { + if (c instanceof Journaled) { + ((Journaled) c).resetState(); + } else if (c instanceof TestCheckpointed) { + ((TestCheckpointed) c).clear(); + } + } + try (CheckpointInputStream inputStream = + new CheckpointInputStream(Files.newInputStream(file.toPath()))) { + JournalUtils.restoreFromCheckpoint(inputStream, checkpointed); + } + assertEquals(copy, checkpointed); + } + + private TestMultiEntryJournaled createJournaled(int numEntries, long baseSequenceNumber) { + TestMultiEntryJournaled journaled = new TestMultiEntryJournaled(); + for (int i = 0; i < numEntries; i++) { + journaled.processJournalEntry( + JournalEntry.newBuilder().setSequenceNumber(baseSequenceNumber + i).build()); + } + return journaled; + } + private static class TestJournaled implements Journaled { private final CheckpointName mName; private int mNumEntriesProcessed; @@ -108,4 +204,101 @@ public CheckpointName getCheckpointName() { return mName; } } + + private static class TestMultiEntryJournaled implements Journaled { + private static int sIndex = 0; + private final CheckpointName mName; + private final List mProcessedEntries = new ArrayList<>(); + + TestMultiEntryJournaled() { + mName = CheckpointName.values()[sIndex]; + sIndex = (sIndex + 1); + assertTrue("Cannot create too many Journaled instances", + sIndex <= CheckpointName.values().length); + } + + @Override + public CloseableIterator getJournalEntryIterator() { + return CloseableIterator.noopCloseable(mProcessedEntries.iterator()); + } + + @Override + public boolean processJournalEntry(JournalEntry entry) { + return mProcessedEntries.add(entry); + } + + @Override + public void resetState() { + mProcessedEntries.clear(); + } + + @Override + public CheckpointName getCheckpointName() { + return mName; + } + + @Override + public int hashCode() { + return super.hashCode(); + } + + @Override + public boolean equals(Object obj) { + return obj instanceof TestMultiEntryJournaled + && mProcessedEntries.equals(((TestMultiEntryJournaled) obj).mProcessedEntries); + } + } + + private static class TestCheckpointed implements Checkpointed { + private static long sLong = 0L; + private final CheckpointName mName; + private final int mSize; + private final List mState = new ArrayList<>(); + + TestCheckpointed(int numLongs, long baseLong) { + mName = new TestMultiEntryJournaled().getCheckpointName(); + mSize = numLongs; + for (int i = 0; i < mSize; i++) { + mState.add(baseLong + i); + } + } + + public void clear() { + mState.clear(); + } + + @Override + public CheckpointName getCheckpointName() { + return mName; + } + + @Override + public void writeToCheckpoint(OutputStream output) throws IOException, InterruptedException { + CheckpointOutputStream outputStream = new CheckpointOutputStream(output, + CheckpointType.LONGS); + for (Long l : mState) { + outputStream.writeLong(l); + } + } + + @Override + public void restoreFromCheckpoint(CheckpointInputStream input) throws IOException { + assertEquals(CheckpointType.LONGS, input.getType()); + for (int i = 0; i < mSize; i++) { + long l = input.readLong(); + mState.add(l); + } + } + + @Override + public int hashCode() { + return super.hashCode(); + } + + @Override + public boolean equals(Object obj) { + return obj instanceof TestCheckpointed + && mState.equals(((TestCheckpointed) obj).mState); + } + } } diff --git a/core/server/common/src/test/java/alluxio/master/journal/checkpoint/CheckpointStreamTest.java b/core/server/common/src/test/java/alluxio/master/journal/checkpoint/CheckpointStreamTest.java new file mode 100644 index 000000000000..4ee717313b6e --- /dev/null +++ b/core/server/common/src/test/java/alluxio/master/journal/checkpoint/CheckpointStreamTest.java @@ -0,0 +1,79 @@ +/* + * The Alluxio Open Foundation licenses this work under the Apache License, version 2.0 + * (the "License"). You may not use this work except in compliance with the License, which is + * available at www.apache.org/licenses/LICENSE-2.0 + * + * This software is distributed on an "AS IS" basis, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, + * either express or implied, as more fully set forth in the License. + * + * See the NOTICE file distributed with this work for information regarding copyright ownership. + */ + +package alluxio.master.journal.checkpoint; + +import net.bytebuddy.utility.RandomString; +import org.apache.ratis.io.MD5Hash; +import org.apache.ratis.util.MD5FileUtil; +import org.junit.Assert; +import org.junit.Rule; +import org.junit.Test; +import org.junit.rules.TemporaryFolder; +import org.junit.runner.RunWith; +import org.junit.runners.Parameterized; + +import java.io.File; +import java.io.IOException; +import java.nio.file.Files; +import java.security.MessageDigest; +import java.util.Arrays; +import java.util.Collection; + +@RunWith(Parameterized.class) +public class CheckpointStreamTest { + @Parameterized.Parameters + public static Collection data() { + return Arrays.asList(CheckpointType.values()); + } + + @Parameterized.Parameter + public CheckpointType mType; + + @Rule + public TemporaryFolder mFolder = new TemporaryFolder(); + + @Test + public void regularStreamTest() throws IOException { + File file = mFolder.newFile(); + byte[] contents = RandomString.make().getBytes(); + try (CheckpointOutputStream outputStream = + new CheckpointOutputStream(Files.newOutputStream(file.toPath()), mType)) { + outputStream.write(contents); + } + byte[] retrieved = new byte[contents.length]; + try (CheckpointInputStream s = new CheckpointInputStream(Files.newInputStream(file.toPath()))) { + Assert.assertEquals(mType, s.getType()); + s.read(retrieved); + } + Assert.assertArrayEquals(contents, retrieved); + } + + @Test + public void optimizedStreamTest() throws IOException { + File file = mFolder.newFile(); + MessageDigest md5Out = MD5Hash.getDigester(); + byte[] contents = RandomString.make().getBytes(); + try (CheckpointOutputStream outputStream = + new CheckpointOutputStream(new OptimizedCheckpointOutputStream(file, md5Out), mType)) { + outputStream.write(contents); + } + MD5FileUtil.saveMD5File(file, new MD5Hash(md5Out.digest())); + MessageDigest md5In = MD5Hash.getDigester(); + byte[] retrieved = new byte[contents.length]; + try (CheckpointInputStream s = new OptimizedCheckpointInputStream(file, md5In)) { + Assert.assertEquals(mType, s.getType()); + s.read(retrieved); + } + MD5FileUtil.verifySavedMD5(file, new MD5Hash(md5In.digest())); + Assert.assertArrayEquals(contents, retrieved); + } +} diff --git a/core/server/common/src/test/java/alluxio/master/journal/raft/RaftSnapshotManagerTest.java b/core/server/common/src/test/java/alluxio/master/journal/raft/RaftSnapshotManagerTest.java new file mode 100644 index 000000000000..e25ad967cf47 --- /dev/null +++ b/core/server/common/src/test/java/alluxio/master/journal/raft/RaftSnapshotManagerTest.java @@ -0,0 +1,274 @@ +/* + * The Alluxio Open Foundation licenses this work under the Apache License, version 2.0 + * (the "License"). You may not use this work except in compliance with the License, which is + * available at www.apache.org/licenses/LICENSE-2.0 + * + * This software is distributed on an "AS IS" basis, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, + * either express or implied, as more fully set forth in the License. + * + * See the NOTICE file distributed with this work for information regarding copyright ownership. + */ + +package alluxio.master.journal.raft; + +import alluxio.conf.Configuration; +import alluxio.conf.PropertyKey; +import alluxio.grpc.GrpcServer; +import alluxio.grpc.GrpcServerAddress; +import alluxio.grpc.GrpcServerBuilder; +import alluxio.grpc.GrpcService; +import alluxio.grpc.ServiceType; + +import net.bytebuddy.utility.RandomString; +import org.apache.commons.io.FileUtils; +import org.apache.ratis.io.MD5Hash; +import org.apache.ratis.server.RaftServerConfigKeys; +import org.apache.ratis.server.raftlog.RaftLog; +import org.apache.ratis.server.storage.RaftStorage; +import org.apache.ratis.server.storage.RaftStorageImpl; +import org.apache.ratis.server.storage.StorageImplUtils; +import org.apache.ratis.statemachine.StateMachineStorage; +import org.apache.ratis.statemachine.impl.SimpleStateMachineStorage; +import org.apache.ratis.util.MD5FileUtil; +import org.junit.After; +import org.junit.Assert; +import org.junit.Before; +import org.junit.Rule; +import org.junit.Test; +import org.junit.rules.TemporaryFolder; + +import java.io.File; +import java.io.FileOutputStream; +import java.io.IOException; +import java.net.InetAddress; +import java.net.InetSocketAddress; +import java.net.ServerSocket; +import java.nio.file.Path; +import java.util.ArrayList; +import java.util.List; +import java.util.Optional; +import java.util.concurrent.Executors; +import java.util.stream.Collectors; + +public class RaftSnapshotManagerTest { + @Rule + public TemporaryFolder mFolder = new TemporaryFolder(); + + private final List mGrpcServers = new ArrayList<>(); + private final List mSmStorages = new ArrayList<>(); + private final List mManagers = new ArrayList<>(); + + @Before + public void before() throws IOException { + Configuration.set(PropertyKey.MASTER_JOURNAL_REQUEST_INFO_TIMEOUT, "10ms"); + // create Raft Storages and grpc servers for all masters + // no need to create full master processes + for (int i = 0; i < 3; i++) { + // create the state machine storage and initalize it using the raft storage + SnapshotDirStateMachineStorage smStorage = createStateMachineStorage(mFolder); + mSmStorages.add(smStorage); + RaftJournalServiceHandler handler = new RaftJournalServiceHandler(smStorage); + // create and start a grpc server for each on a random available port + GrpcServer server = createGrpcServer(handler); + server.start(); + mGrpcServers.add(server); + } + // create snapshot managers based on the ports being used by the servers + String hostAddress = InetAddress.getLocalHost().getHostAddress(); + String rpcAddresses = mGrpcServers.stream() + .map(server -> String.format("%s:%d", hostAddress, server.getBindPort())) + .collect(Collectors.joining(",")); + Configuration.set(PropertyKey.MASTER_RPC_ADDRESSES, rpcAddresses); + // create SnapshotDownloaders after the fact: this is because the downloaders cache their + // grpc clients to reuse them efficiently. They create the clients based on the configured + // rpc addresses, excluding their own. + for (int i = 0; i < mGrpcServers.size(); i++) { + Configuration.set(PropertyKey.MASTER_RPC_PORT, mGrpcServers.get(i).getBindPort()); + mManagers.add(new RaftSnapshotManager(mSmStorages.get(i), + Executors.newSingleThreadExecutor())); + } + } + + @After + public void after() throws IOException { + mGrpcServers.forEach(GrpcServer::shutdown); + mGrpcServers.forEach(GrpcServer::awaitTermination); + } + + @Test + public void noneAvailable() { + mManagers.get(0).downloadSnapshotFromOtherMasters(); + long l = mManagers.get(0).waitForAttemptToComplete(); + Assert.assertEquals(RaftLog.INVALID_LOG_INDEX, l); + } + + @Test + public void simple() throws IOException { + createSampleSnapshot(mSmStorages.get(1), 1, 10); + mSmStorages.get(1).loadLatestSnapshot(); + + mManagers.get(0).downloadSnapshotFromOtherMasters(); + long l = mManagers.get(0).waitForAttemptToComplete(); + Assert.assertEquals(10, l); + File snapshotDir1 = mSmStorages.get(1).getSnapshotDir(); + File snapshotDir0 = mSmStorages.get(0).getSnapshotDir(); + Assert.assertTrue(directoriesEqual(snapshotDir0, snapshotDir1)); + } + + @Test + public void oneUnavailable() throws IOException { + mGrpcServers.get(2).shutdown(); + mGrpcServers.get(2).awaitTermination(); + + createSampleSnapshot(mSmStorages.get(1), 1, 10); + mSmStorages.get(1).loadLatestSnapshot(); + + mManagers.get(0).downloadSnapshotFromOtherMasters(); + long l = mManagers.get(0).waitForAttemptToComplete(); + Assert.assertEquals(10, l); + File snapshotDir1 = mSmStorages.get(1).getSnapshotDir(); + File snapshotDir0 = mSmStorages.get(0).getSnapshotDir(); + Assert.assertTrue(directoriesEqual(snapshotDir0, snapshotDir1)); + } + + @Test + public void downloadHigherOne() throws IOException { + createSampleSnapshot(mSmStorages.get(1), 1, 10); + mSmStorages.get(1).loadLatestSnapshot(); + createSampleSnapshot(mSmStorages.get(2), 1, 100); + mSmStorages.get(2).loadLatestSnapshot(); + + mManagers.get(0).downloadSnapshotFromOtherMasters(); + long l = mManagers.get(0).waitForAttemptToComplete(); + Assert.assertEquals(100, l); + File snapshotDir2 = mSmStorages.get(2).getSnapshotDir(); + File snapshotDir1 = mSmStorages.get(1).getSnapshotDir(); + File snapshotDir0 = mSmStorages.get(0).getSnapshotDir(); + Assert.assertTrue(directoriesEqual(snapshotDir0, snapshotDir2)); + Assert.assertFalse(directoriesEqual(snapshotDir1, snapshotDir0)); + Assert.assertFalse(directoriesEqual(snapshotDir1, snapshotDir2)); + } + + @Test + public void higherOneUnavailable() throws IOException { + createSampleSnapshot(mSmStorages.get(1), 1, 10); + createSampleSnapshot(mSmStorages.get(2), 1, 100); + mSmStorages.get(1).loadLatestSnapshot(); + mSmStorages.get(2).loadLatestSnapshot(); + mGrpcServers.get(2).shutdown(); + mGrpcServers.get(2).awaitTermination(); + + mManagers.get(0).downloadSnapshotFromOtherMasters(); + long l = mManagers.get(0).waitForAttemptToComplete(); + Assert.assertEquals(10, l); + File snapshotDir2 = mSmStorages.get(2).getSnapshotDir(); + File snapshotDir1 = mSmStorages.get(1).getSnapshotDir(); + File snapshotDir0 = mSmStorages.get(0).getSnapshotDir(); + Assert.assertTrue(directoriesEqual(snapshotDir0, snapshotDir1)); + Assert.assertFalse(directoriesEqual(snapshotDir2, snapshotDir0)); + Assert.assertFalse(directoriesEqual(snapshotDir2, snapshotDir1)); + } + + @Test + public void successThenFailureThenSuccess() throws IOException { + // eliminate one of the two servers + mGrpcServers.get(2).shutdown(); + mGrpcServers.get(2).awaitTermination(); + + createSampleSnapshot(mSmStorages.get(1), 1, 10); + mSmStorages.get(1).loadLatestSnapshot(); + mManagers.get(0).downloadSnapshotFromOtherMasters(); + long l = mManagers.get(0).waitForAttemptToComplete(); + Assert.assertEquals(10, l); + File snapshotDir1 = mSmStorages.get(1).getSnapshotDir(); + File snapshotDir0 = mSmStorages.get(0).getSnapshotDir(); + Assert.assertTrue(directoriesEqual(snapshotDir0, snapshotDir1)); + + createSampleSnapshot(mSmStorages.get(1), 2, 100); + mSmStorages.get(1).loadLatestSnapshot(); + int bindPort = mGrpcServers.get(1).getBindPort(); + mGrpcServers.get(1).shutdown(); + mGrpcServers.get(1).awaitTermination(); + mManagers.get(0).downloadSnapshotFromOtherMasters(); + l = mManagers.get(0).waitForAttemptToComplete(); + Assert.assertEquals(-1, l); // failure expected + + // recreate grpc server on the same port + mGrpcServers.add(1, + createGrpcServer(new RaftJournalServiceHandler(mSmStorages.get(1)), bindPort)); + mGrpcServers.get(1).start(); + createSampleSnapshot(mSmStorages.get(1), 3, 1_000); + mSmStorages.get(1).loadLatestSnapshot(); + mManagers.get(0).downloadSnapshotFromOtherMasters(); + l = mManagers.get(0).waitForAttemptToComplete(); + Assert.assertEquals(1_000, l); + // server 1 has more snapshots than server 0 + Assert.assertFalse(directoriesEqual(snapshotDir0, snapshotDir1)); + } + + public static SnapshotDirStateMachineStorage createStateMachineStorage(TemporaryFolder folder) + throws IOException { + RaftStorageImpl raftStorage = StorageImplUtils.newRaftStorage(folder.newFolder(), + RaftServerConfigKeys.Log.CorruptionPolicy.EXCEPTION, RaftStorage.StartupOption.RECOVER, + RaftServerConfigKeys.STORAGE_FREE_SPACE_MIN_DEFAULT.getSize()); + raftStorage.initialize(); + SnapshotDirStateMachineStorage smStorage = new SnapshotDirStateMachineStorage(); + smStorage.init(raftStorage); + return smStorage; + } + + public static GrpcServer createGrpcServer(RaftJournalServiceHandler handler) throws IOException { + return createGrpcServer(handler, 0); + } + + public static GrpcServer createGrpcServer(RaftJournalServiceHandler handler, int port) + throws IOException { + try (ServerSocket socket = new ServerSocket(port)) { + InetSocketAddress address = new InetSocketAddress(socket.getLocalPort()); + return GrpcServerBuilder.forAddress( + GrpcServerAddress.create(address.getHostName(), address), + Configuration.global()) + .addService(ServiceType.RAFT_JOURNAL_SERVICE, new GrpcService(handler)) + .build(); + } + } + + public static void createSampleSnapshot(StateMachineStorage smStorage, long term, long index) + throws IOException { + String snapshotDirName = SimpleStateMachineStorage.getSnapshotFileName(term, index); + File dir = new File(smStorage.getSnapshotDir(), snapshotDirName); + if (!dir.exists() && !dir.mkdirs()) { + throw new IOException(String.format("Unable to create directory %s", dir)); + } + for (int i = 0; i < 10; i++) { + String s = "dummy-file-" + i; + File file = new File(dir, s); + try (FileOutputStream outputStream = new FileOutputStream(file)) { + outputStream.write(RandomString.make().getBytes()); + } + MD5Hash md5Hash = MD5FileUtil.computeMd5ForFile(file); + MD5FileUtil.saveMD5File(file, md5Hash); + } + } + + public static boolean directoriesEqual(File dir1, File dir2) throws IOException { + if (!dir1.getName().equals(dir2.getName())) { + return false; + } + List files1 = new ArrayList<>(FileUtils.listFiles(dir1, null, true)); + List files2 = new ArrayList<>(FileUtils.listFiles(dir2, null, true)); + if (files1.size() != files2.size()) { + return false; + } + for (File file1 : files1) { + Path relativize1 = dir1.toPath().relativize(file1.toPath()); + Optional optionalFile = files2.stream() + .filter(file -> dir2.toPath().relativize(file.toPath()).equals(relativize1)) + .findFirst(); + if (!optionalFile.isPresent() || !FileUtils.contentEquals(file1, optionalFile.get())) { + return false; + } + } + return true; + } +} diff --git a/core/server/common/src/test/java/alluxio/master/journal/raft/SnapshotDirStateMachineStorageTest.java b/core/server/common/src/test/java/alluxio/master/journal/raft/SnapshotDirStateMachineStorageTest.java new file mode 100644 index 000000000000..c60d06da378b --- /dev/null +++ b/core/server/common/src/test/java/alluxio/master/journal/raft/SnapshotDirStateMachineStorageTest.java @@ -0,0 +1,167 @@ +/* + * The Alluxio Open Foundation licenses this work under the Apache License, version 2.0 + * (the "License"). You may not use this work except in compliance with the License, which is + * available at www.apache.org/licenses/LICENSE-2.0 + * + * This software is distributed on an "AS IS" basis, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, + * either express or implied, as more fully set forth in the License. + * + * See the NOTICE file distributed with this work for information regarding copyright ownership. + */ + +package alluxio.master.journal.raft; + +import static alluxio.master.journal.raft.RaftSnapshotManagerTest.createSampleSnapshot; +import static alluxio.master.journal.raft.RaftSnapshotManagerTest.createStateMachineStorage; + +import net.bytebuddy.utility.RandomString; +import org.apache.ratis.server.protocol.TermIndex; +import org.apache.ratis.statemachine.SnapshotInfo; +import org.apache.ratis.statemachine.SnapshotRetentionPolicy; +import org.apache.ratis.statemachine.impl.FileListSnapshotInfo; +import org.apache.ratis.statemachine.impl.SimpleStateMachineStorage; +import org.apache.ratis.statemachine.impl.SingleFileSnapshotInfo; +import org.junit.Assert; +import org.junit.Before; +import org.junit.Rule; +import org.junit.Test; +import org.junit.rules.TemporaryFolder; + +import java.io.File; +import java.io.FileOutputStream; +import java.io.IOException; +import java.nio.file.Files; +import java.nio.file.Path; +import java.util.stream.Stream; + +public class SnapshotDirStateMachineStorageTest { + @Rule + public TemporaryFolder mFolder = new TemporaryFolder(); + + final SnapshotRetentionPolicy mRetentionPolicy = new SnapshotRetentionPolicy() { + @Override + public int getNumSnapshotsRetained() { + return 1; // keep only 1 snapshot + } + }; + SnapshotDirStateMachineStorage mStateMachineStorage; + + @Before + public void before() throws IOException { + mStateMachineStorage = createStateMachineStorage(mFolder); + } + + @Test + public void noSnapshot() { + Assert.assertNull(mStateMachineStorage.getLatestSnapshot()); + } + + @Test + public void onlyUpdateOnLoad() throws IOException { + Assert.assertNull(mStateMachineStorage.getLatestSnapshot()); + createSampleSnapshot(mStateMachineStorage, 1, 10); + // still null until new information is loaded + Assert.assertNull(mStateMachineStorage.getLatestSnapshot()); + } + + @Test + public void singleSnapshot() throws IOException { + createSampleSnapshot(mStateMachineStorage, 1, 10); + mStateMachineStorage.loadLatestSnapshot(); + SnapshotInfo latestSnapshot = mStateMachineStorage.getLatestSnapshot(); + Assert.assertTrue(latestSnapshot instanceof FileListSnapshotInfo); + Assert.assertEquals(TermIndex.valueOf(1, 10), latestSnapshot.getTermIndex()); + } + + @Test + public void newerIndex() throws IOException { + createSampleSnapshot(mStateMachineStorage, 1, 10); + mStateMachineStorage.loadLatestSnapshot(); + Assert.assertEquals(TermIndex.valueOf(1, 10), + mStateMachineStorage.getLatestSnapshot().getTermIndex()); + createSampleSnapshot(mStateMachineStorage, 1, 15); + mStateMachineStorage.loadLatestSnapshot(); + Assert.assertEquals(TermIndex.valueOf(1, 15), + mStateMachineStorage.getLatestSnapshot().getTermIndex()); + } + + @Test + public void newerTerm() throws IOException { + createSampleSnapshot(mStateMachineStorage, 1, 10); + mStateMachineStorage.loadLatestSnapshot(); + Assert.assertEquals(TermIndex.valueOf(1, 10), + mStateMachineStorage.getLatestSnapshot().getTermIndex()); + createSampleSnapshot(mStateMachineStorage, 2, 5); + mStateMachineStorage.loadLatestSnapshot(); + Assert.assertEquals(TermIndex.valueOf(2, 5), + mStateMachineStorage.getLatestSnapshot().getTermIndex()); + } + + @Test + public void noDeletionUnlessSignaled() throws IOException { + createSampleSnapshot(mStateMachineStorage, 1, 1); + createSampleSnapshot(mStateMachineStorage, 2, 10); + createSampleSnapshot(mStateMachineStorage, 3, 100); + + mStateMachineStorage.loadLatestSnapshot(); + mStateMachineStorage.cleanupOldSnapshots(mRetentionPolicy); + // no deletion unless signaled + try (Stream s = Files.list(mStateMachineStorage.getSnapshotDir().toPath())) { + Assert.assertEquals(3, s.count()); + } + } + + @Test + public void noopDeleteIfEmpty() throws IOException { + mStateMachineStorage.loadLatestSnapshot(); + mStateMachineStorage.signalNewSnapshot(); + mStateMachineStorage.cleanupOldSnapshots(mRetentionPolicy); + try (Stream s = Files.list(mStateMachineStorage.getSnapshotDir().toPath())) { + Assert.assertEquals(0, s.count()); + } + } + + @Test + public void noopDeleteIfOneOnly() throws IOException { + createSampleSnapshot(mStateMachineStorage, 1, 10); + + mStateMachineStorage.loadLatestSnapshot(); + mStateMachineStorage.signalNewSnapshot(); + mStateMachineStorage.cleanupOldSnapshots(mRetentionPolicy); + // no deletion unless signaled + try (Stream s = Files.list(mStateMachineStorage.getSnapshotDir().toPath())) { + Assert.assertEquals(1, s.count()); + } + } + + @Test + public void deleteMultiple() throws IOException { + createSampleSnapshot(mStateMachineStorage, 1, 1); + createSampleSnapshot(mStateMachineStorage, 2, 10); + createSampleSnapshot(mStateMachineStorage, 3, 100); + + mStateMachineStorage.signalNewSnapshot(); + mStateMachineStorage.cleanupOldSnapshots(mRetentionPolicy); + // no deletion unless signaled + try (Stream s = Files.list(mStateMachineStorage.getSnapshotDir().toPath())) { + Assert.assertEquals(1, s.count()); + } + mStateMachineStorage.loadLatestSnapshot(); + Assert.assertEquals(TermIndex.valueOf(3, 100), + mStateMachineStorage.getLatestSnapshot().getTermIndex()); + } + + @Test + public void backwardsCompatible() throws IOException { + createSampleSnapshot(mStateMachineStorage, 1, 1); + String snapshotFile = SimpleStateMachineStorage.getSnapshotFileName(2, 10); + try (FileOutputStream outputStream = + new FileOutputStream(new File(mStateMachineStorage.getSnapshotDir(), snapshotFile))) { + outputStream.write(RandomString.make().getBytes()); + } + mStateMachineStorage.loadLatestSnapshot(); + SnapshotInfo latestSnapshot = mStateMachineStorage.getLatestSnapshot(); + Assert.assertTrue(latestSnapshot instanceof SingleFileSnapshotInfo); + Assert.assertEquals(TermIndex.valueOf(2, 10), latestSnapshot.getTermIndex()); + } +} diff --git a/core/server/common/src/test/java/alluxio/util/compression/DirectoryMarshallerTest.java b/core/server/common/src/test/java/alluxio/util/compression/DirectoryMarshallerTest.java new file mode 100644 index 000000000000..bbc97f5c7ca2 --- /dev/null +++ b/core/server/common/src/test/java/alluxio/util/compression/DirectoryMarshallerTest.java @@ -0,0 +1,101 @@ +/* + * The Alluxio Open Foundation licenses this work under the Apache License, version 2.0 + * (the "License"). You may not use this work except in compliance with the License, which is + * available at www.apache.org/licenses/LICENSE-2.0 + * + * This software is distributed on an "AS IS" basis, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, + * either express or implied, as more fully set forth in the License. + * + * See the NOTICE file distributed with this work for information regarding copyright ownership. + */ + +package alluxio.util.compression; + +import org.junit.Rule; +import org.junit.Test; +import org.junit.rules.TemporaryFolder; +import org.junit.runner.RunWith; +import org.junit.runners.Parameterized; + +import java.io.ByteArrayInputStream; +import java.io.ByteArrayOutputStream; +import java.nio.file.Files; +import java.nio.file.Path; +import java.util.Arrays; +import java.util.Collection; + +@RunWith(Parameterized.class) +public class DirectoryMarshallerTest { + @Parameterized.Parameters + public static Collection data() { + return Arrays.asList(new NoCompressionMarshaller(), + new GzipMarshaller(), + new TarGzMarshaller()); + } + + @Parameterized.Parameter + public DirectoryMarshaller mMarshaller; + + @Rule + public TemporaryFolder mFolder = new TemporaryFolder(); + + @Test + public void emptyDir() throws Exception { + Path empty = mFolder.newFolder("emptyDir").toPath(); + + tarUntarTest(empty); + } + + @Test + public void oneFileDir() throws Exception { + Path dir = mFolder.newFolder("oneFileDir").toPath(); + Path file = dir.resolve("file"); + Files.write(file, "test content".getBytes()); + + tarUntarTest(dir); + } + + @Test + public void tenFileDir() throws Exception { + Path dir = mFolder.newFolder("tenFileDir").toPath(); + for (int i = 0; i < 10; i++) { + Path file = dir.resolve("file" + i); + Files.write(file, ("test content" + i).getBytes()); + } + + tarUntarTest(dir); + } + + @Test + public void emptySubDir() throws Exception { + Path dir = mFolder.newFolder("emptySubDir").toPath(); + Path subDir = dir.resolve("subDir"); + Files.createDirectory(subDir); + + tarUntarTest(dir); + } + + @Test + public void nested() throws Exception { + Path dir = mFolder.newFolder("emptySubDir").toPath(); + Path current = dir; + for (int i = 0; i < 10; i++) { + Path newDir = current.resolve("dir" + i); + Files.createDirectory(newDir); + current = newDir; + } + Path file = current.resolve("file"); + Files.write(file, "hello world".getBytes()); + + tarUntarTest(dir); + } + + private void tarUntarTest(Path path) throws Exception { + ByteArrayOutputStream baos = new ByteArrayOutputStream(); + mMarshaller.write(path, baos); + Path reconstructed = mFolder.newFolder("untarred").toPath(); + reconstructed.toFile().delete(); + mMarshaller.read(reconstructed, new ByteArrayInputStream(baos.toByteArray())); + FileUtil.assertDirectoriesEqual(path, reconstructed); + } +} diff --git a/core/server/common/src/test/java/alluxio/util/FileUtil.java b/core/server/common/src/test/java/alluxio/util/compression/FileUtil.java similarity index 98% rename from core/server/common/src/test/java/alluxio/util/FileUtil.java rename to core/server/common/src/test/java/alluxio/util/compression/FileUtil.java index 793c818db94c..937d638717e7 100644 --- a/core/server/common/src/test/java/alluxio/util/FileUtil.java +++ b/core/server/common/src/test/java/alluxio/util/compression/FileUtil.java @@ -9,7 +9,7 @@ * See the NOTICE file distributed with this work for information regarding copyright ownership. */ -package alluxio.util; +package alluxio.util.compression; import static org.junit.Assert.assertArrayEquals; import static org.junit.Assert.assertEquals; diff --git a/core/server/common/src/test/java/alluxio/util/ParallelZipUtilsTest.java b/core/server/common/src/test/java/alluxio/util/compression/ParallelZipUtilsTest.java similarity index 99% rename from core/server/common/src/test/java/alluxio/util/ParallelZipUtilsTest.java rename to core/server/common/src/test/java/alluxio/util/compression/ParallelZipUtilsTest.java index 017e4918c67f..754911a274cd 100644 --- a/core/server/common/src/test/java/alluxio/util/ParallelZipUtilsTest.java +++ b/core/server/common/src/test/java/alluxio/util/compression/ParallelZipUtilsTest.java @@ -9,7 +9,7 @@ * See the NOTICE file distributed with this work for information regarding copyright ownership. */ -package alluxio.util; +package alluxio.util.compression; import alluxio.util.io.FileUtils; diff --git a/core/server/common/src/test/java/alluxio/util/TarUtilsTest.java b/core/server/common/src/test/java/alluxio/util/compression/TarUtilsTest.java similarity index 99% rename from core/server/common/src/test/java/alluxio/util/TarUtilsTest.java rename to core/server/common/src/test/java/alluxio/util/compression/TarUtilsTest.java index ce5f3bc47c06..db95e5cb0bdd 100644 --- a/core/server/common/src/test/java/alluxio/util/TarUtilsTest.java +++ b/core/server/common/src/test/java/alluxio/util/compression/TarUtilsTest.java @@ -9,7 +9,7 @@ * See the NOTICE file distributed with this work for information regarding copyright ownership. */ -package alluxio.util; +package alluxio.util.compression; import static org.mockito.ArgumentMatchers.any; diff --git a/core/server/master/src/main/java/alluxio/master/AlluxioMasterProcess.java b/core/server/master/src/main/java/alluxio/master/AlluxioMasterProcess.java index ec01e0a8abe6..c387fc37eb28 100644 --- a/core/server/master/src/main/java/alluxio/master/AlluxioMasterProcess.java +++ b/core/server/master/src/main/java/alluxio/master/AlluxioMasterProcess.java @@ -202,9 +202,9 @@ public boolean isInSafeMode() { public void start() throws Exception { LOG.info("Process starting."); mRunning = true; - mServices.forEach(SimpleService::start); mJournalSystem.start(); startMasterComponents(false); + mServices.forEach(SimpleService::start); // Perform the initial catchup before joining leader election, // to avoid potential delay if this master is selected as leader diff --git a/core/server/master/src/main/java/alluxio/master/MasterProcess.java b/core/server/master/src/main/java/alluxio/master/MasterProcess.java index 06e12e446fe8..0b4228a94065 100644 --- a/core/server/master/src/main/java/alluxio/master/MasterProcess.java +++ b/core/server/master/src/main/java/alluxio/master/MasterProcess.java @@ -212,9 +212,9 @@ public final InetSocketAddress getWebAddress() { /** * @return true if the system is the leader (serving the rpc server), false otherwise */ - public boolean isGrpcServing() { + public boolean isGrpcServingAsLeader() { return mServices.stream().anyMatch(service -> service instanceof RpcServerService - && ((RpcServerService) service).isServing()); + && ((RpcServerService) service).isServingLeader()); } /** @@ -238,8 +238,8 @@ public boolean isMetricSinkServing() { * @param timeoutMs how long to wait in milliseconds * @return whether the grpc server became ready before the specified timeout */ - public boolean waitForGrpcServerReady(int timeoutMs) { - return pollFor(this + " to start", this::isGrpcServing, timeoutMs); + public boolean waitForLeaderGrpcServerReady(int timeoutMs) { + return pollFor(this + " to start", this::isGrpcServingAsLeader, timeoutMs); } /** @@ -276,7 +276,7 @@ private boolean pollFor(String message, Supplier waitFor, int timeoutMs @Override public boolean waitForReady(int timeoutMs) { - return waitForGrpcServerReady(timeoutMs); + return waitForLeaderGrpcServerReady(timeoutMs); } /** diff --git a/core/server/master/src/main/java/alluxio/master/backup/BackupWorkerRole.java b/core/server/master/src/main/java/alluxio/master/backup/BackupWorkerRole.java index fb1c57b32920..be86c591d575 100644 --- a/core/server/master/src/main/java/alluxio/master/backup/BackupWorkerRole.java +++ b/core/server/master/src/main/java/alluxio/master/backup/BackupWorkerRole.java @@ -13,6 +13,7 @@ import alluxio.AlluxioURI; import alluxio.ClientContext; +import alluxio.Constants; import alluxio.ProcessUtils; import alluxio.conf.Configuration; import alluxio.conf.PropertyKey; @@ -33,6 +34,7 @@ import alluxio.master.transport.Listener; import alluxio.retry.ExponentialBackoffRetry; import alluxio.retry.RetryPolicy; +import alluxio.util.logging.SamplingLogger; import alluxio.util.network.NetworkAddressUtils; import alluxio.wire.BackupStatus; @@ -55,6 +57,7 @@ */ public class BackupWorkerRole extends AbstractBackupRole { private static final Logger LOG = LoggerFactory.getLogger(BackupWorkerRole.class); + private static final Logger SAMPLING_LOG = new SamplingLogger(LOG, 10L * Constants.SECOND_MS); // Constant timeout for journal transition before backup. private static final long BACKUP_ABORT_AFTER_TRANSITION_TIMEOUT_MS = 30000; @@ -370,6 +373,12 @@ private void establishConnectionToLeader() { .build().getMasterInquireClient(); leaderAddress = inquireClient.getPrimaryRpcAddress(); + InetSocketAddress localAddress = NetworkAddressUtils.getConnectAddress( + NetworkAddressUtils.ServiceType.MASTER_RPC, Configuration.global()); + if (leaderAddress.equals(localAddress)) { + SAMPLING_LOG.info("Currently being promoted to leader"); + continue; + } } catch (Throwable t) { LOG.warn("Failed to get backup-leader address. Error:{}. Attempt:{}", t, infiniteRetryPolicy.getAttemptCount()); diff --git a/core/server/master/src/main/java/alluxio/master/block/DefaultBlockMaster.java b/core/server/master/src/main/java/alluxio/master/block/DefaultBlockMaster.java index a852e43e300a..733dcb29fc3d 100644 --- a/core/server/master/src/main/java/alluxio/master/block/DefaultBlockMaster.java +++ b/core/server/master/src/main/java/alluxio/master/block/DefaultBlockMaster.java @@ -50,7 +50,9 @@ import alluxio.master.block.meta.WorkerMetaLockSection; import alluxio.master.block.meta.WorkerState; import alluxio.master.journal.JournalContext; +import alluxio.master.journal.SingleEntryJournaled; import alluxio.master.journal.checkpoint.CheckpointName; +import alluxio.master.journal.checkpoint.Checkpointed; import alluxio.master.metastore.BlockMetaStore; import alluxio.master.metastore.BlockMetaStore.Block; import alluxio.master.metrics.MetricsMaster; @@ -93,6 +95,7 @@ import org.slf4j.Logger; import org.slf4j.LoggerFactory; +import java.io.File; import java.io.IOException; import java.net.UnknownHostException; import java.time.Clock; @@ -110,6 +113,7 @@ import java.util.NoSuchElementException; import java.util.Optional; import java.util.Set; +import java.util.concurrent.CompletableFuture; import java.util.concurrent.ConcurrentHashMap; import java.util.concurrent.ExecutionException; import java.util.concurrent.ExecutorService; @@ -364,7 +368,10 @@ public Map getServices() { @Override public Map getStandbyServices() { - return getServices(); + if (Configuration.getBoolean(PropertyKey.WORKER_REGISTER_TO_ALL_MASTERS)) { + return getServices(); + } + return Collections.emptyMap(); } @Override @@ -428,9 +435,38 @@ public void resetState() { @Override public CheckpointName getCheckpointName() { + if (mBlockMetaStore instanceof Checkpointed) { + return ((Checkpointed) mBlockMetaStore).getCheckpointName(); + } return CheckpointName.BLOCK_MASTER; } + @Override + public CompletableFuture writeToCheckpoint(File directory, + ExecutorService executorService) { + if (mBlockMetaStore instanceof Checkpointed) { + SingleEntryJournaled containerIdJournal = new DefaultBlockMasterContainerIdJournaled(); + containerIdJournal.processJournalEntry(getContainerIdJournalEntry()); + return CompletableFuture.allOf(( + (Checkpointed) mBlockMetaStore).writeToCheckpoint(directory, executorService), + containerIdJournal.writeToCheckpoint(directory, executorService)); + } + return super.writeToCheckpoint(directory, executorService); + } + + @Override + public CompletableFuture restoreFromCheckpoint(File directory, + ExecutorService executorService) { + if (mBlockMetaStore instanceof Checkpointed) { + SingleEntryJournaled containerIdJournal = new DefaultBlockMasterContainerIdJournaled(); + return CompletableFuture.allOf(( + (Checkpointed) mBlockMetaStore).restoreFromCheckpoint(directory, executorService), + containerIdJournal.restoreFromCheckpoint(directory, executorService) + .thenRun(() -> processJournalEntry(containerIdJournal.getEntry()))); + } + return super.restoreFromCheckpoint(directory, executorService); + } + @Override public CloseableIterator getJournalEntryIterator() { CloseableIterator blockStoreIterator = mBlockMetaStore.getCloseableIterator(); diff --git a/core/server/master/src/main/java/alluxio/master/block/DefaultBlockMasterContainerIdJournaled.java b/core/server/master/src/main/java/alluxio/master/block/DefaultBlockMasterContainerIdJournaled.java new file mode 100644 index 000000000000..363e7c510883 --- /dev/null +++ b/core/server/master/src/main/java/alluxio/master/block/DefaultBlockMasterContainerIdJournaled.java @@ -0,0 +1,25 @@ +/* + * The Alluxio Open Foundation licenses this work under the Apache License, version 2.0 + * (the "License"). You may not use this work except in compliance with the License, which is + * available at www.apache.org/licenses/LICENSE-2.0 + * + * This software is distributed on an "AS IS" basis, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, + * either express or implied, as more fully set forth in the License. + * + * See the NOTICE file distributed with this work for information regarding copyright ownership. + */ + +package alluxio.master.block; + +import alluxio.master.journal.SingleEntryJournaled; +import alluxio.master.journal.checkpoint.CheckpointName; + +/** + * Writes a single journal - essential to the DefaultBlockMaster - entry to a checkpoint. + */ +public class DefaultBlockMasterContainerIdJournaled extends SingleEntryJournaled { + @Override + public CheckpointName getCheckpointName() { + return CheckpointName.BLOCK_MASTER_CONTAINER_ID; + } +} diff --git a/core/server/master/src/main/java/alluxio/master/file/meta/InodeTreePersistentState.java b/core/server/master/src/main/java/alluxio/master/file/meta/InodeTreePersistentState.java index be7fe322933b..8585053e9dd9 100644 --- a/core/server/master/src/main/java/alluxio/master/file/meta/InodeTreePersistentState.java +++ b/core/server/master/src/main/java/alluxio/master/file/meta/InodeTreePersistentState.java @@ -56,6 +56,7 @@ import org.slf4j.Logger; import org.slf4j.LoggerFactory; +import java.io.File; import java.io.IOException; import java.io.OutputStream; import java.nio.file.Path; @@ -68,8 +69,11 @@ import java.util.Optional; import java.util.Queue; import java.util.Set; +import java.util.concurrent.CompletableFuture; +import java.util.concurrent.ExecutorService; import java.util.function.Supplier; import java.util.stream.Collectors; +import java.util.stream.Stream; import javax.annotation.Nullable; /** @@ -824,6 +828,15 @@ public void resetState() { mOpIdCache.invalidateAll(); } + @Override + public CompletableFuture writeToCheckpoint(File directory, + ExecutorService executorService) { + return CompletableFuture.allOf(Stream.of(mInodeStore, mPinnedInodeFileIds, + mReplicationLimitedFileIds, mToBePersistedIds, mTtlBuckets, mInodeCounter) + .map(journaled -> journaled.writeToCheckpoint(directory, executorService)) + .toArray(CompletableFuture[]::new)); + } + @Override public void writeToCheckpoint(OutputStream output) throws IOException, InterruptedException { // mTtlBuckets must come after mInodeStore so that it can query the inode store to resolve inode @@ -832,6 +845,15 @@ public void writeToCheckpoint(OutputStream output) throws IOException, Interrupt mReplicationLimitedFileIds, mToBePersistedIds, mTtlBuckets, mInodeCounter)); } + @Override + public CompletableFuture restoreFromCheckpoint(File directory, + ExecutorService executorService) { + return CompletableFuture.allOf(Stream.of(mInodeStore, mPinnedInodeFileIds, + mReplicationLimitedFileIds, mToBePersistedIds, mTtlBuckets, mInodeCounter) + .map(journaled -> journaled.restoreFromCheckpoint(directory, executorService)) + .toArray(CompletableFuture[]::new)); + } + @Override public void restoreFromCheckpoint(CheckpointInputStream input) throws IOException { // mTtlBuckets must come after mInodeStore so that it can query the inode store to resolve inode diff --git a/core/server/master/src/main/java/alluxio/master/journal/tool/RaftJournalDumper.java b/core/server/master/src/main/java/alluxio/master/journal/tool/RaftJournalDumper.java index d7b53bf8cced..974f5ac7d305 100644 --- a/core/server/master/src/main/java/alluxio/master/journal/tool/RaftJournalDumper.java +++ b/core/server/master/src/main/java/alluxio/master/journal/tool/RaftJournalDumper.java @@ -12,32 +12,36 @@ package alluxio.master.journal.tool; import alluxio.master.journal.JournalEntryAssociation; -import alluxio.master.journal.checkpoint.CheckpointInputStream; +import alluxio.master.journal.checkpoint.OptimizedCheckpointInputStream; import alluxio.master.journal.raft.RaftJournalSystem; import alluxio.master.journal.raft.RaftJournalUtils; +import alluxio.master.journal.raft.SnapshotDirStateMachineStorage; import alluxio.proto.journal.Journal; import alluxio.util.io.FileUtils; import com.google.common.base.Preconditions; +import org.apache.ratis.io.MD5Hash; import org.apache.ratis.server.RaftServerConfigKeys; import org.apache.ratis.server.raftlog.segmented.LogSegment; import org.apache.ratis.server.raftlog.segmented.LogSegmentPath; +import org.apache.ratis.server.storage.FileInfo; import org.apache.ratis.server.storage.RaftStorage; import org.apache.ratis.server.storage.StorageImplUtils; +import org.apache.ratis.statemachine.SnapshotInfo; import org.apache.ratis.statemachine.impl.SimpleStateMachineStorage; -import org.apache.ratis.statemachine.impl.SingleFileSnapshotInfo; +import org.apache.ratis.util.MD5FileUtil; import org.slf4j.Logger; import org.slf4j.LoggerFactory; import java.io.BufferedOutputStream; -import java.io.DataInputStream; import java.io.File; -import java.io.FileInputStream; import java.io.FileNotFoundException; import java.io.FileOutputStream; import java.io.IOException; import java.io.PrintStream; +import java.nio.file.Path; import java.nio.file.Paths; +import java.security.MessageDigest; import java.util.List; /** @@ -121,27 +125,31 @@ private void readRatisSnapshotFromDir() throws IOException { RaftStorage.StartupOption.RECOVER, RaftServerConfigKeys.STORAGE_FREE_SPACE_MIN_DEFAULT.getSize())) { storage.initialize(); - SimpleStateMachineStorage stateMachineStorage = new SimpleStateMachineStorage(); + SnapshotDirStateMachineStorage stateMachineStorage = new SnapshotDirStateMachineStorage(); stateMachineStorage.init(storage); - SingleFileSnapshotInfo currentSnapshot = stateMachineStorage.getLatestSnapshot(); + SnapshotInfo currentSnapshot = stateMachineStorage.getLatestSnapshot(); if (currentSnapshot == null) { LOG.debug("No snapshot found"); return; } - final File snapshotFile = currentSnapshot.getFile().getPath().toFile(); + File snapshotDir = new File(stateMachineStorage.getSnapshotDir(), + SimpleStateMachineStorage.getSnapshotFileName(currentSnapshot.getTerm(), + currentSnapshot.getIndex())); String checkpointPath = String.format("%s-%s-%s", mCheckpointsDir, currentSnapshot.getIndex(), - snapshotFile.lastModified()); + snapshotDir.lastModified()); + new File(checkpointPath).mkdirs(); - try (DataInputStream inputStream = new DataInputStream(new FileInputStream(snapshotFile))) { - LOG.debug("Reading snapshot-Id: {}", inputStream.readLong()); - try (CheckpointInputStream checkpointStream = new CheckpointInputStream(inputStream)) { - readCheckpoint(checkpointStream, Paths.get(checkpointPath)); - } catch (Exception e) { - LOG.error("Failed to read snapshot from journal.", e); + for (FileInfo file : currentSnapshot.getFiles()) { + if (file.getFileDigest() != null) { + File snapshotFile = new File(snapshotDir, file.getPath().toString()); + Path humanReadableFile = Paths.get(checkpointPath, file.getPath().toString()); + MessageDigest md5 = MD5Hash.getDigester(); + try (OptimizedCheckpointInputStream is = + new OptimizedCheckpointInputStream(snapshotFile, md5)) { + readCheckpoint(is, humanReadableFile); + } + MD5FileUtil.verifySavedMD5(snapshotFile, new MD5Hash(md5.digest())); } - } catch (Exception e) { - LOG.error("Failed to load snapshot {}", snapshotFile, e); - throw e; } } } diff --git a/core/server/master/src/main/java/alluxio/master/meta/DefaultMetaMaster.java b/core/server/master/src/main/java/alluxio/master/meta/DefaultMetaMaster.java index 42f966c60bde..010e668acd5a 100644 --- a/core/server/master/src/main/java/alluxio/master/meta/DefaultMetaMaster.java +++ b/core/server/master/src/main/java/alluxio/master/meta/DefaultMetaMaster.java @@ -277,6 +277,12 @@ public Map getServices() { return services; } + @Override + public Map getStandbyServices() { + // for snapshot propagation + return new HashMap<>(mJournalSystem.getJournalServices()); + } + @Override public String getName() { return Constants.META_MASTER_NAME; diff --git a/core/server/master/src/main/java/alluxio/master/metastore/caching/CachingInodeStore.java b/core/server/master/src/main/java/alluxio/master/metastore/caching/CachingInodeStore.java index 63a308f74ea3..614e5683c34a 100644 --- a/core/server/master/src/main/java/alluxio/master/metastore/caching/CachingInodeStore.java +++ b/core/server/master/src/main/java/alluxio/master/metastore/caching/CachingInodeStore.java @@ -19,6 +19,8 @@ import alluxio.conf.AlluxioConfiguration; import alluxio.conf.Configuration; import alluxio.conf.PropertyKey; +import alluxio.exception.runtime.AlluxioRuntimeException; +import alluxio.grpc.ErrorType; import alluxio.master.file.meta.Edge; import alluxio.master.file.meta.EdgeEntry; import alluxio.master.file.meta.Inode; @@ -47,10 +49,12 @@ import com.google.common.collect.ImmutableSet; import com.google.common.collect.Sets; import com.google.common.io.Closer; +import io.grpc.Status; import org.slf4j.Logger; import org.slf4j.LoggerFactory; import java.io.Closeable; +import java.io.File; import java.io.IOException; import java.io.OutputStream; import java.util.Collection; @@ -63,8 +67,10 @@ import java.util.Optional; import java.util.Set; import java.util.SortedMap; +import java.util.concurrent.CompletableFuture; import java.util.concurrent.ConcurrentHashMap; import java.util.concurrent.ConcurrentSkipListMap; +import java.util.concurrent.ExecutorService; import java.util.concurrent.TimeUnit; import java.util.concurrent.atomic.AtomicBoolean; import java.util.concurrent.atomic.AtomicInteger; @@ -265,6 +271,24 @@ public CheckpointName getCheckpointName() { return CheckpointName.CACHING_INODE_STORE; } + @Override + public CompletableFuture writeToCheckpoint(File directory, + ExecutorService executorService) { + return CompletableFuture.runAsync(() -> { + LOG.info("Flushing inodes to backing store"); + try { + mInodeCache.flush(); + mEdgeCache.flush(); + } catch (InterruptedException e) { + throw new AlluxioRuntimeException(Status.INTERNAL, + String.format("Failed to restore snapshot %s", getCheckpointName()), + null, ErrorType.Internal, false); + } + LOG.info("Finished flushing inodes to backing store"); + mBackingStore.writeToCheckpoint(directory, executorService).join(); + }, executorService); + } + @Override public void writeToCheckpoint(OutputStream output) throws IOException, InterruptedException { LOG.info("Flushing inodes to backing store"); @@ -274,6 +298,18 @@ public void writeToCheckpoint(OutputStream output) throws IOException, Interrupt mBackingStore.writeToCheckpoint(output); } + @Override + public CompletableFuture restoreFromCheckpoint(File directory, + ExecutorService executorService) { + return CompletableFuture.runAsync(() -> { + mInodeCache.clear(); + mEdgeCache.clear(); + mListingCache.clear(); + mBackingStore.restoreFromCheckpoint(directory, executorService).join(); + mBackingStoreEmpty = false; + }, executorService); + } + @Override public void restoreFromCheckpoint(CheckpointInputStream input) throws IOException { mInodeCache.clear(); diff --git a/core/server/master/src/main/java/alluxio/master/metastore/rocks/RocksBlockMetaStore.java b/core/server/master/src/main/java/alluxio/master/metastore/rocks/RocksBlockMetaStore.java index d0f60304ca2e..2de8650c743b 100644 --- a/core/server/master/src/main/java/alluxio/master/metastore/rocks/RocksBlockMetaStore.java +++ b/core/server/master/src/main/java/alluxio/master/metastore/rocks/RocksBlockMetaStore.java @@ -15,6 +15,7 @@ import alluxio.conf.Configuration; import alluxio.conf.PropertyKey; +import alluxio.master.journal.checkpoint.CheckpointName; import alluxio.master.metastore.BlockMetaStore; import alluxio.metrics.MetricKey; import alluxio.metrics.MetricsSystem; @@ -60,7 +61,7 @@ * Block store backed by RocksDB. */ @ThreadSafe -public class RocksBlockMetaStore implements BlockMetaStore { +public class RocksBlockMetaStore implements BlockMetaStore, RocksCheckpointed { private static final Logger LOG = LoggerFactory.getLogger(RocksBlockMetaStore.class); private static final String BLOCKS_DB_NAME = "blocks"; private static final String BLOCK_META_COLUMN = "block-meta"; @@ -119,16 +120,20 @@ && new String(columns.get(2).getName()).equals(BLOCK_LOCATIONS_COLUMN), .setCreateMissingColumnFamilies(true) .setCreateIfMissing(true) .setMaxOpenFiles(-1); + // This is a field instead of a constant as it depends on the call to RocksDB.loadLibrary(). + CompressionType compressionType = + Configuration.getEnum(PropertyKey.MASTER_METASTORE_ROCKS_CHECKPOINT_COMPRESSION_TYPE, + CompressionType.class); columns.add(new ColumnFamilyDescriptor(BLOCK_META_COLUMN.getBytes(), new ColumnFamilyOptions() .useFixedLengthPrefixExtractor(Longs.BYTES) // allows memtable buckets by block id .setMemTableConfig(new HashLinkedListMemTableConfig()) // bucket contains single value - .setCompressionType(CompressionType.NO_COMPRESSION))); + .setCompressionType(compressionType))); columns.add(new ColumnFamilyDescriptor(BLOCK_LOCATIONS_COLUMN.getBytes(), new ColumnFamilyOptions() .useFixedLengthPrefixExtractor(Longs.BYTES) // allows memtable buckets by block id .setMemTableConfig(new HashLinkedListMemTableConfig()) // bucket contains worker info - .setCompressionType(CompressionType.NO_COMPRESSION))); + .setCompressionType(compressionType))); } mToClose.addAll(columns.stream().map( @@ -395,4 +400,14 @@ public CloseableIterator getCloseableIterator() { private RocksDB db() { return mRocksStore.getDb(); } + + @Override + public RocksStore getRocksStore() { + return mRocksStore; + } + + @Override + public CheckpointName getCheckpointName() { + return CheckpointName.BLOCK_MASTER; + } } diff --git a/core/server/master/src/main/java/alluxio/master/metastore/rocks/RocksCheckpointed.java b/core/server/master/src/main/java/alluxio/master/metastore/rocks/RocksCheckpointed.java new file mode 100644 index 000000000000..cdea4e092f10 --- /dev/null +++ b/core/server/master/src/main/java/alluxio/master/metastore/rocks/RocksCheckpointed.java @@ -0,0 +1,80 @@ +/* + * The Alluxio Open Foundation licenses this work under the Apache License, version 2.0 + * (the "License"). You may not use this work except in compliance with the License, which is + * available at www.apache.org/licenses/LICENSE-2.0 + * + * This software is distributed on an "AS IS" basis, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, + * either express or implied, as more fully set forth in the License. + * + * See the NOTICE file distributed with this work for information regarding copyright ownership. + */ + +package alluxio.master.metastore.rocks; + +import alluxio.exception.runtime.AlluxioRuntimeException; +import alluxio.grpc.ErrorType; +import alluxio.master.journal.checkpoint.CheckpointInputStream; +import alluxio.master.journal.checkpoint.Checkpointed; + +import io.grpc.Status; +import org.rocksdb.RocksDBException; + +import java.io.File; +import java.io.IOException; +import java.io.OutputStream; +import java.util.concurrent.CompletableFuture; +import java.util.concurrent.ExecutorService; + +/** + * Provides default implementations for checkpointing RocksDB databases. + */ +public interface RocksCheckpointed extends Checkpointed { + /** + * @return the {@link RocksStore} that will produce a checkpoint + */ + RocksStore getRocksStore(); + + @Override + default CompletableFuture writeToCheckpoint(File directory, + ExecutorService executorService) { + return CompletableFuture.runAsync(() -> { + LOG.debug("taking {} snapshot started", getCheckpointName()); + File subDir = new File(directory, getCheckpointName().toString()); + try { + getRocksStore().writeToCheckpoint(subDir); + } catch (RocksDBException e) { + throw new AlluxioRuntimeException(Status.INTERNAL, + String.format("Failed to take snapshot %s in dir %s", getCheckpointName(), directory), + e, ErrorType.Internal, false); + } + LOG.debug("taking {} snapshot finished", getCheckpointName()); + }, executorService); + } + + @Override + default void writeToCheckpoint(OutputStream output) throws IOException, InterruptedException { + getRocksStore().writeToCheckpoint(output); + } + + @Override + default CompletableFuture restoreFromCheckpoint(File directory, + ExecutorService executorService) { + return CompletableFuture.runAsync(() -> { + LOG.debug("loading {} snapshot started", getCheckpointName()); + File subDir = new File(directory, getCheckpointName().toString()); + try { + getRocksStore().restoreFromCheckpoint(subDir); + } catch (Exception e) { + throw new AlluxioRuntimeException(Status.INTERNAL, + String.format("Failed to restore snapshot %s", getCheckpointName()), + e, ErrorType.Internal, false); + } + LOG.debug("loading {} snapshot finished", getCheckpointName()); + }, executorService); + } + + @Override + default void restoreFromCheckpoint(CheckpointInputStream input) throws IOException { + getRocksStore().restoreFromCheckpoint(input); + } +} diff --git a/core/server/master/src/main/java/alluxio/master/metastore/rocks/RocksInodeStore.java b/core/server/master/src/main/java/alluxio/master/metastore/rocks/RocksInodeStore.java index 55a5110c0a1b..c8162df5e6af 100644 --- a/core/server/master/src/main/java/alluxio/master/metastore/rocks/RocksInodeStore.java +++ b/core/server/master/src/main/java/alluxio/master/metastore/rocks/RocksInodeStore.java @@ -21,7 +21,6 @@ import alluxio.master.file.meta.InodeDirectoryView; import alluxio.master.file.meta.InodeView; import alluxio.master.file.meta.MutableInode; -import alluxio.master.journal.checkpoint.CheckpointInputStream; import alluxio.master.journal.checkpoint.CheckpointName; import alluxio.master.metastore.InodeStore; import alluxio.master.metastore.ReadOption; @@ -52,8 +51,6 @@ import org.slf4j.Logger; import org.slf4j.LoggerFactory; -import java.io.IOException; -import java.io.OutputStream; import java.util.ArrayList; import java.util.Arrays; import java.util.Collections; @@ -76,7 +73,7 @@ * File store backed by RocksDB. */ @ThreadSafe -public class RocksInodeStore implements InodeStore { +public class RocksInodeStore implements InodeStore, RocksCheckpointed { private static final Logger LOG = LoggerFactory.getLogger(RocksInodeStore.class); private static final String INODES_DB_NAME = "inodes"; private static final String INODES_COLUMN = "inodes"; @@ -138,16 +135,20 @@ && new String(columns.get(2).getName()).equals(EDGES_COLUMN), .setCreateMissingColumnFamilies(true) .setCreateIfMissing(true) .setMaxOpenFiles(-1); + // This is a field instead of a constant because it depends on RocksDB.loadLibrary(). + CompressionType compressionType = + Configuration.getEnum(PropertyKey.MASTER_METASTORE_ROCKS_CHECKPOINT_COMPRESSION_TYPE, + CompressionType.class); columns.add(new ColumnFamilyDescriptor(INODES_COLUMN.getBytes(), new ColumnFamilyOptions() .useFixedLengthPrefixExtractor(Longs.BYTES) // allows memtable buckets by inode id .setMemTableConfig(new HashLinkedListMemTableConfig()) // bucket contains children ids - .setCompressionType(CompressionType.NO_COMPRESSION))); + .setCompressionType(compressionType))); columns.add(new ColumnFamilyDescriptor(EDGES_COLUMN.getBytes(), new ColumnFamilyOptions() .useFixedLengthPrefixExtractor(Longs.BYTES) // allows memtable buckets by inode id .setMemTableConfig(new HashLinkedListMemTableConfig()) // bucket only contains an id - .setCompressionType(CompressionType.NO_COMPRESSION))); + .setCompressionType(compressionType))); } mToClose.addAll(columns.stream().map( ColumnFamilyDescriptor::getOptions).collect(Collectors.toList())); @@ -513,13 +514,8 @@ public CheckpointName getCheckpointName() { } @Override - public void writeToCheckpoint(OutputStream output) throws IOException, InterruptedException { - mRocksStore.writeToCheckpoint(output); - } - - @Override - public void restoreFromCheckpoint(CheckpointInputStream input) throws IOException { - mRocksStore.restoreFromCheckpoint(input); + public RocksStore getRocksStore() { + return mRocksStore; } private class RocksWriteBatch implements WriteBatch { diff --git a/core/server/master/src/main/java/alluxio/master/metastore/rocks/RocksStore.java b/core/server/master/src/main/java/alluxio/master/metastore/rocks/RocksStore.java index e3ffb01f9a15..fe83506adea9 100644 --- a/core/server/master/src/main/java/alluxio/master/metastore/rocks/RocksStore.java +++ b/core/server/master/src/main/java/alluxio/master/metastore/rocks/RocksStore.java @@ -18,8 +18,8 @@ import alluxio.master.journal.checkpoint.CheckpointOutputStream; import alluxio.master.journal.checkpoint.CheckpointType; import alluxio.retry.TimeoutRetry; -import alluxio.util.ParallelZipUtils; -import alluxio.util.TarUtils; +import alluxio.util.compression.ParallelZipUtils; +import alluxio.util.compression.TarUtils; import alluxio.util.io.FileUtils; import com.google.common.base.Preconditions; @@ -73,7 +73,7 @@ public final class RocksStore implements Closeable { private final DBOptions mDbOpts; private final int mCompressLevel = Configuration.getInt( - PropertyKey.MASTER_METASTORE_ROCKS_CHECKPOINT_COMPRESSION_LEVEL); + PropertyKey.MASTER_EMBEDDED_JOURNAL_SNAPSHOT_REPLICATION_COMPRESSION_LEVEL); private final boolean mParallelBackup = Configuration.getBoolean( PropertyKey.MASTER_METASTORE_ROCKS_PARALLEL_BACKUP); @@ -195,6 +195,15 @@ private void createDb() throws RocksDBException { LOG.info("Opened rocks database under path {}", mDbPath); } + /** + * Writes a checkpoint under the specified directory. + * @param directory that the checkpoint will be written under + * @throws RocksDBException if it encounters and error when writing the checkpoint + */ + public synchronized void writeToCheckpoint(File directory) throws RocksDBException { + mCheckpoint.createCheckpoint(directory.getPath()); + } + /** * Writes a checkpoint of the database's content to the given output stream. * @@ -230,6 +239,24 @@ public synchronized void writeToCheckpoint(OutputStream output) FileUtils.deletePathRecursively(mDbCheckpointPath); } + /** + * Restores RocksDB state from a checkpoint at the provided location. Moves the directory to a + * permanent location, restores RocksDB state, and then immediately takes a new snapshot in the + * original location as replacement. + * @param directory where the checkpoint is located + * @throws RocksDBException if rocks encounters a problem + * @throws IOException if moving files around encounters a problem + */ + public synchronized void restoreFromCheckpoint(File directory) + throws RocksDBException, IOException { + stopDb(); + File dbPath = new File(mDbPath); + org.apache.commons.io.FileUtils.deleteDirectory(dbPath); + org.apache.commons.io.FileUtils.moveDirectory(directory, dbPath); + createDb(); + writeToCheckpoint(directory); + } + /** * Restores the database from a checkpoint. * diff --git a/core/server/master/src/main/java/alluxio/master/service/rpc/RpcServerService.java b/core/server/master/src/main/java/alluxio/master/service/rpc/RpcServerService.java index 629c48038a6f..f51f2089d72b 100644 --- a/core/server/master/src/main/java/alluxio/master/service/rpc/RpcServerService.java +++ b/core/server/master/src/main/java/alluxio/master/service/rpc/RpcServerService.java @@ -75,11 +75,29 @@ protected RpcServerService(InetSocketAddress bindAddress, MasterProcess masterPr mMasterProcess = masterProcess; } + protected final synchronized boolean isGrpcServerServing() { + return mGrpcServer != null && mGrpcServer.isServing(); + } + /** * @return whether the grpc server is serving or not */ public synchronized boolean isServing() { - return mGrpcServer != null && mGrpcServer.isServing(); + return isServingLeader() || isServingStandby(); + } + + /** + * @return whether the grpc server is serving in leader mode + */ + public synchronized boolean isServingLeader() { + return isGrpcServerServing(); + } + + /** + * @return whether the grpc server is serving in standby mode + */ + public synchronized boolean isServingStandby() { + return false; } @Override diff --git a/core/server/master/src/main/java/alluxio/master/service/rpc/RpcServerStandbyGrpcService.java b/core/server/master/src/main/java/alluxio/master/service/rpc/RpcServerStandbyGrpcService.java index 075dfc7fc739..f25743f24851 100644 --- a/core/server/master/src/main/java/alluxio/master/service/rpc/RpcServerStandbyGrpcService.java +++ b/core/server/master/src/main/java/alluxio/master/service/rpc/RpcServerStandbyGrpcService.java @@ -41,6 +41,16 @@ protected RpcServerStandbyGrpcService( super(bindAddress, masterProcess, masterRegistry); } + @Override + public synchronized boolean isServingLeader() { + return mIsPromoted && isGrpcServerServing(); + } + + @Override + public synchronized boolean isServingStandby() { + return !mIsPromoted && isGrpcServerServing(); + } + @Override public synchronized void start() { LOG.info("Starting {}", this.getClass().getSimpleName()); diff --git a/core/server/master/src/test/java/alluxio/master/AlluxioMasterProcessTest.java b/core/server/master/src/test/java/alluxio/master/AlluxioMasterProcessTest.java index 4c1bd0b36b98..2ef1438ad8fb 100644 --- a/core/server/master/src/test/java/alluxio/master/AlluxioMasterProcessTest.java +++ b/core/server/master/src/test/java/alluxio/master/AlluxioMasterProcessTest.java @@ -127,7 +127,10 @@ public void startStopPrimary() throws Exception { } }); t.start(); + master.waitForReady(10_000); startStopTest(master); + t.interrupt(); + t.join(); } @Test @@ -275,8 +278,6 @@ public void startStopStandbyStandbyServer() throws Exception { } }); t.start(); - final int TIMEOUT_MS = 10_000; - master.waitForGrpcServerReady(TIMEOUT_MS); startStopTest(master, true, Configuration.getBoolean(PropertyKey.STANDBY_MASTER_WEB_ENABLED), @@ -296,7 +297,10 @@ private void startStopTest(AlluxioMasterProcess master, boolean expectGrpcServic assertTrue(isBound(master.getRpcAddress().getPort())); assertTrue(isBound(master.getWebAddress().getPort())); if (expectGrpcServiceStarted) { - assertTrue(master.waitForGrpcServerReady(TIMEOUT_MS)); + CommonUtils.waitFor("grpc server to serve", + () -> master.mServices.stream().anyMatch(service -> service instanceof RpcServerService + && ((RpcServerService) service).isServing()), + WaitForOptions.defaults().setTimeoutMs(TIMEOUT_MS)); } if (expectWebServiceStarted) { assertTrue(master.waitForWebServerReady(TIMEOUT_MS)); diff --git a/core/server/master/src/test/java/alluxio/master/block/BlockMasterWorkerServiceHandlerTest.java b/core/server/master/src/test/java/alluxio/master/block/BlockMasterWorkerServiceHandlerTest.java index 2de7f5a1e16d..20b0af9ea91f 100644 --- a/core/server/master/src/test/java/alluxio/master/block/BlockMasterWorkerServiceHandlerTest.java +++ b/core/server/master/src/test/java/alluxio/master/block/BlockMasterWorkerServiceHandlerTest.java @@ -28,9 +28,11 @@ import alluxio.grpc.RegisterWorkerPOptions; import alluxio.grpc.RegisterWorkerPRequest; import alluxio.grpc.RegisterWorkerPResponse; +import alluxio.master.AlwaysPrimaryPrimarySelector; import alluxio.master.CoreMasterContext; import alluxio.master.MasterRegistry; import alluxio.master.MasterTestUtils; +import alluxio.master.journal.noop.NoopJournalSystem; import alluxio.master.metrics.MetricsMaster; import alluxio.master.metrics.MetricsMasterFactory; import alluxio.util.SleepUtils; @@ -84,7 +86,8 @@ public void initServiceHandler(boolean leaseEnabled) throws Exception { } mRegistry = new MasterRegistry(); - CoreMasterContext masterContext = MasterTestUtils.testMasterContext(); + CoreMasterContext masterContext = MasterTestUtils.testMasterContext(new NoopJournalSystem(), + null, new AlwaysPrimaryPrimarySelector()); mMetricsMaster = new MetricsMasterFactory().create(mRegistry, masterContext); mClock = new ManualClock(); mExecutorService = diff --git a/core/server/master/src/test/java/alluxio/master/block/DefaultBlockMasterCheckpointTest.java b/core/server/master/src/test/java/alluxio/master/block/DefaultBlockMasterCheckpointTest.java new file mode 100644 index 000000000000..e49963eb88cd --- /dev/null +++ b/core/server/master/src/test/java/alluxio/master/block/DefaultBlockMasterCheckpointTest.java @@ -0,0 +1,125 @@ +/* + * The Alluxio Open Foundation licenses this work under the Apache License, version 2.0 + * (the "License"). You may not use this work except in compliance with the License, which is + * available at www.apache.org/licenses/LICENSE-2.0 + * + * This software is distributed on an "AS IS" basis, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, + * either express or implied, as more fully set forth in the License. + * + * See the NOTICE file distributed with this work for information regarding copyright ownership. + */ + +package alluxio.master.block; + +import alluxio.conf.Configuration; +import alluxio.conf.PropertyKey; +import alluxio.exception.BlockInfoException; +import alluxio.master.CoreMasterContext; +import alluxio.master.MasterRegistry; +import alluxio.master.MasterTestUtils; +import alluxio.master.MasterUtils; +import alluxio.master.journal.checkpoint.CheckpointInputStream; +import alluxio.master.journal.noop.NoopJournalSystem; +import alluxio.master.metastore.MetastoreType; +import alluxio.master.metrics.MetricsMaster; +import alluxio.master.metrics.MetricsMasterFactory; +import alluxio.proto.journal.Block; +import alluxio.proto.journal.Journal; +import alluxio.wire.BlockInfo; + +import org.junit.Assert; +import org.junit.Before; +import org.junit.Rule; +import org.junit.Test; +import org.junit.rules.TemporaryFolder; +import org.junit.runner.RunWith; +import org.junit.runners.Parameterized; + +import java.io.File; +import java.io.IOException; +import java.io.OutputStream; +import java.nio.file.Files; +import java.util.Arrays; +import java.util.Collection; +import java.util.concurrent.ExecutorService; +import java.util.concurrent.Executors; + +@RunWith(Parameterized.class) +public class DefaultBlockMasterCheckpointTest { + @Parameterized.Parameters + public static Collection data() { + return Arrays.asList(MetastoreType.HEAP, MetastoreType.ROCKS); + } + + @Parameterized.Parameter + public MetastoreType mMetastoreType; + + @Rule + public TemporaryFolder mFolder = new TemporaryFolder(); + + private DefaultBlockMaster mBlockMaster; + + private final long mNextContainerId = 1; + private final long mBlockId1 = 2; + private final long mBlockId2 = 3; + private final long mBlockLength = 4; + + private DefaultBlockMaster createDefaultBlockMaster() throws IOException { + CoreMasterContext context = MasterTestUtils.testMasterContext(new NoopJournalSystem(), null, + MasterUtils.getBlockStoreFactory(mFolder.newFolder().getAbsolutePath()), + MasterUtils.getInodeStoreFactory(mFolder.newFolder().getAbsolutePath())); + MetricsMasterFactory metricsMasterFactory = new MetricsMasterFactory(); + MetricsMaster metricsMaster = metricsMasterFactory.create(new MasterRegistry(), context); + return new DefaultBlockMaster(metricsMaster, context); + } + + @Before + public void before() throws IOException { + Configuration.set(PropertyKey.MASTER_BLOCK_METASTORE, mMetastoreType); + mBlockMaster = createDefaultBlockMaster(); + mBlockMaster.processJournalEntry(Journal.JournalEntry.newBuilder() + .setBlockContainerIdGenerator(Block.BlockContainerIdGeneratorEntry.newBuilder() + .setNextContainerId(mNextContainerId)).build()); + mBlockMaster.processJournalEntry(Journal.JournalEntry.newBuilder() + .setBlockInfo(Block.BlockInfoEntry.newBuilder() + .setBlockId(mBlockId1)).build()); + mBlockMaster.processJournalEntry(Journal.JournalEntry.newBuilder() + .setBlockInfo(Block.BlockInfoEntry.newBuilder() + .setBlockId(mBlockId2) + .setLength(mBlockLength)).build()); + mBlockMaster.processJournalEntry(Journal.JournalEntry.newBuilder() + .setDeleteBlock(Block.DeleteBlockEntry.newBuilder() + .setBlockId(mBlockId1)).build()); + } + + @Test + public void testOutputStream() throws IOException, InterruptedException, BlockInfoException { + File file = mFolder.newFile(); + try (OutputStream outputStream = Files.newOutputStream(file.toPath())) { + mBlockMaster.writeToCheckpoint(outputStream); + } + DefaultBlockMaster blockMaster = createDefaultBlockMaster(); + try (CheckpointInputStream inputStream = + new CheckpointInputStream(Files.newInputStream(file.toPath()))) { + blockMaster.restoreFromCheckpoint(inputStream); + } + Assert.assertEquals(mNextContainerId, blockMaster.getJournaledNextContainerId()); + Assert.assertThrows(BlockInfoException.class, () -> blockMaster.getBlockInfo(mBlockId1)); + BlockInfo blockInfo = blockMaster.getBlockInfo(mBlockId2); + Assert.assertEquals(mBlockLength, blockInfo.getLength()); + } + + @Test + public void testDirectory() throws IOException, BlockInfoException { + File dir = mFolder.newFolder(); + ExecutorService executor = Executors.newSingleThreadExecutor(); + mBlockMaster.writeToCheckpoint(dir, executor).join(); + DefaultBlockMaster blockMaster = createDefaultBlockMaster(); + blockMaster.restoreFromCheckpoint(dir, executor).join(); + + Assert.assertEquals(mNextContainerId, blockMaster.getJournaledNextContainerId()); + Assert.assertThrows(BlockInfoException.class, () -> blockMaster.getBlockInfo(mBlockId1)); + BlockInfo blockInfo = blockMaster.getBlockInfo(mBlockId2); + Assert.assertEquals(mBlockLength, blockInfo.getLength()); + } +} diff --git a/core/server/master/src/test/java/alluxio/master/file/meta/CheckpointedIdHashSetTest.java b/core/server/master/src/test/java/alluxio/master/file/meta/CheckpointedIdHashSetTest.java new file mode 100644 index 000000000000..c7ed06f3ba25 --- /dev/null +++ b/core/server/master/src/test/java/alluxio/master/file/meta/CheckpointedIdHashSetTest.java @@ -0,0 +1,63 @@ +/* + * The Alluxio Open Foundation licenses this work under the Apache License, version 2.0 + * (the "License"). You may not use this work except in compliance with the License, which is + * available at www.apache.org/licenses/LICENSE-2.0 + * + * This software is distributed on an "AS IS" basis, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, + * either express or implied, as more fully set forth in the License. + * + * See the NOTICE file distributed with this work for information regarding copyright ownership. + */ + +package alluxio.master.file.meta; + +import alluxio.master.journal.checkpoint.CheckpointInputStream; + +import org.junit.Assert; +import org.junit.Rule; +import org.junit.Test; +import org.junit.rules.TemporaryFolder; +import org.junit.runner.RunWith; +import org.junit.runners.Parameterized; + +import java.io.File; +import java.io.IOException; +import java.io.OutputStream; +import java.nio.file.Files; +import java.util.ArrayList; +import java.util.Arrays; +import java.util.Collection; +import java.util.List; + +@RunWith(Parameterized.class) +public class CheckpointedIdHashSetTest { + @Parameterized.Parameters + public static Collection data() { + return Arrays.asList(new PinnedInodeFileIds(), new ReplicationLimitedFileIds(), + new ToBePersistedFileIds()); + } + + @Parameterized.Parameter + public CheckpointedIdHashSet mIdHashSet; + + @Rule + public TemporaryFolder mFolder = new TemporaryFolder(); + + @Test + public void test() throws IOException { + for (long i = 0L; i < 1_000_000L; i += 5762L) { + mIdHashSet.add(i); + } + List copyList = new ArrayList<>(mIdHashSet); + File file = mFolder.newFile(); + try (OutputStream outputStream = Files.newOutputStream(file.toPath())) { + mIdHashSet.writeToCheckpoint(outputStream); + } + mIdHashSet.clear(); + try (CheckpointInputStream inputStream = + new CheckpointInputStream(Files.newInputStream(file.toPath()))) { + mIdHashSet.restoreFromCheckpoint(inputStream); + } + Assert.assertTrue(mIdHashSet.containsAll(copyList)); + } +} diff --git a/core/server/master/src/test/java/alluxio/master/file/replication/ReplicationCheckerTest.java b/core/server/master/src/test/java/alluxio/master/file/replication/ReplicationCheckerTest.java index 45c4db8333d1..40bab536216f 100644 --- a/core/server/master/src/test/java/alluxio/master/file/replication/ReplicationCheckerTest.java +++ b/core/server/master/src/test/java/alluxio/master/file/replication/ReplicationCheckerTest.java @@ -23,6 +23,7 @@ import alluxio.grpc.StorageList; import alluxio.job.plan.replicate.ReplicationHandler; import alluxio.job.wire.Status; +import alluxio.master.AlwaysPrimaryPrimarySelector; import alluxio.master.CoreMasterContext; import alluxio.master.MasterRegistry; import alluxio.master.MasterTestUtils; @@ -165,7 +166,8 @@ public void before() throws Exception { Configuration.set(PropertyKey.MASTER_JOURNAL_TYPE, JournalType.UFS); MasterRegistry registry = new MasterRegistry(); JournalSystem journalSystem = JournalTestUtils.createJournalSystem(mTestFolder); - mContext = MasterTestUtils.testMasterContext(journalSystem); + mContext = MasterTestUtils.testMasterContext(journalSystem, + null, new AlwaysPrimaryPrimarySelector()); new MetricsMasterFactory().create(registry, mContext); mBlockMaster = new BlockMasterFactory().create(registry, mContext); InodeDirectoryIdGenerator directoryIdGenerator = new InodeDirectoryIdGenerator(mBlockMaster); diff --git a/core/server/master/src/test/java/alluxio/master/journal/raft/RaftJournalSystemMetricsTest.java b/core/server/master/src/test/java/alluxio/master/journal/raft/RaftJournalSystemMetricsTest.java index 35323c1c564c..d0e38fe72ba5 100644 --- a/core/server/master/src/test/java/alluxio/master/journal/raft/RaftJournalSystemMetricsTest.java +++ b/core/server/master/src/test/java/alluxio/master/journal/raft/RaftJournalSystemMetricsTest.java @@ -66,7 +66,8 @@ public void journalStateMachineMetrics() throws Exception { MetricKey.MASTER_JOURNAL_LAST_APPLIED_COMMIT_INDEX.getName(), MetricKey.MASTER_JOURNAL_CHECKPOINT_WARN.getName(), }; - JournalStateMachine stateMachine = new JournalStateMachine(system.getJournals(), system); + JournalStateMachine stateMachine = new JournalStateMachine(system.getJournals(), system, + new SnapshotDirStateMachineStorage()); for (String name : metricsNames) { assertNotNull(MetricsSystem.METRIC_REGISTRY.getGauges().get(name)); } @@ -74,7 +75,8 @@ public void journalStateMachineMetrics() throws Exception { for (String name : metricsNames) { assertNull(MetricsSystem.METRIC_REGISTRY.getGauges().get(name)); } - JournalStateMachine newStateMachine = new JournalStateMachine(system.getJournals(), system); + JournalStateMachine newStateMachine = new JournalStateMachine(system.getJournals(), system, + new SnapshotDirStateMachineStorage()); for (String name : metricsNames) { assertNotNull(MetricsSystem.METRIC_REGISTRY.getGauges().get(name)); } diff --git a/core/server/master/src/test/java/alluxio/master/journal/raft/SnapshotReplicationManagerTest.java b/core/server/master/src/test/java/alluxio/master/journal/raft/SnapshotReplicationManagerTest.java deleted file mode 100644 index 639b5b113a54..000000000000 --- a/core/server/master/src/test/java/alluxio/master/journal/raft/SnapshotReplicationManagerTest.java +++ /dev/null @@ -1,484 +0,0 @@ -/* - * The Alluxio Open Foundation licenses this work under the Apache License, version 2.0 - * (the "License"). You may not use this work except in compliance with the License, which is - * available at www.apache.org/licenses/LICENSE-2.0 - * - * This software is distributed on an "AS IS" basis, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, - * either express or implied, as more fully set forth in the License. - * - * See the NOTICE file distributed with this work for information regarding copyright ownership. - */ - -package alluxio.master.journal.raft; - -import static org.mockito.ArgumentMatchers.any; -import static org.mockito.ArgumentMatchers.anyLong; -import static org.mockito.ArgumentMatchers.argThat; - -import alluxio.ConfigurationRule; -import alluxio.conf.Configuration; -import alluxio.conf.PropertyKey; -import alluxio.grpc.JournalQueryRequest; -import alluxio.grpc.NetAddress; -import alluxio.grpc.QuorumServerInfo; -import alluxio.grpc.RaftJournalServiceGrpc; -import alluxio.grpc.SnapshotData; -import alluxio.grpc.UploadSnapshotPRequest; -import alluxio.grpc.UploadSnapshotPResponse; -import alluxio.util.CommonUtils; -import alluxio.util.WaitForOptions; -import alluxio.util.io.BufferUtils; - -import io.grpc.ManagedChannel; -import io.grpc.Server; -import io.grpc.Status; -import io.grpc.StatusRuntimeException; -import io.grpc.inprocess.InProcessChannelBuilder; -import io.grpc.inprocess.InProcessServerBuilder; -import io.grpc.stub.StreamObserver; -import net.bytebuddy.utility.RandomString; -import org.apache.commons.io.FileUtils; -import org.apache.ratis.protocol.Message; -import org.apache.ratis.protocol.RaftClientReply; -import org.apache.ratis.protocol.RaftPeerId; -import org.apache.ratis.server.RaftServerConfigKeys; -import org.apache.ratis.server.protocol.TermIndex; -import org.apache.ratis.server.storage.RaftStorage; -import org.apache.ratis.server.storage.StorageImplUtils; -import org.apache.ratis.statemachine.impl.SimpleStateMachineStorage; -import org.apache.ratis.statemachine.impl.SingleFileSnapshotInfo; -import org.junit.After; -import org.junit.Assert; -import org.junit.Rule; -import org.junit.Test; -import org.junit.rules.TemporaryFolder; -import org.mockito.Mockito; -import org.mockito.stubbing.Answer; - -import java.io.IOException; -import java.util.ArrayList; -import java.util.HashMap; -import java.util.List; -import java.util.Map; -import java.util.concurrent.CompletableFuture; -import java.util.concurrent.CompletionException; -import java.util.concurrent.ThreadLocalRandom; -import java.util.concurrent.TimeUnit; -import java.util.stream.Collectors; - -public class SnapshotReplicationManagerTest { - private static final int SNAPSHOT_SIZE = 100_000; - private static final int DEFAULT_SNAPSHOT_TERM = 0; - private static final int DEFAULT_SNAPSHOT_INDEX = 1; - - @Rule - public TemporaryFolder mFolder = new TemporaryFolder(); - - @Rule - public ConfigurationRule mConfigurationRule = - new ConfigurationRule(PropertyKey.MASTER_EMBEDDED_JOURNAL_SNAPSHOT_REPLICATION_CHUNK_SIZE, - "32KB", Configuration.modifiableGlobal()); - - private final WaitForOptions mWaitOptions = WaitForOptions.defaults().setTimeoutMs(30_000); - private SnapshotReplicationManager mLeaderSnapshotManager; - private RaftJournalSystem mLeader; - private SimpleStateMachineStorage mLeaderStore; - private final Map mFollowers = new HashMap<>(); - - private RaftJournalServiceClient mClient; - private Server mServer; - - private void before(int numFollowers) throws Exception { - Configuration.set(PropertyKey.MASTER_JOURNAL_REQUEST_INFO_TIMEOUT, 550); - Configuration.set(PropertyKey.MASTER_JOURNAL_REQUEST_DATA_TIMEOUT, 550); - mLeader = Mockito.mock(RaftJournalSystem.class); - Mockito.when(mLeader.isLeader()).thenReturn(true); - Mockito.when(mLeader.getLocalPeerId()).thenReturn(RaftPeerId.getRaftPeerId("leader")); - mLeaderStore = getSimpleStateMachineStorage(); - mLeaderSnapshotManager = Mockito.spy(new SnapshotReplicationManager(mLeader, mLeaderStore)); - - String serverName = InProcessServerBuilder.generateName(); - mServer = InProcessServerBuilder.forName(serverName) - .directExecutor() - .addService(new RaftJournalServiceHandler(mLeaderSnapshotManager, null)).build(); - mServer.start(); - ManagedChannel channel = InProcessChannelBuilder.forName(serverName).directExecutor().build(); - RaftJournalServiceGrpc.RaftJournalServiceStub stub = RaftJournalServiceGrpc.newStub(channel); - // mock RaftJournalServiceClient - mClient = Mockito.mock(RaftJournalServiceClient.class); - Mockito.doNothing().when(mClient).close(); - // download rpc mock - Mockito.when(mClient.downloadSnapshot(any())).thenAnswer((args) -> { - StreamObserver responseObserver = args.getArgument(0, StreamObserver.class); - return stub.downloadSnapshot(responseObserver); - }); - // upload rpc mock - Mockito.when(mClient.uploadSnapshot(any())).thenAnswer((args) -> { - StreamObserver responseObserver = args.getArgument(0, StreamObserver.class); - return stub.uploadSnapshot(responseObserver); - }); - Mockito.doReturn(mClient).when(mLeaderSnapshotManager).createJournalServiceClient(); - - for (int i = 0; i < numFollowers; i++) { - Follower follower = new Follower(mClient); - mFollowers.put(follower.getRaftPeerId(), follower); - } - - List quorumServerInfos = mFollowers.values().stream().map(follower -> { - return QuorumServerInfo.newBuilder().setServerAddress( - NetAddress.newBuilder().setHost(follower.mHost).setRpcPort(follower.mRpcPort)).build(); - }).collect(Collectors.toList()); - - Mockito.when(mLeader.getQuorumServerInfoList()).thenReturn(quorumServerInfos); - Answer fn = (args) -> { - RaftPeerId peerId = args.getArgument(0, RaftPeerId.class); - Message message = args.getArgument(1, Message.class); - JournalQueryRequest queryRequest = JournalQueryRequest.parseFrom( - message.getContent().asReadOnlyByteBuffer()); - return CompletableFuture.supplyAsync(() -> { - CompletableFuture fut = CompletableFuture.supplyAsync(() -> { - Message response; - try { - response = mFollowers.get(peerId).mSnapshotManager.handleRequest(queryRequest); - } catch (IOException e) { - throw new CompletionException(e); - } - RaftClientReply reply = Mockito.mock(RaftClientReply.class); - Mockito.when(reply.getMessage()).thenReturn(response); - return reply; - }); - RaftClientReply result; - try { - if (args.getArguments().length == 3) { - result = fut.get(args.getArgument(2), TimeUnit.MILLISECONDS); - } else { - result = fut.get(); - } - return result; - } catch (Exception e) { - throw new CompletionException(e); - } - }); - }; - Mockito.when(mLeader.sendMessageAsync(any(), any())).thenAnswer(fn); - Mockito.when(mLeader.sendMessageAsync(any(), any(), anyLong())).thenAnswer(fn); - } - - private SimpleStateMachineStorage getSimpleStateMachineStorage() throws IOException { - RaftStorage rs = StorageImplUtils.newRaftStorage( - mFolder.newFolder(CommonUtils.randomAlphaNumString(6)), - RaftServerConfigKeys.Log.CorruptionPolicy.getDefault(), - RaftStorage.StartupOption.FORMAT, - RaftServerConfigKeys.STORAGE_FREE_SPACE_MIN_DEFAULT.getSize()); - rs.initialize(); - SimpleStateMachineStorage snapshotStore = new SimpleStateMachineStorage(); - snapshotStore.init(rs); - return snapshotStore; - } - - private void createSnapshotFile(SimpleStateMachineStorage storage) throws IOException { - createSnapshotFile(storage, DEFAULT_SNAPSHOT_TERM, DEFAULT_SNAPSHOT_INDEX); - } - - private void createSnapshotFile(SimpleStateMachineStorage storage, long term, long index) - throws IOException { - java.io.File file = storage.getSnapshotFile(term, index); - FileUtils.writeByteArrayToFile(file, BufferUtils.getIncreasingByteArray(SNAPSHOT_SIZE)); - storage.loadLatestSnapshot(); - } - - private void validateSnapshotFile(SimpleStateMachineStorage storage) throws IOException { - validateSnapshotFile(storage, DEFAULT_SNAPSHOT_TERM, DEFAULT_SNAPSHOT_INDEX); - } - - private void validateSnapshotFile(SimpleStateMachineStorage storage, long term, long index) - throws IOException { - SingleFileSnapshotInfo snapshot = storage.getLatestSnapshot(); - Assert.assertNotNull(snapshot); - Assert.assertEquals(TermIndex.valueOf(term, index), snapshot.getTermIndex()); - byte[] received = FileUtils.readFileToByteArray(snapshot.getFiles().get(0).getPath().toFile()); - Assert.assertTrue(BufferUtils.equalIncreasingByteArray(SNAPSHOT_SIZE, received)); - } - - @After - public void After() throws Exception { - mServer.shutdown(); - mServer.awaitTermination(); - } - - @Test - public void copySnapshotToLeader() throws Exception { - before(1); - Follower follower = mFollowers.values().stream().findFirst().get(); - createSnapshotFile(follower.mStore); - - Assert.assertNull(mLeaderStore.getLatestSnapshot()); - mLeaderSnapshotManager.maybeCopySnapshotFromFollower(); - - CommonUtils.waitFor("leader snapshot to complete", - () -> mLeaderSnapshotManager.maybeCopySnapshotFromFollower() != -1, mWaitOptions); - validateSnapshotFile(mLeaderStore); - } - - @Test - public void copySnapshotToFollower() throws Exception { - before(1); - createSnapshotFile(mLeaderStore); - - Follower follower = mFollowers.values().stream().findFirst().get(); - Assert.assertNull(follower.mStore.getLatestSnapshot()); - - follower.mSnapshotManager.installSnapshotFromLeader(); - - CommonUtils.waitFor("follower snapshot to complete", - () -> follower.mStore.getLatestSnapshot() != null, mWaitOptions); - validateSnapshotFile(follower.mStore); - } - - @Test - public void requestSnapshotEqualTermHigherIndex() throws Exception { - before(2); - List followers = new ArrayList<>(mFollowers.values()); - Follower firstFollower = followers.get(0); - Follower secondFollower = followers.get(1); - - createSnapshotFile(firstFollower.mStore); // create default 0, 1 snapshot - createSnapshotFile(secondFollower.mStore, 0, 2); // preferable to the default 0, 1 snapshot - - mLeaderSnapshotManager.maybeCopySnapshotFromFollower(); - - CommonUtils.waitFor("leader snapshot to complete", - () -> mLeaderSnapshotManager.maybeCopySnapshotFromFollower() != -1, mWaitOptions); - // verify that the leader still requests and gets the best snapshot - validateSnapshotFile(mLeaderStore, 0, 2); - } - - @Test - public void failGetInfoEqualTermHigherIndex() throws Exception { - before(2); - List followers = new ArrayList<>(mFollowers.values()); - Follower firstFollower = followers.get(0); - Follower secondFollower = followers.get(1); - - createSnapshotFile(firstFollower.mStore); // create default 0, 1 snapshot - createSnapshotFile(secondFollower.mStore, 0, 2); // preferable to the default 0, 1 snapshot - // the second follower will not reply to the getInfo request, so the leader will request from - // the first after a timeout - secondFollower.disableGetInfo(); - - mLeaderSnapshotManager.maybeCopySnapshotFromFollower(); - - CommonUtils.waitFor("leader snapshot to complete", - () -> mLeaderSnapshotManager.maybeCopySnapshotFromFollower() != -1, mWaitOptions); - // verify that the leader still requests and get the snapshot from the first follower - validateSnapshotFile(mLeaderStore, 0, 1); - } - - @Test - public void failSnapshotRequestEqualTermHigherIndex() throws Exception { - before(2); - List followers = new ArrayList<>(mFollowers.values()); - Follower firstFollower = followers.get(0); - Follower secondFollower = followers.get(1); - - createSnapshotFile(firstFollower.mStore); // create default 0, 1 snapshot - createSnapshotFile(secondFollower.mStore, 0, 2); // preferable to the default 0, 1 snapshot - // the second follower will not start the snapshot upload, so the leader will request from the - // first after a timeout - secondFollower.disableFollowerUpload(); - - mLeaderSnapshotManager.maybeCopySnapshotFromFollower(); - - CommonUtils.waitFor("leader snapshot to complete", - () -> mLeaderSnapshotManager.maybeCopySnapshotFromFollower() != -1, mWaitOptions); - // verify that the leader still requests and get the snapshot from the first follower - validateSnapshotFile(mLeaderStore, 0, 1); - } - - @Test - public void failFailThenSuccess() throws Exception { - before(3); - List followers = new ArrayList<>(mFollowers.values()); - Follower firstFollower = followers.get(0); - Follower secondFollower = followers.get(1); - - createSnapshotFile(firstFollower.mStore, 0, 1); - createSnapshotFile(secondFollower.mStore, 0, 1); - - firstFollower.disableFollowerUpload(); - secondFollower.disableGetInfo(); - - mLeaderSnapshotManager.maybeCopySnapshotFromFollower(); - - try { - CommonUtils.waitForResult("upload failure", - () -> mLeaderSnapshotManager.maybeCopySnapshotFromFollower(), - (num) -> num == 1, - WaitForOptions.defaults().setInterval(10).setTimeoutMs(100)); - } catch (Exception e) { - // expected to fail: no snapshot could be uploaded - } - - Follower thirdFollower = followers.get(2); - createSnapshotFile(thirdFollower.mStore, 0, 2); - mLeaderSnapshotManager.maybeCopySnapshotFromFollower(); - CommonUtils.waitForResult("upload failure", - () -> mLeaderSnapshotManager.maybeCopySnapshotFromFollower(), - (num) -> num == 2, mWaitOptions); - validateSnapshotFile(mLeaderStore, 0, 2); - } - - @Test - public void requestSnapshotHigherTermLowerIndex() throws Exception { - before(2); - List followers = new ArrayList<>(mFollowers.values()); - Follower firstFollower = followers.get(0); - Follower secondFollower = followers.get(1); - - createSnapshotFile(firstFollower.mStore, 1, 10); - createSnapshotFile(secondFollower.mStore, 2, 1); - - mLeaderSnapshotManager.maybeCopySnapshotFromFollower(); - - CommonUtils.waitFor("leader snapshot to complete", - () -> mLeaderSnapshotManager.maybeCopySnapshotFromFollower() != -1, mWaitOptions); - // verify that the leader still requests and gets the best snapshot - validateSnapshotFile(mLeaderStore, 2, 1); - } - - @Test - public void installSnapshotsInSuccession() throws Exception { - before(2); - List followers = new ArrayList<>(mFollowers.values()); - Follower firstFollower = followers.get(0); - Follower secondFollower = followers.get(1); - - createSnapshotFile(firstFollower.mStore); // create default 0, 1 snapshot - - for (int i = 2; i < 12; i++) { - if (i % 2 == 0) { - createSnapshotFile(secondFollower.mStore, 0, i); - secondFollower.notifySnapshotInstalled(); - } else { - createSnapshotFile(firstFollower.mStore, 0, i); - firstFollower.notifySnapshotInstalled(); - } - CommonUtils.waitFor("leader snapshot to complete", - () -> mLeaderSnapshotManager.maybeCopySnapshotFromFollower() != -1, mWaitOptions); - validateSnapshotFile(mLeaderStore, 0, i); - } - } - - /** - * Simulates a {@link SnapshotDownloader} error. - */ - @Test - public void downloadFailure() throws Exception { - before(2); - List followers = new ArrayList<>(mFollowers.values()); - Follower firstFollower = followers.get(0); - Follower secondFollower = followers.get(1); - - createSnapshotFile(firstFollower.mStore); // create default 0, 1 snapshot - createSnapshotFile(secondFollower.mStore, 0, 2); // preferable to the default 0, 1 snapshot - - // make sure to error out when requesting the better snapshot from secondFollower - Mockito.doAnswer(mock -> { - SingleFileSnapshotInfo snapshot = secondFollower.mStore.getLatestSnapshot(); - StreamObserver responseObserver = - SnapshotUploader.forFollower(secondFollower.mStore, snapshot); - StreamObserver requestObserver = mClient - .uploadSnapshot(responseObserver); - requestObserver.onError(new IOException("failed snapshot upload")); - return null; - }).when(secondFollower.mSnapshotManager).sendSnapshotToLeader(); - - mLeaderSnapshotManager.maybeCopySnapshotFromFollower(); - - CommonUtils.waitFor("leader snapshot to complete", - () -> mLeaderSnapshotManager.maybeCopySnapshotFromFollower() != -1, mWaitOptions); - // verify that the leader still requests and gets second best snapshot - validateSnapshotFile(mLeaderStore); - } - - /** - * Simulates a {@link SnapshotUploader} error. - */ - @Test - public void uploadFailure() throws Exception { - before(2); - List followers = new ArrayList<>(mFollowers.values()); - Follower firstFollower = followers.get(0); - Follower secondFollower = followers.get(1); - - createSnapshotFile(firstFollower.mStore); // create default 0, 1 snapshot - createSnapshotFile(secondFollower.mStore, 0, 2); // preferable to the default 0, 1 snapshot - - // make sure to error out when requesting the better snapshot from secondFollower - Mockito.doAnswer(mock -> { - SingleFileSnapshotInfo snapshot = secondFollower.mStore.getLatestSnapshot(); - StreamObserver responseObserver = - SnapshotUploader.forFollower(secondFollower.mStore, snapshot); - StreamObserver requestObserver = mClient - .uploadSnapshot(responseObserver); - responseObserver.onError(new StatusRuntimeException(Status.UNAVAILABLE)); - requestObserver.onNext(UploadSnapshotPRequest.newBuilder() - .setData(SnapshotData.newBuilder() - .setSnapshotTerm(snapshot.getTerm()) - .setSnapshotIndex(snapshot.getIndex()) - .setOffset(0)) - .build()); - return null; - }).when(secondFollower.mSnapshotManager).sendSnapshotToLeader(); - - mLeaderSnapshotManager.maybeCopySnapshotFromFollower(); - - CommonUtils.waitFor("leader snapshot to complete", - () -> mLeaderSnapshotManager.maybeCopySnapshotFromFollower() != -1, mWaitOptions); - // verify that the leader still requests and gets second best snapshot - validateSnapshotFile(mLeaderStore); - } - - private class Follower { - final String mHost; - final int mRpcPort; - final SnapshotReplicationManager mSnapshotManager; - RaftJournalSystem mJournalSystem; - SimpleStateMachineStorage mStore; - - Follower(RaftJournalServiceClient client) throws IOException { - mHost = String.format("follower-%s", RandomString.make()); - mRpcPort = ThreadLocalRandom.current().nextInt(10_000, 99_999); - mStore = getSimpleStateMachineStorage(); - mJournalSystem = Mockito.mock(RaftJournalSystem.class); - mSnapshotManager = Mockito.spy(new SnapshotReplicationManager(mJournalSystem, mStore)); - Mockito.doReturn(client).when(mSnapshotManager).createJournalServiceClient(); - } - - void notifySnapshotInstalled() { - synchronized (mSnapshotManager) { - mSnapshotManager.notifyAll(); - } - } - - void disableFollowerUpload() throws IOException { - Mockito.doNothing().when(mSnapshotManager).sendSnapshotToLeader(); - } - - void disableGetInfo() throws IOException { - Mockito.doAnswer((args) -> { - synchronized (mSnapshotManager) { - // we sleep so nothing is returned - mSnapshotManager.wait(Configuration.global().getMs( - PropertyKey.MASTER_JOURNAL_REQUEST_INFO_TIMEOUT)); - } - throw new IOException("get info disabled"); - }).when(mSnapshotManager) - .handleRequest(argThat(JournalQueryRequest::hasSnapshotInfoRequest)); - } - - RaftPeerId getRaftPeerId() { - return RaftPeerId.valueOf(String.format("%s_%d", mHost, mRpcPort)); - } - } -} diff --git a/core/server/master/src/test/java/alluxio/master/meta/AlluxioMasterRestServiceHandlerTest.java b/core/server/master/src/test/java/alluxio/master/meta/AlluxioMasterRestServiceHandlerTest.java index b49adb4ee03e..ecde1ccea1b7 100644 --- a/core/server/master/src/test/java/alluxio/master/meta/AlluxioMasterRestServiceHandlerTest.java +++ b/core/server/master/src/test/java/alluxio/master/meta/AlluxioMasterRestServiceHandlerTest.java @@ -30,6 +30,7 @@ import alluxio.grpc.RegisterWorkerPOptions; import alluxio.grpc.StorageList; import alluxio.master.AlluxioMasterProcess; +import alluxio.master.AlwaysPrimaryPrimarySelector; import alluxio.master.CoreMasterContext; import alluxio.master.MasterProcess; import alluxio.master.MasterRegistry; @@ -38,6 +39,7 @@ import alluxio.master.block.BlockMasterFactory; import alluxio.master.file.FileSystemMaster; import alluxio.master.file.FileSystemMasterFactory; +import alluxio.master.journal.noop.NoopJournalSystem; import alluxio.master.metrics.MetricsMaster; import alluxio.master.metrics.MetricsMasterFactory; import alluxio.metrics.MetricKey; @@ -133,7 +135,8 @@ public void before() throws Exception { mMasterProcess = PowerMockito.mock(AlluxioMasterProcess.class); ServletContext context = mock(ServletContext.class); mRegistry = new MasterRegistry(); - CoreMasterContext masterContext = MasterTestUtils.testMasterContext(); + CoreMasterContext masterContext = MasterTestUtils.testMasterContext(new NoopJournalSystem(), + null, new AlwaysPrimaryPrimarySelector()); mMetricsMaster = new MetricsMasterFactory().create(mRegistry, masterContext); mRegistry.add(MetricsMaster.class, mMetricsMaster); registerMockUfs(); diff --git a/core/server/master/src/test/java/alluxio/master/metastore/InodeStoreCheckpointTest.java b/core/server/master/src/test/java/alluxio/master/metastore/InodeStoreCheckpointTest.java new file mode 100644 index 000000000000..72f384feb9e9 --- /dev/null +++ b/core/server/master/src/test/java/alluxio/master/metastore/InodeStoreCheckpointTest.java @@ -0,0 +1,129 @@ +/* + * The Alluxio Open Foundation licenses this work under the Apache License, version 2.0 + * (the "License"). You may not use this work except in compliance with the License, which is + * available at www.apache.org/licenses/LICENSE-2.0 + * + * This software is distributed on an "AS IS" basis, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, + * either express or implied, as more fully set forth in the License. + * + * See the NOTICE file distributed with this work for information regarding copyright ownership. + */ + +package alluxio.master.metastore; + +import alluxio.conf.Configuration; +import alluxio.conf.PropertyKey; +import alluxio.master.MasterUtils; +import alluxio.master.file.contexts.CreateDirectoryContext; +import alluxio.master.file.contexts.CreateFileContext; +import alluxio.master.file.meta.Inode; +import alluxio.master.file.meta.InodeLockManager; +import alluxio.master.file.meta.MutableInodeDirectory; +import alluxio.master.journal.checkpoint.CheckpointInputStream; + +import org.junit.After; +import org.junit.Assert; +import org.junit.Before; +import org.junit.Rule; +import org.junit.Test; +import org.junit.rules.TemporaryFolder; +import org.junit.runner.RunWith; +import org.junit.runners.Parameterized; + +import java.io.File; +import java.io.IOException; +import java.io.OutputStream; +import java.nio.file.Files; +import java.util.Arrays; +import java.util.Collection; +import java.util.Optional; +import java.util.concurrent.ExecutorService; +import java.util.concurrent.Executors; + +@RunWith(Parameterized.class) +public class InodeStoreCheckpointTest { + @Parameterized.Parameters + public static Collection data() { + return Arrays.asList(new Object[][] { + {MetastoreType.HEAP, 0}, + {MetastoreType.ROCKS, PropertyKey.MASTER_METASTORE_INODE_CACHE_MAX_SIZE.getDefaultValue()}, + {MetastoreType.ROCKS, 0} + }); + } + + @Parameterized.Parameter(0) + public MetastoreType mType; + + @Parameterized.Parameter(1) + public int mCacheSize; + + @Rule + public TemporaryFolder mFolder = new TemporaryFolder(); + + private InodeStore mBaseInodeStore; + private InodeStore mNewInodeStore; + + private final MutableInodeDirectory mRoot = + MutableInodeDirectory.create(0, -1, "", CreateDirectoryContext.defaults()); + + private InodeStore createInodeStore() throws IOException { + return MasterUtils.getInodeStoreFactory(mFolder.newFolder().getAbsolutePath()) + .apply(new InodeLockManager()); + } + + @Before + public void before() throws IOException { + Configuration.set(PropertyKey.MASTER_INODE_METASTORE, mType); + Configuration.set(PropertyKey.MASTER_METASTORE_INODE_CACHE_MAX_SIZE, mCacheSize); + CreateDirectoryContext c = CreateDirectoryContext.defaults(); + CreateFileContext cf = CreateFileContext.defaults(); + mBaseInodeStore = createInodeStore(); + mBaseInodeStore.writeNewInode(MutableInodeDirectory.create(0, -1, "", c)); + mBaseInodeStore.writeNewInode(MutableInodeDirectory.create(1, 0, "one", c)); + mBaseInodeStore.writeNewInode(MutableInodeDirectory.create(2, 0, "two", c)); + mBaseInodeStore.writeNewInode(MutableInodeDirectory.create(3, 0, "three", c)); + mBaseInodeStore.remove(2L); + } + + @After + public void after() { + Optional root = mNewInodeStore.get(mRoot.getId()); + Assert.assertTrue(root.isPresent()); + Optional one = mNewInodeStore.get(1); + Assert.assertTrue(one.isPresent()); + Assert.assertEquals(0, one.get().getParentId()); + Assert.assertTrue(one.get().isDirectory()); + Assert.assertEquals("one", one.get().getName()); + Optional two = mNewInodeStore.get(2); + Assert.assertFalse(two.isPresent()); + Optional three = mNewInodeStore.get(3); + Assert.assertTrue(three.isPresent()); + Assert.assertEquals(0, three.get().getParentId()); + Assert.assertEquals("three", three.get().getName()); + + mBaseInodeStore.close(); + mNewInodeStore.close(); + } + + @Test + public void testOutputStream() throws IOException, InterruptedException { + File checkpoint = mFolder.newFile("checkpoint"); + try (OutputStream outputStream = Files.newOutputStream(checkpoint.toPath())) { + mBaseInodeStore.writeToCheckpoint(outputStream); + } + mNewInodeStore = createInodeStore(); + try (CheckpointInputStream inputStream = + new CheckpointInputStream(Files.newInputStream(checkpoint.toPath()))) { + mNewInodeStore.restoreFromCheckpoint(inputStream); + } + } + + @Test + public void testDirectory() throws IOException { + File dir = mFolder.newFolder("checkpoint"); + ExecutorService executor = Executors.newFixedThreadPool(2); + mBaseInodeStore.writeToCheckpoint(dir, executor).join(); + mNewInodeStore = createInodeStore(); + mNewInodeStore.restoreFromCheckpoint(dir, executor).join(); + } +} diff --git a/core/server/master/src/test/java/alluxio/master/service/rpc/RpcServerServiceTest.java b/core/server/master/src/test/java/alluxio/master/service/rpc/RpcServerServiceTest.java index 2ae2ffac6c34..f043d72b1bae 100644 --- a/core/server/master/src/test/java/alluxio/master/service/rpc/RpcServerServiceTest.java +++ b/core/server/master/src/test/java/alluxio/master/service/rpc/RpcServerServiceTest.java @@ -11,9 +11,12 @@ package alluxio.master.service.rpc; +import alluxio.conf.Configuration; +import alluxio.conf.PropertyKey; import alluxio.master.AlluxioMasterProcess; import org.junit.Assert; +import org.junit.Before; import org.junit.Test; import org.junit.runner.RunWith; import org.powermock.core.classloader.annotations.PrepareForTest; @@ -25,6 +28,11 @@ @RunWith(PowerMockRunner.class) @PrepareForTest(AlluxioMasterProcess.class) public class RpcServerServiceTest extends RpcServerServiceTestBase { + @Before + public void before() { + Configuration.set(PropertyKey.STANDBY_MASTER_GRPC_ENABLED, false); + } + @Test public void primaryOnlyTest() { RpcServerService service = diff --git a/core/transport/src/main/proto/grpc/common.proto b/core/transport/src/main/proto/grpc/common.proto index 94bf1db6f8e5..b0aa380a75be 100644 --- a/core/transport/src/main/proto/grpc/common.proto +++ b/core/transport/src/main/proto/grpc/common.proto @@ -65,6 +65,8 @@ enum MetricType { // METER represents a metric value at a _rate_. The value of the metric varies with the time over which events are // recorded METER = 2; + // HISTOGRAM gives statistics about the value of past occurrences of an event. + HISTOGRAM = 5; // TIMER represents a histogram of the rate of the specified events. TIMER = 3; // EXECUTOR_SERVICE represents an executor service. diff --git a/core/transport/src/main/proto/grpc/raft_journal.proto b/core/transport/src/main/proto/grpc/raft_journal.proto index c6f6dc5b2001..09168749e17c 100644 --- a/core/transport/src/main/proto/grpc/raft_journal.proto +++ b/core/transport/src/main/proto/grpc/raft_journal.proto @@ -36,6 +36,7 @@ message GetSnapshotRequest { message SnapshotMetadata { optional int64 snapshotTerm = 1; optional int64 snapshotIndex = 2; + optional bool exists = 3; } message SnapshotData { @@ -62,18 +63,32 @@ message DownloadSnapshotPResponse { optional SnapshotData data = 1; } +message LatestSnapshotInfoPRequest {} + /** * This interface contains raft service endpoints for Alluxio masters. */ service RaftJournalService { /** + * Deprecated. * Uploads a snapshot to primary master. */ rpc UploadSnapshot (stream UploadSnapshotPRequest) returns (stream UploadSnapshotPResponse); /** + * Deprecated. * Downloads a snapshot from primary master. */ rpc DownloadSnapshot (stream DownloadSnapshotPRequest) returns (stream DownloadSnapshotPResponse); + + /** + * Requests information about snapshots on a particular machine. + */ + rpc RequestLatestSnapshotInfo(LatestSnapshotInfoPRequest) returns (SnapshotMetadata) {} + + /** + * Request to download the snapshot information from a particular machine. + */ + rpc RequestLatestSnapshotData(SnapshotMetadata) returns (stream SnapshotData) {} } diff --git a/core/transport/src/main/proto/proto.lock b/core/transport/src/main/proto/proto.lock index fa24040577ba..12d48a5d5116 100644 --- a/core/transport/src/main/proto/proto.lock +++ b/core/transport/src/main/proto/proto.lock @@ -1572,6 +1572,10 @@ "name": "METER", "integer": 2 }, + { + "name": "HISTOGRAM", + "integer": 5 + }, { "name": "TIMER", "integer": 3 @@ -6702,6 +6706,11 @@ "id": 2, "name": "snapshotIndex", "type": "int64" + }, + { + "id": 3, + "name": "exists", + "type": "bool" } ] }, @@ -6774,6 +6783,9 @@ "type": "SnapshotData" } ] + }, + { + "name": "LatestSnapshotInfoPRequest" } ], "services": [ @@ -6793,6 +6805,17 @@ "out_type": "DownloadSnapshotPResponse", "in_streamed": true, "out_streamed": true + }, + { + "name": "RequestLatestSnapshotInfo", + "in_type": "LatestSnapshotInfoPRequest", + "out_type": "SnapshotMetadata" + }, + { + "name": "RequestLatestSnapshotData", + "in_type": "SnapshotMetadata", + "out_type": "SnapshotData", + "out_streamed": true } ] } diff --git a/microbench/src/main/java/alluxio/snapshot/SnapshotBench.java b/microbench/src/main/java/alluxio/snapshot/SnapshotBench.java new file mode 100644 index 000000000000..837f60981720 --- /dev/null +++ b/microbench/src/main/java/alluxio/snapshot/SnapshotBench.java @@ -0,0 +1,141 @@ +/* + * The Alluxio Open Foundation licenses this work under the Apache License, version 2.0 + * (the "License"). You may not use this work except in compliance with the License, which is + * available at www.apache.org/licenses/LICENSE-2.0 + * + * This software is distributed on an "AS IS" basis, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, + * either express or implied, as more fully set forth in the License. + * + * See the NOTICE file distributed with this work for information regarding copyright ownership. + */ + +package alluxio.snapshot; + +import alluxio.AlluxioURI; +import alluxio.annotation.SuppressFBWarnings; +import alluxio.concurrent.jsr.CompletableFuture; +import alluxio.conf.Configuration; +import alluxio.conf.PropertyKey; +import alluxio.grpc.NodeState; +import alluxio.master.AlluxioMasterProcess; +import alluxio.master.StateLockOptions; +import alluxio.master.file.FileSystemMaster; +import alluxio.master.file.contexts.CompleteFileContext; +import alluxio.master.file.contexts.CreateFileContext; +import alluxio.master.journal.JournalType; +import alluxio.master.journal.raft.JournalStateMachine; +import alluxio.master.journal.raft.RaftJournalSystem; +import alluxio.master.journal.raft.RaftPrimarySelector; +import alluxio.resource.LockResource; + +import org.apache.commons.io.FileUtils; +import org.apache.log4j.LogManager; +import org.junit.rules.TemporaryFolder; +import org.openjdk.jmh.annotations.Benchmark; +import org.openjdk.jmh.annotations.Level; +import org.openjdk.jmh.annotations.Measurement; +import org.openjdk.jmh.annotations.Scope; +import org.openjdk.jmh.annotations.Setup; +import org.openjdk.jmh.annotations.State; +import org.openjdk.jmh.annotations.TearDown; +import org.openjdk.jmh.annotations.Warmup; +import org.openjdk.jmh.infra.Blackhole; +import org.openjdk.jmh.results.format.ResultFormatType; +import org.openjdk.jmh.runner.Runner; +import org.openjdk.jmh.runner.options.CommandLineOptions; +import org.openjdk.jmh.runner.options.Options; +import org.openjdk.jmh.runner.options.OptionsBuilder; + +import java.io.File; +import java.io.IOException; +import java.lang.reflect.Field; + +@SuppressFBWarnings("UWF_FIELD_NOT_INITIALIZED_IN_CONSTRUCTOR") +public class SnapshotBench { + @State(Scope.Benchmark) + public static class Snapshot { + TemporaryFolder mFolder = new TemporaryFolder(); + AlluxioMasterProcess mMasterProcess; + JournalStateMachine mStateMachine; + RaftPrimarySelector mPrimarySelector; + CompletableFuture mLifeCycle; + + @Setup(Level.Trial) + public void setup() throws Exception { + LogManager.getRootLogger().setLevel(org.apache.log4j.Level.ERROR); + + mFolder.create(); + Configuration.set(PropertyKey.MASTER_JOURNAL_TYPE, JournalType.EMBEDDED); + Configuration.set(PropertyKey.MASTER_JOURNAL_FOLDER, mFolder.newFolder("journal")); + Configuration.set(PropertyKey.MASTER_METASTORE_DIR, mFolder.newFolder("metastore")); + Configuration.set(PropertyKey.SECURITY_AUTHENTICATION_TYPE, "NOSASL"); + Configuration.set(PropertyKey.SECURITY_AUTHORIZATION_PERMISSION_ENABLED, false); + mMasterProcess = AlluxioMasterProcess.Factory.create(); + RaftJournalSystem journalSystem = (RaftJournalSystem) mMasterProcess + .getMaster(FileSystemMaster.class).getMasterContext().getJournalSystem(); + mPrimarySelector = (RaftPrimarySelector) journalSystem.getPrimarySelector(); + mLifeCycle = CompletableFuture.runAsync(() -> { + try { + mMasterProcess.start(); + } catch (Exception e) { + throw new RuntimeException(e); + } + }); + mMasterProcess.waitForReady(10_000); + Field f = journalSystem.getClass().getDeclaredField("mStateMachine"); + f.setAccessible(true); + mStateMachine = (JournalStateMachine) f.get(journalSystem); + + FileSystemMaster master = mMasterProcess.getMaster(FileSystemMaster.class); + for (int i = 0; i < 1_000_000; i++) { + if (i % 100_000 == 0) { + System.out.printf("Creating file%d%n", i); + } + AlluxioURI alluxioURI = new AlluxioURI("/file" + i); + master.createFile(alluxioURI, CreateFileContext.defaults()); + master.completeFile(alluxioURI, CompleteFileContext.defaults()); + } + System.out.println("getting state lock"); + LockResource lr = master.getMasterContext().getStateLockManager() + .lockExclusive(StateLockOptions.defaults()); + lr.close(); + System.out.println("Setup complete"); + } + + @TearDown(Level.Trial) + public void tearDown() throws Exception { + mMasterProcess.stop(); + mPrimarySelector.notifyStateChanged(NodeState.STANDBY); + mLifeCycle.join(); + mFolder.delete(); + System.out.println("Tear down complete"); + } + + @TearDown(Level.Invocation) + public void tearDownIteration() throws IOException { + File snapshotDir = mStateMachine.getStateMachineStorage().getSnapshotDir(); + FileUtils.cleanDirectory(snapshotDir); + } + } + + @Benchmark + @Warmup(iterations = 0) + @Measurement(iterations = 1) + public void snapshot(Blackhole bh, Snapshot snapshot) { + System.out.println("Taking snapshot"); + bh.consume(snapshot.mStateMachine.takeLocalSnapshot(true)); + System.out.println("Took snapshot"); + } + + public static void main(String[] args) throws Exception { + Options argsCli = new CommandLineOptions(args); + Options opts = new OptionsBuilder() + .forks(0) + .parent(argsCli) + .include(SnapshotBench.class.getName()) + .result("results.json") + .resultFormat(ResultFormatType.JSON) + .build(); + new Runner(opts).run(); + } +} diff --git a/minicluster/src/main/java/alluxio/master/LocalAlluxioMaster.java b/minicluster/src/main/java/alluxio/master/LocalAlluxioMaster.java index 0a4c420e0488..49f70084cbfd 100644 --- a/minicluster/src/main/java/alluxio/master/LocalAlluxioMaster.java +++ b/minicluster/src/main/java/alluxio/master/LocalAlluxioMaster.java @@ -151,7 +151,7 @@ public void run() { * @return true if the master is serving, false otherwise */ public boolean isServing() { - return mMasterProcess.isGrpcServing(); + return mMasterProcess.isGrpcServingAsLeader(); } /** diff --git a/tests/src/test/java/alluxio/client/cli/JournalToolTest.java b/tests/src/test/java/alluxio/client/cli/JournalToolTest.java index c45579013299..d0f7e776e76c 100644 --- a/tests/src/test/java/alluxio/client/cli/JournalToolTest.java +++ b/tests/src/test/java/alluxio/client/cli/JournalToolTest.java @@ -205,14 +205,11 @@ public void dumpHeapCheckpointFromEmbeddedJournal() throws Throwable { new String[] {"-inputDir", leaderJournalDir, "-outputDir", mDumpDir.getAbsolutePath()}); // Find the main checkpoint dir. String checkpointDir = findCheckpointDir(); - // Embedded journal checkpoints are grouped by masters. - String fsMasterCheckpointsDir = PathUtils.concatPath(checkpointDir, "FILE_SYSTEM_MASTER"); - assertNonemptyFileExists( - PathUtils.concatPath(fsMasterCheckpointsDir, "INODE_DIRECTORY_ID_GENERATOR")); for (String subPath : Arrays.asList("HEAP_INODE_STORE", "INODE_COUNTER", - "PINNED_INODE_FILE_IDS", "REPLICATION_LIMITED_FILE_IDS", "TO_BE_PERSISTED_FILE_IDS")) { - assertNonemptyFileExists(PathUtils.concatPath(fsMasterCheckpointsDir, "INODE_TREE", subPath)); + "PINNED_INODE_FILE_IDS", "REPLICATION_LIMITED_FILE_IDS", "TO_BE_PERSISTED_FILE_IDS", + "INODE_DIRECTORY_ID_GENERATOR")) { + assertNonemptyFileExists(PathUtils.concatPath(checkpointDir, subPath)); } } diff --git a/tests/src/test/java/alluxio/client/fs/BlockMasterDeleteLostWorkerIntegrationTest.java b/tests/src/test/java/alluxio/client/fs/BlockMasterDeleteLostWorkerIntegrationTest.java index 1b620e2537ad..d9aa14700b79 100644 --- a/tests/src/test/java/alluxio/client/fs/BlockMasterDeleteLostWorkerIntegrationTest.java +++ b/tests/src/test/java/alluxio/client/fs/BlockMasterDeleteLostWorkerIntegrationTest.java @@ -17,12 +17,14 @@ import alluxio.conf.Configuration; import alluxio.conf.PropertyKey; import alluxio.grpc.RegisterWorkerPOptions; +import alluxio.master.AlwaysPrimaryPrimarySelector; import alluxio.master.CoreMasterContext; import alluxio.master.MasterRegistry; import alluxio.master.MasterTestUtils; import alluxio.master.block.BlockMaster; import alluxio.master.block.DefaultBlockMaster; import alluxio.master.block.meta.MasterWorkerInfo; +import alluxio.master.journal.noop.NoopJournalSystem; import alluxio.master.metrics.MetricsMaster; import alluxio.master.metrics.MetricsMasterFactory; import alluxio.util.executor.ExecutorServiceFactories; @@ -55,7 +57,8 @@ public void before() throws Exception { Configuration.set(PropertyKey.MASTER_WORKER_TIMEOUT_MS, MASTER_WORKER_TIMEOUT_MS); mRegistry = new MasterRegistry(); - CoreMasterContext masterContext = MasterTestUtils.testMasterContext(); + CoreMasterContext masterContext = MasterTestUtils.testMasterContext(new NoopJournalSystem(), + null, new AlwaysPrimaryPrimarySelector()); MetricsMaster metricsMaster = new MetricsMasterFactory().create(mRegistry, masterContext); mRegistry.add(MetricsMaster.class, metricsMaster); mClock = new ManualClock(); diff --git a/tests/src/test/java/alluxio/server/ft/journal/MultiProcessCheckpointTest.java b/tests/src/test/java/alluxio/server/ft/journal/MultiProcessCheckpointTest.java index 11e0b72276be..31c9285188c6 100644 --- a/tests/src/test/java/alluxio/server/ft/journal/MultiProcessCheckpointTest.java +++ b/tests/src/test/java/alluxio/server/ft/journal/MultiProcessCheckpointTest.java @@ -55,7 +55,7 @@ void runTest(int compressionLevel, boolean parallelCompression) throws Exception .addProperty(PropertyKey.MASTER_JOURNAL_CHECKPOINT_PERIOD_ENTRIES, 100) .addProperty(PropertyKey.MASTER_JOURNAL_LOG_SIZE_BYTES_MAX, "500") .addProperty(PropertyKey.MASTER_JOURNAL_TAILER_SHUTDOWN_QUIET_WAIT_TIME_MS, "500") - .addProperty(PropertyKey.MASTER_METASTORE_ROCKS_CHECKPOINT_COMPRESSION_LEVEL, + .addProperty(PropertyKey.MASTER_EMBEDDED_JOURNAL_SNAPSHOT_REPLICATION_COMPRESSION_LEVEL, compressionLevel) .addProperty(PropertyKey.MASTER_METASTORE_ROCKS_PARALLEL_BACKUP, parallelCompression) diff --git a/tests/src/test/java/alluxio/server/ft/journal/raft/EmbeddedJournalIntegrationTestFaultTolerance.java b/tests/src/test/java/alluxio/server/ft/journal/raft/EmbeddedJournalIntegrationTestFaultTolerance.java index 224484451047..86c5c369a834 100644 --- a/tests/src/test/java/alluxio/server/ft/journal/raft/EmbeddedJournalIntegrationTestFaultTolerance.java +++ b/tests/src/test/java/alluxio/server/ft/journal/raft/EmbeddedJournalIntegrationTestFaultTolerance.java @@ -21,10 +21,13 @@ import alluxio.conf.PropertyKey; import alluxio.exception.FileAlreadyExistsException; import alluxio.exception.FileDoesNotExistException; +import alluxio.grpc.MetricValue; import alluxio.grpc.MountPOptions; import alluxio.master.journal.JournalType; import alluxio.master.journal.raft.RaftJournalSystem; import alluxio.master.journal.raft.RaftJournalUtils; +import alluxio.master.journal.raft.SnapshotDirStateMachineStorage; +import alluxio.metrics.MetricKey; import alluxio.multi.process.MultiProcessCluster; import alluxio.multi.process.PortCoordination; import alluxio.util.CommonUtils; @@ -38,7 +41,6 @@ import org.apache.ratis.server.storage.StorageImplUtils; import org.apache.ratis.statemachine.impl.SimpleStateMachineStorage; import org.apache.ratis.statemachine.impl.SingleFileSnapshotInfo; -import org.junit.Assert; import org.junit.Rule; import org.junit.Test; import org.junit.rules.TemporaryFolder; @@ -51,6 +53,7 @@ import java.nio.file.Paths; import java.util.ArrayList; import java.util.List; +import java.util.Map; import java.util.concurrent.TimeoutException; import java.util.concurrent.atomic.AtomicInteger; import java.util.concurrent.atomic.AtomicReference; @@ -231,8 +234,8 @@ private void waitForSnapshot(File raftDir) throws InterruptedException, TimeoutE File snapshotDir = new File(raftDir, "sm"); final int RETRY_INTERVAL_MS = 200; // milliseconds CommonUtils.waitFor("snapshot is downloaded", () -> { - File[] files = snapshotDir.listFiles(); - return files != null && files.length > 1 && files[0].length() > 0; + String[] files = snapshotDir.list(); + return files != null && files.length > 0 && files[0].length() > 0; }, WaitForOptions.defaults().setInterval(RETRY_INTERVAL_MS).setTimeoutMs(RESTART_TIMEOUT_MS)); } @@ -251,27 +254,24 @@ public void snapshotTransferLoad() throws Exception { .addProperty(PropertyKey.MASTER_EMBEDDED_JOURNAL_MIN_ELECTION_TIMEOUT, "750ms") .addProperty(PropertyKey.MASTER_EMBEDDED_JOURNAL_MAX_ELECTION_TIMEOUT, "1500ms") .addProperty(PropertyKey.MASTER_JOURNAL_CHECKPOINT_PERIOD_ENTRIES, snapshotPeriod) + .addProperty(PropertyKey.MASTER_JOURNAL_REQUEST_INFO_TIMEOUT, "50ms") .build(); mCluster.start(); // this operation creates more that numFiles log entries for (int i = 0; i < numFile; i++) { - mCluster.getFileSystemClient().createFile(new AlluxioURI(String.format("/%d", i))); + mCluster.getFileSystemClient().createFile(new AlluxioURI(String.format("/%d", i))).close(); } - // only the latest 3 snapshots are kept, but each snapshot leaves behind a small md5 file. - // this checks to make sure there are enough md5 files, meaning many snapshots were propagated. - for (int i = 0; i < NUM_MASTERS; i++) { - File journalDir = new File(mCluster.getJournalDir(i)); - Path raftDir = Paths.get(RaftJournalUtils.getRaftJournalDir(journalDir).toString(), - RaftJournalSystem.RAFT_GROUP_UUID.toString()); - try (Stream stream = Files.walk(raftDir, Integer.MAX_VALUE)) { - long count = stream.filter(path -> path.toString().endsWith(".md5")).count(); - long expected = numFile / snapshotPeriod * 3 / 2; - Assert.assertTrue(String.format("Expected at least %d snapshots, got %d", expected, - count), count >= expected); - } - } + Map metrics = mCluster.getMetricsMasterClient().getMetrics(); + assertTrue(metrics.containsKey( + MetricKey.MASTER_EMBEDDED_JOURNAL_SNAPSHOT_DOWNLOAD_TIMER.getName())); + MetricValue metricValue = metrics.get( + MetricKey.MASTER_EMBEDDED_JOURNAL_SNAPSHOT_DOWNLOAD_TIMER.getName()); + long count = (long) metricValue.getDoubleValue(); + long expected = numFile / snapshotPeriod * 3 / 2; + assertTrue(String.format("Expected at least %d snapshots, got %d", expected, count), + count >= expected); mCluster.notifySuccess(); } @@ -310,8 +310,9 @@ public void singleMasterSnapshotPurgeLogFiles() throws Exception { private void expectSnapshots(Path raftDir, int numExpected) throws Exception { try (Stream stream = Files.walk(raftDir, Integer.MAX_VALUE)) { - long countSnapshots = stream.filter(path -> path.toString().endsWith(".md5")).count(); - assertEquals("Expected " + numExpected + " snapshot(s) to be taken", numExpected, + long countSnapshots = stream + .filter(path -> SnapshotDirStateMachineStorage.matchSnapshotPath(path).matches()).count(); + assertEquals("Expected " + numExpected + " snapshot(s) to be taken", numExpected, countSnapshots); } } diff --git a/tests/src/test/java/alluxio/server/worker/WorkerAllMasterRegistrationTest.java b/tests/src/test/java/alluxio/server/worker/WorkerAllMasterRegistrationTest.java index d6873b63f72f..81b57ad31314 100644 --- a/tests/src/test/java/alluxio/server/worker/WorkerAllMasterRegistrationTest.java +++ b/tests/src/test/java/alluxio/server/worker/WorkerAllMasterRegistrationTest.java @@ -80,6 +80,7 @@ public void before() throws Exception { Configuration.set(PropertyKey.MASTER_EMBEDDED_JOURNAL_WRITE_TIMEOUT, "10sec"); Configuration.set(PropertyKey.MASTER_EMBEDDED_JOURNAL_MIN_ELECTION_TIMEOUT, "3s"); Configuration.set(PropertyKey.MASTER_EMBEDDED_JOURNAL_MAX_ELECTION_TIMEOUT, "6s"); + Configuration.set(PropertyKey.WORKER_REGISTER_TO_ALL_MASTERS, true); mCluster.start(); From 5f170426b6de9926134b55bd77b619d0e5fe536b Mon Sep 17 00:00:00 2001 From: secfree Date: Wed, 12 Apr 2023 10:47:06 +0800 Subject: [PATCH 229/334] Skip ssh for localhost ### What changes are proposed in this pull request? Skip ssh connection while running locally. ### Why are the changes needed? I was trying to setup a one-node cluster with the latest version to do some simple tests. However, I encountered the following exception while executing `/bin/alluxio format` ``` $ ./bin/alluxio format Executing the following command on all worker nodes and logging to /home/test/deploy/alluxio/logs/task.log: /home/test/deploy/alluxio/bin/alluxio formatWorker Waiting for tasks to finish... test@localhost's password: test@localhost's password: test@localhost's password: Task on 'localhost' fails, exit code: 255 There are task failures, look at /home/test/deploy/alluxio/logs/task.log for details. ``` In the log, it has ``` [2023-03-24 17:12:20][localhost] Failed to add the host to the list of known hosts (/home/test/.ssh/known_hosts). [2023-03-24 17:13:28][localhost] Permission denied, please try again. [2023-03-24 17:13:29][localhost] Permission denied, please try again. [2023-03-24 17:13:30][localhost] Permission denied (publickey,gssapi-keyex,gssapi-with-mic,password). ``` However, in our cluster, we do not know the password of our accounts in a specific server. So I feel it's better to skip the "ssh" part for a local one-node cluster. ### Does this PR introduce any user facing changes? NO. pr-link: Alluxio/alluxio#17167 change-id: cid-0ade98d9517f269668a2f3cb384b8b079fa440be --- bin/alluxio-common.sh | 10 ++++++++++ bin/alluxio-masters.sh | 11 +++-------- bin/alluxio-workers.sh | 9 ++------- 3 files changed, 15 insertions(+), 15 deletions(-) diff --git a/bin/alluxio-common.sh b/bin/alluxio-common.sh index 69023a43482f..d0b8f7926e33 100755 --- a/bin/alluxio-common.sh +++ b/bin/alluxio-common.sh @@ -68,3 +68,13 @@ function get_ramdisk_array() { done IFS=$oldifs } + +# Compose the ssh command according to the hostname +function ssh_command() { + local host=$1 + local command="" + if [[ $host != "localhost" && $host != "127.0.0.1" ]]; then + command="ssh -o ConnectTimeout=5 -o StrictHostKeyChecking=no -tt ${host}" + fi + echo "${command}" +} diff --git a/bin/alluxio-masters.sh b/bin/alluxio-masters.sh index 359c3daea348..e72cc693c820 100755 --- a/bin/alluxio-masters.sh +++ b/bin/alluxio-masters.sh @@ -12,12 +12,7 @@ set -o pipefail -LAUNCHER= -# If debugging is enabled propagate that through to sub-shells -if [[ "$-" == *x* ]]; then - LAUNCHER="bash -x" -fi -BIN=$(cd "$( dirname "$( readlink "$0" || echo "$0" )" )"; pwd) +. $(dirname "$0")/alluxio-common.sh USAGE="Usage: alluxio-masters.sh command..." @@ -46,10 +41,10 @@ fi for master in ${HOSTLIST[@]}; do echo "[${master}] Connecting as ${USER}..." >> ${ALLUXIO_TASK_LOG} if [[ ${HA_ENABLED} == "true" || ${N} -eq 0 ]]; then - nohup ssh -o ConnectTimeout=5 -o StrictHostKeyChecking=no -tt ${master} ${LAUNCHER} \ + nohup $(ssh_command ${master}) ${LAUNCHER} \ $"${@// /\\ }" 2>&1 | while read line; do echo "[$(date '+%F %T')][${master}] ${line}"; done >> ${ALLUXIO_TASK_LOG} & else - nohup ssh -o ConnectTimeout=5 -o StrictHostKeyChecking=no -tt ${master} ${LAUNCHER} \ + nohup $(ssh_command ${master}) ${LAUNCHER} \ $"export ALLUXIO_MASTER_SECONDARY=true; ${@// /\\ }" 2>&1 | while read line; do echo "[$(date '+%F %T')][${master}] ${line}"; done >> ${ALLUXIO_TASK_LOG} & fi pids[${#pids[@]}]=$! diff --git a/bin/alluxio-workers.sh b/bin/alluxio-workers.sh index 11dc9c9558ba..79b792706db6 100755 --- a/bin/alluxio-workers.sh +++ b/bin/alluxio-workers.sh @@ -12,12 +12,7 @@ set -o pipefail -LAUNCHER= -# If debugging is enabled propagate that through to sub-shells -if [[ "$-" == *x* ]]; then - LAUNCHER="bash -x" -fi -BIN=$(cd "$( dirname "$( readlink "$0" || echo "$0" )" )"; pwd) +. $(dirname "$0")/alluxio-common.sh USAGE="Usage: alluxio-workers.sh command..." @@ -39,7 +34,7 @@ echo "Executing the following command on all worker nodes and logging to ${ALLUX for worker in ${HOSTLIST[@]}; do echo "[${worker}] Connecting as ${USER}..." >> ${ALLUXIO_TASK_LOG} - nohup ssh -o ConnectTimeout=5 -o StrictHostKeyChecking=no -tt ${worker} ${LAUNCHER} \ + nohup $(ssh_command ${worker}) ${LAUNCHER} \ $"${@// /\\ }" 2>&1 | while read line; do echo "[$(date '+%F %T')][${worker}] ${line}"; done >> ${ALLUXIO_TASK_LOG} & pids[${#pids[@]}]=$! done From 075e4e4f9385712d4c00d920180e5897c6796b2b Mon Sep 17 00:00:00 2001 From: Rico Chiu Date: Thu, 13 Apr 2023 17:32:58 -0700 Subject: [PATCH 230/334] Prevent unrelated property keys from generating in docGen pr-link: Alluxio/alluxio#17249 change-id: cid-961a1e2cf4862e04be59cfdf6385ae3e2eb9a90d --- .../java/alluxio/cli/docgen/ConfigurationDocGenerator.java | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/shell/src/main/java/alluxio/cli/docgen/ConfigurationDocGenerator.java b/shell/src/main/java/alluxio/cli/docgen/ConfigurationDocGenerator.java index 79e7b02ac615..0413657134f6 100644 --- a/shell/src/main/java/alluxio/cli/docgen/ConfigurationDocGenerator.java +++ b/shell/src/main/java/alluxio/cli/docgen/ConfigurationDocGenerator.java @@ -105,8 +105,12 @@ public static void writeCSVFile(Collection defaultKeys, S fileWriter = fileWriterMap.get("security"); } else if (pKey.startsWith("alluxio.integration")) { fileWriter = fileWriterMap.get("cluster-management"); - } else { + } else if (pKey.startsWith("alluxio.") || pKey.startsWith("fs.") + || pKey.startsWith("s3a.")) { fileWriter = fileWriterMap.get("common"); + } else { + // skip configuration properties unrelated to Alluxio + continue; } fileWriter.append(keyValueStr); } From fd19fb60cadf2d941f5b36d11f128563e511d50d Mon Sep 17 00:00:00 2001 From: maobaolong <307499405@qq.com> Date: Fri, 14 Apr 2023 09:57:23 +0800 Subject: [PATCH 231/334] Support cron timer to arrange the period heartbeat executor invoke time Support cron timer to arrange the period heartbeat executor invoke time. Example ``` cron timer config: * 0-10,20-30,40-56 12-13 * * ? * The heartbeat executor invoke at every minute from 0 through 10, from 20 through 30, from 40 through 56, at past every hour from 12 through 13. ``` pr-link: Alluxio/alluxio#16900 change-id: cid-9277f30e2159e64863067d14cbcbee526707c5b6 --- .../alluxio/client/file/ConfigHashSync.java | 2 +- .../file/FileSystemContextReinitializer.java | 2 +- .../CronExpressionIntervalSupplier.java | 59 +++++++++ .../heartbeat/FixedIntervalSupplier.java | 63 +++++++++ .../alluxio/heartbeat/HeartbeatExecutor.java | 6 +- .../alluxio/heartbeat/HeartbeatThread.java | 74 ++++++----- .../alluxio/heartbeat/HeartbeatTimer.java | 26 ++-- .../alluxio/heartbeat/ScheduledTimer.java | 11 +- .../heartbeat/SleepIntervalSupplier.java | 34 +++++ .../java/alluxio/heartbeat/SleepingTimer.java | 66 +++++----- .../heartbeat/HeartbeatContextTest.java | 2 +- .../heartbeat/HeartbeatThreadTest.java | 5 +- ...ForCronExpressionIntervalSupplierTest.java | 121 ++++++++++++++++++ .../alluxio/heartbeat/SleepingTimerTest.java | 9 +- .../master/block/DefaultBlockMaster.java | 11 +- .../master/block/meta/MasterWorkerInfo.java | 2 +- .../master/file/BlockIntegrityChecker.java | 2 +- .../master/file/DefaultFileSystemMaster.java | 31 +++-- .../alluxio/master/file/InodeTtlChecker.java | 2 +- .../alluxio/master/file/LostFileDetector.java | 2 +- .../java/alluxio/master/file/UfsCleaner.java | 2 +- .../file/activesync/ActiveSyncManager.java | 4 +- .../master/file/activesync/ActiveSyncer.java | 2 +- .../file/replication/ReplicationChecker.java | 2 +- .../master/meta/DefaultMetaMaster.java | 21 +-- .../master/meta/JournalSpaceMonitor.java | 2 +- .../alluxio/master/meta/MetaMasterSync.java | 2 +- .../alluxio/master/meta/UpdateChecker.java | 2 +- .../master/metrics/DefaultMetricsMaster.java | 6 +- .../throttle/DefaultThrottleMaster.java | 6 +- .../replication/ReplicationCheckerTest.java | 36 +++--- .../master/meta/JournalSpaceMonitorTest.java | 4 +- .../alluxio/worker/block/BlockMasterSync.java | 2 +- .../worker/block/BlockSyncMasterGroup.java | 4 +- .../worker/block/DefaultBlockWorker.java | 12 +- .../alluxio/worker/block/PinListSync.java | 2 +- .../worker/block/SpecificMasterBlockSync.java | 2 +- .../alluxio/worker/block/PinListSyncTest.java | 4 +- .../block/SpecificMasterBlockSyncTest.java | 12 +- .../main/java/alluxio/fuse/AlluxioFuse.java | 3 +- .../java/alluxio/fuse/meta/UpdateChecker.java | 2 +- .../java/alluxio/master/job/JobMaster.java | 6 +- .../main/java/alluxio/worker/JobWorker.java | 4 +- .../job/command/CommandHandlingExecutor.java | 2 +- .../command/CommandHandlingExecutorTest.java | 2 +- .../table/transform/TransformManager.java | 6 +- ...MasterDeleteLostWorkerIntegrationTest.java | 4 +- ...ileSystemContextReinitIntegrationTest.java | 4 +- ...ckMasterRegisterStreamIntegrationTest.java | 2 +- ...ckWorkerRegisterStreamIntegrationTest.java | 2 +- 50 files changed, 506 insertions(+), 188 deletions(-) create mode 100644 core/common/src/main/java/alluxio/heartbeat/CronExpressionIntervalSupplier.java create mode 100644 core/common/src/main/java/alluxio/heartbeat/FixedIntervalSupplier.java create mode 100644 core/common/src/main/java/alluxio/heartbeat/SleepIntervalSupplier.java create mode 100644 core/common/src/test/java/alluxio/heartbeat/SleepingTimerForCronExpressionIntervalSupplierTest.java diff --git a/core/client/fs/src/main/java/alluxio/client/file/ConfigHashSync.java b/core/client/fs/src/main/java/alluxio/client/file/ConfigHashSync.java index 144be4e7f6f1..b94ffd6d0651 100644 --- a/core/client/fs/src/main/java/alluxio/client/file/ConfigHashSync.java +++ b/core/client/fs/src/main/java/alluxio/client/file/ConfigHashSync.java @@ -71,7 +71,7 @@ public Optional getException() { } @Override - public synchronized void heartbeat() { + public synchronized void heartbeat(long timeLimitMs) { if (!mContext.getClientContext().getClusterConf().clusterDefaultsLoaded()) { // Wait until the initial cluster defaults are loaded. return; diff --git a/core/client/fs/src/main/java/alluxio/client/file/FileSystemContextReinitializer.java b/core/client/fs/src/main/java/alluxio/client/file/FileSystemContextReinitializer.java index 78ae526be8e6..ae7e9049e95c 100644 --- a/core/client/fs/src/main/java/alluxio/client/file/FileSystemContextReinitializer.java +++ b/core/client/fs/src/main/java/alluxio/client/file/FileSystemContextReinitializer.java @@ -66,7 +66,7 @@ public FileSystemContextReinitializer(FileSystemContext context) { mExecutor = new ConfigHashSync(context); mFuture = REINIT_EXECUTOR.scheduleAtFixedRate(() -> { try { - mExecutor.heartbeat(); + mExecutor.heartbeat(Long.MAX_VALUE); } catch (Exception e) { LOG.error("Uncaught exception in config heartbeat executor, shutting down", e); } diff --git a/core/common/src/main/java/alluxio/heartbeat/CronExpressionIntervalSupplier.java b/core/common/src/main/java/alluxio/heartbeat/CronExpressionIntervalSupplier.java new file mode 100644 index 000000000000..e632e472dac6 --- /dev/null +++ b/core/common/src/main/java/alluxio/heartbeat/CronExpressionIntervalSupplier.java @@ -0,0 +1,59 @@ +/* + * The Alluxio Open Foundation licenses this work under the Apache License, version 2.0 + * (the "License"). You may not use this work except in compliance with the License, which is + * available at www.apache.org/licenses/LICENSE-2.0 + * + * This software is distributed on an "AS IS" basis, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, + * either express or implied, as more fully set forth in the License. + * + * See the NOTICE file distributed with this work for information regarding copyright ownership. + */ + +package alluxio.heartbeat; + +import org.apache.logging.log4j.core.util.CronExpression; + +import java.time.Duration; +import java.time.Instant; +import java.util.Date; + +/** +* Calculate the next interval by given cron expression. +*/ +public class CronExpressionIntervalSupplier implements SleepIntervalSupplier { + private final long mInterval; + private final CronExpression mCron; + + /** + * Constructs a new {@link CronExpressionIntervalSupplier}. + * + * @param cronExpression the cron expression + * @param fixedInterval the fixed interval + */ + public CronExpressionIntervalSupplier(CronExpression cronExpression, long fixedInterval) { + mInterval = fixedInterval; + mCron = cronExpression; + } + + @Override + public long getNextInterval(long mPreviousTickedMs, long nowTimeStampMillis) { + long nextInterval = 0; + long executionTimeMs = nowTimeStampMillis - mPreviousTickedMs; + if (executionTimeMs < mInterval) { + nextInterval = mInterval - executionTimeMs; + } + Date now = Date.from(Instant.ofEpochMilli(nowTimeStampMillis + nextInterval)); + if (mCron.isSatisfiedBy(now)) { + return nextInterval; + } + return nextInterval + Duration.between( + now.toInstant(), mCron.getNextValidTimeAfter(now).toInstant()).toMillis(); + } + + @Override + public long getRunLimit(long mPreviousTickedMs) { + Date now = Date.from(Instant.ofEpochMilli(mPreviousTickedMs)); + return Duration.between(now.toInstant(), + mCron.getNextInvalidTimeAfter(now).toInstant()).toMillis(); + } +} diff --git a/core/common/src/main/java/alluxio/heartbeat/FixedIntervalSupplier.java b/core/common/src/main/java/alluxio/heartbeat/FixedIntervalSupplier.java new file mode 100644 index 000000000000..1269f5996112 --- /dev/null +++ b/core/common/src/main/java/alluxio/heartbeat/FixedIntervalSupplier.java @@ -0,0 +1,63 @@ +/* + * The Alluxio Open Foundation licenses this work under the Apache License, version 2.0 + * (the "License"). You may not use this work except in compliance with the License, which is + * available at www.apache.org/licenses/LICENSE-2.0 + * + * This software is distributed on an "AS IS" basis, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, + * either express or implied, as more fully set forth in the License. + * + * See the NOTICE file distributed with this work for information regarding copyright ownership. + */ + +package alluxio.heartbeat; + +import org.slf4j.Logger; +import org.slf4j.helpers.NOPLogger; + +/** + * Fixed interval supplier. + */ +public class FixedIntervalSupplier implements SleepIntervalSupplier { + + private final long mInterval; + protected final Logger mLogger; + + /** + * Constructs a new {@link FixedIntervalSupplier}. + * + * @param fixedInterval the fixed interval + * @param logger the logger + */ + public FixedIntervalSupplier(long fixedInterval, Logger logger) { + mInterval = fixedInterval; + mLogger = logger; + } + + /** + * Constructs a new {@link FixedIntervalSupplier}. + * + * @param fixedInterval the fixed interval + */ + public FixedIntervalSupplier(long fixedInterval) { + this(fixedInterval, NOPLogger.NOP_LOGGER); + } + + @Override + public long getNextInterval(long mPreviousTickedMs, long nowTimeStampMillis) { + if (mPreviousTickedMs == -1) { + return -1; + } + long executionTimeMs = nowTimeStampMillis - mPreviousTickedMs; + if (executionTimeMs > mInterval) { + mLogger.warn("{} last execution took {} ms. Longer than the interval {}", + Thread.currentThread().getName(), executionTimeMs, mInterval); + return 0; + } + return mInterval - executionTimeMs; + } + + @Override + public long getRunLimit(long mPreviousTickedMs) { + return mInterval; + } +} diff --git a/core/common/src/main/java/alluxio/heartbeat/HeartbeatExecutor.java b/core/common/src/main/java/alluxio/heartbeat/HeartbeatExecutor.java index a10c4662c5c5..2b8e96ec7532 100644 --- a/core/common/src/main/java/alluxio/heartbeat/HeartbeatExecutor.java +++ b/core/common/src/main/java/alluxio/heartbeat/HeartbeatExecutor.java @@ -15,15 +15,17 @@ /** * An interface for a heartbeat execution. The {@link HeartbeatThread} calls the - * {@link #heartbeat()} method. + * {@link #heartbeat(long)} method. */ public interface HeartbeatExecutor extends Closeable { + /** * Implements the heartbeat logic. * + * @param timeLimitMs time limit in milliseconds this heartbeat should not exceed when running * @throws InterruptedException if the thread is interrupted */ - void heartbeat() throws InterruptedException; + void heartbeat(long timeLimitMs) throws InterruptedException; /** * Cleans up any resources used by the heartbeat executor. diff --git a/core/common/src/main/java/alluxio/heartbeat/HeartbeatThread.java b/core/common/src/main/java/alluxio/heartbeat/HeartbeatThread.java index 2bb891d67c19..cc9b200bfe5f 100644 --- a/core/common/src/main/java/alluxio/heartbeat/HeartbeatThread.java +++ b/core/common/src/main/java/alluxio/heartbeat/HeartbeatThread.java @@ -12,7 +12,6 @@ package alluxio.heartbeat; import alluxio.conf.AlluxioConfiguration; -import alluxio.conf.Reconfigurable; import alluxio.conf.ReconfigurableRegistry; import alluxio.security.authentication.AuthenticatedClientUser; import alluxio.security.user.UserState; @@ -21,11 +20,12 @@ import com.google.common.annotations.VisibleForTesting; import com.google.common.base.Preconditions; -import com.google.common.base.Supplier; import org.slf4j.Logger; import org.slf4j.LoggerFactory; import java.io.IOException; +import java.time.Clock; +import java.util.function.Supplier; import javax.annotation.concurrent.NotThreadSafe; /** @@ -33,13 +33,12 @@ * the JVM from exiting. */ @NotThreadSafe -public final class HeartbeatThread implements Runnable, Reconfigurable { +public final class HeartbeatThread implements Runnable { private static final Logger LOG = LoggerFactory.getLogger(HeartbeatThread.class); private final String mThreadName; private final HeartbeatExecutor mExecutor; private final UserState mUserState; - private final Supplier mIntervalSupplier; private HeartbeatTimer mTimer; private AlluxioConfiguration mConfiguration; private Status mStatus; @@ -73,26 +72,28 @@ public static String generateThreadName(String executorName, String threadId) { * @param intervalSupplier Sleep time between different heartbeat supplier * @param conf Alluxio configuration * @param userState the user state for this heartbeat thread + * @param clock the clock used to compute the current time */ public HeartbeatThread(String executorName, String threadId, HeartbeatExecutor executor, - Supplier intervalSupplier, AlluxioConfiguration conf, UserState userState) { + Supplier intervalSupplier, + AlluxioConfiguration conf, UserState userState, Clock clock) { mThreadName = generateThreadName(executorName, threadId); mExecutor = Preconditions.checkNotNull(executor, "executor"); Class timerClass = HeartbeatContext.getTimerClass(executorName); - mTimer = CommonUtils.createNewClassInstance(timerClass, new Class[] {String.class, long.class}, - new Object[] {mThreadName, intervalSupplier.get()}); + mTimer = CommonUtils.createNewClassInstance(timerClass, + new Class[] {String.class, Clock.class, Supplier.class}, + new Object[] {mThreadName, clock, intervalSupplier}); mConfiguration = conf; mUserState = userState; - mIntervalSupplier = intervalSupplier; mStatus = Status.INIT; - ReconfigurableRegistry.register(this); + ReconfigurableRegistry.register(mTimer); } /** * Convenience method for * {@link * #HeartbeatThread(String, String, HeartbeatExecutor, Supplier, AlluxioConfiguration, - * UserState)} where threadId is null. + * UserState, Clock)} where threadId is null. * * @param executorName the executor name that is one of those defined in {@link HeartbeatContext} * @param executor the heartbeat executor @@ -101,12 +102,34 @@ public HeartbeatThread(String executorName, String threadId, HeartbeatExecutor e * @param userState the user state for this heartbeat thread */ public HeartbeatThread(String executorName, HeartbeatExecutor executor, - Supplier intervalSupplier, AlluxioConfiguration conf, UserState userState) { - this(executorName, null, executor, intervalSupplier, conf, userState); + Supplier intervalSupplier, AlluxioConfiguration conf, + UserState userState) { + this(executorName, null, executor, intervalSupplier, conf, userState, Clock.systemUTC()); + } + + /** + * Convenience method for + * {@link + * #HeartbeatThread(String, String, HeartbeatExecutor, Supplier, AlluxioConfiguration, + * UserState, Clock)} where threadId is null. + * + * @param executorName the executor name that is one of those defined in {@link HeartbeatContext} + * @param executor the heartbeat executor + * @param intervalSupplier the interval between heartbeats supplier + * @param conf the Alluxio configuration + * @param userState the user state for this heartbeat thread + * @param clock the clock used to compute the current time + */ + public HeartbeatThread(String executorName, HeartbeatExecutor executor, + Supplier intervalSupplier, + AlluxioConfiguration conf, UserState userState, Clock clock) { + this(executorName, null, executor, intervalSupplier, + conf, userState, clock); } @Override public void run() { + long counter = 0L; try { if (SecurityUtils.isSecurityEnabled(mConfiguration) && AuthenticatedClientUser.get(mConfiguration) == null) { @@ -123,9 +146,10 @@ public void run() { while (!Thread.interrupted()) { // TODO(peis): Fix this. The current implementation consumes one thread even when ticking. mStatus = Status.WAITING; - mTimer.tick(); + long limitTime = mTimer.tick(); mStatus = Status.RUNNING; - mExecutor.heartbeat(); + LOG.debug("{} #{} will run limited in {}s", mThreadName, counter++, limitTime / 1000); + mExecutor.heartbeat(limitTime); } } catch (InterruptedException e) { // Allow thread to exit. @@ -133,19 +157,11 @@ public void run() { LOG.error("Uncaught exception in heartbeat executor, Heartbeat Thread shutting down", e); } finally { mStatus = Status.STOPPED; + ReconfigurableRegistry.unregister(mTimer); mExecutor.close(); } } - /** - * Updates the heartbeat interval. - * - * @param intervalMs the heartbeat interval in ms - */ - public void updateIntervalMs(long intervalMs) { - mTimer.setIntervalMs(intervalMs); - } - /** * @return the status of current heartbeat thread */ @@ -153,18 +169,6 @@ public Status getStatus() { return mStatus; } - @Override - public void update() { - if (mStatus == Status.STOPPED) { - ReconfigurableRegistry.unregister(this); - return; - } - long interval = mIntervalSupplier.get(); - if (interval != mTimer.getIntervalMs()) { - updateIntervalMs(interval); - } - } - /** * Enum representing the status of HeartbeatThread. */ diff --git a/core/common/src/main/java/alluxio/heartbeat/HeartbeatTimer.java b/core/common/src/main/java/alluxio/heartbeat/HeartbeatTimer.java index 96e9618af3ea..736037234edd 100644 --- a/core/common/src/main/java/alluxio/heartbeat/HeartbeatTimer.java +++ b/core/common/src/main/java/alluxio/heartbeat/HeartbeatTimer.java @@ -11,33 +11,27 @@ package alluxio.heartbeat; +import alluxio.conf.Reconfigurable; + /** * An interface for heartbeat timers. The {@link HeartbeatThread} calls the {@link #tick()} method. */ -public interface HeartbeatTimer { +public interface HeartbeatTimer extends Reconfigurable { /** - * Sets the heartbeat interval. - * - * @param intervalMs the heartbeat interval in ms - */ - default void setIntervalMs(long intervalMs) { - throw new UnsupportedOperationException("Setting interval is not supported"); - } - - /** - * Get the interval of HeartbeatTimer. - * - * @return the interval of this HeartbeatTimer + * When this object needs to be reconfigured + * due to external configuration change etc., + * this function will be invoked. */ - default long getIntervalMs() { - throw new UnsupportedOperationException("Getting interval is not supported"); + default void update() { } /** * Waits until next heartbeat should be executed. * + * @return time limit in milliseconds for this heartbeat action to run for before + * the next heartbeat is due. * @throws InterruptedException if the thread is interrupted while waiting */ - void tick() throws InterruptedException; + long tick() throws InterruptedException; } diff --git a/core/common/src/main/java/alluxio/heartbeat/ScheduledTimer.java b/core/common/src/main/java/alluxio/heartbeat/ScheduledTimer.java index 62b6d5667d83..cff75372105c 100644 --- a/core/common/src/main/java/alluxio/heartbeat/ScheduledTimer.java +++ b/core/common/src/main/java/alluxio/heartbeat/ScheduledTimer.java @@ -15,9 +15,11 @@ import com.google.common.base.Preconditions; +import java.time.Clock; import java.util.concurrent.locks.Condition; import java.util.concurrent.locks.Lock; import java.util.concurrent.locks.ReentrantLock; +import java.util.function.Supplier; import javax.annotation.concurrent.ThreadSafe; /** @@ -46,9 +48,11 @@ public final class ScheduledTimer implements HeartbeatTimer { * Creates a new instance of {@link ScheduledTimer}. * * @param threadName the thread name - * @param intervalMs the heartbeat interval (unused) + * @param clock for telling the current time (unused) + * @param intervalSupplierSupplier Sleep time between different heartbeat supplier */ - public ScheduledTimer(String threadName, long intervalMs) { + public ScheduledTimer(String threadName, Clock clock, + Supplier intervalSupplierSupplier) { mThreadName = threadName; mLock = new ReentrantLock(); mTickCondition = mLock.newCondition(); @@ -77,7 +81,7 @@ protected void schedule() { } @Override - public void tick() throws InterruptedException { + public long tick() throws InterruptedException { try (LockResource r = new LockResource(mLock)) { HeartbeatScheduler.addTimer(this); // Wait in a loop to handle spurious wakeups @@ -87,5 +91,6 @@ public void tick() throws InterruptedException { mScheduled = false; } + return Long.MAX_VALUE; } } diff --git a/core/common/src/main/java/alluxio/heartbeat/SleepIntervalSupplier.java b/core/common/src/main/java/alluxio/heartbeat/SleepIntervalSupplier.java new file mode 100644 index 000000000000..cde2ddd5ff3f --- /dev/null +++ b/core/common/src/main/java/alluxio/heartbeat/SleepIntervalSupplier.java @@ -0,0 +1,34 @@ +/* + * The Alluxio Open Foundation licenses this work under the Apache License, version 2.0 + * (the "License"). You may not use this work except in compliance with the License, which is + * available at www.apache.org/licenses/LICENSE-2.0 + * + * This software is distributed on an "AS IS" basis, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, + * either express or implied, as more fully set forth in the License. + * + * See the NOTICE file distributed with this work for information regarding copyright ownership. + */ + +package alluxio.heartbeat; + +/** + * A policy to calculate the next interval to sleep. + */ +public interface SleepIntervalSupplier { + /** + * Gets the next interval for sleeping. + * + * @param mPreviousTickedMs previous ticked time stamp in millisecond + * @param nowTimeStampMillis current time stamp in millisecond + * @return the interval to sleep starting from now before next time the timer triggers + */ + long getNextInterval(long mPreviousTickedMs, long nowTimeStampMillis); + + /** + * Gets the run limit from previous ticked. + * + * @param mPreviousTickedMs previous ticked time stamp in millisecond + * @return the run limit + */ + long getRunLimit(long mPreviousTickedMs); +} diff --git a/core/common/src/main/java/alluxio/heartbeat/SleepingTimer.java b/core/common/src/main/java/alluxio/heartbeat/SleepingTimer.java index d6d4ad2589ab..2e444de5b892 100644 --- a/core/common/src/main/java/alluxio/heartbeat/SleepingTimer.java +++ b/core/common/src/main/java/alluxio/heartbeat/SleepingTimer.java @@ -11,7 +11,6 @@ package alluxio.heartbeat; -import alluxio.clock.SystemClock; import alluxio.time.Sleeper; import alluxio.time.ThreadSleeper; @@ -20,57 +19,52 @@ import java.time.Clock; import java.time.Duration; +import java.util.function.Supplier; import javax.annotation.concurrent.NotThreadSafe; /** * This class can be used for executing heartbeats periodically. */ @NotThreadSafe -public final class SleepingTimer implements HeartbeatTimer { - private long mIntervalMs; - private long mPreviousTickMs; +public class SleepingTimer implements HeartbeatTimer { + protected long mPreviousTickedMs = -1; private final String mThreadName; - private final Logger mLogger; - private final Clock mClock; - private final Sleeper mSleeper; + protected final Logger mLogger; + protected final Clock mClock; + protected final Sleeper mSleeper; + protected final Supplier mIntervalSupplierSupplier; + protected SleepIntervalSupplier mIntervalSupplier; /** * Creates a new instance of {@link SleepingTimer}. * * @param threadName the thread name - * @param intervalMs the heartbeat interval + * @param clock for telling the current time + * @param intervalSupplierSupplier Sleep time between different heartbeat supplier */ - public SleepingTimer(String threadName, long intervalMs) { - this(threadName, intervalMs, LoggerFactory.getLogger(SleepingTimer.class), - new SystemClock(), ThreadSleeper.INSTANCE); + public SleepingTimer(String threadName, Clock clock, + Supplier intervalSupplierSupplier) { + this(threadName, LoggerFactory.getLogger(SleepingTimer.class), + clock, ThreadSleeper.INSTANCE, intervalSupplierSupplier); } /** * Creates a new instance of {@link SleepingTimer}. * * @param threadName the thread name - * @param intervalMs the heartbeat interval * @param logger the logger to log to * @param clock for telling the current time * @param sleeper the utility to use for sleeping + * @param intervalSupplierSupplier Sleep time between different heartbeat supplier */ - public SleepingTimer(String threadName, long intervalMs, Logger logger, Clock clock, - Sleeper sleeper) { - mIntervalMs = intervalMs; + public SleepingTimer(String threadName, Logger logger, Clock clock, Sleeper sleeper, + Supplier intervalSupplierSupplier) { mThreadName = threadName; mLogger = logger; mClock = clock; mSleeper = sleeper; - } - - @Override - public void setIntervalMs(long intervalMs) { - mIntervalMs = intervalMs; - } - - @Override - public long getIntervalMs() { - return mIntervalMs; + mIntervalSupplierSupplier = intervalSupplierSupplier; + mIntervalSupplier = intervalSupplierSupplier.get(); } /** @@ -79,16 +73,18 @@ public long getIntervalMs() { * @throws InterruptedException if the thread is interrupted while waiting */ @Override - public void tick() throws InterruptedException { - if (mPreviousTickMs != 0) { - long executionTimeMs = mClock.millis() - mPreviousTickMs; - if (executionTimeMs > mIntervalMs) { - mLogger.warn("{} last execution took {} ms. Longer than the interval {}", mThreadName, - executionTimeMs, mIntervalMs); - } else { - mSleeper.sleep(Duration.ofMillis(mIntervalMs - executionTimeMs)); - } + public long tick() throws InterruptedException { + long nextInterval = mIntervalSupplier.getNextInterval(mPreviousTickedMs, mClock.millis()); + if (nextInterval > 0) { + mSleeper.sleep(Duration.ofMillis(nextInterval)); } - mPreviousTickMs = mClock.millis(); + mPreviousTickedMs = mClock.millis(); + return mIntervalSupplier.getRunLimit(mPreviousTickedMs); + } + + @Override + public void update() { + mIntervalSupplier = mIntervalSupplierSupplier.get(); + mLogger.info("update {} interval supplier.", mThreadName); } } diff --git a/core/common/src/test/java/alluxio/heartbeat/HeartbeatContextTest.java b/core/common/src/test/java/alluxio/heartbeat/HeartbeatContextTest.java index f5c222739dc0..0c972baf44db 100644 --- a/core/common/src/test/java/alluxio/heartbeat/HeartbeatContextTest.java +++ b/core/common/src/test/java/alluxio/heartbeat/HeartbeatContextTest.java @@ -21,7 +21,7 @@ */ public final class HeartbeatContextTest { @Test - public void allThreadsUseSleepingTimer() { + public void allThreadsUseProductionTimer() { for (String threadName : HeartbeatContext.getTimerClasses().keySet()) { Class timerClass = HeartbeatContext.getTimerClass(threadName); assertTrue(timerClass.isAssignableFrom(SleepingTimer.class)); diff --git a/core/common/src/test/java/alluxio/heartbeat/HeartbeatThreadTest.java b/core/common/src/test/java/alluxio/heartbeat/HeartbeatThreadTest.java index 5d09135dc7ea..921e250984da 100644 --- a/core/common/src/test/java/alluxio/heartbeat/HeartbeatThreadTest.java +++ b/core/common/src/test/java/alluxio/heartbeat/HeartbeatThreadTest.java @@ -139,7 +139,8 @@ public Void call() throws Exception { try (ManuallyScheduleHeartbeat.Resource r = new ManuallyScheduleHeartbeat.Resource(Arrays.asList(mThreadName))) { DummyHeartbeatExecutor executor = new DummyHeartbeatExecutor(); - HeartbeatThread ht = new HeartbeatThread(mThreadName, executor, () -> 1L, + HeartbeatThread ht = new HeartbeatThread(mThreadName, executor, + () -> new FixedIntervalSupplier(1L), Configuration.global(), UserState.Factory.create(Configuration.global())); // Run the HeartbeatThread. @@ -166,7 +167,7 @@ private class DummyHeartbeatExecutor implements HeartbeatExecutor { private int mCounter = 0; @Override - public void heartbeat() { + public void heartbeat(long timeLimitMs) { mCounter++; } diff --git a/core/common/src/test/java/alluxio/heartbeat/SleepingTimerForCronExpressionIntervalSupplierTest.java b/core/common/src/test/java/alluxio/heartbeat/SleepingTimerForCronExpressionIntervalSupplierTest.java new file mode 100644 index 000000000000..81d9d5e4bc06 --- /dev/null +++ b/core/common/src/test/java/alluxio/heartbeat/SleepingTimerForCronExpressionIntervalSupplierTest.java @@ -0,0 +1,121 @@ +/* + * The Alluxio Open Foundation licenses this work under the Apache License, version 2.0 + * (the "License"). You may not use this work except in compliance with the License, which is + * available at www.apache.org/licenses/LICENSE-2.0 + * + * This software is distributed on an "AS IS" basis, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, + * either express or implied, as more fully set forth in the License. + * + * See the NOTICE file distributed with this work for information regarding copyright ownership. + */ + +package alluxio.heartbeat; + +import static org.mockito.ArgumentMatchers.any; +import static org.mockito.Mockito.doAnswer; +import static org.mockito.Mockito.mock; + +import alluxio.Constants; +import alluxio.clock.ManualClock; +import alluxio.time.Sleeper; + +import org.apache.logging.log4j.core.util.CronExpression; +import org.junit.Assert; +import org.junit.Before; +import org.junit.Test; +import org.slf4j.Logger; + +import java.text.DateFormat; +import java.text.ParseException; +import java.text.SimpleDateFormat; +import java.time.Duration; +import java.util.Date; + +/** + * Unit tests for {@link SleepingTimer}. + */ +public final class SleepingTimerForCronExpressionIntervalSupplierTest { + private static final String THREAD_NAME = "cron-test-thread-name"; + private static final long INTERVAL_MS = 10 * Constants.MINUTE_MS; + private Logger mMockLogger; + private ManualClock mFakeClock; + private Sleeper mMockSleeper; + private long mAllSleepTimeMs; + + @Before + public void before() throws InterruptedException { + mMockLogger = mock(Logger.class); + mFakeClock = new ManualClock(); + mMockSleeper = mock(Sleeper.class); + doAnswer((invocation) -> { + Duration duration = invocation.getArgument(0); + mFakeClock.addTime(duration); + mAllSleepTimeMs += duration.toMillis(); + return null; + }).when(mMockSleeper).sleep(any(Duration.class)); + } + + /** + * Tests that the cron timer will attempt to run at the same interval, independently of how + * long the execution between ticks takes. For example, if the interval is 100ms and execution + * takes 80ms, the timer should sleep for only 20ms to maintain the regular interval of 100ms. + */ + @Test + public void maintainInterval() throws Exception { + SleepingTimer timer = + new SleepingTimer(THREAD_NAME, mMockLogger, mFakeClock, mMockSleeper, + () -> { + try { + return new CronExpressionIntervalSupplier( + new CronExpression("* 30-59 0-1,4-9,13-23 * * ? *"), INTERVAL_MS); + } catch (ParseException e) { + throw new RuntimeException(e); + } + }); + DateFormat formatter = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss"); + Date startDate = formatter.parse("2022-01-01 00:00:00"); + Assert.assertEquals(-1, timer.mPreviousTickedMs); + mFakeClock.setTimeMs(startDate.getTime()); + long limitMs = timer.tick(); + long lastAllSleepTimeMs = mAllSleepTimeMs; + Assert.assertEquals(30 * Constants.MINUTE_MS, mAllSleepTimeMs); + Assert.assertEquals(30 * Constants.MINUTE_MS, limitMs); + Assert.assertEquals(formatter.parse("2022-01-01 00:30:00"), new Date(timer.mPreviousTickedMs)); + Assert.assertEquals(formatter.parse("2022-01-01 00:30:00"), new Date(mFakeClock.millis())); + // Mock heartbeat 1 minute + mFakeClock.addTime(Duration.ofMinutes(1)); + + limitMs = timer.tick(); + Assert.assertEquals(9 * Constants.MINUTE_MS, mAllSleepTimeMs - lastAllSleepTimeMs); + lastAllSleepTimeMs = mAllSleepTimeMs; + Assert.assertEquals(20 * Constants.MINUTE_MS, limitMs); + Assert.assertEquals(formatter.parse("2022-01-01 00:40:00"), new Date(timer.mPreviousTickedMs)); + Assert.assertEquals(formatter.parse("2022-01-01 00:40:00"), new Date(mFakeClock.millis())); + // Mock heartbeat 5 minute + mFakeClock.addTime(Duration.ofMinutes(5)); + + limitMs = timer.tick(); + Assert.assertEquals(5 * Constants.MINUTE_MS, mAllSleepTimeMs - lastAllSleepTimeMs); + lastAllSleepTimeMs = mAllSleepTimeMs; + Assert.assertEquals(10 * Constants.MINUTE_MS, limitMs); + Assert.assertEquals(formatter.parse("2022-01-01 00:50:00"), new Date(timer.mPreviousTickedMs)); + Assert.assertEquals(formatter.parse("2022-01-01 00:50:00"), new Date(mFakeClock.millis())); + // Mock heartbeat 5 minute + mFakeClock.addTime(Duration.ofMinutes(5)); + + limitMs = timer.tick(); + Assert.assertEquals(35 * Constants.MINUTE_MS, mAllSleepTimeMs - lastAllSleepTimeMs); + lastAllSleepTimeMs = mAllSleepTimeMs; + Assert.assertEquals(30 * Constants.MINUTE_MS, limitMs); + Assert.assertEquals(formatter.parse("2022-01-01 01:30:00"), new Date(timer.mPreviousTickedMs)); + Assert.assertEquals(formatter.parse("2022-01-01 01:30:00"), new Date(mFakeClock.millis())); + // Mock heartbeat 30 minute + mFakeClock.addTime(Duration.ofMinutes(30)); + + limitMs = timer.tick(); + Assert.assertEquals(150 * Constants.MINUTE_MS, mAllSleepTimeMs - lastAllSleepTimeMs); + Assert.assertEquals(30 * Constants.MINUTE_MS, limitMs); + Assert.assertEquals(formatter.parse("2022-01-01 04:30:00"), new Date(timer.mPreviousTickedMs)); + Assert.assertEquals(formatter.parse("2022-01-01 04:30:00"), new Date(mFakeClock.millis())); + } +} diff --git a/core/common/src/test/java/alluxio/heartbeat/SleepingTimerTest.java b/core/common/src/test/java/alluxio/heartbeat/SleepingTimerTest.java index ae8ef03d8aea..6a4f79447574 100644 --- a/core/common/src/test/java/alluxio/heartbeat/SleepingTimerTest.java +++ b/core/common/src/test/java/alluxio/heartbeat/SleepingTimerTest.java @@ -47,7 +47,8 @@ public void before() { @Test public void warnWhenExecutionTakesLongerThanInterval() throws Exception { SleepingTimer timer = - new SleepingTimer(THREAD_NAME, INTERVAL_MS, mMockLogger, mFakeClock, mMockSleeper); + new SleepingTimer(THREAD_NAME, mMockLogger, mFakeClock, mMockSleeper, + () -> new FixedIntervalSupplier(INTERVAL_MS, mMockLogger)); timer.tick(); mFakeClock.addTimeMs(5 * INTERVAL_MS); @@ -60,7 +61,8 @@ public void warnWhenExecutionTakesLongerThanInterval() throws Exception { @Test public void sleepForSpecifiedInterval() throws Exception { final SleepingTimer timer = - new SleepingTimer(THREAD_NAME, INTERVAL_MS, mMockLogger, mFakeClock, mMockSleeper); + new SleepingTimer(THREAD_NAME, mMockLogger, mFakeClock, mMockSleeper, + () -> new FixedIntervalSupplier(INTERVAL_MS)); timer.tick(); // first tick won't sleep verify(mMockSleeper, times(0)).sleep(any(Duration.class)); timer.tick(); @@ -75,7 +77,8 @@ public void sleepForSpecifiedInterval() throws Exception { @Test public void maintainInterval() throws Exception { SleepingTimer stimer = - new SleepingTimer(THREAD_NAME, INTERVAL_MS, mMockLogger, mFakeClock, mMockSleeper); + new SleepingTimer(THREAD_NAME, mMockLogger, mFakeClock, mMockSleeper, + () -> new FixedIntervalSupplier(INTERVAL_MS)); stimer.tick(); mFakeClock.addTimeMs(INTERVAL_MS / 3); diff --git a/core/server/master/src/main/java/alluxio/master/block/DefaultBlockMaster.java b/core/server/master/src/main/java/alluxio/master/block/DefaultBlockMaster.java index 733dcb29fc3d..c5f740d1fbc7 100644 --- a/core/server/master/src/main/java/alluxio/master/block/DefaultBlockMaster.java +++ b/core/server/master/src/main/java/alluxio/master/block/DefaultBlockMaster.java @@ -41,6 +41,7 @@ import alluxio.grpc.ServiceType; import alluxio.grpc.StorageList; import alluxio.grpc.WorkerLostStorageInfo; +import alluxio.heartbeat.FixedIntervalSupplier; import alluxio.heartbeat.HeartbeatContext; import alluxio.heartbeat.HeartbeatExecutor; import alluxio.heartbeat.HeartbeatThread; @@ -513,7 +514,7 @@ public class WorkerRegisterStreamGCExecutor implements HeartbeatExecutor { .getMs(PropertyKey.MASTER_WORKER_REGISTER_STREAM_RESPONSE_TIMEOUT); @Override - public void heartbeat() { + public void heartbeat(long timeLimitMs) { AtomicInteger removedSessions = new AtomicInteger(0); mActiveRegisterContexts.entrySet().removeIf((entry) -> { WorkerRegisterContext context = entry.getValue(); @@ -558,7 +559,8 @@ public void start(Boolean isLeader) throws IOException { if (isLeader || mWorkerRegisterToAllMasters) { getExecutorService().submit(new HeartbeatThread( HeartbeatContext.MASTER_LOST_WORKER_DETECTION, new LostWorkerDetectionHeartbeatExecutor(), - () -> Configuration.getMs(PropertyKey.MASTER_LOST_WORKER_DETECTION_INTERVAL), + () -> new FixedIntervalSupplier( + Configuration.getMs(PropertyKey.MASTER_LOST_WORKER_DETECTION_INTERVAL)), Configuration.global(), mMasterContext.getUserState())); } @@ -566,7 +568,8 @@ HeartbeatContext.MASTER_LOST_WORKER_DETECTION, new LostWorkerDetectionHeartbeatE getExecutorService().submit(new HeartbeatThread( HeartbeatContext.MASTER_WORKER_REGISTER_SESSION_CLEANER, new WorkerRegisterStreamGCExecutor(), - () -> Configuration.getMs(PropertyKey.MASTER_WORKER_REGISTER_STREAM_RESPONSE_TIMEOUT), + () -> new FixedIntervalSupplier(Configuration.getMs( + PropertyKey.MASTER_WORKER_REGISTER_STREAM_RESPONSE_TIMEOUT)), Configuration.global(), mMasterContext.getUserState())); } @@ -1759,7 +1762,7 @@ public final class LostWorkerDetectionHeartbeatExecutor implements HeartbeatExec public LostWorkerDetectionHeartbeatExecutor() {} @Override - public void heartbeat() { + public void heartbeat(long timeLimitMs) { long masterWorkerTimeoutMs = Configuration.getMs(PropertyKey.MASTER_WORKER_TIMEOUT_MS); long masterWorkerDeleteTimeoutMs = Configuration.getMs(PropertyKey.MASTER_LOST_WORKER_DELETION_TIMEOUT_MS); diff --git a/core/server/master/src/main/java/alluxio/master/block/meta/MasterWorkerInfo.java b/core/server/master/src/main/java/alluxio/master/block/meta/MasterWorkerInfo.java index 8974ce548176..b2d08a66fcb3 100644 --- a/core/server/master/src/main/java/alluxio/master/block/meta/MasterWorkerInfo.java +++ b/core/server/master/src/main/java/alluxio/master/block/meta/MasterWorkerInfo.java @@ -111,7 +111,7 @@ * and block removal/commit. * 2. In {@link alluxio.master.block.WorkerRegisterContext}, * to write locks are held throughout the lifecycle. - * 3. In {@link DefaultBlockMaster.LostWorkerDetectionHeartbeatExecutor#heartbeat()} + * 3. In {@link DefaultBlockMaster.LostWorkerDetectionHeartbeatExecutor#heartbeat(long)} */ @NotThreadSafe public final class MasterWorkerInfo { diff --git a/core/server/master/src/main/java/alluxio/master/file/BlockIntegrityChecker.java b/core/server/master/src/main/java/alluxio/master/file/BlockIntegrityChecker.java index 24334a592eb0..46370d2f91f2 100644 --- a/core/server/master/src/main/java/alluxio/master/file/BlockIntegrityChecker.java +++ b/core/server/master/src/main/java/alluxio/master/file/BlockIntegrityChecker.java @@ -39,7 +39,7 @@ public BlockIntegrityChecker(FileSystemMaster fsm) { } @Override - public void heartbeat() { + public void heartbeat(long timeLimitMs) { try { mFileSystemMaster.validateInodeBlocks(mRepair); } catch (Exception e) { diff --git a/core/server/master/src/main/java/alluxio/master/file/DefaultFileSystemMaster.java b/core/server/master/src/main/java/alluxio/master/file/DefaultFileSystemMaster.java index 5e2eb91255dc..eaf8193f483a 100644 --- a/core/server/master/src/main/java/alluxio/master/file/DefaultFileSystemMaster.java +++ b/core/server/master/src/main/java/alluxio/master/file/DefaultFileSystemMaster.java @@ -60,6 +60,7 @@ import alluxio.grpc.SetAclAction; import alluxio.grpc.SetAttributePOptions; import alluxio.grpc.TtlAction; +import alluxio.heartbeat.FixedIntervalSupplier; import alluxio.heartbeat.HeartbeatContext; import alluxio.heartbeat.HeartbeatThread; import alluxio.job.plan.persist.PersistConfig; @@ -717,30 +718,35 @@ public void start(Boolean isPrimary) throws IOException { getExecutorService().submit( new HeartbeatThread(HeartbeatContext.MASTER_BLOCK_INTEGRITY_CHECK, new BlockIntegrityChecker(this), () -> - Configuration.getMs(PropertyKey.MASTER_PERIODIC_BLOCK_INTEGRITY_CHECK_INTERVAL), + new FixedIntervalSupplier(Configuration.getMs( + PropertyKey.MASTER_PERIODIC_BLOCK_INTEGRITY_CHECK_INTERVAL)), Configuration.global(), mMasterContext.getUserState())); } getExecutorService().submit( new HeartbeatThread(HeartbeatContext.MASTER_TTL_CHECK, new InodeTtlChecker(this, mInodeTree), - () -> Configuration.getMs(PropertyKey.MASTER_TTL_CHECKER_INTERVAL_MS), + () -> new FixedIntervalSupplier( + Configuration.getMs(PropertyKey.MASTER_TTL_CHECKER_INTERVAL_MS)), Configuration.global(), mMasterContext.getUserState())); getExecutorService().submit( new HeartbeatThread(HeartbeatContext.MASTER_LOST_FILES_DETECTION, new LostFileDetector(this, mBlockMaster, mInodeTree), - () -> Configuration.getMs(PropertyKey.MASTER_LOST_WORKER_FILE_DETECTION_INTERVAL), + () -> new FixedIntervalSupplier( + Configuration.getMs(PropertyKey.MASTER_LOST_WORKER_FILE_DETECTION_INTERVAL)), Configuration.global(), mMasterContext.getUserState())); mReplicationCheckHeartbeatThread = new HeartbeatThread( HeartbeatContext.MASTER_REPLICATION_CHECK, new alluxio.master.file.replication.ReplicationChecker(mInodeTree, mBlockMaster, mSafeModeManager, mJobMasterClientPool), - () -> Configuration.getMs(PropertyKey.MASTER_REPLICATION_CHECK_INTERVAL_MS), + () -> new FixedIntervalSupplier( + Configuration.getMs(PropertyKey.MASTER_REPLICATION_CHECK_INTERVAL_MS)), Configuration.global(), mMasterContext.getUserState()); getExecutorService().submit(mReplicationCheckHeartbeatThread); getExecutorService().submit( new HeartbeatThread(HeartbeatContext.MASTER_PERSISTENCE_SCHEDULER, new PersistenceScheduler(), - () -> Configuration.getMs(PropertyKey.MASTER_PERSISTENCE_SCHEDULER_INTERVAL_MS), + () -> new FixedIntervalSupplier( + Configuration.getMs(PropertyKey.MASTER_PERSISTENCE_SCHEDULER_INTERVAL_MS)), Configuration.global(), mMasterContext.getUserState())); mPersistCheckerPool = new java.util.concurrent.ThreadPoolExecutor(PERSIST_CHECKER_POOL_THREADS, @@ -751,12 +757,14 @@ public void start(Boolean isPrimary) throws IOException { getExecutorService().submit( new HeartbeatThread(HeartbeatContext.MASTER_PERSISTENCE_CHECKER, new PersistenceChecker(), - () -> Configuration.getMs(PropertyKey.MASTER_PERSISTENCE_CHECKER_INTERVAL_MS), + () -> new FixedIntervalSupplier( + Configuration.getMs(PropertyKey.MASTER_PERSISTENCE_CHECKER_INTERVAL_MS)), Configuration.global(), mMasterContext.getUserState())); getExecutorService().submit( new HeartbeatThread(HeartbeatContext.MASTER_METRICS_TIME_SERIES, new TimeSeriesRecorder(), - () -> Configuration.getMs(PropertyKey.MASTER_METRICS_TIME_SERIES_INTERVAL), + () -> new FixedIntervalSupplier( + Configuration.getMs(PropertyKey.MASTER_METRICS_TIME_SERIES_INTERVAL)), Configuration.global(), mMasterContext.getUserState())); if (Configuration.getBoolean(PropertyKey.MASTER_AUDIT_LOGGING_ENABLED)) { mAsyncAuditLogWriter = new AsyncUserAccessAuditLogWriter("AUDIT_LOG"); @@ -769,7 +777,8 @@ public void start(Boolean isPrimary) throws IOException { if (Configuration.getBoolean(PropertyKey.UNDERFS_CLEANUP_ENABLED)) { getExecutorService().submit( new HeartbeatThread(HeartbeatContext.MASTER_UFS_CLEANUP, new UfsCleaner(this), - () -> Configuration.getMs(PropertyKey.UNDERFS_CLEANUP_INTERVAL), + () -> new FixedIntervalSupplier( + Configuration.getMs(PropertyKey.UNDERFS_CLEANUP_INTERVAL)), Configuration.global(), mMasterContext.getUserState())); } if (mAccessTimeUpdater != null) { @@ -4564,7 +4573,7 @@ private void handleReady(long fileId, JournalContext journalContext, AtomicInteg * @throws InterruptedException if the thread is interrupted */ @Override - public void heartbeat() throws InterruptedException { + public void heartbeat(long timeLimitMs) throws InterruptedException { LOG.debug("Async Persist heartbeat start"); java.util.concurrent.TimeUnit.SECONDS.sleep(mQuietPeriodSeconds); AtomicInteger journalCounter = new AtomicInteger(0); @@ -4867,7 +4876,7 @@ private void createParentPath(List inodes, String ufsPath, } @Override - public void heartbeat() throws InterruptedException { + public void heartbeat(long timeLimitMs) throws InterruptedException { boolean queueEmpty = mPersistCheckerPool.getQueue().isEmpty(); // Check the progress of persist jobs. for (long fileId : mPersistJobs.keySet()) { @@ -4955,7 +4964,7 @@ public void heartbeat() throws InterruptedException { @NotThreadSafe private final class TimeSeriesRecorder implements alluxio.heartbeat.HeartbeatExecutor { @Override - public void heartbeat() throws InterruptedException { + public void heartbeat(long timeLimitMs) throws InterruptedException { // TODO(calvin): Provide a better way to keep track of metrics collected as time series MetricRegistry registry = MetricsSystem.METRIC_REGISTRY; SortedMap gauges = registry.getGauges(); diff --git a/core/server/master/src/main/java/alluxio/master/file/InodeTtlChecker.java b/core/server/master/src/main/java/alluxio/master/file/InodeTtlChecker.java index 0c9cf4a76ab4..595322679c31 100644 --- a/core/server/master/src/main/java/alluxio/master/file/InodeTtlChecker.java +++ b/core/server/master/src/main/java/alluxio/master/file/InodeTtlChecker.java @@ -61,7 +61,7 @@ public InodeTtlChecker(FileSystemMaster fileSystemMaster, InodeTree inodeTree) { } @Override - public void heartbeat() throws InterruptedException { + public void heartbeat(long timeLimitMs) throws InterruptedException { Set expiredBuckets = mTtlBuckets.pollExpiredBuckets(System.currentTimeMillis()); Map failedInodesToRetryNum = new HashMap<>(); for (TtlBucket bucket : expiredBuckets) { diff --git a/core/server/master/src/main/java/alluxio/master/file/LostFileDetector.java b/core/server/master/src/main/java/alluxio/master/file/LostFileDetector.java index 535bec900ec9..9f25b8d8a857 100644 --- a/core/server/master/src/main/java/alluxio/master/file/LostFileDetector.java +++ b/core/server/master/src/main/java/alluxio/master/file/LostFileDetector.java @@ -59,7 +59,7 @@ public LostFileDetector(FileSystemMaster fileSystemMaster, BlockMaster blockMast } @Override - public void heartbeat() throws InterruptedException { + public void heartbeat(long timeLimitMs) throws InterruptedException { Iterator iter = mBlockMaster.getLostBlocksIterator(); Set toMarkFiles = new HashSet<>(); while (iter.hasNext()) { diff --git a/core/server/master/src/main/java/alluxio/master/file/UfsCleaner.java b/core/server/master/src/main/java/alluxio/master/file/UfsCleaner.java index bc9ab0ab6ef4..5d1261bff807 100644 --- a/core/server/master/src/main/java/alluxio/master/file/UfsCleaner.java +++ b/core/server/master/src/main/java/alluxio/master/file/UfsCleaner.java @@ -30,7 +30,7 @@ public UfsCleaner(FileSystemMaster fileSystemMaster) { } @Override - public void heartbeat() { + public void heartbeat(long timeLimitMs) { mFileSystemMaster.cleanupUfs(); } diff --git a/core/server/master/src/main/java/alluxio/master/file/activesync/ActiveSyncManager.java b/core/server/master/src/main/java/alluxio/master/file/activesync/ActiveSyncManager.java index 214c1ec72e67..6993b31027dd 100644 --- a/core/server/master/src/main/java/alluxio/master/file/activesync/ActiveSyncManager.java +++ b/core/server/master/src/main/java/alluxio/master/file/activesync/ActiveSyncManager.java @@ -17,6 +17,7 @@ import alluxio.conf.Configuration; import alluxio.conf.PropertyKey; import alluxio.exception.InvalidPathException; +import alluxio.heartbeat.FixedIntervalSupplier; import alluxio.heartbeat.HeartbeatContext; import alluxio.heartbeat.HeartbeatThread; import alluxio.master.file.FileSystemMaster; @@ -262,7 +263,8 @@ public void launchPollingThread(long mountId, long txId) { ActiveSyncer syncer = new ActiveSyncer(mFileSystemMaster, this, mMountTable, mountId); Future future = getExecutor().submit( new HeartbeatThread(HeartbeatContext.MASTER_ACTIVE_UFS_SYNC, - syncer, () -> Configuration.getMs(PropertyKey.MASTER_UFS_ACTIVE_SYNC_INTERVAL), + syncer, () -> new FixedIntervalSupplier( + Configuration.getMs(PropertyKey.MASTER_UFS_ACTIVE_SYNC_INTERVAL)), Configuration.global(), ServerUserState.global())); mPollerMap.put(mountId, future); } diff --git a/core/server/master/src/main/java/alluxio/master/file/activesync/ActiveSyncer.java b/core/server/master/src/main/java/alluxio/master/file/activesync/ActiveSyncer.java index 666da9434682..e9ba8aebec3c 100644 --- a/core/server/master/src/main/java/alluxio/master/file/activesync/ActiveSyncer.java +++ b/core/server/master/src/main/java/alluxio/master/file/activesync/ActiveSyncer.java @@ -74,7 +74,7 @@ public ActiveSyncer(FileSystemMaster fileSystemMaster, ActiveSyncManager syncMan } @Override - public void heartbeat() { + public void heartbeat(long timeLimitMs) { LOG.debug("start sync heartbeat for {} with mount id {}", mMountUri, mMountId); // Remove any previously completed sync tasks mSyncTasks.removeIf(Future::isDone); diff --git a/core/server/master/src/main/java/alluxio/master/file/replication/ReplicationChecker.java b/core/server/master/src/main/java/alluxio/master/file/replication/ReplicationChecker.java index 44e801dc29d9..d669f182bbdb 100644 --- a/core/server/master/src/main/java/alluxio/master/file/replication/ReplicationChecker.java +++ b/core/server/master/src/main/java/alluxio/master/file/replication/ReplicationChecker.java @@ -148,7 +148,7 @@ private boolean shouldRun() { * (2) Is there any blocks over replicated, schedule evict jobs to reduce the replication level. */ @Override - public void heartbeat() throws InterruptedException { + public void heartbeat(long timeLimitMs) throws InterruptedException { if (!shouldRun()) { return; } diff --git a/core/server/master/src/main/java/alluxio/master/meta/DefaultMetaMaster.java b/core/server/master/src/main/java/alluxio/master/meta/DefaultMetaMaster.java index 010e668acd5a..b1b8fe6bb393 100644 --- a/core/server/master/src/main/java/alluxio/master/meta/DefaultMetaMaster.java +++ b/core/server/master/src/main/java/alluxio/master/meta/DefaultMetaMaster.java @@ -35,6 +35,7 @@ import alluxio.grpc.RegisterMasterPOptions; import alluxio.grpc.Scope; import alluxio.grpc.ServiceType; +import alluxio.heartbeat.FixedIntervalSupplier; import alluxio.heartbeat.HeartbeatContext; import alluxio.heartbeat.HeartbeatExecutor; import alluxio.heartbeat.HeartbeatThread; @@ -307,13 +308,14 @@ public void start(Boolean isPrimary) throws IOException { getExecutorService().submit(new HeartbeatThread( HeartbeatContext.MASTER_LOST_MASTER_DETECTION, new LostMasterDetectionHeartbeatExecutor(), - () -> Configuration.getMs(PropertyKey.MASTER_STANDBY_HEARTBEAT_INTERVAL), + () -> new FixedIntervalSupplier( + Configuration.getMs(PropertyKey.MASTER_STANDBY_HEARTBEAT_INTERVAL)), Configuration.global(), mMasterContext.getUserState())); getExecutorService().submit( new HeartbeatThread(HeartbeatContext.MASTER_LOG_CONFIG_REPORT_SCHEDULING, new LogConfigReportHeartbeatExecutor(), - () -> Configuration - .getMs(PropertyKey.MASTER_LOG_CONFIG_REPORT_HEARTBEAT_INTERVAL), + () -> new FixedIntervalSupplier( + Configuration.getMs(PropertyKey.MASTER_LOG_CONFIG_REPORT_HEARTBEAT_INTERVAL)), Configuration.global(), mMasterContext.getUserState())); if (Configuration.getBoolean(PropertyKey.MASTER_DAILY_BACKUP_ENABLED)) { @@ -324,7 +326,8 @@ public void start(Boolean isPrimary) throws IOException { if (mJournalSpaceMonitor != null) { getExecutorService().submit(new HeartbeatThread( HeartbeatContext.MASTER_JOURNAL_SPACE_MONITOR, mJournalSpaceMonitor, - () -> Configuration.getMs(PropertyKey.MASTER_JOURNAL_SPACE_MONITOR_INTERVAL), + () -> new FixedIntervalSupplier( + Configuration.getMs(PropertyKey.MASTER_JOURNAL_SPACE_MONITOR_INTERVAL)), Configuration.global(), mMasterContext.getUserState())); } if (mState.getClusterID().equals(INVALID_CLUSTER_ID)) { @@ -337,7 +340,8 @@ public void start(Boolean isPrimary) throws IOException { && !Configuration.getBoolean(PropertyKey.TEST_MODE)) { getExecutorService().submit(new HeartbeatThread(HeartbeatContext.MASTER_UPDATE_CHECK, new UpdateChecker(this), - () -> Configuration.getMs(PropertyKey.MASTER_UPDATE_CHECK_INTERVAL), + () -> new FixedIntervalSupplier( + Configuration.getMs(PropertyKey.MASTER_UPDATE_CHECK_INTERVAL)), Configuration.global(), mMasterContext.getUserState())); } } else { @@ -352,7 +356,8 @@ public void start(Boolean isPrimary) throws IOException { .newBuilder(ClientContext.create(Configuration.global())).build()); getExecutorService().submit(new HeartbeatThread(HeartbeatContext.META_MASTER_SYNC, new MetaMasterSync(mMasterAddress, metaMasterClient), - () -> Configuration.getMs(PropertyKey.MASTER_STANDBY_HEARTBEAT_INTERVAL), + () -> new FixedIntervalSupplier( + Configuration.getMs(PropertyKey.MASTER_STANDBY_HEARTBEAT_INTERVAL)), Configuration.global(), mMasterContext.getUserState())); LOG.info("Standby master with address {} starts sending heartbeat to leader master.", mMasterAddress); @@ -714,7 +719,7 @@ public LostMasterDetectionHeartbeatExecutor() { } @Override - public void heartbeat() { + public void heartbeat(long timeLimitMs) { long masterTimeoutMs = Configuration.getMs(PropertyKey.MASTER_HEARTBEAT_TIMEOUT); for (MasterInfo master : mMasters) { synchronized (master) { @@ -743,7 +748,7 @@ private final class LogConfigReportHeartbeatExecutor implements HeartbeatExecuto private volatile boolean mFirst = true; @Override - public void heartbeat() { + public void heartbeat(long timeLimitMs) { // Skip the first heartbeat since it happens before servers have time to register their // configurations. if (mFirst) { diff --git a/core/server/master/src/main/java/alluxio/master/meta/JournalSpaceMonitor.java b/core/server/master/src/main/java/alluxio/master/meta/JournalSpaceMonitor.java index 8b74f695e6a9..d917be9e348f 100644 --- a/core/server/master/src/main/java/alluxio/master/meta/JournalSpaceMonitor.java +++ b/core/server/master/src/main/java/alluxio/master/meta/JournalSpaceMonitor.java @@ -169,7 +169,7 @@ public List getJournalDiskWarnings() { } @Override - public void heartbeat() throws InterruptedException { + public void heartbeat(long timeLimitMs) throws InterruptedException { getJournalDiskWarnings().forEach(LOG::warn); } diff --git a/core/server/master/src/main/java/alluxio/master/meta/MetaMasterSync.java b/core/server/master/src/main/java/alluxio/master/meta/MetaMasterSync.java index f793f2d7fa34..3b246cefae15 100644 --- a/core/server/master/src/main/java/alluxio/master/meta/MetaMasterSync.java +++ b/core/server/master/src/main/java/alluxio/master/meta/MetaMasterSync.java @@ -62,7 +62,7 @@ public MetaMasterSync(Address masterAddress, RetryHandlingMetaMasterMasterClient * Heartbeats to the leader master node. */ @Override - public void heartbeat() { + public void heartbeat(long timeLimitMs) { MetaCommand command = null; try { if (mMasterId.get() == UNINITIALIZED_MASTER_ID) { diff --git a/core/server/master/src/main/java/alluxio/master/meta/UpdateChecker.java b/core/server/master/src/main/java/alluxio/master/meta/UpdateChecker.java index d7d75f837014..7bfdfb6e77c2 100644 --- a/core/server/master/src/main/java/alluxio/master/meta/UpdateChecker.java +++ b/core/server/master/src/main/java/alluxio/master/meta/UpdateChecker.java @@ -45,7 +45,7 @@ public UpdateChecker(DefaultMetaMaster metaMaster) { * Heartbeat for the periodic update check. */ @Override - public void heartbeat() { + public void heartbeat(long timeLimitMs) { try { List additionalInfo = new ArrayList<>(); int clusterSize = mMetaMaster.getWorkerAddresses().size(); diff --git a/core/server/master/src/main/java/alluxio/master/metrics/DefaultMetricsMaster.java b/core/server/master/src/main/java/alluxio/master/metrics/DefaultMetricsMaster.java index bf65ad6d2449..3ccbb8c7aba1 100644 --- a/core/server/master/src/main/java/alluxio/master/metrics/DefaultMetricsMaster.java +++ b/core/server/master/src/main/java/alluxio/master/metrics/DefaultMetricsMaster.java @@ -18,6 +18,7 @@ import alluxio.grpc.GrpcService; import alluxio.grpc.MetricValue; import alluxio.grpc.ServiceType; +import alluxio.heartbeat.FixedIntervalSupplier; import alluxio.heartbeat.HeartbeatContext; import alluxio.heartbeat.HeartbeatExecutor; import alluxio.heartbeat.HeartbeatThread; @@ -180,7 +181,8 @@ public void start(Boolean isLeader) throws IOException { if (isLeader) { getExecutorService().submit(new HeartbeatThread( HeartbeatContext.MASTER_CLUSTER_METRICS_UPDATER, new ClusterMetricsUpdater(), - () -> Configuration.getMs(PropertyKey.MASTER_CLUSTER_METRICS_UPDATE_INTERVAL), + () -> new FixedIntervalSupplier( + Configuration.getMs(PropertyKey.MASTER_CLUSTER_METRICS_UPDATE_INTERVAL)), Configuration.global(), mMasterContext.getUserState())); } } @@ -215,7 +217,7 @@ public Map getMetrics() { */ private class ClusterMetricsUpdater implements HeartbeatExecutor { @Override - public void heartbeat() throws InterruptedException { + public void heartbeat(long timeLimitMs) throws InterruptedException { updateMultiValueMasterMetrics(); } diff --git a/core/server/master/src/main/java/alluxio/master/throttle/DefaultThrottleMaster.java b/core/server/master/src/main/java/alluxio/master/throttle/DefaultThrottleMaster.java index 70ee98d0b85c..ef5eee6f489c 100644 --- a/core/server/master/src/main/java/alluxio/master/throttle/DefaultThrottleMaster.java +++ b/core/server/master/src/main/java/alluxio/master/throttle/DefaultThrottleMaster.java @@ -19,6 +19,7 @@ import alluxio.conf.PropertyKey; import alluxio.grpc.GrpcService; import alluxio.grpc.ServiceType; +import alluxio.heartbeat.FixedIntervalSupplier; import alluxio.heartbeat.HeartbeatContext; import alluxio.heartbeat.HeartbeatExecutor; import alluxio.heartbeat.HeartbeatThread; @@ -109,7 +110,8 @@ public void start(Boolean isLeader) throws IOException { LOG.info("Starting {}", getName()); mThrottleService = getExecutorService().submit( new HeartbeatThread(HeartbeatContext.MASTER_THROTTLE, mThrottleExecutor, - () -> Configuration.getMs(PropertyKey.MASTER_THROTTLE_HEARTBEAT_INTERVAL), + () -> new FixedIntervalSupplier( + Configuration.getMs(PropertyKey.MASTER_THROTTLE_HEARTBEAT_INTERVAL)), Configuration.global(), mMasterContext.getUserState())); LOG.info("{} is started", getName()); @@ -141,7 +143,7 @@ public ThrottleExecutor(MasterProcess masterProcess) { } @Override - public void heartbeat() throws InterruptedException { + public void heartbeat(long timeLimitMs) throws InterruptedException { mSystemMonitor.run(); } diff --git a/core/server/master/src/test/java/alluxio/master/file/replication/ReplicationCheckerTest.java b/core/server/master/src/test/java/alluxio/master/file/replication/ReplicationCheckerTest.java index 40bab536216f..f49c504db33c 100644 --- a/core/server/master/src/test/java/alluxio/master/file/replication/ReplicationCheckerTest.java +++ b/core/server/master/src/test/java/alluxio/master/file/replication/ReplicationCheckerTest.java @@ -281,7 +281,7 @@ private void heartbeatToAddLocationHelper(long blockId, long workerId) throws Ex @Test public void heartbeatWhenTreeIsEmpty() throws Exception { - mReplicationChecker.heartbeat(); + mReplicationChecker.heartbeat(Long.MAX_VALUE); Assert.assertEquals(EMPTY, mMockReplicationHandler.getSetReplicaRequests()); } @@ -292,17 +292,17 @@ public void heartbeatFileWithinRange() throws Exception { createBlockHelper(TEST_FILE_1, mFileContext, ""); // One replica, meeting replication min addBlockLocationHelper(blockId, 1); - mReplicationChecker.heartbeat(); + mReplicationChecker.heartbeat(Long.MAX_VALUE); Assert.assertEquals(EMPTY, mMockReplicationHandler.getSetReplicaRequests()); // Two replicas, good heartbeatToAddLocationHelper(blockId, createWorkerHelper(1)); - mReplicationChecker.heartbeat(); + mReplicationChecker.heartbeat(Long.MAX_VALUE); Assert.assertEquals(EMPTY, mMockReplicationHandler.getSetReplicaRequests()); // Three replicas, meeting replication max, still good heartbeatToAddLocationHelper(blockId, createWorkerHelper(2)); - mReplicationChecker.heartbeat(); + mReplicationChecker.heartbeat(Long.MAX_VALUE); Assert.assertEquals(EMPTY, mMockReplicationHandler.getSetReplicaRequests()); } @@ -311,7 +311,7 @@ public void heartbeatFileUnderReplicatedBy1() throws Exception { mFileContext.getOptions().setReplicationMin(1); long blockId = createBlockHelper(TEST_FILE_1, mFileContext, ""); - mReplicationChecker.heartbeat(); + mReplicationChecker.heartbeat(Long.MAX_VALUE); Map expected = ImmutableMap.of(blockId, 1); Assert.assertEquals(expected, mMockReplicationHandler.getSetReplicaRequests()); } @@ -322,7 +322,7 @@ public void heartbeatFileNeedsMove() throws Exception { long blockId = createBlockHelper(TEST_FILE_1, mFileContext, Constants.MEDIUM_SSD); addBlockLocationHelper(blockId, 1); - mReplicationChecker.heartbeat(); + mReplicationChecker.heartbeat(Long.MAX_VALUE); Map> expected = ImmutableMap.of(blockId, new Pair<>("host0", Constants.MEDIUM_SSD)); Assert.assertEquals(EMPTY, mMockReplicationHandler.getSetReplicaRequests()); @@ -335,7 +335,7 @@ public void heartbeatFileDoesnotNeedMove() throws Exception { long blockId = createBlockHelper(TEST_FILE_1, mFileContext, Constants.MEDIUM_MEM); addBlockLocationHelper(blockId, 1); - mReplicationChecker.heartbeat(); + mReplicationChecker.heartbeat(Long.MAX_VALUE); Assert.assertEquals(EMPTY, mMockReplicationHandler.getSetReplicaRequests()); Assert.assertEquals(EMPTY, mMockReplicationHandler.getMigrateRequests()); } @@ -345,7 +345,7 @@ public void heartbeatFileUnderReplicatedBy10() throws Exception { mFileContext.getOptions().setReplicationMin(10); long blockId = createBlockHelper(TEST_FILE_1, mFileContext, ""); - mReplicationChecker.heartbeat(); + mReplicationChecker.heartbeat(Long.MAX_VALUE); Map expected = ImmutableMap.of(blockId, 10); Assert.assertEquals(expected, mMockReplicationHandler.getSetReplicaRequests()); } @@ -357,7 +357,7 @@ public void heartbeatMultipleFilesUnderReplicated() throws Exception { mFileContext.getOptions().setReplicationMin(2); long blockId2 = createBlockHelper(TEST_FILE_2, mFileContext, ""); - mReplicationChecker.heartbeat(); + mReplicationChecker.heartbeat(Long.MAX_VALUE); Map expected = ImmutableMap.of(blockId1, 1, blockId2, 2); Assert.assertEquals(expected, mMockReplicationHandler.getSetReplicaRequests()); } @@ -382,7 +382,7 @@ public void heartbeatFileUnderReplicatedAndLost() throws Exception { ImmutableMap.of(Constants.MEDIUM_MEM, 0L), ImmutableList.of(blockId), NO_BLOCKS_ON_LOCATION, NO_LOST_STORAGE, NO_METRICS); - mReplicationChecker.heartbeat(); + mReplicationChecker.heartbeat(Long.MAX_VALUE); Assert.assertEquals(EMPTY, mMockReplicationHandler.getSetReplicaRequests()); } @@ -392,7 +392,7 @@ public void heartbeatFileOverReplicatedBy1() throws Exception { long blockId = createBlockHelper(TEST_FILE_1, mFileContext, ""); addBlockLocationHelper(blockId, 2); - mReplicationChecker.heartbeat(); + mReplicationChecker.heartbeat(Long.MAX_VALUE); Map expected = ImmutableMap.of(blockId, 1); Assert.assertEquals(expected, mMockReplicationHandler.getSetReplicaRequests()); } @@ -403,7 +403,7 @@ public void heartbeatFileOverReplicatedBy10() throws Exception { long blockId = createBlockHelper(TEST_FILE_1, mFileContext, ""); addBlockLocationHelper(blockId, 11); - mReplicationChecker.heartbeat(); + mReplicationChecker.heartbeat(Long.MAX_VALUE); Map expected = ImmutableMap.of(blockId, 1); Assert.assertEquals(expected, mMockReplicationHandler.getSetReplicaRequests()); } @@ -417,7 +417,7 @@ public void heartbeatMultipleFilesOverReplicated() throws Exception { addBlockLocationHelper(blockId1, 2); addBlockLocationHelper(blockId2, 4); - mReplicationChecker.heartbeat(); + mReplicationChecker.heartbeat(Long.MAX_VALUE); Map expected = ImmutableMap.of(blockId1, 1, blockId2, 2); Assert.assertEquals(expected, mMockReplicationHandler.getSetReplicaRequests()); } @@ -431,7 +431,7 @@ public void heartbeatFilesUnderAndOverReplicated() throws Exception { addBlockLocationHelper(blockId1, 1); addBlockLocationHelper(blockId2, 5); - mReplicationChecker.heartbeat(); + mReplicationChecker.heartbeat(Long.MAX_VALUE); Map expected1 = ImmutableMap.of(blockId1, 2, blockId2, 3); Assert.assertEquals(expected1, mMockReplicationHandler.getSetReplicaRequests()); } @@ -449,7 +449,7 @@ public void heartbeatPartial() throws Exception { addBlockLocationHelper(blockId2, 1); addBlockLocationHelper(blockId3, 1); - mReplicationChecker.heartbeat(); + mReplicationChecker.heartbeat(Long.MAX_VALUE); final Map replicateRequests = mMockReplicationHandler.getSetReplicaRequests(); System.out.println(replicateRequests); Assert.assertEquals(2, replicateRequests.size()); @@ -459,11 +459,11 @@ public void heartbeatPartial() throws Exception { mMockReplicationHandler.setJobStatus(1, Status.RUNNING); mMockReplicationHandler.setJobStatus(2, Status.RUNNING); - mReplicationChecker.heartbeat(); + mReplicationChecker.heartbeat(Long.MAX_VALUE); Assert.assertEquals(0, replicateRequests.size()); mMockReplicationHandler.setJobStatus(1, Status.FAILED); - mReplicationChecker.heartbeat(); + mReplicationChecker.heartbeat(Long.MAX_VALUE); Assert.assertEquals(1, replicateRequests.size()); Assert.assertEquals(3, replicateRequests.values().toArray()[0]); @@ -473,7 +473,7 @@ public void heartbeatPartial() throws Exception { mMockReplicationHandler.setJobStatus(2, Status.COMPLETED); mMockReplicationHandler.setJobStatus(3, Status.COMPLETED); - mReplicationChecker.heartbeat(); + mReplicationChecker.heartbeat(Long.MAX_VALUE); Assert.assertEquals(1, replicateRequests.size()); Assert.assertTrue(replicateRequests.containsKey(blockId3)); Assert.assertEquals(3, replicateRequests.values().toArray()[0]); diff --git a/core/server/master/src/test/java/alluxio/master/meta/JournalSpaceMonitorTest.java b/core/server/master/src/test/java/alluxio/master/meta/JournalSpaceMonitorTest.java index eb638ae88800..8054599ee0a6 100644 --- a/core/server/master/src/test/java/alluxio/master/meta/JournalSpaceMonitorTest.java +++ b/core/server/master/src/test/java/alluxio/master/meta/JournalSpaceMonitorTest.java @@ -82,7 +82,7 @@ public void testLoggingPositive() throws IOException, InterruptedException { JournalSpaceMonitor monitor = Mockito.spy( new JournalSpaceMonitor(Paths.get(".").toAbsolutePath().toString(), 90)); doReturn(new CommandReturn(0, CMD_RETURN_MOCK)).when(monitor).getRawDiskInfo(); - monitor.heartbeat(); + monitor.heartbeat(Long.MAX_VALUE); assertTrue(mLogger.wasLoggedWithLevel("The journal disk /dev/nvme0n1p2 backing the journal " + "has only .* space left", Level.WARN)); } @@ -92,7 +92,7 @@ public void testLoggingNegative() throws IOException, InterruptedException { JournalSpaceMonitor monitor = Mockito.spy( new JournalSpaceMonitor(Paths.get(".").toAbsolutePath().toString(), 10)); doReturn(new CommandReturn(0, CMD_RETURN_MOCK)).when(monitor).getRawDiskInfo(); - monitor.heartbeat(); + monitor.heartbeat(Long.MAX_VALUE); assertFalse(mLogger.wasLoggedWithLevel("The journal disk /dev/nvme0n1p2 backing the journal " + "has only .* space left", Level.WARN)); } diff --git a/core/server/worker/src/main/java/alluxio/worker/block/BlockMasterSync.java b/core/server/worker/src/main/java/alluxio/worker/block/BlockMasterSync.java index b372c7f84ac8..3ac632238cc2 100644 --- a/core/server/worker/src/main/java/alluxio/worker/block/BlockMasterSync.java +++ b/core/server/worker/src/main/java/alluxio/worker/block/BlockMasterSync.java @@ -117,7 +117,7 @@ private void registerWithMaster() throws IOException { * Heartbeats to the master node about the change in the worker's managed space. */ @Override - public void heartbeat() { + public void heartbeat(long timeLimitMs) { boolean success = mBlockMasterSyncHelper.heartbeat( mWorkerId.get(), mBlockWorker.getReport(), mBlockWorker.getStoreMeta(), this::handleMasterCommand); diff --git a/core/server/worker/src/main/java/alluxio/worker/block/BlockSyncMasterGroup.java b/core/server/worker/src/main/java/alluxio/worker/block/BlockSyncMasterGroup.java index 6abc313fc1d1..ba9758da143a 100644 --- a/core/server/worker/src/main/java/alluxio/worker/block/BlockSyncMasterGroup.java +++ b/core/server/worker/src/main/java/alluxio/worker/block/BlockSyncMasterGroup.java @@ -15,6 +15,7 @@ import alluxio.ProcessUtils; import alluxio.conf.Configuration; import alluxio.conf.PropertyKey; +import alluxio.heartbeat.FixedIntervalSupplier; import alluxio.heartbeat.HeartbeatContext; import alluxio.heartbeat.HeartbeatThread; import alluxio.master.MasterClientContext; @@ -91,7 +92,8 @@ public synchronized void start(ExecutorService executorService) { } mMasterSyncOperators.values().forEach(blockMasterSync -> executorService .submit(new HeartbeatThread(HeartbeatContext.WORKER_BLOCK_SYNC, blockMasterSync, - () -> Configuration.getMs(PropertyKey.WORKER_BLOCK_HEARTBEAT_INTERVAL_MS), + () -> new FixedIntervalSupplier( + Configuration.getMs(PropertyKey.WORKER_BLOCK_HEARTBEAT_INTERVAL_MS)), Configuration.global(), ServerUserState.global()))); } diff --git a/core/server/worker/src/main/java/alluxio/worker/block/DefaultBlockWorker.java b/core/server/worker/src/main/java/alluxio/worker/block/DefaultBlockWorker.java index 0dd50a6978c5..fcba47cedbe6 100644 --- a/core/server/worker/src/main/java/alluxio/worker/block/DefaultBlockWorker.java +++ b/core/server/worker/src/main/java/alluxio/worker/block/DefaultBlockWorker.java @@ -38,6 +38,7 @@ import alluxio.grpc.GrpcService; import alluxio.grpc.ServiceType; import alluxio.grpc.UfsReadOptions; +import alluxio.heartbeat.FixedIntervalSupplier; import alluxio.heartbeat.HeartbeatContext; import alluxio.heartbeat.HeartbeatExecutor; import alluxio.heartbeat.HeartbeatThread; @@ -223,7 +224,8 @@ public void start(WorkerNetAddress address) throws IOException { new PinListSync(this, mFileSystemMasterClient)); getExecutorService() .submit(new HeartbeatThread(HeartbeatContext.WORKER_PIN_LIST_SYNC, pinListSync, - () -> Configuration.getMs(PropertyKey.WORKER_BLOCK_HEARTBEAT_INTERVAL_MS), + () -> new FixedIntervalSupplier( + Configuration.getMs(PropertyKey.WORKER_BLOCK_HEARTBEAT_INTERVAL_MS)), Configuration.global(), ServerUserState.global())); // Setup session cleaner @@ -236,7 +238,8 @@ public void start(WorkerNetAddress address) throws IOException { StorageChecker storageChecker = mResourceCloser.register(new StorageChecker()); getExecutorService() .submit(new HeartbeatThread(HeartbeatContext.WORKER_STORAGE_HEALTH, storageChecker, - () -> Configuration.getMs(PropertyKey.WORKER_BLOCK_HEARTBEAT_INTERVAL_MS), + () -> new FixedIntervalSupplier( + Configuration.getMs(PropertyKey.WORKER_BLOCK_HEARTBEAT_INTERVAL_MS)), Configuration.global(), ServerUserState.global())); } @@ -251,7 +254,8 @@ protected void setupBlockMasterSync() throws IOException { .register(new BlockMasterSync(this, mWorkerId, mAddress, mBlockMasterClientPool)); getExecutorService() .submit(new HeartbeatThread(HeartbeatContext.WORKER_BLOCK_SYNC, blockMasterSync, - () -> Configuration.getMs(PropertyKey.WORKER_BLOCK_HEARTBEAT_INTERVAL_MS), + () -> new FixedIntervalSupplier( + Configuration.getMs(PropertyKey.WORKER_BLOCK_HEARTBEAT_INTERVAL_MS)), Configuration.global(), ServerUserState.global())); } @@ -568,7 +572,7 @@ private Metrics() {} // prevent instantiation public final class StorageChecker implements HeartbeatExecutor { @Override - public void heartbeat() { + public void heartbeat(long timeLimitMs) { try { mBlockStore.removeInaccessibleStorage(); } catch (Exception e) { diff --git a/core/server/worker/src/main/java/alluxio/worker/block/PinListSync.java b/core/server/worker/src/main/java/alluxio/worker/block/PinListSync.java index a85a50092a3c..67ac89a7357d 100644 --- a/core/server/worker/src/main/java/alluxio/worker/block/PinListSync.java +++ b/core/server/worker/src/main/java/alluxio/worker/block/PinListSync.java @@ -47,7 +47,7 @@ public PinListSync(BlockWorker blockWorker, FileSystemMasterClient masterClient) } @Override - public void heartbeat() { + public void heartbeat(long timeLimitMs) { // Send the sync try { Set pinList = mMasterClient.getPinList(); diff --git a/core/server/worker/src/main/java/alluxio/worker/block/SpecificMasterBlockSync.java b/core/server/worker/src/main/java/alluxio/worker/block/SpecificMasterBlockSync.java index 3c9aeea0b491..660e0735c785 100644 --- a/core/server/worker/src/main/java/alluxio/worker/block/SpecificMasterBlockSync.java +++ b/core/server/worker/src/main/java/alluxio/worker/block/SpecificMasterBlockSync.java @@ -182,7 +182,7 @@ private RetryPolicy createEndlessRetry() { } @Override - public synchronized void heartbeat() throws InterruptedException { + public synchronized void heartbeat(long runLimit) throws InterruptedException { if (mWorkerState == WorkerMasterRegistrationState.NOT_REGISTERED) { // Not registered because: // 1. The worker just started, we kick off the 1st registration here. diff --git a/core/server/worker/src/test/java/alluxio/worker/block/PinListSyncTest.java b/core/server/worker/src/test/java/alluxio/worker/block/PinListSyncTest.java index 2e8b44920ef6..dae0717ffef1 100644 --- a/core/server/worker/src/test/java/alluxio/worker/block/PinListSyncTest.java +++ b/core/server/worker/src/test/java/alluxio/worker/block/PinListSyncTest.java @@ -44,7 +44,7 @@ public Set getPinList() { }; PinListSync sync = new PinListSync(mBlockWorker, client); - sync.heartbeat(); + sync.heartbeat(Long.MAX_VALUE); // should receive the latest pin list assertEquals(testPinLists, mBlockWorker.getPinList()); @@ -62,7 +62,7 @@ public Set getPinList() throws IOException { PinListSync sync = new PinListSync(mBlockWorker, client); // should fail - sync.heartbeat(); + sync.heartbeat(Long.MAX_VALUE); // should not get any pin list update assertEquals(ImmutableSet.of(), mBlockWorker.getPinList()); diff --git a/core/server/worker/src/test/java/alluxio/worker/block/SpecificMasterBlockSyncTest.java b/core/server/worker/src/test/java/alluxio/worker/block/SpecificMasterBlockSyncTest.java index cf02f215f52a..e88385f2ae56 100644 --- a/core/server/worker/src/test/java/alluxio/worker/block/SpecificMasterBlockSyncTest.java +++ b/core/server/worker/src/test/java/alluxio/worker/block/SpecificMasterBlockSyncTest.java @@ -63,24 +63,24 @@ public void heartbeatThread() throws Exception { assertFalse(sync.isRegistered()); // heartbeat registers the worker if it has not been registered. - sync.heartbeat(); + sync.heartbeat(Long.MAX_VALUE); assertTrue(sync.isRegistered()); // heartbeat returning register command resets the worker state. Configuration.set(PropertyKey.WORKER_REGISTER_STREAM_ENABLED, true); TestBlockMasterClient.INSTANCE.setReturnRegisterCommand(true); - sync.heartbeat(); + sync.heartbeat(Long.MAX_VALUE); TestBlockMasterClient.INSTANCE.setReturnRegisterCommand(false); assertFalse(sync.isRegistered()); Configuration.set(PropertyKey.WORKER_REGISTER_STREAM_ENABLED, false); TestBlockMasterClient.INSTANCE.setReturnRegisterCommand(true); - sync.heartbeat(); + sync.heartbeat(Long.MAX_VALUE); TestBlockMasterClient.INSTANCE.setReturnRegisterCommand(false); assertFalse(sync.isRegistered()); // heartbeat registers the worker if it has not been registered. - sync.heartbeat(); + sync.heartbeat(Long.MAX_VALUE); assertTrue(sync.isRegistered()); // TestBlockHeartbeatReporter generates the report with one more removed block id each time. @@ -88,7 +88,7 @@ public void heartbeatThread() throws Exception { // heartbeatReportCapacityThreshold is 3. TestBlockMasterClient.INSTANCE.mHeartbeatCallCount = 0; TestBlockMasterClient.INSTANCE.setHeartbeatError(true); - sync.heartbeat(); + sync.heartbeat(Long.MAX_VALUE); assertFalse(sync.isRegistered()); assertEquals( heartbeatReportCapacityThreshold, TestBlockMasterClient.INSTANCE.mHeartbeatCallCount); @@ -96,7 +96,7 @@ public void heartbeatThread() throws Exception { // registration should happen on the next heartbeat and the reporter should be cleared, // except the newly generated ones. TestBlockMasterClient.INSTANCE.setHeartbeatError(false); - sync.heartbeat(); + sync.heartbeat(Long.MAX_VALUE); assertTrue(sync.isRegistered()); assertEquals(1, blockHeartbeatReporter.generateReportAndClear().getBlockChangeCount()); diff --git a/integration/fuse/src/main/java/alluxio/fuse/AlluxioFuse.java b/integration/fuse/src/main/java/alluxio/fuse/AlluxioFuse.java index 8ec0aa9048c8..ab6bdb529444 100644 --- a/integration/fuse/src/main/java/alluxio/fuse/AlluxioFuse.java +++ b/integration/fuse/src/main/java/alluxio/fuse/AlluxioFuse.java @@ -27,6 +27,7 @@ import alluxio.exception.runtime.InvalidArgumentRuntimeException; import alluxio.fuse.meta.UpdateChecker; import alluxio.fuse.options.FuseOptions; +import alluxio.heartbeat.FixedIntervalSupplier; import alluxio.heartbeat.HeartbeatContext; import alluxio.heartbeat.HeartbeatThread; import alluxio.jnifuse.LibFuse; @@ -178,7 +179,7 @@ public static void main(String[] args) throws ParseException { if (fuseOptions.updateCheckEnabled()) { executor = Executors.newSingleThreadExecutor(); executor.submit(new HeartbeatThread(HeartbeatContext.FUSE_UPDATE_CHECK, - UpdateChecker.create(fuseOptions), () -> Long.valueOf(Constants.DAY_MS), + UpdateChecker.create(fuseOptions), () -> new FixedIntervalSupplier(Constants.DAY_MS), Configuration.global(), UserState.Factory.create(conf))); } try (FileSystem fs = FileSystem.Factory.create(fsContext, fuseOptions.getFileSystemOptions())) { diff --git a/integration/fuse/src/main/java/alluxio/fuse/meta/UpdateChecker.java b/integration/fuse/src/main/java/alluxio/fuse/meta/UpdateChecker.java index 802ebd1ef434..bfcc6ca93f13 100644 --- a/integration/fuse/src/main/java/alluxio/fuse/meta/UpdateChecker.java +++ b/integration/fuse/src/main/java/alluxio/fuse/meta/UpdateChecker.java @@ -79,7 +79,7 @@ private UpdateChecker(List unchangeableFuseInfo, Map fuseO * Heartbeat for the periodic update check. */ @Override - public void heartbeat() { + public void heartbeat(long timeLimitMs) { try { String latestVersion = UpdateCheck.getLatestVersion(mInstanceId, getFuseCheckInfo(), diff --git a/job/server/src/main/java/alluxio/master/job/JobMaster.java b/job/server/src/main/java/alluxio/master/job/JobMaster.java index ae99321ca928..bc2782e01bfb 100644 --- a/job/server/src/main/java/alluxio/master/job/JobMaster.java +++ b/job/server/src/main/java/alluxio/master/job/JobMaster.java @@ -28,6 +28,7 @@ import alluxio.grpc.ListAllPOptions; import alluxio.grpc.RegisterCommand; import alluxio.grpc.ServiceType; +import alluxio.heartbeat.FixedIntervalSupplier; import alluxio.heartbeat.HeartbeatContext; import alluxio.heartbeat.HeartbeatExecutor; import alluxio.heartbeat.HeartbeatThread; @@ -199,7 +200,8 @@ public void start(Boolean isLeader) throws IOException { getExecutorService() .submit(new HeartbeatThread(HeartbeatContext.JOB_MASTER_LOST_WORKER_DETECTION, new LostWorkerDetectionHeartbeatExecutor(), - () -> Configuration.getMs(PropertyKey.JOB_MASTER_LOST_WORKER_INTERVAL), + () -> new FixedIntervalSupplier( + Configuration.getMs(PropertyKey.JOB_MASTER_LOST_WORKER_INTERVAL)), Configuration.global(), mMasterContext.getUserState())); if (Configuration.getBoolean(PropertyKey.MASTER_AUDIT_LOGGING_ENABLED)) { mAsyncAuditLogWriter = new AsyncUserAccessAuditLogWriter("JOB_MASTER_AUDIT_LOG"); @@ -694,7 +696,7 @@ private final class LostWorkerDetectionHeartbeatExecutor implements HeartbeatExe public LostWorkerDetectionHeartbeatExecutor() {} @Override - public void heartbeat() { + public void heartbeat(long timeLimitMs) { int masterWorkerTimeoutMs = (int) Configuration .getMs(PropertyKey.JOB_MASTER_WORKER_TIMEOUT); List lostWorkers = new ArrayList<>(); diff --git a/job/server/src/main/java/alluxio/worker/JobWorker.java b/job/server/src/main/java/alluxio/worker/JobWorker.java index aec996509b95..29a6cc054772 100644 --- a/job/server/src/main/java/alluxio/worker/JobWorker.java +++ b/job/server/src/main/java/alluxio/worker/JobWorker.java @@ -21,6 +21,7 @@ import alluxio.exception.ConnectionFailedException; import alluxio.grpc.GrpcService; import alluxio.grpc.ServiceType; +import alluxio.heartbeat.FixedIntervalSupplier; import alluxio.heartbeat.HeartbeatContext; import alluxio.heartbeat.HeartbeatThread; import alluxio.job.JobServerContext; @@ -107,7 +108,8 @@ public void start(WorkerNetAddress address) throws IOException { new HeartbeatThread(HeartbeatContext.JOB_WORKER_COMMAND_HANDLING, new CommandHandlingExecutor(mJobServerContext, taskExecutorManager, mJobMasterClient, address), - () -> Configuration.getMs(PropertyKey.JOB_MASTER_WORKER_HEARTBEAT_INTERVAL), + () -> new FixedIntervalSupplier( + Configuration.getMs(PropertyKey.JOB_MASTER_WORKER_HEARTBEAT_INTERVAL)), Configuration.global(), ServerUserState.global())); } diff --git a/job/server/src/main/java/alluxio/worker/job/command/CommandHandlingExecutor.java b/job/server/src/main/java/alluxio/worker/job/command/CommandHandlingExecutor.java index c52db6c0ff58..4d14e2418532 100644 --- a/job/server/src/main/java/alluxio/worker/job/command/CommandHandlingExecutor.java +++ b/job/server/src/main/java/alluxio/worker/job/command/CommandHandlingExecutor.java @@ -83,7 +83,7 @@ public CommandHandlingExecutor(JobServerContext jobServerContext, } @Override - public void heartbeat() { + public void heartbeat(long timeLimitMs) { JobWorkerHealthReporter.JobWorkerHealthReport jobWorkerHealthReport = mHealthReporter.getJobWorkerHealthReport(); diff --git a/job/server/src/test/java/alluxio/job/command/CommandHandlingExecutorTest.java b/job/server/src/test/java/alluxio/job/command/CommandHandlingExecutorTest.java index 95310ff7c92b..15c2d804e916 100644 --- a/job/server/src/test/java/alluxio/job/command/CommandHandlingExecutorTest.java +++ b/job/server/src/test/java/alluxio/job/command/CommandHandlingExecutorTest.java @@ -86,7 +86,7 @@ public void heartbeat() throws Exception { Mockito.when(mJobMasterClient.heartbeat(any(JobWorkerHealth.class), eq(Lists.newArrayList()))) .thenReturn(Lists.newArrayList(command.build())); - mCommandHandlingExecutor.heartbeat(); + mCommandHandlingExecutor.heartbeat(Long.MAX_VALUE); ExecutorService executorService = AlluxioMockUtil.getInternalState( mCommandHandlingExecutor, "mCommandHandlingService"); executorService.shutdown(); diff --git a/table/server/master/src/main/java/alluxio/master/table/transform/TransformManager.java b/table/server/master/src/main/java/alluxio/master/table/transform/TransformManager.java index e5a24c5715be..ba7b9bab3a65 100644 --- a/table/server/master/src/main/java/alluxio/master/table/transform/TransformManager.java +++ b/table/server/master/src/main/java/alluxio/master/table/transform/TransformManager.java @@ -18,6 +18,7 @@ import alluxio.exception.ExceptionMessage; import alluxio.exception.status.NotFoundException; import alluxio.exception.status.UnavailableException; +import alluxio.heartbeat.FixedIntervalSupplier; import alluxio.heartbeat.HeartbeatContext; import alluxio.heartbeat.HeartbeatExecutor; import alluxio.heartbeat.HeartbeatThread; @@ -135,7 +136,8 @@ public TransformManager( public void start(ExecutorService executorService, UserState userState) { executorService.submit( new HeartbeatThread(HeartbeatContext.MASTER_TABLE_TRANSFORMATION_MONITOR, new JobMonitor(), - () -> Configuration.getMs(PropertyKey.TABLE_TRANSFORM_MANAGER_JOB_MONITOR_INTERVAL), + () -> new FixedIntervalSupplier( + Configuration.getMs(PropertyKey.TABLE_TRANSFORM_MANAGER_JOB_MONITOR_INTERVAL)), Configuration.global(), userState)); } @@ -300,7 +302,7 @@ private void handleJobSuccess(TransformJobInfo job) { } @Override - public void heartbeat() throws InterruptedException { + public void heartbeat(long timeLimitMs) throws InterruptedException { for (TransformJobInfo job : mState.getRunningJobs()) { if (Thread.currentThread().isInterrupted()) { throw new InterruptedException("TransformManager's heartbeat was interrupted"); diff --git a/tests/src/test/java/alluxio/client/fs/BlockMasterDeleteLostWorkerIntegrationTest.java b/tests/src/test/java/alluxio/client/fs/BlockMasterDeleteLostWorkerIntegrationTest.java index d9aa14700b79..ef11c38eb6fe 100644 --- a/tests/src/test/java/alluxio/client/fs/BlockMasterDeleteLostWorkerIntegrationTest.java +++ b/tests/src/test/java/alluxio/client/fs/BlockMasterDeleteLostWorkerIntegrationTest.java @@ -91,14 +91,14 @@ public void lostWorkerDeletedAfterTimeout() throws Exception { // The worker will not be deleted, if the lost time is less than MASTER_WORKER_TIMEOUT_MS long newTimeMs = worker.getLastUpdatedTimeMs() + MASTER_WORKER_TIMEOUT_MS + 1; mClock.setTimeMs(newTimeMs); - lostWorkerDetector.heartbeat(); + lostWorkerDetector.heartbeat(Long.MAX_VALUE); assertEquals(0, mBlockMaster.getWorkerCount()); assertEquals(1, mBlockMaster.getLostWorkerCount()); // The worker will be deleted, if the lost time is greater than MASTER_WORKER_TIMEOUT_MS newTimeMs = newTimeMs + MASTER_WORKER_DELETE_TIMEOUT_MS + 1; mClock.setTimeMs(newTimeMs); - lostWorkerDetector.heartbeat(); + lostWorkerDetector.heartbeat(Long.MAX_VALUE); assertEquals(0, mBlockMaster.getWorkerCount()); assertEquals(0, mBlockMaster.getLostWorkerCount()); } diff --git a/tests/src/test/java/alluxio/client/fs/FileSystemContextReinitIntegrationTest.java b/tests/src/test/java/alluxio/client/fs/FileSystemContextReinitIntegrationTest.java index 22472643ab13..43c92688ec24 100644 --- a/tests/src/test/java/alluxio/client/fs/FileSystemContextReinitIntegrationTest.java +++ b/tests/src/test/java/alluxio/client/fs/FileSystemContextReinitIntegrationTest.java @@ -139,7 +139,7 @@ public void configHashSyncWithOpenStream() throws Exception { ExecutorService service = Executors.newSingleThreadExecutor(); Future future = service.submit(() -> { - mExecutor.heartbeat(); + mExecutor.heartbeat(Long.MAX_VALUE); }); TimeUnit.SECONDS.sleep(1); // Stream is open, so reinitialization should block until the stream is closed. @@ -159,7 +159,7 @@ public void configHashSyncWithOpenStream() throws Exception { * Triggers ConfigHashSync heartbeat and waits for it to finish. */ private void triggerAndWaitSync() throws Exception { - mExecutor.heartbeat(); + mExecutor.heartbeat(Long.MAX_VALUE); } private void restartMasters() throws Exception { diff --git a/tests/src/test/java/alluxio/server/block/BlockMasterRegisterStreamIntegrationTest.java b/tests/src/test/java/alluxio/server/block/BlockMasterRegisterStreamIntegrationTest.java index dab5f3e302ab..6bd74f113f03 100644 --- a/tests/src/test/java/alluxio/server/block/BlockMasterRegisterStreamIntegrationTest.java +++ b/tests/src/test/java/alluxio/server/block/BlockMasterRegisterStreamIntegrationTest.java @@ -211,7 +211,7 @@ public void registerLostWorker() throws Exception { mClock.setTimeMs(newTimeMs); DefaultBlockMaster.LostWorkerDetectionHeartbeatExecutor lostWorkerDetector = ((DefaultBlockMaster) mBlockMaster).new LostWorkerDetectionHeartbeatExecutor(); - lostWorkerDetector.heartbeat(); + lostWorkerDetector.heartbeat(Long.MAX_VALUE); // Verify the worker has been forgotten assertEquals(0, mBlockMaster.getWorkerCount()); diff --git a/tests/src/test/java/alluxio/server/block/BlockWorkerRegisterStreamIntegrationTest.java b/tests/src/test/java/alluxio/server/block/BlockWorkerRegisterStreamIntegrationTest.java index 6964bdb2e146..d90d6d56741d 100644 --- a/tests/src/test/java/alluxio/server/block/BlockWorkerRegisterStreamIntegrationTest.java +++ b/tests/src/test/java/alluxio/server/block/BlockWorkerRegisterStreamIntegrationTest.java @@ -462,7 +462,7 @@ public void deleteDuringRegisterStream() throws Exception { f.get(); assertNull(error.get()); // Validation will happen on the heartbeat - sync.heartbeat(); + sync.heartbeat(Long.MAX_VALUE); } // TODO(jiacheng): an internal block movement happens during register stream From e4499aeadad24f6e548eda017d18aeb33c7d9de7 Mon Sep 17 00:00:00 2001 From: maobaolong <307499405@qq.com> Date: Fri, 14 Apr 2023 09:58:01 +0800 Subject: [PATCH 232/334] Show the invalid default value and key name ### What changes are proposed in this pull request? If we develop a new Propertykey and give the inappropriate default value, the master will not start successfully, and we cannot find which Propertykey is bad. So I improve the prompt and show the related Propertykey ### Why are the changes needed? Show the related Propertykey of inappropriate default value ### Does this PR introduce any user facing changes? NO pr-link: Alluxio/alluxio#17185 change-id: cid-486e3ab1c1092299d0b8e762fe43715b9dcbd8e7 --- core/common/src/main/java/alluxio/conf/PropertyKey.java | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/core/common/src/main/java/alluxio/conf/PropertyKey.java b/core/common/src/main/java/alluxio/conf/PropertyKey.java index f05f12f37699..8e77b4ea7a70 100755 --- a/core/common/src/main/java/alluxio/conf/PropertyKey.java +++ b/core/common/src/main/java/alluxio/conf/PropertyKey.java @@ -412,7 +412,8 @@ public Builder setDefaultSupplier(Supplier supplier, String description) * @return the updated builder instance */ public Builder setDefaultValue(Object defaultValue) { - checkArgument(validateValue(defaultValue, mType, mEnumType, mValueValidationFunction)); + checkArgument(validateValue(defaultValue, mType, mEnumType, mValueValidationFunction), + String.format("default value %s of %s validate failed", defaultValue, mName)); mDefaultValue = formatValue(defaultValue, mType, mEnumType, mDelimiter); return this; } From f7e811c384e8049eb32a66721834cacf03f1dd30 Mon Sep 17 00:00:00 2001 From: secfree Date: Mon, 17 Apr 2023 10:43:57 +0800 Subject: [PATCH 233/334] Improve the performance of MountTable.getMountPoint ### What changes are proposed in this pull request? Improve the performance of MountTable.getMountPoint ### Why are the changes needed? Currently the implementation of `MountTable.getMountPoint` needs to iterate through all mount points. In one of our Alluxio clusters, there are more than 300 mount points. The leader alluxio master had very high load and we found `MountTable.getMountPoint` cost most of the cpu time. This PR can improve the performance a lot especially for clusters with lots of mount points. Below is the time cost of calling `MountTable.getMountPoint` 1000 times when there are 300 mount points | version | time cost (ms) | | --- | --- | | master | 142 | | PR | 6 | ### Does this PR introduce any user facing changes? NO pr-link: Alluxio/alluxio#17244 change-id: cid-aa35fe2bb9c2f439fc818461cb49c15c9391909e --- .../main/java/alluxio/util/io/PathUtils.java | 24 +++++++++++++++ .../java/alluxio/util/io/PathUtilsTest.java | 30 +++++++++++++++++++ .../alluxio/master/file/meta/MountTable.java | 12 ++++---- 3 files changed, 60 insertions(+), 6 deletions(-) diff --git a/core/common/src/main/java/alluxio/util/io/PathUtils.java b/core/common/src/main/java/alluxio/util/io/PathUtils.java index cb731a9d7270..0d3069b69182 100644 --- a/core/common/src/main/java/alluxio/util/io/PathUtils.java +++ b/core/common/src/main/java/alluxio/util/io/PathUtils.java @@ -22,8 +22,10 @@ import com.google.common.base.Preconditions; import org.apache.commons.io.FilenameUtils; +import java.util.ArrayList; import java.util.Arrays; import java.util.Collection; +import java.util.List; import java.util.UUID; import java.util.regex.Pattern; import javax.annotation.concurrent.ThreadSafe; @@ -444,4 +446,26 @@ public static String normalizePath(String path, String separator) { } private PathUtils() {} // prevent instantiation + + /** + * Returns the list of possible mount points of the given path. + * + * "/a/b/c" => {"/a", "/a/b", "/a/b/c"} + * + * @param path the path to get the mount points of + * @return a list of paths + */ + public static List getPossibleMountPoints(String path) throws InvalidPathException { + String basePath = cleanPath(path); + List paths = new ArrayList<>(); + if ((basePath != null) && !basePath.equals(AlluxioURI.SEPARATOR)) { + paths.add(basePath); + String parent = getParent(path); + while (!parent.equals(AlluxioURI.SEPARATOR)) { + paths.add(0, parent); + parent = getParent(parent); + } + } + return paths; + } } diff --git a/core/common/src/test/java/alluxio/util/io/PathUtilsTest.java b/core/common/src/test/java/alluxio/util/io/PathUtilsTest.java index cd34c8a078b9..81d7e1ac01da 100644 --- a/core/common/src/test/java/alluxio/util/io/PathUtilsTest.java +++ b/core/common/src/test/java/alluxio/util/io/PathUtilsTest.java @@ -507,4 +507,34 @@ public void normalizePath() throws Exception { assertEquals("/foo/bar//", PathUtils.normalizePath("/foo/bar//", "/")); assertEquals("/foo/bar%", PathUtils.normalizePath("/foo/bar", "%")); } + + /** + * Tests the {@link PathUtils#getPossibleMountPoints(String)} method to + * throw an exception in case the path is invalid. + */ + @Test + public void getPossibleMountPointsException() throws InvalidPathException { + mException.expect(InvalidPathException.class); + PathUtils.getPossibleMountPoints(""); + } + + /** + * Tests the {@link PathUtils#getPossibleMountPoints(String)} method. + */ + @Test + public void getPossibleMountPointsNoException() throws InvalidPathException { + ArrayList paths = new ArrayList<>(); + assertEquals(paths, PathUtils.getPossibleMountPoints("/")); + assertEquals(paths, PathUtils.getPossibleMountPoints("//")); + + paths.add("/a"); + assertEquals(paths, PathUtils.getPossibleMountPoints("/a")); + assertEquals(paths, PathUtils.getPossibleMountPoints("/a/")); + paths.add("/a/b"); + assertEquals(paths, PathUtils.getPossibleMountPoints("/a/b")); + assertEquals(paths, PathUtils.getPossibleMountPoints("/a/b/")); + paths.add("/a/b/c"); + assertEquals(paths, PathUtils.getPossibleMountPoints("/a/b/c")); + assertEquals(paths, PathUtils.getPossibleMountPoints("/a/b/c/")); + } } diff --git a/core/server/master/src/main/java/alluxio/master/file/meta/MountTable.java b/core/server/master/src/main/java/alluxio/master/file/meta/MountTable.java index 563a282b9f37..da0ae9c4d871 100644 --- a/core/server/master/src/main/java/alluxio/master/file/meta/MountTable.java +++ b/core/server/master/src/main/java/alluxio/master/file/meta/MountTable.java @@ -323,13 +323,13 @@ public void update(Supplier journalContext, AlluxioURI alluxioUr public String getMountPoint(AlluxioURI uri) throws InvalidPathException { String path = uri.getPath(); String lastMount = ROOT; + List possibleMounts = PathUtils.getPossibleMountPoints(path); try (LockResource r = new LockResource(mReadLock)) { - for (Map.Entry entry : mState.getMountTable().entrySet()) { - String mount = entry.getKey(); - // we choose a new candidate path if the previous candidate path is a prefix - // of the current alluxioPath and the alluxioPath is a prefix of the path - if (!mount.equals(ROOT) && PathUtils.hasPrefix(path, mount) - && lastMount.length() < mount.length()) { + Map mountTable = mState.getMountTable(); + for (String mount: possibleMounts) { + if (mountTable.containsKey(mount)) { + // results in `possibleMounts` are from shortest to longest, so it will get the + // longest matching below lastMount = mount; } } From 46270b8ac3af3643a0cafe259e209e3788ee7db5 Mon Sep 17 00:00:00 2001 From: Xinran Dong <81548653+007DXR@users.noreply.github.com> Date: Tue, 18 Apr 2023 10:46:21 +0800 Subject: [PATCH 234/334] Enable bucket cache in v2 s3 proxy ### What changes are proposed in this pull request? Create 'BUCKET_CACHE' in v2 s3 proxy to reduce time cost of checking bucket path in the same way as PR #16806. ### Why are the changes needed? To keep the consistence between v2 s3 proxy and v1 s3 proxy. To speed up the Alluxio proxy efficency to deal with requests . ### Does this PR introduce any user facing changes? If enabling the cache, alluxio will cache bucket path statistics for specified time period(configured in alluxio-site.properties file). Be careful to use this cache because Alluxio S3 API will behave differently from AWS S3 API when coming an illegal request. This bucket path cache is swithed off by default. pr-link: Alluxio/alluxio#17022 change-id: cid-a6ae6484aab704bc39ce3a02d22d70bd633cc6a6 --- .../main/java/alluxio/conf/PropertyKey.java | 6 ++- .../java/alluxio/proxy/s3/S3BucketTask.java | 26 +++++++++---- .../main/java/alluxio/proxy/s3/S3Handler.java | 14 +++++++ .../java/alluxio/proxy/s3/S3ObjectTask.java | 37 +++++++++++++------ .../proxy/s3/S3RestServiceHandler.java | 12 ++++-- .../java/alluxio/proxy/s3/S3RestUtils.java | 7 ++-- 6 files changed, 74 insertions(+), 28 deletions(-) diff --git a/core/common/src/main/java/alluxio/conf/PropertyKey.java b/core/common/src/main/java/alluxio/conf/PropertyKey.java index 8e77b4ea7a70..07874168519f 100755 --- a/core/common/src/main/java/alluxio/conf/PropertyKey.java +++ b/core/common/src/main/java/alluxio/conf/PropertyKey.java @@ -5439,9 +5439,11 @@ public String toString() { public static final PropertyKey PROXY_S3_BUCKETPATHCACHE_TIMEOUT_MS = durationBuilder(Name.PROXY_S3_BUCKETPATHCACHE_TIMEOUT_MS) .setAlias("alluxio.proxy.s3.bucketpathcache.timeout.ms") - .setDefaultValue("1min") + .setDefaultValue("0min") .setDescription("Expire bucket path statistics in cache for this time period. " - + "Set 0min to disable the cache.") + + "Set 0min to disable the cache. If enabling the cache, " + + "be careful that Alluxio S3 API will behave differently from AWS S3 API" + + " if bucket path cache entries become stale.") .setConsistencyCheckLevel(ConsistencyCheckLevel.IGNORE) .setScope(Scope.NONE) .build(); diff --git a/core/server/proxy/src/main/java/alluxio/proxy/s3/S3BucketTask.java b/core/server/proxy/src/main/java/alluxio/proxy/s3/S3BucketTask.java index 9e13e22bcffc..17d3b5c7eade 100644 --- a/core/server/proxy/src/main/java/alluxio/proxy/s3/S3BucketTask.java +++ b/core/server/proxy/src/main/java/alluxio/proxy/s3/S3BucketTask.java @@ -144,6 +144,8 @@ public Response continueTask() { // debatable (?) potentially breaks backcompat(?) .filter(URIStatus::isFolder) .collect(Collectors.toList()); + buckets.forEach( + (uri) -> mHandler.BUCKET_PATH_CACHE.put(uri.getPath(), true)); return new ListAllMyBucketsResult(buckets); } }); @@ -165,7 +167,8 @@ public Response continueTask() { try (S3AuditContext auditContext = mHandler.createAuditContext( mOPType.name(), user, mHandler.getBucket(), null)) { - S3RestUtils.checkPathIsAlluxioDirectory(userFs, path, auditContext); + S3RestUtils.checkPathIsAlluxioDirectory(userFs, path, auditContext, + mHandler.BUCKET_PATH_CACHE); AlluxioURI uri = new AlluxioURI(path); try { TaggingData tagData = S3RestUtils.deserializeTags(userFs.getStatus(uri).getXAttr()); @@ -196,7 +199,8 @@ public Response continueTask() { try (S3AuditContext auditContext = mHandler.createAuditContext( mOPType.name(), user, mHandler.getBucket(), null)) { - S3RestUtils.checkPathIsAlluxioDirectory(userFs, path, auditContext); + S3RestUtils.checkPathIsAlluxioDirectory(userFs, path, auditContext, + mHandler.BUCKET_PATH_CACHE); try { List children = mHandler.getMetaFS().listStatus(new AlluxioURI( S3RestUtils.MULTIPART_UPLOADS_METADATA_DIR)); @@ -256,7 +260,8 @@ public Response continueTask() { try (S3AuditContext auditContext = mHandler.createAuditContext( mOPType.name(), user, mHandler.getBucket(), null)) { - S3RestUtils.checkPathIsAlluxioDirectory(userFs, path, auditContext); + S3RestUtils.checkPathIsAlluxioDirectory(userFs, path, auditContext, + mHandler.BUCKET_PATH_CACHE); String markerParam = mHandler.getQueryParameter("marker"); String maxKeysParam = mHandler.getQueryParameter("max-keys"); String prefixParam = mHandler.getQueryParameter("prefix"); @@ -330,7 +335,8 @@ public Response continueTask() { String bucketPath = S3RestUtils.parsePath(AlluxioURI.SEPARATOR + mHandler.getBucket()); try (S3AuditContext auditContext = mHandler.createAuditContext( mOPType.name(), mHandler.getUser(), mHandler.getBucket(), null)) { - S3RestUtils.checkPathIsAlluxioDirectory(mHandler.getMetaFS(), bucketPath, auditContext); + S3RestUtils.checkPathIsAlluxioDirectory(mHandler.getMetaFS(), bucketPath, auditContext, + mHandler.BUCKET_PATH_CACHE); try { TaggingData tagData = new XmlMapper().readerFor(TaggingData.class) .readValue(mHandler.getInputStream()); @@ -395,6 +401,7 @@ public Response continueTask() { // Silently swallow CreateBucket calls on existing buckets for this user // - S3 clients may prepend PutObject requests with CreateBucket calls instead of // calling HeadBucket to ensure that the bucket exists + mHandler.BUCKET_PATH_CACHE.put(bucketPath, true); return Response.Status.OK; } // Otherwise, this bucket is owned by a different user @@ -428,6 +435,7 @@ public Response continueTask() { } catch (Exception e) { throw S3RestUtils.toBucketS3Exception(e, bucketPath, auditContext); } + mHandler.BUCKET_PATH_CACHE.put(bucketPath, true); return Response.Status.OK; } }); @@ -509,7 +517,8 @@ public Response continueTask() { try (S3AuditContext auditContext = mHandler.createAuditContext( mOPType.name(), user, mHandler.getBucket(), null)) { - S3RestUtils.checkPathIsAlluxioDirectory(userFs, bucketPath, auditContext); + S3RestUtils.checkPathIsAlluxioDirectory(userFs, bucketPath, auditContext, + mHandler.BUCKET_PATH_CACHE); } return Response.ok().build(); }); @@ -530,7 +539,8 @@ public Response continueTask() { String bucketPath = S3RestUtils.parsePath(AlluxioURI.SEPARATOR + mHandler.getBucket()); try (S3AuditContext auditContext = mHandler.createAuditContext( mOPType.name(), user, mHandler.getBucket(), null)) { - S3RestUtils.checkPathIsAlluxioDirectory(userFs, bucketPath, auditContext); + S3RestUtils.checkPathIsAlluxioDirectory(userFs, bucketPath, auditContext, + mHandler.BUCKET_PATH_CACHE); LOG.debug("DeleteBucketTagging bucket={}", bucketPath); Map xattrMap = new HashMap<>(); @@ -565,7 +575,8 @@ public Response continueTask() { try (S3AuditContext auditContext = mHandler.createAuditContext( mOPType.name(), user, mHandler.getBucket(), null)) { - S3RestUtils.checkPathIsAlluxioDirectory(userFs, bucketPath, auditContext); + S3RestUtils.checkPathIsAlluxioDirectory(userFs, bucketPath, auditContext, + mHandler.BUCKET_PATH_CACHE); // Delete the bucket. DeletePOptions options = DeletePOptions.newBuilder().setAlluxioOnly(Configuration .get(PropertyKey.PROXY_S3_DELETE_TYPE) @@ -573,6 +584,7 @@ public Response continueTask() { .build(); try { userFs.delete(new AlluxioURI(bucketPath), options); + mHandler.BUCKET_PATH_CACHE.put(bucketPath, false); } catch (Exception e) { throw S3RestUtils.toBucketS3Exception(e, bucketPath, auditContext); } diff --git a/core/server/proxy/src/main/java/alluxio/proxy/s3/S3Handler.java b/core/server/proxy/src/main/java/alluxio/proxy/s3/S3Handler.java index 46eb1e82226f..179447ede09e 100644 --- a/core/server/proxy/src/main/java/alluxio/proxy/s3/S3Handler.java +++ b/core/server/proxy/src/main/java/alluxio/proxy/s3/S3Handler.java @@ -25,6 +25,8 @@ import alluxio.web.ProxyWebServer; import com.google.common.base.Stopwatch; +import com.google.common.cache.Cache; +import com.google.common.cache.CacheBuilder; import org.eclipse.jetty.server.Request; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -40,6 +42,7 @@ import java.util.List; import java.util.Map; import java.util.Set; +import java.util.concurrent.TimeUnit; import java.util.regex.Matcher; import java.util.regex.Pattern; import javax.annotation.Nullable; @@ -72,6 +75,17 @@ public class S3Handler { Pattern.compile("^" + S3RequestServlet.S3_V2_SERVICE_PATH_PREFIX + "/[^/]*$"); public static final Pattern OBJECT_PATH_PATTERN = Pattern.compile("^" + S3RequestServlet.S3_V2_SERVICE_PATH_PREFIX + "/[^/]*/.*$"); + public static final int BUCKET_PATH_CACHE_SIZE = 65536; + /* BUCKET_PATH_CACHE caches bucket path during specific period. + BUCKET_PATH_CACHE.put(bucketPath,true) means bucket path exists. + BUCKET_PATH_CACHE.put(bucketPath,false) plays the same effect + as BUCKET_PATH_CACHE.remove(bucketPath). */ + public static final Cache BUCKET_PATH_CACHE = CacheBuilder.newBuilder() + .maximumSize(BUCKET_PATH_CACHE_SIZE) + .expireAfterWrite( + Configuration.global().getMs(PropertyKey.PROXY_S3_BUCKETPATHCACHE_TIMEOUT_MS), + TimeUnit.MILLISECONDS) + .build(); private static final Logger LOG = LoggerFactory.getLogger(S3Handler.class); private static final ThreadLocal TLS_BYTES = ThreadLocal.withInitial(() -> new byte[8 * 1024]); diff --git a/core/server/proxy/src/main/java/alluxio/proxy/s3/S3ObjectTask.java b/core/server/proxy/src/main/java/alluxio/proxy/s3/S3ObjectTask.java index 27d77684b02e..78450c296eb6 100644 --- a/core/server/proxy/src/main/java/alluxio/proxy/s3/S3ObjectTask.java +++ b/core/server/proxy/src/main/java/alluxio/proxy/s3/S3ObjectTask.java @@ -171,7 +171,8 @@ public Response continueTask() { String bucketPath = S3RestUtils.parsePath(AlluxioURI.SEPARATOR + mHandler.getBucket()); try (S3AuditContext auditContext = mHandler.createAuditContext( mOPType.name(), user, mHandler.getBucket(), mHandler.getObject())) { - S3RestUtils.checkPathIsAlluxioDirectory(userFs, bucketPath, auditContext); + S3RestUtils.checkPathIsAlluxioDirectory(userFs, bucketPath, auditContext, + mHandler.BUCKET_PATH_CACHE); AlluxioURI tmpDir = new AlluxioURI(S3RestUtils.getMultipartTemporaryDirForObject( bucketPath, mHandler.getObject(), uploadId)); @@ -222,7 +223,8 @@ public Response continueTask() { AlluxioURI uri = new AlluxioURI(objectPath); try (S3AuditContext auditContext = mHandler.createAuditContext( mOPType.name(), user, mHandler.getBucket(), mHandler.getObject())) { - S3RestUtils.checkPathIsAlluxioDirectory(userFs, bucketPath, auditContext); + S3RestUtils.checkPathIsAlluxioDirectory(userFs, bucketPath, auditContext, + mHandler.BUCKET_PATH_CACHE); try { TaggingData tagData = S3RestUtils.deserializeTags(userFs.getStatus(uri).getXAttr()); LOG.debug("GetObjectTagging tagData={}", tagData); @@ -249,7 +251,8 @@ public Response continueTask() { String bucketPath = S3RestUtils.parsePath(AlluxioURI.SEPARATOR + mHandler.getBucket()); try (S3AuditContext auditContext = mHandler.createAuditContext( mOPType.name(), user, mHandler.getBucket(), mHandler.getObject())) { - S3RestUtils.checkPathIsAlluxioDirectory(userFs, bucketPath, auditContext); + S3RestUtils.checkPathIsAlluxioDirectory(userFs, bucketPath, auditContext, + mHandler.BUCKET_PATH_CACHE); String objectPath = bucketPath + AlluxioURI.SEPARATOR + mHandler.getObject(); AlluxioURI objectUri = new AlluxioURI(objectPath); TaggingData tagData = null; @@ -448,6 +451,7 @@ public Response continueTask() { if (objectPath.endsWith(AlluxioURI.SEPARATOR)) { createDirectory(objectPath, userFs, auditContext); } + AlluxioURI objectUri = new AlluxioURI(objectPath); // Populate the xattr Map with the metadata tags if provided Map xattrMap = new HashMap<>(); @@ -473,7 +477,8 @@ public Response continueTask() { .build()) .setWriteType(S3RestUtils.getS3WriteType()) .setXattrPropStrat(XAttrPropagationStrategy.LEAF_NODE) - .setOverwrite(true); + .setOverwrite(true) + .setCheckS3BucketPath(true); // Handle metadata directive final String metadataDirective = mHandler.getHeader( @@ -624,6 +629,7 @@ public Response createDirectory(String objectPath, FileSystem userFs, .setGroupBits(Bits.ALL) .setOtherBits(Bits.NONE).build()) .setAllowExists(true) + .setCheckS3BucketPath(true) .build(); userFs.createDirectory(new AlluxioURI(objectPath), dirOptions); } catch (FileAlreadyExistsException e) { @@ -696,7 +702,8 @@ public Response continueTask() { try (S3AuditContext auditContext = mHandler.createAuditContext(mOPType.name(), user, bucket, object)) { - S3RestUtils.checkPathIsAlluxioDirectory(userFs, bucketPath, auditContext); + S3RestUtils.checkPathIsAlluxioDirectory(userFs, bucketPath, auditContext, + mHandler.BUCKET_PATH_CACHE); String objectPath = bucketPath + AlluxioURI.SEPARATOR + object; if (objectPath.endsWith(AlluxioURI.SEPARATOR)) { @@ -722,6 +729,7 @@ public Response continueTask() { .setWriteType(S3RestUtils.getS3WriteType()) .putAllXattr(xattrMap).setXattrPropStrat(XAttrPropagationStrategy.LEAF_NODE) .setOverwrite(true) + .setCheckS3BucketPath(true) .build(); return createObject(objectPath, userFs, filePOptions, auditContext); } @@ -839,7 +847,8 @@ public Response continueTask() { final String contentTypeHeader = mHandler.getHeader(S3Constants.S3_CONTENT_TYPE_HEADER); try (S3AuditContext auditContext = mHandler.createAuditContext( "initiateMultipartUpload", user, bucket, object)) { - S3RestUtils.checkPathIsAlluxioDirectory(userFs, bucketPath, auditContext); + S3RestUtils.checkPathIsAlluxioDirectory(userFs, bucketPath, auditContext, + mHandler.BUCKET_PATH_CACHE); if (taggingHeader != null) { // Parse the tagging header if it exists try { tagData = S3RestUtils.deserializeTaggingHeader( @@ -877,7 +886,9 @@ public Response continueTask() { .setOwnerBits(Bits.ALL) .setGroupBits(Bits.ALL) .setOtherBits(Bits.NONE).build()) - .setWriteType(S3RestUtils.getS3WriteType()).build()); + .setWriteType(S3RestUtils.getS3WriteType()) + .setCheckS3BucketPath(true) + .build()); // Create the Alluxio multipart upload metadata file if (contentTypeHeader != null) { @@ -1035,7 +1046,8 @@ public Response continueTask() { mUserFs = S3RestUtils.createFileSystemForUser(user, mHandler.getMetaFS()); try { String bucketPath = S3RestUtils.parsePath(AlluxioURI.SEPARATOR + bucket); - S3RestUtils.checkPathIsAlluxioDirectory(mUserFs, bucketPath, null); + S3RestUtils.checkPathIsAlluxioDirectory(mUserFs, bucketPath, null, + mHandler.BUCKET_PATH_CACHE); objectPath = bucketPath + AlluxioURI.SEPARATOR + object; // Check for existing multipart info files and dirs AlluxioURI multipartTemporaryDir = new AlluxioURI( @@ -1304,7 +1316,8 @@ public Response continueTask() { .getMultipartTemporaryDirForObject(bucketPath, mHandler.getObject(), uploadId)); try (S3AuditContext auditContext = mHandler.createAuditContext( "abortMultipartUpload", user, mHandler.getBucket(), mHandler.getObject())) { - S3RestUtils.checkPathIsAlluxioDirectory(userFs, bucketPath, auditContext); + S3RestUtils.checkPathIsAlluxioDirectory(userFs, bucketPath, auditContext, + mHandler.BUCKET_PATH_CACHE); try { S3RestUtils.checkStatusesForUploadId(mHandler.getMetaFS(), userFs, multipartTemporaryDir, uploadId); @@ -1360,7 +1373,8 @@ public Response continueTask() { .build(); try (S3AuditContext auditContext = mHandler.createAuditContext( "deleteObjectTags", user, mHandler.getBucket(), mHandler.getObject())) { - S3RestUtils.checkPathIsAlluxioDirectory(userFs, bucketPath, auditContext); + S3RestUtils.checkPathIsAlluxioDirectory(userFs, bucketPath, auditContext, + mHandler.BUCKET_PATH_CACHE); try { userFs.setAttribute(new AlluxioURI(objectPath), attrPOptions); } catch (Exception e) { @@ -1396,7 +1410,8 @@ public Response continueTask() { .build(); try (S3AuditContext auditContext = mHandler.createAuditContext( "deleteObject", user, mHandler.getBucket(), mHandler.getObject())) { - S3RestUtils.checkPathIsAlluxioDirectory(userFs, bucketPath, auditContext); + S3RestUtils.checkPathIsAlluxioDirectory(userFs, bucketPath, auditContext, + mHandler.BUCKET_PATH_CACHE); try { userFs.delete(new AlluxioURI(objectPath), options); } catch (FileDoesNotExistException | DirectoryNotEmptyException e) { diff --git a/core/server/proxy/src/main/java/alluxio/proxy/s3/S3RestServiceHandler.java b/core/server/proxy/src/main/java/alluxio/proxy/s3/S3RestServiceHandler.java index 967528c4c01c..3a4a28d65355 100644 --- a/core/server/proxy/src/main/java/alluxio/proxy/s3/S3RestServiceHandler.java +++ b/core/server/proxy/src/main/java/alluxio/proxy/s3/S3RestServiceHandler.java @@ -111,7 +111,11 @@ public final class S3RestServiceHandler { /* Object is after bucket in the URL path */ public static final String OBJECT_PARAM = "{bucket}/{object:.+}"; public static final int BUCKET_PATH_CACHE_SIZE = 65536; - private static final Cache BUCKET_PATH_CACHE = CacheBuilder.newBuilder() + /* BUCKET_PATH_CACHE caches bucket path during specific period. + BUCKET_PATH_CACHE.put(bucketPath,true) means bucket path exists. + BUCKET_PATH_CACHE.put(bucketPath,false) plays the same effect + as BUCKET_PATH_CACHE.remove(bucketPath). */ + private static final Cache BUCKET_PATH_CACHE = CacheBuilder.newBuilder() .maximumSize(BUCKET_PATH_CACHE_SIZE) .expireAfterWrite( Configuration.global().getMs(PropertyKey.PROXY_S3_BUCKETPATHCACHE_TIMEOUT_MS), @@ -225,7 +229,7 @@ public Response listAllMyBuckets() { // debatable (?) potentially breaks backcompat(?) .filter(URIStatus::isFolder) .collect(Collectors.toList()); - buckets.forEach((uri) -> BUCKET_PATH_CACHE.put(new AlluxioURI(uri.getPath()), true)); + buckets.forEach((uri) -> BUCKET_PATH_CACHE.put(uri.getPath(), true)); return new ListAllMyBucketsResult(buckets); } }); @@ -588,7 +592,7 @@ public Response createBucket(@PathParam("bucket") final String bucket, } catch (Exception e) { throw S3RestUtils.toBucketS3Exception(e, bucketPath, auditContext); } - BUCKET_PATH_CACHE.put(new AlluxioURI(bucketPath), true); + BUCKET_PATH_CACHE.put(bucketPath, true); return Response.Status.OK; } }); @@ -649,7 +653,7 @@ public Response deleteBucket(@PathParam("bucket") final String bucket, } catch (Exception e) { throw S3RestUtils.toBucketS3Exception(e, bucketPath, auditContext); } - BUCKET_PATH_CACHE.put(new AlluxioURI(bucketPath), false); + BUCKET_PATH_CACHE.put(bucketPath, false); return Response.Status.NO_CONTENT; } }); diff --git a/core/server/proxy/src/main/java/alluxio/proxy/s3/S3RestUtils.java b/core/server/proxy/src/main/java/alluxio/proxy/s3/S3RestUtils.java index d10f9beb1aee..5013db2a0170 100644 --- a/core/server/proxy/src/main/java/alluxio/proxy/s3/S3RestUtils.java +++ b/core/server/proxy/src/main/java/alluxio/proxy/s3/S3RestUtils.java @@ -317,14 +317,13 @@ public static void checkPathIsAlluxioDirectory(FileSystem fs, String bucketPath, */ public static void checkPathIsAlluxioDirectory(FileSystem fs, String bucketPath, @Nullable S3AuditContext auditContext, - Cache bucketPathCache) + Cache bucketPathCache) throws S3Exception { - AlluxioURI uri = new AlluxioURI(bucketPath); - if (Boolean.TRUE.equals(bucketPathCache.getIfPresent(uri))) { + if (Boolean.TRUE.equals(bucketPathCache.getIfPresent(bucketPath))) { return; } checkPathIsAlluxioDirectory(fs, bucketPath, auditContext); - bucketPathCache.put(uri, true); + bucketPathCache.put(bucketPath, true); } /** From 9f152c554b737b7014a3c8ac1555d0a2d1e2e936 Mon Sep 17 00:00:00 2001 From: Jiacheng Liu Date: Wed, 19 Apr 2023 10:17:12 +0800 Subject: [PATCH 235/334] Fix Rocksdb thread safety using refcount(no lock) ### What changes are proposed in this pull request? This change intends to fix RocksDB segfaults caused by race conditions when we close the RocksDB concurrent to ongoing operations like: 1. Iterations represented by BlockMaster.validateBlocks() 2. Backup/checkpoint where we iterate the RocksDB contents and dump into an output stream 3. RPC serving in RocksInodeStore The above race conditions will cause the Alluxio master to segfault and crash with an hs_err.pid file like ``` Current thread (0x00007ec746db8f70): JavaThread "Master Block Integrity Check" daemon [_thread_in_native, id=32073, stack(0x00007ebf18fd0000,0x00007ebf190d1000)] Stack: [0x00007ebf18fd0000,0x00007ebf190d1000], sp=0x00007ebf190cf460, free space=1021k Native frames: (J=compiled Java code, A=aot compiled Java code, j=interpreted, Vv=VM code, C=native code) C [librocksdbjni249315869067059420.so+0x5e1c4b] rocksdb::MergingIterator::Next()+0x24b C [librocksdbjni249315869067059420.so+0x5ddb13] rocksdb::MergingIterator::NextAndGetResult(rocksdb::IterateResult*)+0x13 C [librocksdbjni249315869067059420.so+0x3cbb74] rocksdb::DBIter::Next()+0x284 J 18876 org.rocksdb.RocksIterator.next0(J)V (0 bytes) @ 0x00007f1531d7eb73 [0x00007f1531d7eac0+0x00000000000000b3] J 23512 c2 alluxio.master.block.DefaultBlockMaster.validateBlocks(Ljava/util/function/Function;Z)V (320 bytes) @ 0x00007f1531021cb0 [0x00007f1531021540+0x0000000000000770] j alluxio.master.file.DefaultFileSystemMaster.validateInodeBlocks(Z)V+11 j alluxio.master.file.BlockIntegrityChecker.heartbeat()V+8 j alluxio.heartbeat.HeartbeatThread.run()V+78 ``` The solution is: 1. Use a flag on RocksStore to indicate that the RocksDB will be closed/restart/wiped out, so all ongoing operations will abort voluntarily. This makes sure R/W operations do not block the closer, as much as possible. 2. Use reference counting on the RocksStore. A R/W operation will increment the ref count by one, and the closer must wait for the ref count to decrement to zero (or timeout). This makes sure the closer waits for R/W to abort or complete. So there will be no possibilty of crashing. ### Why are the changes needed? As explained above ### Does this PR introduce any user facing changes? The change should be mostly transparent to users. However, in some small corners the behavior can be different. Before this change, if the RocksDB is closed, some list operations may return partial results or crash. After this change, the operation will fail explicitly instead. ### Performance impact Impact on performance is further explained in https://github.com/Alluxio/alluxio/pull/17171#issuecomment-1494235026 In short, by removing the `synchronized` on `RocksStore` and switch to a thread safety model based on ref count, we should have better performance, especially when the concurrency is very high (in a large deployment with 32+ cores and 500 RPC threads). pr-link: Alluxio/alluxio#17171 change-id: cid-11d7930fb5c1a40ef2f47914bc6b5241f6fb8b81 --- .../file/cache/store/RocksPageStoreDir.java | 5 +- .../main/java/alluxio/conf/PropertyKey.java | 15 + .../alluxio/exception/ExceptionMessage.java | 10 + .../rocks/RocksExclusiveLockHandle.java | 48 ++ .../rocks/RocksSharedLockHandle.java | 59 ++ .../master/metastore/rocks/RocksUtils.java | 53 +- .../alluxio/resource/CloseableIterator.java | 2 +- .../master/block/DefaultBlockMaster.java | 4 + .../metastore/rocks/RocksBlockMetaStore.java | 90 ++- .../metastore/rocks/RocksCheckpointed.java | 28 +- .../metastore/rocks/RocksInodeStore.java | 170 +++-- .../master/metastore/rocks/RocksStore.java | 416 ++++++++++- .../rocks/RocksBlockMetaStoreTest.java | 275 +++++++ .../metastore/rocks/RocksInodeStoreTest.java | 703 +++++++++++++++++- .../metastore/rocks/RocksStoreTest.java | 489 +++++++++++- .../metastore/rocks/RocksStoreTestUtils.java | 29 + .../java/alluxio/inode/RocksBenchBase.java | 3 + 17 files changed, 2248 insertions(+), 151 deletions(-) create mode 100644 core/common/src/main/java/alluxio/master/metastore/rocks/RocksExclusiveLockHandle.java create mode 100644 core/common/src/main/java/alluxio/master/metastore/rocks/RocksSharedLockHandle.java create mode 100644 core/server/master/src/test/java/alluxio/master/metastore/rocks/RocksBlockMetaStoreTest.java create mode 100644 core/server/master/src/test/java/alluxio/master/metastore/rocks/RocksStoreTestUtils.java diff --git a/core/client/fs/src/main/java/alluxio/client/file/cache/store/RocksPageStoreDir.java b/core/client/fs/src/main/java/alluxio/client/file/cache/store/RocksPageStoreDir.java index 628c9b386bf3..1f218ca56b44 100644 --- a/core/client/fs/src/main/java/alluxio/client/file/cache/store/RocksPageStoreDir.java +++ b/core/client/fs/src/main/java/alluxio/client/file/cache/store/RocksPageStoreDir.java @@ -66,8 +66,11 @@ public void reset() throws IOException { @Override public void scanPages(Consumer> pageInfoConsumer) { + // Fix thread safety on this iterator or or demise RocksPageStore + // https://github.com/Alluxio/alluxio/issues/17131 try (CloseableIterator> pageIterator = - RocksUtils.createCloseableIterator(mPageStore.createNewInterator(), this::parsePageInfo)) { + RocksUtils.createCloseableIterator(mPageStore.createNewInterator(), this::parsePageInfo, + () -> null, null)) { Streams.stream(pageIterator).forEach(pageInfoConsumer); } } diff --git a/core/common/src/main/java/alluxio/conf/PropertyKey.java b/core/common/src/main/java/alluxio/conf/PropertyKey.java index 07874168519f..dfdab6e892e9 100755 --- a/core/common/src/main/java/alluxio/conf/PropertyKey.java +++ b/core/common/src/main/java/alluxio/conf/PropertyKey.java @@ -2617,6 +2617,19 @@ public String toString() { .setConsistencyCheckLevel(ConsistencyCheckLevel.WARN) .setScope(Scope.MASTER) .build(); + public static final PropertyKey MASTER_METASTORE_ROCKS_EXCLUSIVE_LOCK_TIMEOUT = + durationBuilder(Name.MASTER_METASTORE_ROCKS_EXCLUSIVE_LOCK_TIMEOUT) + .setDefaultValue("10s") + .setIsHidden(true) + .setDescription("Before RocksDB is shut down/restarted/restored, Master will wait for " + + "ongoing operations to complete/abort. This timeout specifies how long to wait " + + "before forcing the action. Then the leftover operations will fail. Normally the " + + "wait will be short, because when master fails over/shuts down/replays journal, " + + "all other concurrent operations should have been stopped. This is just one extra " + + "safety guard. Therefore we do not recommend setting this manually.") + .setConsistencyCheckLevel(ConsistencyCheckLevel.WARN) + .setScope(Scope.MASTER) + .build(); public static final PropertyKey MASTER_METASTORE_ROCKS_PARALLEL_BACKUP = booleanBuilder(Name.MASTER_METASTORE_ROCKS_PARALLEL_BACKUP) .setDefaultValue(false) @@ -8086,6 +8099,8 @@ public static final class Name { "alluxio.master.metastore.rocks.checkpoint.compression.level"; public static final String MASTER_METASTORE_ROCKS_CHECKPOINT_COMPRESSION_TYPE = "alluxio.master.metastore.rocks.checkpoint.compression.type"; + public static final String MASTER_METASTORE_ROCKS_EXCLUSIVE_LOCK_TIMEOUT = + "alluxio.master.metastore.rocks.exclusive.lock.timeout"; public static final String MASTER_METASTORE_ROCKS_PARALLEL_BACKUP = "alluxio.master.metastore.rocks.parallel.backup"; public static final String MASTER_METASTORE_ROCKS_PARALLEL_BACKUP_THREADS = diff --git a/core/common/src/main/java/alluxio/exception/ExceptionMessage.java b/core/common/src/main/java/alluxio/exception/ExceptionMessage.java index ec304eb2e8f2..34ef67c9beef 100644 --- a/core/common/src/main/java/alluxio/exception/ExceptionMessage.java +++ b/core/common/src/main/java/alluxio/exception/ExceptionMessage.java @@ -192,6 +192,16 @@ public enum ExceptionMessage { // ufs maintenance UFS_OP_NOT_ALLOWED("Operation {0} not allowed on ufs path {1} under maintenance mode {2}"), + // RocksDB + ROCKS_DB_CLOSING("RocksDB is being closed because the master is under one of the following " + + "events: primary failover/shut down/checkpoint/journal replay"), + ROCKS_DB_REWRITTEN("RocksDB has been rewritten. Typically this is because the master is " + + "restored to a checkpoint."), + ROCKS_DB_EXCLUSIVE_LOCK_FORCED("RocksDB exclusive lock is forced with {0} ongoing " + + "r/w operations. There is a risk to crash!"), + ROCKS_DB_REF_COUNT_DIRTY("Some read/write operations did not respect the exclusive lock on " + + "the RocksStore and messed up the ref count! Current ref count is {0}."), + // SEMICOLON! minimize merge conflicts by putting it on its own line ; diff --git a/core/common/src/main/java/alluxio/master/metastore/rocks/RocksExclusiveLockHandle.java b/core/common/src/main/java/alluxio/master/metastore/rocks/RocksExclusiveLockHandle.java new file mode 100644 index 000000000000..c742012466f8 --- /dev/null +++ b/core/common/src/main/java/alluxio/master/metastore/rocks/RocksExclusiveLockHandle.java @@ -0,0 +1,48 @@ +/* + * The Alluxio Open Foundation licenses this work under the Apache License, version 2.0 + * (the "License"). You may not use this work except in compliance with the License, which is + * available at www.apache.org/licenses/LICENSE-2.0 + * + * This software is distributed on an "AS IS" basis, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, + * either express or implied, as more fully set forth in the License. + * + * See the NOTICE file distributed with this work for information regarding copyright ownership. + */ + +package alluxio.master.metastore.rocks; + +import alluxio.exception.runtime.AlluxioRuntimeException; + +import java.util.concurrent.Callable; + +/** + * This is a handle used to manage the write lock(exclusive lock) on RocksStore. + * The exclusive lock is acquired when ref count is zero, and the StopServing flag ensures + * no new r/w will come in, so the ref count will stay zero throughout the period. + * + * One exception is when the exclusive lock is forced (ignoring uncompleted r/w operations), + * when the reader comes back the exclusive lock is already held. At this moment when the late + * reader comes back, it should not update the ref count anymore. See Javadoc on + * {@link RocksSharedLockHandle#close()} for how that is handled. + */ +public class RocksExclusiveLockHandle implements AutoCloseable { + private final Callable mCloseAction; + + /** + * The constructor. + * @param closeAction the action called on close + */ + public RocksExclusiveLockHandle(Callable closeAction) { + mCloseAction = closeAction; + } + + @Override + public void close() { + try { + mCloseAction.call(); + } catch (Exception e) { + // From the current usage in RocksStore, this is unreachable + throw AlluxioRuntimeException.from(e); + } + } +} diff --git a/core/common/src/main/java/alluxio/master/metastore/rocks/RocksSharedLockHandle.java b/core/common/src/main/java/alluxio/master/metastore/rocks/RocksSharedLockHandle.java new file mode 100644 index 000000000000..c38504caa6fc --- /dev/null +++ b/core/common/src/main/java/alluxio/master/metastore/rocks/RocksSharedLockHandle.java @@ -0,0 +1,59 @@ +/* + * The Alluxio Open Foundation licenses this work under the Apache License, version 2.0 + * (the "License"). You may not use this work except in compliance with the License, which is + * available at www.apache.org/licenses/LICENSE-2.0 + * + * This software is distributed on an "AS IS" basis, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, + * either express or implied, as more fully set forth in the License. + * + * See the NOTICE file distributed with this work for information regarding copyright ownership. + */ + +package alluxio.master.metastore.rocks; + +import java.util.concurrent.atomic.LongAdder; + +/** + * This is a handle used to manage a read lock(shared lock) on RocksStore. + * When the shared lock is held, exclusive locks will wait. That guarantees the RocksDB + * is not wiped out/closed while an r/w operation is active. + * + * RocksStore uses ref count for locking so releasing a read lock is just decrementing the + * reference count. + */ +public class RocksSharedLockHandle implements AutoCloseable { + private final int mDbVersion; + private final LongAdder mRefCount; + + /** + * The constructor. + * + * @param dbVersion The RocksDB version. This version is updated when the RocksDB + * is restored or wiped out. + * @param refCount the ref count to decrement on close + */ + public RocksSharedLockHandle(int dbVersion, LongAdder refCount) { + mDbVersion = dbVersion; + mRefCount = refCount; + } + + /** + * Gets the version on the lock. + * @return version + */ + public int getLockVersion() { + return mDbVersion; + } + + @Override + public void close() { + /* + * If the exclusive lock has been forced and the ref count is reset, this reference will point + * to an out-of-date counter. Therefore, we can just update this counter without concerns. + * If the exclusive lock is has NOT been forced, we decrement the ref count normally. + * If the exclusive lock has been forced, we decrement an irrelevant counter which will never + * be read. + */ + mRefCount.decrement(); + } +} diff --git a/core/common/src/main/java/alluxio/master/metastore/rocks/RocksUtils.java b/core/common/src/main/java/alluxio/master/metastore/rocks/RocksUtils.java index 493787f00850..cdada2019182 100644 --- a/core/common/src/main/java/alluxio/master/metastore/rocks/RocksUtils.java +++ b/core/common/src/main/java/alluxio/master/metastore/rocks/RocksUtils.java @@ -20,6 +20,7 @@ import java.util.Iterator; import java.util.concurrent.atomic.AtomicBoolean; +import java.util.function.Supplier; /** * Convenience methods for working with RocksDB. @@ -94,13 +95,31 @@ public interface RocksIteratorParser { * Used to wrap an {@link CloseableIterator} over {@link RocksIterator}. * It seeks given iterator to first entry before returning the iterator. * + * The Iterator is associated with a shared lock to the RocksStore. The lock should be acquired + * by the caller (See java doc on RocksStore.checkAndAcquireSharedLock()) for how. + * And the lock is held throughout the lifecycle of this iterator until it is closed + * either on completion or on exception. This shared lock guarantees thread safety when + * accessing the RocksDB. In other words, when this shared lock is held, the underlying + * RocksDB will not be stopped/restarted. + * + * The abortCheck defines a way to voluntarily abort the iteration. This typically happens + * when the underlying RocksDB will be closed/restart/checkpointed, where all accesses should + * be stopped. + * + * With the thread safety baked into hasNext() and next(), users of this Iterator do not need + * to worry about safety and can use this Iterator normally. + * See examples in how this iterator is used in RocksBlockMetaStore and RocksInodeStore. + * * @param rocksIterator the rocks iterator * @param parser parser to produce iterated values from rocks key-value * @param iterator value type + * @param abortCheck if true, abort the iteration + * @param rocksDbSharedLock the shared lock acquired by the iterator * @return wrapped iterator */ public static CloseableIterator createCloseableIterator( - RocksIterator rocksIterator, RocksIteratorParser parser) { + RocksIterator rocksIterator, RocksIteratorParser parser, + Supplier abortCheck, RocksSharedLockHandle rocksDbSharedLock) { rocksIterator.seekToFirst(); AtomicBoolean valid = new AtomicBoolean(true); Iterator iter = new Iterator() { @@ -111,23 +130,41 @@ public boolean hasNext() { @Override public T next() { + boolean succeeded = false; + + /* + * If the RocksDB wants to stop, abort the loop instead of finishing it. + * The abortCheck will throw an exception, which closes the CloseableIterator + * if the CloseableIterator is correctly put in a try-with-resource section. + */ + abortCheck.get(); + try { - return parser.next(rocksIterator); + T result = parser.next(rocksIterator); + rocksIterator.next(); + succeeded = true; + return result; } catch (Exception exc) { LOG.warn("Iteration aborted because of error", exc); - rocksIterator.close(); - valid.set(false); throw new RuntimeException(exc); } finally { - rocksIterator.next(); - if (!rocksIterator.isValid()) { - rocksIterator.close(); + if (!succeeded) { valid.set(false); + rocksIterator.close(); } } } }; - return CloseableIterator.create(iter, (whatever) -> rocksIterator.close()); + return CloseableIterator.create(iter, (whatever) -> { + try { + rocksIterator.close(); + } finally { + if (rocksDbSharedLock != null) { + // Release the lock after recycling the iterator safely + rocksDbSharedLock.close(); + } + } + }); } } diff --git a/core/common/src/main/java/alluxio/resource/CloseableIterator.java b/core/common/src/main/java/alluxio/resource/CloseableIterator.java index b6f473ad44fd..c2ba6138e77f 100644 --- a/core/common/src/main/java/alluxio/resource/CloseableIterator.java +++ b/core/common/src/main/java/alluxio/resource/CloseableIterator.java @@ -43,7 +43,7 @@ public abstract class CloseableIterator extends CloseableResource * * @param iterator the resource to wrap */ - CloseableIterator(Iterator iterator) { + protected CloseableIterator(Iterator iterator) { super(iterator); mIter = iterator; } diff --git a/core/server/master/src/main/java/alluxio/master/block/DefaultBlockMaster.java b/core/server/master/src/main/java/alluxio/master/block/DefaultBlockMaster.java index c5f740d1fbc7..b138b94c8f2b 100644 --- a/core/server/master/src/main/java/alluxio/master/block/DefaultBlockMaster.java +++ b/core/server/master/src/main/java/alluxio/master/block/DefaultBlockMaster.java @@ -482,6 +482,10 @@ public JournalEntry next() { if (!hasNext()) { throw new NoSuchElementException(); } + /* + * When the BlockStore is RocksBlockMetaStore, thread safety is embedded in the iterator. + * So no need to worry if the RocksDB is closed while this iterator is active. + */ Block block = blockStoreIterator.next(); BlockInfoEntry blockInfoEntry = BlockInfoEntry.newBuilder().setBlockId(block.getId()) diff --git a/core/server/master/src/main/java/alluxio/master/metastore/rocks/RocksBlockMetaStore.java b/core/server/master/src/main/java/alluxio/master/metastore/rocks/RocksBlockMetaStore.java index 2de8650c743b..a3b733767435 100644 --- a/core/server/master/src/main/java/alluxio/master/metastore/rocks/RocksBlockMetaStore.java +++ b/core/server/master/src/main/java/alluxio/master/metastore/rocks/RocksBlockMetaStore.java @@ -68,6 +68,10 @@ public class RocksBlockMetaStore implements BlockMetaStore, RocksCheckpointed { private static final String BLOCK_LOCATIONS_COLUMN = "block-locations"; private static final String ROCKS_STORE_NAME = "BlockStore"; + /* + * Below 3 fields are created and managed by the external user class, + * no need to close in this class + */ // This is a field instead of a constant because it depends on the call to RocksDB.loadLibrary(). private final WriteOptions mDisableWAL; private final ReadOptions mIteratorOption; @@ -76,7 +80,9 @@ public class RocksBlockMetaStore implements BlockMetaStore, RocksCheckpointed { private final List mToClose = new ArrayList<>(); private final RocksStore mRocksStore; - // The handles are closed in RocksStore + /* + * The ColumnFamilyHandle instances are created and closed in RocksStore + */ private final AtomicReference mBlockMetaColumn = new AtomicReference<>(); private final AtomicReference mBlockLocationsColumn = new AtomicReference<>(); private final LongAdder mSize = new LongAdder(); @@ -90,11 +96,14 @@ public RocksBlockMetaStore(String baseDir) { RocksDB.loadLibrary(); // the rocksDB objects must be initialized after RocksDB.loadLibrary() is called mDisableWAL = new WriteOptions().setDisableWAL(true); + mToClose.add(mDisableWAL); mReadPrefixSameAsStart = new ReadOptions().setPrefixSameAsStart(true); + mToClose.add(mReadPrefixSameAsStart); mIteratorOption = new ReadOptions() .setReadaheadSize(Configuration.getBytes( PropertyKey.MASTER_METASTORE_ITERATOR_READAHEAD_SIZE)) .setTotalOrderSeek(true); + mToClose.add(mIteratorOption); List columns = new ArrayList<>(); DBOptions opts = new DBOptions(); @@ -266,7 +275,7 @@ && new String(columns.get(2).getName()).equals(BLOCK_LOCATIONS_COLUMN), } private long getProperty(String rocksPropertyName) { - try { + try (RocksSharedLockHandle lock = mRocksStore.checkAndAcquireSharedLock()) { return db().getAggregatedLongProperty(rocksPropertyName); } catch (RocksDBException e) { LOG.warn(String.format("error collecting %s", rocksPropertyName), e); @@ -277,7 +286,7 @@ private long getProperty(String rocksPropertyName) { @Override public Optional getBlock(long id) { byte[] meta; - try { + try (RocksSharedLockHandle lock = mRocksStore.checkAndAcquireSharedLock()) { meta = db().get(mBlockMetaColumn.get(), Longs.toByteArray(id)); } catch (RocksDBException e) { throw new RuntimeException(e); @@ -294,7 +303,7 @@ public Optional getBlock(long id) { @Override public void putBlock(long id, BlockMeta meta) { - try { + try (RocksSharedLockHandle lock = mRocksStore.checkAndAcquireSharedLock()) { byte[] buf = db().get(mBlockMetaColumn.get(), Longs.toByteArray(id)); // Overwrites the key if it already exists. db().put(mBlockMetaColumn.get(), mDisableWAL, Longs.toByteArray(id), meta.toByteArray()); @@ -309,7 +318,7 @@ public void putBlock(long id, BlockMeta meta) { @Override public void removeBlock(long id) { - try { + try (RocksSharedLockHandle lock = mRocksStore.checkAndAcquireSharedLock()) { byte[] buf = db().get(mBlockMetaColumn.get(), Longs.toByteArray(id)); db().delete(mBlockMetaColumn.get(), mDisableWAL, Longs.toByteArray(id)); if (buf != null) { @@ -323,8 +332,14 @@ public void removeBlock(long id) { @Override public void clear() { - mSize.reset(); - mRocksStore.clear(); + LOG.info("Waiting to clear RocksBlockMetaStore.."); + try (RocksExclusiveLockHandle lock = mRocksStore.lockForRewrite()) { + LOG.info("Clearing RocksDB"); + mSize.reset(); + mRocksStore.clear(); + } + // Reset the DB state and prepare to serve again + LOG.info("RocksBlockMetaStore cleared and ready to serve again"); } @Override @@ -333,17 +348,23 @@ public long size() { } @Override + /** + * There may be concurrent readers and writers so we have to guarantee thread safety when + * closing the RocksDB and all RocksObject instances. The sequence for closing is: + * 1. Mark flag mClosed = true without locking. + * All new readers/writers should see the flag and not start the operation. + * 2. Acquire the WriteLock before shutting down, so it waits for all concurrent r/w to + * bail or finish. + */ public void close() { - mSize.reset(); - LOG.info("Closing RocksBlockStore and recycling all RocksDB JNI objects"); - mRocksStore.close(); - mIteratorOption.close(); - mDisableWAL.close(); - mReadPrefixSameAsStart.close(); - // Close the elements in the reverse order they were added - Collections.reverse(mToClose); - mToClose.forEach(RocksObject::close); - LOG.info("RocksBlockStore closed"); + LOG.info("RocksBlockStore is being closed"); + try (RocksExclusiveLockHandle lock = mRocksStore.lockForClosing()) { + mSize.reset(); + mRocksStore.close(); + // Close the elements in the reverse order they were added + Collections.reverse(mToClose); + mToClose.forEach(RocksObject::close); + } } @Override @@ -355,8 +376,11 @@ public List getLocations(long id) { // When there are multiple resources declared in the try-with-resource block // They are closed in the opposite order of declaration // Ref: https://docs.oracle.com/javase/tutorial/essential/exceptions/tryResourceClose.html - try (final RocksIterator iter = db().newIterator(mBlockLocationsColumn.get(), - mReadPrefixSameAsStart)) { + // We assume this operation is short (one block cannot have too many locations) + // and lock the full iteration + try (RocksSharedLockHandle lock = mRocksStore.checkAndAcquireSharedLock(); + final RocksIterator iter = db().newIterator(mBlockLocationsColumn.get(), + mReadPrefixSameAsStart)) { iter.seek(Longs.toByteArray(id)); List locations = new ArrayList<>(); for (; iter.isValid(); iter.next()) { @@ -373,7 +397,7 @@ public List getLocations(long id) { @Override public void addLocation(long id, BlockLocation location) { byte[] key = RocksUtils.toByteArray(id, location.getWorkerId()); - try { + try (RocksSharedLockHandle lock = mRocksStore.checkAndAcquireSharedLock()) { db().put(mBlockLocationsColumn.get(), mDisableWAL, key, location.toByteArray()); } catch (RocksDBException e) { throw new RuntimeException(e); @@ -383,7 +407,7 @@ public void addLocation(long id, BlockLocation location) { @Override public void removeLocation(long blockId, long workerId) { byte[] key = RocksUtils.toByteArray(blockId, workerId); - try { + try (RocksSharedLockHandle lock = mRocksStore.checkAndAcquireSharedLock()) { db().delete(mBlockLocationsColumn.get(), mDisableWAL, key); } catch (RocksDBException e) { throw new RuntimeException(e); @@ -391,10 +415,28 @@ public void removeLocation(long blockId, long workerId) { } @Override + /** + * Acquires an iterator to iterate all Blocks in RocksDB. + * A shared lock will be acquired when this iterator is created, and released when: + * 1. This iterator is complete. + * 2. At each step, the iterator finds the RocksDB is closing and aborts voluntarily. + * + * This iterator is used in: + * 1. {@link BlockIntegrityChecker} to iterate all existing blocks + * 2. Journal dumping like checkpoint/backup sequences + */ public CloseableIterator getCloseableIterator() { - RocksIterator iterator = db().newIterator(mBlockMetaColumn.get(), mIteratorOption); - return RocksUtils.createCloseableIterator(iterator, - (iter) -> new Block(Longs.fromByteArray(iter.key()), BlockMeta.parseFrom(iter.value()))); + try (RocksSharedLockHandle lock = mRocksStore.checkAndAcquireSharedLock()) { + RocksSharedLockHandle readLock = mRocksStore.checkAndAcquireSharedLock(); + + RocksIterator iterator = db().newIterator(mBlockMetaColumn.get(), mIteratorOption); + return RocksUtils.createCloseableIterator(iterator, + (iter) -> new Block(Longs.fromByteArray(iter.key()), BlockMeta.parseFrom(iter.value())), + () -> { + mRocksStore.shouldAbort(lock.getLockVersion()); + return null; + }, readLock); + } } private RocksDB db() { diff --git a/core/server/master/src/main/java/alluxio/master/metastore/rocks/RocksCheckpointed.java b/core/server/master/src/main/java/alluxio/master/metastore/rocks/RocksCheckpointed.java index cdea4e092f10..959dc13963c8 100644 --- a/core/server/master/src/main/java/alluxio/master/metastore/rocks/RocksCheckpointed.java +++ b/core/server/master/src/main/java/alluxio/master/metastore/rocks/RocksCheckpointed.java @@ -39,21 +39,25 @@ default CompletableFuture writeToCheckpoint(File directory, ExecutorService executorService) { return CompletableFuture.runAsync(() -> { LOG.debug("taking {} snapshot started", getCheckpointName()); - File subDir = new File(directory, getCheckpointName().toString()); - try { - getRocksStore().writeToCheckpoint(subDir); - } catch (RocksDBException e) { - throw new AlluxioRuntimeException(Status.INTERNAL, - String.format("Failed to take snapshot %s in dir %s", getCheckpointName(), directory), - e, ErrorType.Internal, false); + try (RocksExclusiveLockHandle lock = getRocksStore().lockForCheckpoint()) { + File subDir = new File(directory, getCheckpointName().toString()); + try { + getRocksStore().writeToCheckpoint(subDir); + } catch (RocksDBException e) { + throw new AlluxioRuntimeException(Status.INTERNAL, + String.format("Failed to take snapshot %s in dir %s", getCheckpointName(), directory), + e, ErrorType.Internal, false); + } + LOG.debug("taking {} snapshot finished", getCheckpointName()); } - LOG.debug("taking {} snapshot finished", getCheckpointName()); }, executorService); } @Override default void writeToCheckpoint(OutputStream output) throws IOException, InterruptedException { - getRocksStore().writeToCheckpoint(output); + try (RocksExclusiveLockHandle lock = getRocksStore().lockForCheckpoint()) { + getRocksStore().writeToCheckpoint(output); + } } @Override @@ -62,7 +66,7 @@ default CompletableFuture restoreFromCheckpoint(File directory, return CompletableFuture.runAsync(() -> { LOG.debug("loading {} snapshot started", getCheckpointName()); File subDir = new File(directory, getCheckpointName().toString()); - try { + try (RocksExclusiveLockHandle lock = getRocksStore().lockForRewrite()) { getRocksStore().restoreFromCheckpoint(subDir); } catch (Exception e) { throw new AlluxioRuntimeException(Status.INTERNAL, @@ -75,6 +79,8 @@ default CompletableFuture restoreFromCheckpoint(File directory, @Override default void restoreFromCheckpoint(CheckpointInputStream input) throws IOException { - getRocksStore().restoreFromCheckpoint(input); + try (RocksExclusiveLockHandle lock = getRocksStore().lockForRewrite()) { + getRocksStore().restoreFromCheckpoint(input); + } } } diff --git a/core/server/master/src/main/java/alluxio/master/metastore/rocks/RocksInodeStore.java b/core/server/master/src/main/java/alluxio/master/metastore/rocks/RocksInodeStore.java index c8162df5e6af..e785c67559a0 100644 --- a/core/server/master/src/main/java/alluxio/master/metastore/rocks/RocksInodeStore.java +++ b/core/server/master/src/main/java/alluxio/master/metastore/rocks/RocksInodeStore.java @@ -63,6 +63,7 @@ import java.util.Spliterators; import java.util.concurrent.TimeUnit; import java.util.concurrent.atomic.AtomicReference; +import java.util.function.Supplier; import java.util.stream.Collectors; import java.util.stream.Stream; import java.util.stream.StreamSupport; @@ -80,6 +81,10 @@ public class RocksInodeStore implements InodeStore, RocksCheckpointed { private static final String EDGES_COLUMN = "edges"; private static final String ROCKS_STORE_NAME = "InodeStore"; + /* + * Below 3 fields are created and managed by the external user class, + * no need to close in this class. + */ // These are fields instead of constants because they depend on the call to RocksDB.loadLibrary(). private final WriteOptions mDisableWAL; private final ReadOptions mReadPrefixSameAsStart; @@ -93,6 +98,9 @@ public class RocksInodeStore implements InodeStore, RocksCheckpointed { private final RocksStore mRocksStore; private final List mToClose = new ArrayList<>(); + /* + * The ColumnFamilyHandle instances are created and closed in RocksStore + */ private final AtomicReference mInodesColumn = new AtomicReference<>(); private final AtomicReference mEdgesColumn = new AtomicReference<>(); @@ -105,10 +113,13 @@ public RocksInodeStore(String baseDir) { RocksDB.loadLibrary(); // the rocksDB objects must be initialized after RocksDB.loadLibrary() is called mDisableWAL = new WriteOptions().setDisableWAL(true); + mToClose.add(mDisableWAL); mReadPrefixSameAsStart = new ReadOptions().setPrefixSameAsStart(true); + mToClose.add(mReadPrefixSameAsStart); mIteratorOption = new ReadOptions().setReadaheadSize( Configuration.getBytes(PropertyKey.MASTER_METASTORE_ITERATOR_READAHEAD_SIZE)) .setTotalOrderSeek(true); + mToClose.add(mIteratorOption); String dbPath = PathUtils.concatPath(baseDir, INODES_DB_NAME); String backupPath = PathUtils.concatPath(baseDir, INODES_DB_NAME + "-backup"); @@ -277,7 +288,7 @@ && new String(columns.get(2).getName()).equals(EDGES_COLUMN), } private long getProperty(String rocksPropertyName) { - try { + try (RocksSharedLockHandle lock = mRocksStore.checkAndAcquireSharedLock()) { return db().getAggregatedLongProperty(rocksPropertyName); } catch (RocksDBException e) { LOG.warn(String.format("error collecting %s", rocksPropertyName), e); @@ -287,7 +298,7 @@ private long getProperty(String rocksPropertyName) { @Override public void remove(Long inodeId) { - try { + try (RocksSharedLockHandle lock = mRocksStore.checkAndAcquireSharedLock()) { byte[] id = Longs.toByteArray(inodeId); db().delete(mInodesColumn.get(), mDisableWAL, id); } catch (RocksDBException e) { @@ -297,7 +308,7 @@ public void remove(Long inodeId) { @Override public void writeInode(MutableInode inode) { - try { + try (RocksSharedLockHandle lock = mRocksStore.checkAndAcquireSharedLock()) { db().put(mInodesColumn.get(), mDisableWAL, Longs.toByteArray(inode.getId()), inode.toProto().toByteArray()); } catch (RocksDBException e) { @@ -312,12 +323,18 @@ public WriteBatch createWriteBatch() { @Override public void clear() { - mRocksStore.clear(); + LOG.info("Waiting to clear RocksInodeStore.."); + try (RocksExclusiveLockHandle lock = mRocksStore.lockForRewrite()) { + LOG.info("Clearing RocksDB"); + mRocksStore.clear(); + } + // Reset the DB state and prepare to serve again + LOG.info("RocksInodeStore cleared and ready to serve again"); } @Override public void addChild(long parentId, String childName, Long childId) { - try { + try (RocksSharedLockHandle lock = mRocksStore.checkAndAcquireSharedLock()) { db().put(mEdgesColumn.get(), mDisableWAL, RocksUtils.toByteArray(parentId, childName), Longs.toByteArray(childId)); } catch (RocksDBException e) { @@ -327,7 +344,7 @@ public void addChild(long parentId, String childName, Long childId) { @Override public void removeChild(long parentId, String name) { - try { + try (RocksSharedLockHandle lock = mRocksStore.checkAndAcquireSharedLock()) { db().delete(mEdgesColumn.get(), mDisableWAL, RocksUtils.toByteArray(parentId, name)); } catch (RocksDBException e) { throw new RuntimeException(e); @@ -337,7 +354,7 @@ public void removeChild(long parentId, String name) { @Override public Optional> getMutable(long id, ReadOption option) { byte[] inode; - try { + try (RocksSharedLockHandle lock = mRocksStore.checkAndAcquireSharedLock()) { inode = db().get(mInodesColumn.get(), Longs.toByteArray(id)); } catch (RocksDBException e) { throw new RuntimeException(e); @@ -354,37 +371,54 @@ public Optional> getMutable(long id, ReadOption option) { @Override public CloseableIterator getChildIds(Long inodeId, ReadOption option) { - RocksIterator iter = db().newIterator(mEdgesColumn.get(), mReadPrefixSameAsStart); - // first seek to the correct bucket - iter.seek(Longs.toByteArray(inodeId)); - // now seek to a specific file if needed - String prefix = option.getPrefix(); - String fromName = option.getStartFrom(); - String seekTo; - if (fromName != null && prefix != null) { - if (fromName.compareTo(prefix) > 0) { + try (RocksSharedLockHandle lock = mRocksStore.checkAndAcquireSharedLock()) { + RocksIterator iter = db().newIterator(mEdgesColumn.get(), mReadPrefixSameAsStart); + // first seek to the correct bucket + iter.seek(Longs.toByteArray(inodeId)); + // now seek to a specific file if needed + String prefix = option.getPrefix(); + String fromName = option.getStartFrom(); + String seekTo; + if (fromName != null && prefix != null) { + if (fromName.compareTo(prefix) > 0) { + seekTo = fromName; + } else { + seekTo = prefix; + } + } else if (fromName != null) { seekTo = fromName; } else { seekTo = prefix; } - } else if (fromName != null) { - seekTo = fromName; - } else { - seekTo = prefix; - } - if (seekTo != null && seekTo.length() > 0) { - iter.seek(RocksUtils.toByteArray(inodeId, seekTo)); + if (seekTo != null && seekTo.length() > 0) { + iter.seek(RocksUtils.toByteArray(inodeId, seekTo)); + } + /* + * Acquire a second lock for iteration, instead of using the same lock for initialization. + * Because init takes many operations and should be protected by try-with-resource. + * This is fine because the shared lock is reentrant. + */ + RocksSharedLockHandle readLock = mRocksStore.checkAndAcquireSharedLock(); + RocksIter rocksIter = new RocksIter(iter, prefix, () -> { + mRocksStore.shouldAbort(readLock.getLockVersion()); + return null; + }); + Stream idStream = StreamSupport.stream(Spliterators + .spliteratorUnknownSize(rocksIter, Spliterator.ORDERED), false); + return CloseableIterator.create(idStream.iterator(), (any) -> { + try { + iter.close(); + } finally { + readLock.close(); + } + }); } - RocksIter rocksIter = new RocksIter(iter, prefix); - Stream idStream = StreamSupport.stream(Spliterators - .spliteratorUnknownSize(rocksIter, Spliterator.ORDERED), false); - return CloseableIterator.create(idStream.iterator(), (any) -> iter.close()); } @Override public Optional getChildId(Long inodeId, String name, ReadOption option) { byte[] id; - try { + try (RocksSharedLockHandle lock = mRocksStore.checkAndAcquireSharedLock()) { id = db().get(mEdgesColumn.get(), RocksUtils.toByteArray(inodeId, name)); } catch (RocksDBException e) { throw new RuntimeException(e); @@ -400,8 +434,10 @@ static class RocksIter implements Iterator { final RocksIterator mIter; boolean mStopped = false; final byte[] mPrefix; + Supplier mAbortCheck; - RocksIter(RocksIterator rocksIterator, @Nullable String prefix) { + RocksIter(RocksIterator rocksIterator, @Nullable String prefix, + Supplier abortCheck) { mIter = rocksIterator; if (prefix != null && prefix.length() > 0) { mPrefix = prefix.getBytes(); @@ -409,6 +445,7 @@ static class RocksIter implements Iterator { mPrefix = null; } checkPrefix(); + mAbortCheck = abortCheck; } private void checkPrefix() { @@ -434,6 +471,8 @@ public boolean hasNext() { @Override public Long next() { + // Abort the operation if RocksDB stops serving + mAbortCheck.get(); Long l = Longs.fromByteArray(mIter.value()); mIter.next(); checkPrefix(); @@ -443,6 +482,7 @@ public Long next() { @Override public Optional getChild(Long inodeId, String name, ReadOption option) { + // The underlying calls should each handle locking internally return getChildId(inodeId, name).flatMap(id -> { Optional child = get(id); if (!child.isPresent()) { @@ -455,7 +495,8 @@ public Optional getChild(Long inodeId, String name, ReadOption option) { @Override public boolean hasChildren(InodeDirectoryView inode, ReadOption option) { - try (RocksIterator iter = db().newIterator(mEdgesColumn.get(), mReadPrefixSameAsStart)) { + try (RocksSharedLockHandle lock = mRocksStore.checkAndAcquireSharedLock(); + RocksIterator iter = db().newIterator(mEdgesColumn.get(), mReadPrefixSameAsStart)) { iter.seek(Longs.toByteArray(inode.getId())); return iter.isValid(); } @@ -464,10 +505,11 @@ public boolean hasChildren(InodeDirectoryView inode, ReadOption option) { @Override public Set allEdges() { Set edges = new HashSet<>(); - try (RocksIterator iter = db().newIterator(mEdgesColumn.get(), - mIteratorOption)) { + try (RocksSharedLockHandle lock = mRocksStore.checkAndAcquireSharedLock(); + RocksIterator iter = db().newIterator(mEdgesColumn.get(), mIteratorOption)) { iter.seekToFirst(); while (iter.isValid()) { + mRocksStore.shouldAbort(lock.getLockVersion()); long parentId = RocksUtils.readLong(iter.key(), 0); String childName = new String(iter.key(), Longs.BYTES, iter.key().length - Longs.BYTES); long childId = Longs.fromByteArray(iter.value()); @@ -481,10 +523,11 @@ public Set allEdges() { @Override public Set> allInodes() { Set> inodes = new HashSet<>(); - try (RocksIterator iter = db().newIterator(mInodesColumn.get(), - mIteratorOption)) { + try (RocksSharedLockHandle lock = mRocksStore.checkAndAcquireSharedLock(); + RocksIterator iter = db().newIterator(mInodesColumn.get(), mIteratorOption)) { iter.seekToFirst(); while (iter.isValid()) { + mRocksStore.shouldAbort(lock.getLockVersion()); inodes.add(getMutable(Longs.fromByteArray(iter.key()), ReadOption.defaults()).get()); iter.next(); } @@ -493,14 +536,28 @@ public Set> allInodes() { } /** - * The name is intentional, in order to distinguish from the {@code Iterable} interface. + * Acquires an iterator to iterate all Inodes in RocksDB. + * A shared lock will be acquired when this iterator is created, and released when: + * 1. This iterator is complete. + * 2. At each step, the iterator finds the RocksDB is closing and aborts voluntarily. + * + * Except tests, this iterator is only used in: + * 1. {@link alluxio.master.journal.tool.AbstractJournalDumper} which translates RocksDB + * checkpoints to a human-readable form. * * @return an iterator over stored inodes */ public CloseableIterator getCloseableIterator() { - return RocksUtils.createCloseableIterator( - db().newIterator(mInodesColumn.get(), mIteratorOption), - (iter) -> getMutable(Longs.fromByteArray(iter.key()), ReadOption.defaults()).get()); + try (RocksSharedLockHandle lock = mRocksStore.checkAndAcquireSharedLock()) { + RocksSharedLockHandle readLock = mRocksStore.checkAndAcquireSharedLock(); + return RocksUtils.createCloseableIterator( + db().newIterator(mInodesColumn.get(), mIteratorOption), + (iter) -> getMutable(Longs.fromByteArray(iter.key()), ReadOption.defaults()).get(), + () -> { + mRocksStore.shouldAbort(lock.getLockVersion()); + return null; + }, readLock); + } } @Override @@ -523,7 +580,7 @@ private class RocksWriteBatch implements WriteBatch { @Override public void writeInode(MutableInode inode) { - try { + try (RocksSharedLockHandle lock = mRocksStore.checkAndAcquireSharedLock()) { mBatch.put(mInodesColumn.get(), Longs.toByteArray(inode.getId()), inode.toProto().toByteArray()); } catch (RocksDBException e) { @@ -533,7 +590,7 @@ public void writeInode(MutableInode inode) { @Override public void removeInode(Long key) { - try { + try (RocksSharedLockHandle lock = mRocksStore.checkAndAcquireSharedLock()) { mBatch.delete(mInodesColumn.get(), Longs.toByteArray(key)); } catch (RocksDBException e) { throw new RuntimeException(e); @@ -542,7 +599,7 @@ public void removeInode(Long key) { @Override public void addChild(Long parentId, String childName, Long childId) { - try { + try (RocksSharedLockHandle lock = mRocksStore.checkAndAcquireSharedLock()) { mBatch.put(mEdgesColumn.get(), RocksUtils.toByteArray(parentId, childName), Longs.toByteArray(childId)); } catch (RocksDBException e) { @@ -552,7 +609,7 @@ public void addChild(Long parentId, String childName, Long childId) { @Override public void removeChild(Long parentId, String childName) { - try { + try (RocksSharedLockHandle lock = mRocksStore.checkAndAcquireSharedLock()) { mBatch.delete(mEdgesColumn.get(), RocksUtils.toByteArray(parentId, childName)); } catch (RocksDBException e) { throw new RuntimeException(e); @@ -561,7 +618,7 @@ public void removeChild(Long parentId, String childName) { @Override public void commit() { - try { + try (RocksSharedLockHandle lock = mRocksStore.checkAndAcquireSharedLock()) { db().write(mDisableWAL, mBatch); } catch (RocksDBException e) { throw new RuntimeException(e); @@ -576,14 +633,13 @@ public void close() { @Override public void close() { - LOG.info("Closing RocksInodeStore and recycling all RocksDB JNI objects"); - mRocksStore.close(); - mDisableWAL.close(); - mReadPrefixSameAsStart.close(); - // Close the elements in the reverse order they were added - Collections.reverse(mToClose); - mToClose.forEach(RocksObject::close); - LOG.info("RocksInodeStore closed"); + LOG.info("RocksInodeStore is being closed"); + try (RocksExclusiveLockHandle lock = mRocksStore.lockForClosing()) { + mRocksStore.close(); + // Close the elements in the reverse order they were added + Collections.reverse(mToClose); + mToClose.forEach(RocksObject::close); + } } private RocksDB db() { @@ -596,10 +652,12 @@ private RocksDB db() { */ public String toStringEntries() { StringBuilder sb = new StringBuilder(); - try (ReadOptions readOptions = new ReadOptions().setTotalOrderSeek(true); - RocksIterator inodeIter = db().newIterator(mInodesColumn.get(), readOptions)) { + try (RocksSharedLockHandle lock = mRocksStore.checkAndAcquireSharedLock(); + ReadOptions readOptions = new ReadOptions().setTotalOrderSeek(true); + RocksIterator inodeIter = db().newIterator(mInodesColumn.get(), readOptions)) { inodeIter.seekToFirst(); while (inodeIter.isValid()) { + mRocksStore.shouldAbort(lock.getLockVersion()); MutableInode inode; try { inode = MutableInode.fromProto(InodeMeta.Inode.parseFrom(inodeIter.value())); @@ -611,9 +669,11 @@ public String toStringEntries() { inodeIter.next(); } } - try (RocksIterator edgeIter = db().newIterator(mEdgesColumn.get())) { + try (RocksSharedLockHandle lock = mRocksStore.checkAndAcquireSharedLock(); + RocksIterator edgeIter = db().newIterator(mEdgesColumn.get())) { edgeIter.seekToFirst(); while (edgeIter.isValid()) { + mRocksStore.shouldAbort(lock.getLockVersion()); byte[] key = edgeIter.key(); byte[] id = new byte[Longs.BYTES]; byte[] name = new byte[key.length - Longs.BYTES]; @@ -629,6 +689,8 @@ public String toStringEntries() { /** * A testing only method to access the internal objects. + * For simplicity, no thread safety is provided on the escaping objects. + * * @return the RocksDB objects references the InodesColumn */ @VisibleForTesting diff --git a/core/server/master/src/main/java/alluxio/master/metastore/rocks/RocksStore.java b/core/server/master/src/main/java/alluxio/master/metastore/rocks/RocksStore.java index fe83506adea9..766e9e7516fc 100644 --- a/core/server/master/src/main/java/alluxio/master/metastore/rocks/RocksStore.java +++ b/core/server/master/src/main/java/alluxio/master/metastore/rocks/RocksStore.java @@ -14,14 +14,19 @@ import alluxio.Constants; import alluxio.conf.Configuration; import alluxio.conf.PropertyKey; +import alluxio.exception.ExceptionMessage; +import alluxio.exception.runtime.UnavailableRuntimeException; import alluxio.master.journal.checkpoint.CheckpointInputStream; import alluxio.master.journal.checkpoint.CheckpointOutputStream; import alluxio.master.journal.checkpoint.CheckpointType; +import alluxio.retry.CountingRetry; import alluxio.retry.TimeoutRetry; +import alluxio.util.SleepUtils; import alluxio.util.compression.ParallelZipUtils; import alluxio.util.compression.TarUtils; import alluxio.util.io.FileUtils; +import com.google.common.annotations.VisibleForTesting; import com.google.common.base.Preconditions; import org.apache.commons.io.IOUtils; import org.rocksdb.BlockBasedTableConfig; @@ -47,41 +52,128 @@ import java.io.IOException; import java.io.OutputStream; import java.nio.file.Paths; +import java.time.Duration; +import java.time.Instant; import java.util.ArrayList; import java.util.Collection; import java.util.List; import java.util.Optional; import java.util.UUID; +import java.util.concurrent.Callable; import java.util.concurrent.atomic.AtomicReference; -import javax.annotation.concurrent.ThreadSafe; +import java.util.concurrent.atomic.AtomicStampedReference; +import java.util.concurrent.atomic.LongAdder; +import javax.annotation.concurrent.NotThreadSafe; /** * Class for managing a rocksdb database. This class handles common functionality such as * initializing the database and performing database backup/restore. * - * Thread safety is achieved by synchronizing all public methods. + * This class provides locking methods for the callers. And the thread safety of RocksDB + * relies on the caller to use the corresponding lock methods. + * The reasons why this class only provides thread safety utilities to the callers + * (instead of wrapping it under each call) are: + * 1. Callers like RocksInodeStore and RocksBlockMetaStore have specific read/write logic + * like iteration, which cannot be abstracted and locked internally in this class. + * 2. With locking methods provided by this class, callers like RocksInodeStore + * can actually reuse the locks to perform concurrency control on their own logic. + * + * For reading/writing on the RocksDB, use the shared lock + *
+ *   try (RocksSharedLockHandle r = mRocksStore.checkAndAcquireSharedLock() {
+ *     // perform your read/write operation
+ *   }
+ * 
+ * + * For operations like closing/restart/restoring on the RocksDB, an exclusive lock should + * be acquired by calling one of: + * 1. {@link #lockForClosing()} + * 2. {@link #lockForRewrite()} + * 3. {@link #lockForCheckpoint()} + * + * Rule of thumb: + * 1. Use the proper locking methods when you access RocksDB. + * 2. Make each operation short. Make the locked section short. + * 3. If you have to make the operation long (like iteration), utilize {@link #shouldAbort(int)} + * to check and abort voluntarily. + * See Javadoc on the locking methods for details. */ -@ThreadSafe +@NotThreadSafe public final class RocksStore implements Closeable { private static final Logger LOG = LoggerFactory.getLogger(RocksStore.class); public static final int ROCKS_OPEN_RETRY_TIMEOUT = 20 * Constants.SECOND_MS; + public static final Duration ROCKS_CLOSE_WAIT_TIMEOUT = + Configuration.getDuration(PropertyKey.MASTER_METASTORE_ROCKS_EXCLUSIVE_LOCK_TIMEOUT); + private static final boolean TEST_MODE = Configuration.getBoolean(PropertyKey.TEST_MODE); + private final String mName; private final String mDbPath; private final String mDbCheckpointPath; private final Integer mParallelBackupPoolSize; - private final Collection mColumnFamilyDescriptors; - private final DBOptions mDbOpts; private final int mCompressLevel = Configuration.getInt( PropertyKey.MASTER_EMBEDDED_JOURNAL_SNAPSHOT_REPLICATION_COMPRESSION_LEVEL); private final boolean mParallelBackup = Configuration.getBoolean( PropertyKey.MASTER_METASTORE_ROCKS_PARALLEL_BACKUP); - private RocksDB mDb; - private Checkpoint mCheckpoint; - // When we create the database, we must set these handles. + /* + * Below 2 fields are created and managed by the external user class, + * no need to close in this class. + */ + private final Collection mColumnFamilyDescriptors; + private final DBOptions mDbOpts; + /* + * Below 3 fields are created and managed internally to this class, + * must be closed in this class. + */ + private volatile RocksDB mDb; + private volatile Checkpoint mCheckpoint; private final List> mColumnHandles; + /* + * The state consists of two information. + * + * The boolean flag indicates whether the RocksDB wants to stop serving. + * TRUE - Stop serving + * FALSE - Serving normally + * + * The version number indicates whether the RocksDB has been rewritten. + * If the RocksDB is restored or wiped out, the version number goes up. + * If the RocksDB is paused just to dump a checkpoint, the version number is kept the same. + * A reader can rely on the version to tell whether it can still read the RocksDB + * after the exclusive lock is taken and released. + */ + public final AtomicStampedReference mRocksDbStopServing = + new AtomicStampedReference<>(false, 0); + public volatile LongAdder mRefCount = new LongAdder(); + + /* + * Normally, the ref count will still be zero when the exclusive lock is held because: + * 1. If the exclusive lock was not forced, that means the ref count has decremented to zero + * before the exclusive lock was taken. And while the exclusive lock was held, no readers + * was able to come in and increment the ref count. + * 2. If the exclusive lock was forced, the old ref count instance was thrown away. + * So even if there were a slow reader, that would not touch the new ref count incorrectly. + * Therefore, the new ref count should stay zero. + * + * However, we still added this sanity check as a canary for incorrect ref count usages. + */ + private final Callable mCheckRefCount = () -> { + long refCount = getSharedLockCount(); + if (TEST_MODE) { + // In test mode we enforce strict ref count check, as a canary for ref count issues + Preconditions.checkState(refCount == 0, + ExceptionMessage.ROCKS_DB_REF_COUNT_DIRTY.getMessage(refCount)); + } else { + // In a real deployment, we forgive potential ref count problems and take the risk + if (refCount != 0) { + LOG.warn(ExceptionMessage.ROCKS_DB_REF_COUNT_DIRTY.getMessage(refCount)); + } + resetRefCounter(); + } + return null; + }; + /** * @param name a name to distinguish what store this is * @param dbPath a path for the rocks database @@ -102,7 +194,8 @@ public RocksStore(String name, String dbPath, String checkpointPath, DBOptions d mColumnFamilyDescriptors = columnFamilyDescriptors; mDbOpts = dbOpts; mColumnHandles = columnHandles; - try { + LOG.info("Resetting RocksDB for {} on init", name); + try (RocksExclusiveLockHandle lock = lockForRewrite()) { resetDb(); } catch (RocksDBException e) { throw new RuntimeException(e); @@ -110,17 +203,20 @@ public RocksStore(String name, String dbPath, String checkpointPath, DBOptions d } /** + * Requires the caller to acquire a shared lock by calling {@link #checkAndAcquireSharedLock()}. + * * @return the underlying rocksdb instance. The instance changes when clear() is called, so if the * caller caches the returned db, they must reset it after calling clear() */ - public synchronized RocksDB getDb() { + public RocksDB getDb() { return mDb; } /** * Clears and re-initializes the database. + * Requires the caller to acquire exclusive lock by calling {@link #lockForRewrite()}. */ - public synchronized void clear() { + public void clear() { try { resetDb(); } catch (RocksDBException e) { @@ -206,10 +302,11 @@ public synchronized void writeToCheckpoint(File directory) throws RocksDBExcepti /** * Writes a checkpoint of the database's content to the given output stream. + * Requires the caller to acquire an exclusive lock by calling {@link #lockForCheckpoint()}. * * @param output the stream to write to */ - public synchronized void writeToCheckpoint(OutputStream output) + public void writeToCheckpoint(OutputStream output) throws IOException, InterruptedException { LOG.info("Creating rocksdb checkpoint at {}", mDbCheckpointPath); long startNano = System.nanoTime(); @@ -259,10 +356,11 @@ public synchronized void restoreFromCheckpoint(File directory) /** * Restores the database from a checkpoint. + * Requires the caller to acquire an exclusive lock by calling {@link #lockForRewrite()}. * * @param input the checkpoint stream to restore from */ - public synchronized void restoreFromCheckpoint(CheckpointInputStream input) throws IOException { + public void restoreFromCheckpoint(CheckpointInputStream input) throws IOException { LOG.info("Restoring rocksdb from checkpoint"); long startNano = System.nanoTime(); Preconditions.checkState(input.getType() == CheckpointType.ROCKS_SINGLE @@ -274,7 +372,7 @@ public synchronized void restoreFromCheckpoint(CheckpointInputStream input) thro if (input.getType() == CheckpointType.ROCKS_PARALLEL) { List tmpDirs = Configuration.getList(PropertyKey.TMP_DIRS); String tmpZipFilePath = new File(tmpDirs.get(0), "alluxioRockStore-" + UUID.randomUUID()) - .getPath(); + .getPath(); try { try (FileOutputStream fos = new FileOutputStream(tmpZipFilePath)) { @@ -282,7 +380,7 @@ public synchronized void restoreFromCheckpoint(CheckpointInputStream input) thro } ParallelZipUtils.decompress(Paths.get(mDbPath), tmpZipFilePath, - mParallelBackupPoolSize); + mParallelBackupPoolSize); FileUtils.deletePathRecursively(tmpZipFilePath); } catch (Exception e) { @@ -299,11 +397,14 @@ public synchronized void restoreFromCheckpoint(CheckpointInputStream input) thro throw new IOException(e); } LOG.info("Restored rocksdb checkpoint in {}ms", - (System.nanoTime() - startNano) / Constants.MS_NANO); + (System.nanoTime() - startNano) / Constants.MS_NANO); } @Override - public synchronized void close() { + /** + * Requires the caller to acquire exclusive lock by calling {@link #lockForClosing()}. + */ + public void close() { stopDb(); LOG.info("Closed store at {}", mDbPath); } @@ -374,4 +475,285 @@ private static IndexType toRocksIndexType( throw new IllegalArgumentException(String.format("Unknown IndexType %s", index)); } } + + /** + * This is the core logic of the shared lock mechanism. + * + * Before any r/w operation on the RocksDB, acquire a shared lock with this method. + * The shared lock guarantees the RocksDB will not be restarted/cleared during the + * r/w access. In other words, similar to a read-write lock, exclusive lock requests + * will wait for shared locks to be released first. + * + * However, note that exclusive lock acquisition only waits for a certain period of time, + * defined by {@link PropertyKey#MASTER_METASTORE_ROCKS_EXCLUSIVE_LOCK_TIMEOUT}. + * After this timeout, the exclusive lock will be forced, and the shared lock holders + * are disrespected. Normally, the r/w operation should either complete or abort within + * seconds so the timeout {@link PropertyKey#MASTER_METASTORE_ROCKS_EXCLUSIVE_LOCK_TIMEOUT} + * should not be exceeded at all. + * + * @return a shared lock handle used to manage and close the shared lock + */ + public RocksSharedLockHandle checkAndAcquireSharedLock() { + if (mRocksDbStopServing.getReference()) { + throw new UnavailableRuntimeException(ExceptionMessage.ROCKS_DB_CLOSING.getMessage()); + } + /* + * The lock action is merely incrementing the lock so it is very fast + * The closer will respect the ref count and only close when the ref count is zero + */ + mRefCount.increment(); + + /* + * Need to check the flag again to PREVENT the sequence of events below: + * 1. Reader checks flag + * 2. Closer sets flag + * 3. Closer sees refCount=0 + * 4. Reader increments refCount + * 5. Closer closes RocksDB + * 6. Reader reads RocksDB and incurs a segfault + * + * With the 2nd check, we make sure the ref count will be respected by the closer and + * the closer will therefore wait for this reader to complete/abort. + */ + if (mRocksDbStopServing.getReference()) { + mRefCount.decrement(); + throw new UnavailableRuntimeException(ExceptionMessage.ROCKS_DB_CLOSING.getMessage()); + } + + return new RocksSharedLockHandle(mRocksDbStopServing.getStamp(), mRefCount); + } + + /** + * This is the core logic of the exclusive lock mechanism. + * + * The exclusive lock will first set a flag and then wait for all shared lock holders to + * complete/abort. The time to wait is defined by + * {@link PropertyKey#MASTER_METASTORE_ROCKS_EXCLUSIVE_LOCK_TIMEOUT}. + * When the r/w operations observe this flag by {@link #shouldAbort(int)}, + * the operation will be aborted and the shared lock will be released. + * Some short operations do not check the {@link #shouldAbort(int)} because we expect + * them to finish fast. + * + * Normally, the default value of this timeout is long enough. + * However, if the ref count is still not zero after this wait, the exclusive lock will + * be forced and some warnings will be logged. There are multiple possibilities: + * 1. There is a very slow r/w operation. + * 2. Some r/w operation somewhere are not following the rules. + * 3. There is a bug somewhere, and the ref count is incorrect. + * In either case, submit an issue to https://github.com/Alluxio/alluxio/issues + * And we do not recommend tuning + * {@link PropertyKey#MASTER_METASTORE_ROCKS_EXCLUSIVE_LOCK_TIMEOUT} + * because it usually just covers the real issue. + * + * There are 4 cases where the exclusive lock is acquired: + * 1. The master is closing (and the process will exit). + * 2. The RocksDB will be cleared. This happens when the master process starts or in a failover. + * 3. The master is just dumping a checkpoint, where the RocksDB contents will not change. + * 4. The master is restoring from a checkpoint/backup where the RocksDB is rebuilt. + * + * When the master is closing, it will not wait for an ongoing checkpoint/restore/clear + * operation and will just grab the lock even though the exclusive lock is taken. + * Then the master process will exit and whatever operation will be aborted. + * This covers case 1 and yieldToAnotherCloser=false. + * + * In case 2, 3 or 4, we let the later closer(writer) fail. It will be the caller's + * responsibility to either retry or abort. In other words, when yieldToAnotherClose=true, + * the one who sets the mState will succeed and the other one will fail. + * + * @param yieldToAnotherCloser if true, the operation will fail if it observes a concurrent + * action on the exclusive lock + */ + private void setFlagAndBlockingWait(boolean yieldToAnotherCloser) { + // Another known operation has acquired the exclusive lock + if (yieldToAnotherCloser && mRocksDbStopServing.getReference()) { + throw new UnavailableRuntimeException(ExceptionMessage.ROCKS_DB_CLOSING.getMessage()); + } + + int version = mRocksDbStopServing.getStamp(); + if (yieldToAnotherCloser) { + if (!mRocksDbStopServing.compareAndSet(false, true, version, version)) { + throw new UnavailableRuntimeException(ExceptionMessage.ROCKS_DB_CLOSING.getMessage()); + } + } else { + // Just set the state with no respect to concurrent actions + mRocksDbStopServing.set(true, version); + } + + /* + * Wait until: + * 1. Ref count is zero, meaning all concurrent r/w have completed or aborted + * 2. Timeout is reached, meaning we force close/restart without waiting + * + * According to Java doc + * https://docs.oracle.com/javase/8/docs/api/java/util/concurrent/atomic/LongAdder.html + * In absence of concurrent updates, sum() returns an accurate result. + * But sum() does not see concurrent updates and therefore can miss an update. + * + * The correctness then relies on the 2nd check in checkAndAcquireSharedLock() + * because the reader will see the flag and just abort voluntarily. An example sequence + * of events is like below: + * 1. Reader checks flag, the flag is not set by the closer + * 2. Closer sets flag + * 3. Closer sees refCount=0 + * 4. Reader increments refCount + * 5. Closer closes RocksDB + * 6. Reader checks flag again and sees the flag + * 7. Reader decrements refCount aborts in checkAndAcquireSharedLock() + */ + Instant waitStart = Instant.now(); + CountingRetry retry = new CountingRetry((int) ROCKS_CLOSE_WAIT_TIMEOUT.getSeconds() * 10); + while (mRefCount.sum() != 0 && retry.attempt()) { + SleepUtils.sleepMs(100); + } + Duration elapsed = Duration.between(waitStart, Instant.now()); + LOG.info("Waited {}ms for ongoing read/write to complete/abort", elapsed.toMillis()); + + /* + * Reset the ref count to forget about the aborted operations + */ + long unclosedOperations = mRefCount.sum(); + if (unclosedOperations != 0) { + if (Configuration.getBoolean(PropertyKey.TEST_MODE)) { + throw new RuntimeException(ExceptionMessage.ROCKS_DB_EXCLUSIVE_LOCK_FORCED + .getMessage(unclosedOperations)); + } + /* + * Set the flag so shared locks know that the ref count has been reset, + * no need to update the ref count on unlock. + * If one shared lock did not decrement the ref count before this reset, it should not + * decrement the ref count when it is released. + */ + resetRefCounter(); + LOG.warn("{} readers/writers fail to complete/abort before we stop/restart the RocksDB", + unclosedOperations); + } + } + + /** + * When the exclusive lock is forced (after a timeout), we have to reset the ref count to zero + * and throw away the updates from the concurrent readers. In other words, those readers should + * not update the ref count when they release the lock. One possible sequence of events + * goes as below: + * + * 1. Reader checks the flag. + * 2. Reader increments refCount. + * 3. Reader is blocked (for a lock) or goes to sleep. + * 4. One Closer comes in, sets the flag and waits on refCount. + * 5. Closer wait times out. Closer forces the exclusive lock and resets refCount to 0. + * 6. Instead of closing the RocksDB, the exclusive lock is taken for restoring the RocksDB. + * 7. Closer finishes and resets the flag to 0. + * 8. Reader wakes up and releases the shared lock, now it should NOT decrement the ref count. + * + * We create a new ref counter and throw away the existing one. So the old readers will + * update the old counter when they release the lock, and only the new counter will be used. + */ + private void resetRefCounter() { + mRefCount = new LongAdder(); + } + + /** + * Before the process shuts down, acquire an exclusive lock on the RocksDB before closing. + * Note this lock only exists on the Alluxio side. A STOP_SERVING flag will be set so all + * existing readers/writers will abort asap. + * The exclusive lock ensures there are no existing concurrent r/w operations, so it is safe to + * close the RocksDB and recycle all relevant resources. + * + * The STOP_SERVING status will NOT be reset, because the process will shut down soon. + * + * @return the exclusive lock handle used to manage and close the lock + */ + public RocksExclusiveLockHandle lockForClosing() { + Exception e = new RuntimeException("Log trace here"); + LOG.error("Just recording the trace here", e); + // Grab the lock with no respect to concurrent operations + // Just grab the lock and close + setFlagAndBlockingWait(false); + return new RocksExclusiveLockHandle(mCheckRefCount); + } + + /** + * Before the process shuts down, acquire an exclusive lock on the RocksDB before closing. + * Note this lock only exists on the Alluxio side. A STOP_SERVING flag will be set so all + * existing readers/writers will abort asap. + * The exclusive lock ensures there are no existing concurrent r/w operations, so it is safe to + * restart/checkpoint the RocksDB and update the DB reference. + * + * The STOP_SERVING status will be reset and the RocksDB will be open for operations again. + * The version will not be bumped up, because the RocksDB contents has not changed. + * See {@link #checkAndAcquireSharedLock} for how this affects the shared lock logic. + * + * @return the exclusive lock handle used to manage and close the lock + */ + public RocksExclusiveLockHandle lockForCheckpoint() { + // Grab the lock with respect to contenders + setFlagAndBlockingWait(true); + return new RocksExclusiveLockHandle(() -> { + mCheckRefCount.call(); + // There is no need to worry about overwriting another concurrent Closer action + // The only chance of concurrency is with lockForClosing() + // But lockForClosing() guarantees the master process will close immediately + mRocksDbStopServing.set(false, mRocksDbStopServing.getStamp()); + return null; + }); + } + + /** + * Before the process shuts down, acquire an exclusive lock on the RocksDB before closing. + * Note this lock only exists on the Alluxio side. A STOP_SERVING flag will be set so all + * existing readers/writers will abort asap. + * The exclusive lock ensures there are no existing concurrent r/w operations, so it is safe to + * restart/checkpoint the RocksDB and update the DB reference. + * + * The STOP_SERVING status will be reset and the RocksDB will be open for operations again. + * The version will be bumped up, because the RocksDB contents has changed. If there is one slow + * operation expecting to see the old version, that operation should abort. + * See {@link #checkAndAcquireSharedLock} for how this affects the shared lock logic. + * + * @return the exclusive lock handle used to manage and close the lock + */ + public RocksExclusiveLockHandle lockForRewrite() { + // Grab the lock with respect to contenders + setFlagAndBlockingWait(true); + return new RocksExclusiveLockHandle(() -> { + mCheckRefCount.call(); + // There is no need to worry about overwriting another concurrent Closer action + // The only chance of concurrency is with lockForClosing() + // But lockForClosing() guarantees the master process will close immediately + mRocksDbStopServing.set(false, mRocksDbStopServing.getStamp() + 1); + return null; + }); + } + + /** + * Used by ongoing r/w operations to check if the operation needs to abort and yield + * to the RocksDB shutdown. + * + * @param lockedVersion The RocksDB version from the shared lock. This version is used to tell + * if a restore or clear operation has happened on the RocksDB. + */ + public void shouldAbort(int lockedVersion) { + if (mRocksDbStopServing.getReference()) { + throw new UnavailableRuntimeException(ExceptionMessage.ROCKS_DB_CLOSING.getMessage()); + } else if (lockedVersion < mRocksDbStopServing.getStamp()) { + throw new UnavailableRuntimeException(ExceptionMessage.ROCKS_DB_REWRITTEN.getMessage()); + } + } + + /** + * Checks whether the RocksDB is marked for exclusive access, so the operation should abort. + * @return whether the RocksDB expects to stop + */ + public boolean isServiceStopping() { + return mRocksDbStopServing.getReference(); + } + + /** + * Gets the number of shared lock on the RocksStore. + * + * @return the count + */ + @VisibleForTesting + public long getSharedLockCount() { + return mRefCount.sum(); + } } diff --git a/core/server/master/src/test/java/alluxio/master/metastore/rocks/RocksBlockMetaStoreTest.java b/core/server/master/src/test/java/alluxio/master/metastore/rocks/RocksBlockMetaStoreTest.java new file mode 100644 index 000000000000..d6a3072febb8 --- /dev/null +++ b/core/server/master/src/test/java/alluxio/master/metastore/rocks/RocksBlockMetaStoreTest.java @@ -0,0 +1,275 @@ +/* + * The Alluxio Open Foundation licenses this work under the Apache License, version 2.0 + * (the "License"). You may not use this work except in compliance with the License, which is + * available at www.apache.org/licenses/LICENSE-2.0 + * + * This software is distributed on an "AS IS" basis, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, + * either express or implied, as more fully set forth in the License. + * + * See the NOTICE file distributed with this work for information regarding copyright ownership. + */ + +package alluxio.master.metastore.rocks; + +import static alluxio.master.metastore.rocks.RocksStoreTestUtils.waitForReaders; +import static org.junit.Assert.assertEquals; +import static org.junit.Assert.assertNotNull; +import static org.junit.Assert.assertTrue; + +import alluxio.conf.Configuration; +import alluxio.conf.PropertyKey; +import alluxio.master.journal.checkpoint.CheckpointInputStream; +import alluxio.master.metastore.BlockMetaStore; +import alluxio.proto.meta.Block; +import alluxio.resource.CloseableIterator; +import alluxio.util.ThreadFactoryUtils; + +import org.junit.After; +import org.junit.Before; +import org.junit.Rule; +import org.junit.Test; +import org.junit.rules.TemporaryFolder; + +import java.io.BufferedOutputStream; +import java.io.DataInputStream; +import java.io.File; +import java.io.FileInputStream; +import java.io.FileOutputStream; +import java.nio.file.Files; +import java.util.ArrayList; +import java.util.List; +import java.util.concurrent.ArrayBlockingQueue; +import java.util.concurrent.CountDownLatch; +import java.util.concurrent.ExecutorService; +import java.util.concurrent.Executors; +import java.util.concurrent.Future; +import java.util.concurrent.atomic.AtomicReference; +import javax.annotation.Nullable; + +public class RocksBlockMetaStoreTest { + private static final int FILE_NUMBER = 400; + private static final int THREAD_NUMBER = 20; + + @Rule + public TemporaryFolder mFolder = new TemporaryFolder(); + + public String mPath; + public RocksBlockMetaStore mStore; + + private ExecutorService mThreadPool; + + @Before + public void setUp() throws Exception { + Configuration.set(PropertyKey.MASTER_METASTORE_ROCKS_EXCLUSIVE_LOCK_TIMEOUT, "500ms"); + Configuration.set(PropertyKey.TEST_MODE, true); + // Wait for a shorter period of time in test + Configuration.set(PropertyKey.MASTER_METASTORE_ROCKS_EXCLUSIVE_LOCK_TIMEOUT, "1s"); + mPath = mFolder.newFolder().getAbsolutePath(); + mStore = new RocksBlockMetaStore(mFolder.newFolder().getAbsolutePath()); + mThreadPool = Executors.newCachedThreadPool(ThreadFactoryUtils.build("test-executor-%d", true)); + } + + @After + public void tearDown() throws Exception { + mStore.close(); + mThreadPool.shutdownNow(); + mThreadPool = null; + } + + @Test + public void escapingIteratorExceptionInNext() throws Exception { + prepareBlocks(FILE_NUMBER); + + FlakyRocksBlockStore delegateStore = new FlakyRocksBlockStore(mPath, mStore); + AtomicReference exception = new AtomicReference<>(null); + try (CloseableIterator brokenIter = + delegateStore.getCloseableIterator(false, true)) { + while (brokenIter.hasNext()) { + brokenIter.next(); + } + } catch (Exception e) { + exception.set(e); + } + assertNotNull(exception.get()); + + // Even if the iter is flaky, the lock and ref count are managed correctly + // A close action will look at the ref count and err if there is a lock leak + assertEquals(0, mStore.getRocksStore().getSharedLockCount()); + mStore.close(); + } + + @Test + public void escapingIteratorExceptionInHasNext() throws Exception { + prepareBlocks(FILE_NUMBER); + + FlakyRocksBlockStore delegateStore = new FlakyRocksBlockStore(mPath, mStore); + AtomicReference exception = new AtomicReference<>(null); + try (CloseableIterator brokenIter = + delegateStore.getCloseableIterator(true, false)) { + while (brokenIter.hasNext()) { + brokenIter.next(); + } + } catch (Exception e) { + exception.set(e); + } + assertNotNull(exception.get()); + + // Even if the iter is flaky, the lock and ref count are managed correctly + // A close action will look at the ref count and err if there is a lock leak + assertEquals(0, mStore.getRocksStore().getSharedLockCount()); + mStore.close(); + } + + @Test + public void longRunningIterAndCheckpoint() throws Exception { + // Manually set this flag, otherwise an exception will be thrown when the exclusive lock + // is forced. + Configuration.set(PropertyKey.TEST_MODE, false); + prepareBlocks(FILE_NUMBER); + + // Create a bunch of long running iterators on the InodeStore + CountDownLatch readerLatch = new CountDownLatch(THREAD_NUMBER); + CountDownLatch restoreLatch = new CountDownLatch(1); + ArrayBlockingQueue errors = new ArrayBlockingQueue<>(THREAD_NUMBER); + ArrayBlockingQueue results = new ArrayBlockingQueue<>(THREAD_NUMBER); + List> futures = + submitIterJob(THREAD_NUMBER, errors, results, readerLatch, restoreLatch); + + // Await for the 20 threads to be iterating in the middle, then trigger the shutdown event + readerLatch.await(); + File checkpointFile = File.createTempFile("checkpoint-for-recovery", ""); + try (BufferedOutputStream out = + new BufferedOutputStream(new FileOutputStream(checkpointFile))) { + mStore.writeToCheckpoint(out); + } + assertTrue(Files.size(checkpointFile.toPath()) > 0); + + // Verify that the iterators can still run + restoreLatch.countDown(); + waitForReaders(futures); + + // All iterators should abort because the RocksDB contents have changed + assertEquals(0, errors.size()); + long completed = results.stream().filter(n -> n == FILE_NUMBER).count(); + assertEquals(THREAD_NUMBER, completed); + } + + @Test + public void longRunningIterAndRestore() throws Exception { + // Manually set this flag, otherwise an exception will be thrown when the exclusive lock + // is forced. + Configuration.set(PropertyKey.TEST_MODE, false); + prepareBlocks(FILE_NUMBER); + + // Prepare a checkpoint file + File checkpointFile = File.createTempFile("checkpoint-for-recovery", ""); + try (BufferedOutputStream out = + new BufferedOutputStream(new FileOutputStream(checkpointFile))) { + mStore.writeToCheckpoint(out); + } + + // Create a bunch of long running iterators on the InodeStore + CountDownLatch readerLatch = new CountDownLatch(THREAD_NUMBER); + CountDownLatch restoreLatch = new CountDownLatch(1); + ArrayBlockingQueue errors = new ArrayBlockingQueue<>(THREAD_NUMBER); + ArrayBlockingQueue results = new ArrayBlockingQueue<>(THREAD_NUMBER); + List> futures = + submitIterJob(THREAD_NUMBER, errors, results, readerLatch, restoreLatch); + + // Await for the 20 threads to be iterating in the middle, then trigger the shutdown event + readerLatch.await(); + try (CheckpointInputStream in = new CheckpointInputStream( + (new DataInputStream(new FileInputStream(checkpointFile))))) { + mStore.restoreFromCheckpoint(in); + } + + // Verify that the iterators can still run + restoreLatch.countDown(); + waitForReaders(futures); + + // All iterators should abort because the RocksDB contents have changed + assertEquals(THREAD_NUMBER, errors.size()); + long completed = results.stream().filter(n -> n == FILE_NUMBER).count(); + assertEquals(0, completed); + long aborted = results.stream().filter(n -> n == 10).count(); + assertEquals(THREAD_NUMBER, aborted); + } + + public static class FlakyRocksBlockStore extends RocksInodeStore { + private final RocksBlockMetaStore mDelegate; + + public FlakyRocksBlockStore(String baseDir, RocksBlockMetaStore delegate) { + super(baseDir); + mDelegate = delegate; + } + + public CloseableIterator getCloseableIterator( + boolean hasNextIsFlaky, boolean nextIsFlaky) { + CloseableIterator iter = mDelegate.getCloseableIterator(); + + // This iterator is flaky + return new CloseableIterator(iter) { + private int mCounter = 0; + + @Override + public void closeResource() { + iter.closeResource(); + } + + @Override + public boolean hasNext() { + if (mCounter == 5 && hasNextIsFlaky) { + throw new RuntimeException("Unexpected exception in iterator"); + } + return iter.hasNext(); + } + + @Override + public BlockMetaStore.Block next() { + mCounter++; + if (mCounter == 5 && nextIsFlaky) { + throw new RuntimeException("Unexpected exception in iterator"); + } + return iter.next(); + } + }; + } + } + + private void prepareBlocks(int blockCount) throws Exception { + for (int i = 1; i < blockCount + 1; i++) { + mStore.putBlock(i, Block.BlockMeta.newBuilder().setLength(100).build()); + } + } + + private List> submitIterJob(int threadCount, + ArrayBlockingQueue errors, ArrayBlockingQueue results, + @Nullable CountDownLatch readersRunningLatch, + @Nullable CountDownLatch writerCompletedLatch) { + List> futures = new ArrayList<>(); + for (int k = 0; k < threadCount; k++) { + futures.add(mThreadPool.submit(() -> { + int listedCount = 0; + try (CloseableIterator iter = mStore.getCloseableIterator()) { + while (iter.hasNext()) { + if (listedCount == 10 && readersRunningLatch != null) { + readersRunningLatch.countDown(); + if (writerCompletedLatch != null) { + // Pretend the reader is blocked and will wake up after the writer is done + writerCompletedLatch.await(); + } + } + iter.next(); + listedCount++; + } + } catch (Exception e) { + errors.add(e); + } finally { + results.add(listedCount); + } + return null; + })); + } + return futures; + } +} diff --git a/core/server/master/src/test/java/alluxio/master/metastore/rocks/RocksInodeStoreTest.java b/core/server/master/src/test/java/alluxio/master/metastore/rocks/RocksInodeStoreTest.java index 0e4a4561bb07..d4f4b619e7ad 100644 --- a/core/server/master/src/test/java/alluxio/master/metastore/rocks/RocksInodeStoreTest.java +++ b/core/server/master/src/test/java/alluxio/master/metastore/rocks/RocksInodeStoreTest.java @@ -11,45 +11,732 @@ package alluxio.master.metastore.rocks; +import static alluxio.master.metastore.rocks.RocksStoreTestUtils.waitForReaders; import static org.hamcrest.CoreMatchers.containsString; import static org.junit.Assert.assertEquals; +import static org.junit.Assert.assertNotNull; import static org.junit.Assert.assertThat; +import static org.junit.Assert.assertTrue; +import alluxio.conf.Configuration; +import alluxio.conf.PropertyKey; import alluxio.master.file.contexts.CreateDirectoryContext; +import alluxio.master.file.meta.InodeView; +import alluxio.master.file.meta.MutableInode; import alluxio.master.file.meta.MutableInodeDirectory; +import alluxio.master.journal.checkpoint.CheckpointInputStream; import alluxio.master.metastore.InodeStore.WriteBatch; +import alluxio.master.metastore.ReadOption; +import alluxio.resource.CloseableIterator; +import alluxio.util.ThreadFactoryUtils; +import org.junit.After; +import org.junit.Before; import org.junit.Rule; import org.junit.Test; import org.junit.rules.TemporaryFolder; +import java.io.BufferedOutputStream; +import java.io.DataInputStream; +import java.io.File; +import java.io.FileInputStream; +import java.io.FileOutputStream; import java.io.IOException; +import java.nio.file.Files; +import java.util.ArrayList; +import java.util.List; +import java.util.Optional; +import java.util.concurrent.ArrayBlockingQueue; +import java.util.concurrent.CountDownLatch; +import java.util.concurrent.ExecutorService; +import java.util.concurrent.Executors; +import java.util.concurrent.Future; +import java.util.concurrent.atomic.AtomicReference; +import java.util.function.BiFunction; +import javax.annotation.Nullable; public class RocksInodeStoreTest { + private static final int FILE_NUMBER = 400; + private static final int THREAD_NUMBER = 20; + @Rule public TemporaryFolder mFolder = new TemporaryFolder(); + public String mPath; + public RocksInodeStore mStore; + + private ExecutorService mThreadPool; + + // Functional wrappers of RocksDB r/w actions + private QuadFunction, ArrayBlockingQueue, + CountDownLatch, CountDownLatch, List>> mCreateAddReaders = + (errors, results, readerRunningLatch, writerCompletedLatch) -> { + return submitAddInodeJob( + errors, results, readerRunningLatch, writerCompletedLatch); + }; + private QuadFunction, ArrayBlockingQueue, + CountDownLatch, CountDownLatch, List>> mCreateGetReaders = + (errors, results, readerRunningLatch, writerCompletedLatch) -> { + return submitGetInodeJob( + errors, results, readerRunningLatch, writerCompletedLatch); + }; + private QuadFunction, ArrayBlockingQueue, + CountDownLatch, CountDownLatch, List>> mCreateListReadersAbort = + (errors, results, readerRunningLatch, writerCompletedLatch) -> { + // Do not wait for the writer latch, writer will run concurrent to the list actions + return submitListingJob( + errors, results, readerRunningLatch, null); + }; + + @Before + public void setUp() throws Exception { + Configuration.set(PropertyKey.MASTER_METASTORE_ROCKS_EXCLUSIVE_LOCK_TIMEOUT, "500ms"); + Configuration.set(PropertyKey.TEST_MODE, true); + // Wait for a shorter period of time in test + Configuration.set(PropertyKey.MASTER_METASTORE_ROCKS_EXCLUSIVE_LOCK_TIMEOUT, "1s"); + mPath = mFolder.newFolder().getAbsolutePath(); + mStore = new RocksInodeStore(mFolder.newFolder().getAbsolutePath()); + mThreadPool = Executors.newCachedThreadPool( + ThreadFactoryUtils.build("test-executor-%d", true)); + } + + @After + public void tearDown() throws Exception { + mStore.close(); + mThreadPool.shutdownNow(); + mThreadPool = null; + } + @Test public void batchWrite() throws IOException { - RocksInodeStore store = new RocksInodeStore(mFolder.newFolder().getAbsolutePath()); - WriteBatch batch = store.createWriteBatch(); + WriteBatch batch = mStore.createWriteBatch(); for (int i = 1; i < 20; i++) { batch.writeInode( MutableInodeDirectory.create(i, 0, "dir" + i, CreateDirectoryContext.defaults())); } batch.commit(); for (int i = 1; i < 20; i++) { - assertEquals("dir" + i, store.get(i).get().getName()); + assertEquals("dir" + i, mStore.get(i).get().getName()); } } @Test public void toStringEntries() throws IOException { - RocksInodeStore store = new RocksInodeStore(mFolder.newFolder().getAbsolutePath()); - assertEquals("", store.toStringEntries()); + assertEquals("", mStore.toStringEntries()); + + mStore.writeInode(MutableInodeDirectory.create( + 1, 0, "dir", CreateDirectoryContext.defaults())); + assertEquals("dir", mStore.get(1).get().getName()); + assertThat(mStore.toStringEntries(), containsString("name=dir")); + } + + @Test + public void concurrentListAndClose() throws Exception { + testConcurrentReaderAndClose(mCreateListReadersAbort); + } + + @Test + public void concurrentListAndRestore() throws Exception { + testConcurrentReaderAndRestore(mCreateListReadersAbort, (errors, results) -> { + assertTrue(errors.size() <= THREAD_NUMBER); + // Depending on the thread execution order, sometimes the reader threads + // may run to finish before the writer thread picks up the signal and flag + long completed = results.stream().filter(n -> n == FILE_NUMBER).count(); + assertEquals(THREAD_NUMBER, completed + errors.size()); + return null; + }, (errors, results) -> { + // Results are all empty after the clear + assertEquals(0, errors.size()); + long completed = results.stream().filter(n -> n == FILE_NUMBER).count(); + assertEquals(THREAD_NUMBER, completed); + return null; + }); + } + + @Test + public void concurrentListAndCheckpoint() throws Exception { + testConcurrentReaderAndCheckpoint(mCreateListReadersAbort, (errors, results) -> { + assertTrue(errors.size() <= THREAD_NUMBER); + // Depending on the thread execution order, sometimes the reader threads + // may run to finish before the writer thread picks up the signal and flag + long completed = results.stream().filter(n -> n == FILE_NUMBER).count(); + assertEquals(THREAD_NUMBER, completed + errors.size()); + return null; + }, (errors, results) -> { + assertEquals(0, errors.size()); + long completed = results.stream().filter(n -> n == FILE_NUMBER).count(); + assertEquals(THREAD_NUMBER, completed); + return null; + }); + } + + @Test + public void concurrentListAndClear() throws Exception { + testConcurrentReaderAndClear(mCreateListReadersAbort, (errors, results) -> { + assertTrue(errors.size() <= THREAD_NUMBER); + // Depending on the thread execution order, sometimes the reader threads + // may run to finish before the writer thread picks up the signal and flag + long completed = results.stream().filter(n -> n == FILE_NUMBER).count(); + assertEquals(THREAD_NUMBER, completed + errors.size()); + return null; + }, (errors, results) -> { + // Results are all empty after the clear + assertEquals(0, errors.size()); + long seeEmpty = results.stream().filter(n -> n == 0).count(); + assertEquals(THREAD_NUMBER, seeEmpty); + return null; + }); + } + + @Test + public void concurrentGetAndClose() throws Exception { + testConcurrentReaderAndClose(mCreateGetReaders); + } + + @Test + public void concurrentGetAndRestore() throws Exception { + testConcurrentReaderAndRestore(mCreateGetReaders, (errors, results) -> { + // The closer will finish and the new Get operations are unaffected + // If one inode does not exist, result will be Optional.empty + assertEquals(0, errors.size()); + long completed = results.stream().filter(n -> n == THREAD_NUMBER).count(); + assertEquals(THREAD_NUMBER, completed); + return null; + }, (errors, results) -> { + assertEquals(0, errors.size()); + long completed = results.stream().filter(n -> n == THREAD_NUMBER).count(); + assertEquals(THREAD_NUMBER, completed); + return null; + }); + } + + @Test + public void concurrentGetAndCheckpoint() throws Exception { + testConcurrentReaderAndCheckpoint(mCreateGetReaders, (errors, results) -> { + // The closer will finish and the new Get operations are unaffected + assertEquals(0, errors.size()); + long completed = results.stream().filter(n -> n == THREAD_NUMBER).count(); + assertEquals(THREAD_NUMBER, completed); + return null; + }, (errors, results) -> { + assertEquals(0, errors.size()); + long completed = results.stream().filter(n -> n == THREAD_NUMBER).count(); + assertEquals(THREAD_NUMBER, completed); + return null; + }); + } + + @Test + public void concurrentGetAndClear() throws Exception { + testConcurrentReaderAndClear(mCreateGetReaders, (errors, results) -> { + // The closer will finish and the new Get operations are unaffected + // However, Get after the RocksDB is cleared will get empty results + assertEquals(0, errors.size()); + long completed = results.stream().filter(n -> n == THREAD_NUMBER).count(); + assertEquals(THREAD_NUMBER, completed); + return null; + }, (errors, results) -> { + assertEquals(0, errors.size()); + long completed = results.stream().filter(n -> n == THREAD_NUMBER).count(); + assertEquals(THREAD_NUMBER, completed); + return null; + }); + } + + @Test + public void concurrentAddAndClose() throws Exception { + testConcurrentReaderAndClose(mCreateAddReaders); + } + + @Test + public void concurrentAddAndRestore() throws Exception { + testConcurrentReaderAndRestore(mCreateAddReaders, (errors, results) -> { + // After the restore finishes, new add operations can go on unaffected + assertEquals(0, errors.size()); + long completed = results.stream().filter(n -> n == THREAD_NUMBER).count(); + assertEquals(THREAD_NUMBER, completed); + return null; + }, (errors, results) -> { + assertEquals(0, errors.size()); + long completed = results.stream().filter(n -> n == THREAD_NUMBER).count(); + assertEquals(THREAD_NUMBER, completed); + return null; + }); + } + + @Test + public void concurrentAddAndCheckpoint() throws Exception { + testConcurrentReaderAndCheckpoint(mCreateAddReaders, (errors, results) -> { + // After the clear finishes, add operations can go on unaffected + assertEquals(0, errors.size()); + long completed = results.stream().filter(n -> n == THREAD_NUMBER).count(); + assertEquals(THREAD_NUMBER, completed); + return null; + }, (errors, results) -> { + assertEquals(0, errors.size()); + long completed = results.stream().filter(n -> n == THREAD_NUMBER).count(); + assertEquals(THREAD_NUMBER, completed); + return null; + }); + } + + @Test + public void concurrentAddAndClear() throws Exception { + testConcurrentReaderAndClear(mCreateAddReaders, (errors, results) -> { + // After the clear finishes, add operations can go on unaffected + assertEquals(0, errors.size()); + long completed = results.stream().filter(n -> n == THREAD_NUMBER).count(); + assertEquals(THREAD_NUMBER, completed); + return null; + }, (errors, results) -> { + assertEquals(0, errors.size()); + long completed = results.stream().filter(n -> n == THREAD_NUMBER).count(); + assertEquals(THREAD_NUMBER, completed); + return null; + }); + } + + private List> submitListingJob( + ArrayBlockingQueue errors, + ArrayBlockingQueue results, + @Nullable CountDownLatch readersRunningLatch, + @Nullable CountDownLatch writerCompletedLatch) { + List> futures = new ArrayList<>(); + for (int k = 0; k < THREAD_NUMBER; k++) { + futures.add(mThreadPool.submit(() -> { + int listedCount = 0; + try (CloseableIterator iter = mStore.getChildIds(0L)) { + while (iter.hasNext()) { + if (listedCount == 10 && readersRunningLatch != null) { + readersRunningLatch.countDown(); + if (writerCompletedLatch != null) { + // Pretend the reader is blocked and will wake up after the writer is done + writerCompletedLatch.await(); + } + } + iter.next(); + listedCount++; + } + } catch (Exception e) { + errors.add(e); + } finally { + results.add(listedCount); + } + return null; + })); + } + return futures; + } + + private List> submitIterJob(int threadCount, + ArrayBlockingQueue errors, + ArrayBlockingQueue results, + @Nullable CountDownLatch readersRunningLatch, + @Nullable CountDownLatch writerCompletedLatch) { + List> futures = new ArrayList<>(); + for (int k = 0; k < threadCount; k++) { + futures.add(mThreadPool.submit(() -> { + int listedCount = 0; + try (CloseableIterator iter = mStore.getCloseableIterator()) { + while (iter.hasNext()) { + if (listedCount == 10 && readersRunningLatch != null) { + readersRunningLatch.countDown(); + if (writerCompletedLatch != null) { + // Pretend the reader is blocked and will wake up after the writer is done + writerCompletedLatch.await(); + } + } + iter.next(); + listedCount++; + } + } catch (Exception e) { + errors.add(e); + } finally { + results.add(listedCount); + } + return null; + })); + } + return futures; + } + + @Test + public void escapingIteratorExceptionInNext() throws Exception { + prepareFiles(FILE_NUMBER); + + FlakyRocksInodeStore delegateStore = new FlakyRocksInodeStore(mPath, mStore); + AtomicReference exception = new AtomicReference<>(null); + try (CloseableIterator brokenIter = + delegateStore.getCloseableIterator(false, true)) { + while (brokenIter.hasNext()) { + brokenIter.next(); + } + } catch (Exception e) { + exception.set(e); + } + assertNotNull(exception.get()); + + // Even if the iter is flaky, the lock and ref count are managed correctly + // A close action will look at the ref count and err if there is a lock leak + assertEquals(0, mStore.getRocksStore().getSharedLockCount()); + mStore.close(); + } + + @Test + public void escapingIteratorExceptionInHasNext() throws Exception { + prepareFiles(FILE_NUMBER); + + FlakyRocksInodeStore delegateStore = new FlakyRocksInodeStore(mPath, mStore); + AtomicReference exception = new AtomicReference<>(null); + try (CloseableIterator brokenIter = + delegateStore.getCloseableIterator(true, false)) { + while (brokenIter.hasNext()) { + brokenIter.next(); + } + } catch (Exception e) { + exception.set(e); + } + assertNotNull(exception.get()); + + // Even if the iter is flaky, the lock and ref count are managed correctly + // A close action will look at the ref count and err if there is a lock leak + assertEquals(0, mStore.getRocksStore().getSharedLockCount()); + mStore.close(); + } + + @Test + public void longRunningIterAndRestore() throws Exception { + // Manually set this flag, otherwise an exception will be thrown when the exclusive lock + // is forced. + Configuration.set(PropertyKey.TEST_MODE, false); + prepareFiles(FILE_NUMBER); + + // Prepare a checkpoint file + File checkpointFile = File.createTempFile("checkpoint-for-recovery", ""); + try (BufferedOutputStream out = + new BufferedOutputStream(new FileOutputStream(checkpointFile))) { + mStore.writeToCheckpoint(out); + } + + // Create a bunch of long running iterators on the InodeStore + CountDownLatch readerLatch = new CountDownLatch(THREAD_NUMBER); + CountDownLatch restoreLatch = new CountDownLatch(1); + ArrayBlockingQueue errors = new ArrayBlockingQueue<>(THREAD_NUMBER); + ArrayBlockingQueue results = new ArrayBlockingQueue<>(THREAD_NUMBER); + List> futures = + submitIterJob(THREAD_NUMBER, errors, results, readerLatch, restoreLatch); + + // Await for the 20 threads to be iterating in the middle, then trigger the shutdown event + readerLatch.await(); + try (CheckpointInputStream in = new CheckpointInputStream( + (new DataInputStream(new FileInputStream(checkpointFile))))) { + mStore.restoreFromCheckpoint(in); + } + + // Verify that the iterators can still run + restoreLatch.countDown(); + waitForReaders(futures); + + // All iterators should abort because the RocksDB contents have changed + assertEquals(THREAD_NUMBER, errors.size()); + long completed = results.stream().filter(n -> n == FILE_NUMBER).count(); + assertEquals(0, completed); + long aborted = results.stream().filter(n -> n == 10).count(); + assertEquals(THREAD_NUMBER, aborted); + } + + @Test + public void longRunningIterAndCheckpoint() throws Exception { + // Manually set this flag, otherwise an exception will be thrown when the exclusive lock + // is forced. + Configuration.set(PropertyKey.TEST_MODE, false); + prepareFiles(FILE_NUMBER); + + // Create a bunch of long running iterators on the InodeStore + CountDownLatch readerLatch = new CountDownLatch(THREAD_NUMBER); + CountDownLatch restoreLatch = new CountDownLatch(1); + ArrayBlockingQueue errors = new ArrayBlockingQueue<>(THREAD_NUMBER); + ArrayBlockingQueue results = new ArrayBlockingQueue<>(THREAD_NUMBER); + List> futures = + submitIterJob(THREAD_NUMBER, errors, results, readerLatch, restoreLatch); + + // Await for the 20 threads to be iterating in the middle, then trigger the shutdown event + readerLatch.await(); + File checkpointFile = File.createTempFile("checkpoint-for-recovery", ""); + try (BufferedOutputStream out = + new BufferedOutputStream(new FileOutputStream(checkpointFile))) { + mStore.writeToCheckpoint(out); + } + assertTrue(Files.size(checkpointFile.toPath()) > 0); + + // Verify that the iterators can still run + restoreLatch.countDown(); + waitForReaders(futures); + + // All iterators should abort because the RocksDB contents have changed + assertEquals(0, errors.size()); + long completed = results.stream().filter(n -> n == FILE_NUMBER).count(); + assertEquals(THREAD_NUMBER, completed); + } + + public static class FlakyRocksInodeStore extends RocksInodeStore { + private final RocksInodeStore mDelegate; + + public FlakyRocksInodeStore(String baseDir, RocksInodeStore delegate) { + super(baseDir); + mDelegate = delegate; + } + + public CloseableIterator getCloseableIterator( + boolean hasNextIsFlaky, boolean nextIsFlaky) { + CloseableIterator iter = mDelegate.getCloseableIterator(); + + // This iterator is flaky + return new CloseableIterator(iter) { + private int mCounter = 0; + + @Override + public void closeResource() { + iter.closeResource(); + } + + @Override + public boolean hasNext() { + if (mCounter == 5 && hasNextIsFlaky) { + throw new RuntimeException("Unexpected exception in iterator"); + } + return iter.hasNext(); + } + + @Override + public InodeView next() { + mCounter++; + if (mCounter == 5 && nextIsFlaky) { + throw new RuntimeException("Unexpected exception in iterator"); + } + return iter.next(); + } + }; + } + } + + private List> submitGetInodeJob( + ArrayBlockingQueue errors, + ArrayBlockingQueue results, + @Nullable CountDownLatch readersRunningLatch, + @Nullable CountDownLatch writerCompletedLatch) { + List> futures = new ArrayList<>(); + for (int k = 0; k < THREAD_NUMBER; k++) { + final int iterNum = k; + futures.add(mThreadPool.submit(() -> { + int finishedCount = 0; + try { + for (int x = 0; x < THREAD_NUMBER; x++) { + long targetInodeId = iterNum * THREAD_NUMBER + x; + Optional> dir = mStore.getMutable(targetInodeId, ReadOption.defaults()); + finishedCount++; + if (x == 10 && readersRunningLatch != null) { + readersRunningLatch.countDown(); + if (writerCompletedLatch != null) { + // Pretend the reader is blocked and will wake up after the writer is done + writerCompletedLatch.await(); + } + } + } + } catch (Exception e) { + e.printStackTrace(); + errors.add(e); + } finally { + results.add(finishedCount); + } + return null; + })); + } + return futures; + } + + private List> submitAddInodeJob( + ArrayBlockingQueue errors, + ArrayBlockingQueue results, + @Nullable CountDownLatch readersRunningLatch, + @Nullable CountDownLatch writerCompletedLatch) { + List> futures = new ArrayList<>(); + for (int k = 0; k < THREAD_NUMBER; k++) { + final int iterNum = k; + futures.add(mThreadPool.submit(() -> { + int finishedCount = 0; + try { + for (int x = 0; x < THREAD_NUMBER; x++) { + long targetInodeId = iterNum * THREAD_NUMBER + x; + MutableInodeDirectory dir = + MutableInodeDirectory.create(targetInodeId, 0, "dir" + targetInodeId, + CreateDirectoryContext.defaults()); + mStore.addChild(0L, dir); + if (x == 10 && readersRunningLatch != null) { + readersRunningLatch.countDown(); + if (writerCompletedLatch != null) { + // Pretend the reader is blocked and will wake up after the writer is done + writerCompletedLatch.await(); + } + } + finishedCount++; + } + } catch (Exception e) { + errors.add(e); + } finally { + results.add(finishedCount); + } + return null; + })); + } + return futures; + } + + private void testConcurrentReaderAndClose( + QuadFunction, ArrayBlockingQueue, CountDownLatch, + CountDownLatch, List>> reader) throws Exception { + prepareFiles(FILE_NUMBER); + + CountDownLatch readerRunningLatch = new CountDownLatch(THREAD_NUMBER); + CountDownLatch writerCompletedLatch = new CountDownLatch(1); + ArrayBlockingQueue errors = new ArrayBlockingQueue<>(THREAD_NUMBER); + ArrayBlockingQueue results = new ArrayBlockingQueue<>(THREAD_NUMBER); + List> futures = + reader.apply(errors, results, readerRunningLatch, writerCompletedLatch); + + // Await for the threads to be running in the middle, then trigger the closer event + readerRunningLatch.await(); + mStore.close(); + writerCompletedLatch.countDown(); + + waitForReaders(futures); + // Reaching here means close() was successfully, which implies ref count reached zero + assertTrue(errors.size() <= THREAD_NUMBER); + } + + private void testConcurrentReaderAndCheckpoint( + QuadFunction, ArrayBlockingQueue, CountDownLatch, + CountDownLatch, List>> reader, + BiFunction, ArrayBlockingQueue, + Void> stateCheckAfterReadersFinish, + BiFunction, ArrayBlockingQueue, + Void> stateCheckAfterReadersFinishAgain + ) throws Exception { + prepareFiles(FILE_NUMBER); + + CountDownLatch readerRunningLatch = new CountDownLatch(THREAD_NUMBER); + CountDownLatch writerCompletedLatch = new CountDownLatch(1); + ArrayBlockingQueue errors = new ArrayBlockingQueue<>(THREAD_NUMBER); + ArrayBlockingQueue results = new ArrayBlockingQueue<>(THREAD_NUMBER); + List> futures = + reader.apply(errors, results, readerRunningLatch, writerCompletedLatch); + + // Await for the 20 threads to be iterating in the middle, then trigger the shutdown event + readerRunningLatch.await(); + File checkpointFile = File.createTempFile("checkpoint-file", ""); + try (BufferedOutputStream out = + new BufferedOutputStream(new FileOutputStream(checkpointFile))) { + mStore.writeToCheckpoint(out); + } + assertTrue(Files.size(checkpointFile.toPath()) > 0); + writerCompletedLatch.countDown(); + + waitForReaders(futures); + stateCheckAfterReadersFinish.apply(errors, results); + + // Verify that the RocksDB can still serve + ArrayBlockingQueue errorsAgain = new ArrayBlockingQueue<>(THREAD_NUMBER); + ArrayBlockingQueue resultsAgain = new ArrayBlockingQueue<>(THREAD_NUMBER); + List> futuresAgain = reader.apply(errorsAgain, resultsAgain, null, null); + waitForReaders(futuresAgain); + stateCheckAfterReadersFinishAgain.apply(errorsAgain, resultsAgain); + } + + private void testConcurrentReaderAndRestore( + QuadFunction, ArrayBlockingQueue, + CountDownLatch, CountDownLatch, List>> reader, + BiFunction, ArrayBlockingQueue, + Void> stateCheckAfterReadersFinish, + BiFunction, ArrayBlockingQueue, + Void> stateCheckAfterReadersFinishAgain + ) throws Exception { + prepareFiles(FILE_NUMBER); + // Prepare a checkpoint file + File checkpointFile = File.createTempFile("checkpoint-for-recovery", ""); + try (BufferedOutputStream out = + new BufferedOutputStream(new FileOutputStream(checkpointFile))) { + mStore.writeToCheckpoint(out); + } + + CountDownLatch readerRunningLatch = new CountDownLatch(THREAD_NUMBER); + CountDownLatch writerCompletedLatch = new CountDownLatch(1); + ArrayBlockingQueue errors = new ArrayBlockingQueue<>(THREAD_NUMBER); + ArrayBlockingQueue results = new ArrayBlockingQueue<>(THREAD_NUMBER); + List> futures = + reader.apply(errors, results, readerRunningLatch, writerCompletedLatch); + + // Await for the 20 threads to be iterating in the middle, then trigger the shutdown event + readerRunningLatch.await(); + try (CheckpointInputStream in = new CheckpointInputStream( + (new DataInputStream(new FileInputStream(checkpointFile))))) { + mStore.restoreFromCheckpoint(in); + } + writerCompletedLatch.countDown(); + waitForReaders(futures); + stateCheckAfterReadersFinish.apply(errors, results); + + // Verify that the RocksDB can still serve + ArrayBlockingQueue errorsAgain = new ArrayBlockingQueue<>(THREAD_NUMBER); + ArrayBlockingQueue resultsAgain = new ArrayBlockingQueue<>(THREAD_NUMBER); + List> futuresAgain = reader.apply(errorsAgain, resultsAgain, null, null); + waitForReaders(futuresAgain); + stateCheckAfterReadersFinishAgain.apply(errorsAgain, resultsAgain); + } + + private void testConcurrentReaderAndClear( + QuadFunction, ArrayBlockingQueue, + CountDownLatch, CountDownLatch, List>> reader, + BiFunction, ArrayBlockingQueue, + Void> stateCheckAfterReadersFinish, + BiFunction, ArrayBlockingQueue, + Void> stateCheckAfterReadersFinishAgain + ) throws Exception { + prepareFiles(FILE_NUMBER); + + CountDownLatch readerRunningLatch = new CountDownLatch(THREAD_NUMBER); + CountDownLatch writerCompletedLatch = new CountDownLatch(1); + ArrayBlockingQueue errors = new ArrayBlockingQueue<>(THREAD_NUMBER); + ArrayBlockingQueue results = new ArrayBlockingQueue<>(THREAD_NUMBER); + List> futures = + reader.apply(errors, results, readerRunningLatch, writerCompletedLatch); + + // Await for the 20 threads to be iterating in the middle, then trigger the shutdown event + readerRunningLatch.await(); + mStore.clear(); + writerCompletedLatch.countDown(); + + waitForReaders(futures); + stateCheckAfterReadersFinish.apply(errors, results); + + // Verify that the RocksDB can still serve + ArrayBlockingQueue errorsAgain = new ArrayBlockingQueue<>(THREAD_NUMBER); + ArrayBlockingQueue resultsAgain = new ArrayBlockingQueue<>(THREAD_NUMBER); + List> futuresAgain = reader.apply(errorsAgain, resultsAgain, null, null); + waitForReaders(futuresAgain); + stateCheckAfterReadersFinishAgain.apply(errorsAgain, resultsAgain); + } + + private void prepareFiles(int fileCount) throws Exception { + for (int i = 1; i < fileCount + 1; i++) { + MutableInodeDirectory dir = MutableInodeDirectory.create(i, 0, "dir" + i, + CreateDirectoryContext.defaults()); + mStore.addChild(0, dir); + mStore.writeInode(dir); + } + } - store.writeInode(MutableInodeDirectory.create(1, 0, "dir", CreateDirectoryContext.defaults())); - assertEquals("dir", store.get(1).get().getName()); - assertThat(store.toStringEntries(), containsString("name=dir")); + @FunctionalInterface + interface QuadFunction { + R apply(A a, B b, C c, D d); } } diff --git a/core/server/master/src/test/java/alluxio/master/metastore/rocks/RocksStoreTest.java b/core/server/master/src/test/java/alluxio/master/metastore/rocks/RocksStoreTest.java index e89cf874e7a2..3ce80ed00581 100644 --- a/core/server/master/src/test/java/alluxio/master/metastore/rocks/RocksStoreTest.java +++ b/core/server/master/src/test/java/alluxio/master/metastore/rocks/RocksStoreTest.java @@ -12,10 +12,21 @@ package alluxio.master.metastore.rocks; import static org.junit.Assert.assertArrayEquals; +import static org.junit.Assert.assertEquals; +import static org.junit.Assert.assertFalse; +import static org.junit.Assert.assertThrows; +import static org.junit.Assert.assertTrue; +import alluxio.conf.Configuration; +import alluxio.conf.PropertyKey; +import alluxio.exception.ExceptionMessage; +import alluxio.exception.runtime.UnavailableRuntimeException; import alluxio.master.journal.checkpoint.CheckpointInputStream; +import alluxio.util.ThreadFactoryUtils; import com.google.common.primitives.Longs; +import org.junit.After; +import org.junit.Before; import org.junit.Rule; import org.junit.Test; import org.junit.rules.TemporaryFolder; @@ -26,60 +37,484 @@ import org.rocksdb.DBOptions; import org.rocksdb.HashLinkedListMemTableConfig; import org.rocksdb.RocksDB; +import org.rocksdb.RocksObject; import org.rocksdb.WriteOptions; import java.io.ByteArrayInputStream; import java.io.ByteArrayOutputStream; +import java.util.ArrayList; import java.util.Arrays; +import java.util.Collections; import java.util.List; +import java.util.concurrent.CountDownLatch; +import java.util.concurrent.ExecutorService; +import java.util.concurrent.Executors; +import java.util.concurrent.Future; import java.util.concurrent.atomic.AtomicReference; public class RocksStoreTest { @Rule public TemporaryFolder mFolder = new TemporaryFolder(); - @Test - public void backupRestore() throws Exception { + private RocksStore mStore; + List mToClose; + AtomicReference mTestColumn; + String mDbDir; + String mBackupsDir; + List mColumnDescriptors; + ExecutorService mThreadPool; + + @Before + public void setup() throws Exception { + Configuration.set(PropertyKey.MASTER_METASTORE_ROCKS_EXCLUSIVE_LOCK_TIMEOUT, "500ms"); + Configuration.set(PropertyKey.TEST_MODE, true); + + mToClose = new ArrayList<>(); ColumnFamilyOptions cfOpts = new ColumnFamilyOptions() .setMemTableConfig(new HashLinkedListMemTableConfig()) .setCompressionType(CompressionType.NO_COMPRESSION) .useFixedLengthPrefixExtractor(Longs.BYTES); // We always search using the initial long key + mToClose.add(cfOpts); - List columnDescriptors = + mColumnDescriptors = Arrays.asList(new ColumnFamilyDescriptor("test".getBytes(), cfOpts)); - String dbDir = mFolder.newFolder("rocks").getAbsolutePath(); - String backupsDir = mFolder.newFolder("rocks-backups").getAbsolutePath(); - AtomicReference testColumn = new AtomicReference<>(); + mDbDir = mFolder.newFolder("rocks").getAbsolutePath(); + mBackupsDir = mFolder.newFolder("rocks-backups").getAbsolutePath(); + mTestColumn = new AtomicReference<>(); DBOptions dbOpts = new DBOptions().setCreateIfMissing(true) .setCreateMissingColumnFamilies(true) .setAllowConcurrentMemtableWrite(false); - RocksStore store = - new RocksStore("test", dbDir, backupsDir, dbOpts, columnDescriptors, - Arrays.asList(testColumn)); + mToClose.add(dbOpts); + + mStore = new RocksStore("test", mDbDir, mBackupsDir, dbOpts, mColumnDescriptors, + Arrays.asList(mTestColumn)); + + mThreadPool = Executors.newCachedThreadPool( + ThreadFactoryUtils.build("test-executor-%d", true)); + } + + @After + public void tearDown() throws Exception { + try (RocksExclusiveLockHandle lock = mStore.lockForClosing()) { + mStore.close(); + } + + Collections.reverse(mToClose); + mToClose.forEach(RocksObject::close); + + mThreadPool.shutdownNow(); + } + + @Test + public void backupRestore() throws Exception { ByteArrayOutputStream baos = new ByteArrayOutputStream(); - RocksDB db = store.getDb(); + RocksDB db; int count = 10; - for (int i = 0; i < count; i++) { - db.put(testColumn.get(), new WriteOptions().setDisableWAL(true), ("a" + i).getBytes(), - "b".getBytes()); + try (RocksSharedLockHandle lock = mStore.checkAndAcquireSharedLock()) { + db = mStore.getDb(); + for (int i = 0; i < count; i++) { + db.put(mTestColumn.get(), new WriteOptions().setDisableWAL(true), ("a" + i).getBytes(), + "b".getBytes()); + } + } + try (RocksExclusiveLockHandle lock = mStore.lockForCheckpoint()) { + mStore.writeToCheckpoint(baos); + } + try (RocksExclusiveLockHandle lock = mStore.lockForClosing()) { + mStore.close(); } - store.writeToCheckpoint(baos); - store.close(); - String newBbDir = mFolder.newFolder("rocks-new").getAbsolutePath(); - dbOpts = new DBOptions().setCreateIfMissing(true) + String newDbDir = mFolder.newFolder("rocks-new").getAbsolutePath(); + DBOptions dbOpts = new DBOptions().setCreateIfMissing(true) .setCreateMissingColumnFamilies(true) .setAllowConcurrentMemtableWrite(false); - store = - new RocksStore("test-new", newBbDir, backupsDir, dbOpts, columnDescriptors, - Arrays.asList(testColumn)); - store.restoreFromCheckpoint( - new CheckpointInputStream(new ByteArrayInputStream(baos.toByteArray()))); - db = store.getDb(); - for (int i = 0; i < count; i++) { - assertArrayEquals("b".getBytes(), db.get(testColumn.get(), ("a" + i).getBytes())); + mToClose.add(dbOpts); + mStore = + new RocksStore("test-new", newDbDir, mBackupsDir, dbOpts, mColumnDescriptors, + Arrays.asList(mTestColumn)); + try (RocksExclusiveLockHandle lock = mStore.lockForRewrite()) { + mStore.restoreFromCheckpoint( + new CheckpointInputStream(new ByteArrayInputStream(baos.toByteArray()))); + } + try (RocksSharedLockHandle lock = mStore.checkAndAcquireSharedLock()) { + db = mStore.getDb(); + for (int i = 0; i < count; i++) { + assertArrayEquals("b".getBytes(), db.get(mTestColumn.get(), ("a" + i).getBytes())); + } + } + } + + @Test + public void sharedLockRefCount() { + List readLocks = new ArrayList<>(); + for (int i = 0; i < 20; i++) { + assertEquals(i, mStore.getSharedLockCount()); + RocksSharedLockHandle lockHandle = mStore.checkAndAcquireSharedLock(); + readLocks.add(lockHandle); + } + assertEquals(20, mStore.getSharedLockCount()); + + for (int i = 0; i < 20; i++) { + assertEquals(20 - i, mStore.getSharedLockCount()); + readLocks.get(i).close(); } - store.close(); - cfOpts.close(); + assertEquals(0, mStore.getSharedLockCount()); + } + + @Test + public void exclusiveLockOnClosing() { + RocksExclusiveLockHandle exclusiveLock = mStore.lockForClosing(); + + Exception e = assertThrows(UnavailableRuntimeException.class, () -> { + mStore.checkAndAcquireSharedLock(); + }); + assertTrue(e.getMessage().contains(ExceptionMessage.ROCKS_DB_CLOSING.getMessage())); + Exception f = assertThrows(UnavailableRuntimeException.class, () -> { + mStore.shouldAbort(0); + }); + assertTrue(f.getMessage().contains(ExceptionMessage.ROCKS_DB_CLOSING.getMessage())); + assertEquals(0, mStore.getSharedLockCount()); + assertTrue(mStore.isServiceStopping()); + exclusiveLock.close(); + assertEquals(0, mStore.getSharedLockCount()); + // The flag is NOT reset after the lock is released, because the service will exit + assertTrue(mStore.isServiceStopping()); + } + + @Test + public void exclusiveLockOnCheckpoint() { + RocksExclusiveLockHandle exclusiveLock = mStore.lockForCheckpoint(); + + Exception e = assertThrows(UnavailableRuntimeException.class, () -> { + mStore.checkAndAcquireSharedLock(); + }); + assertTrue(e.getMessage().contains(ExceptionMessage.ROCKS_DB_CLOSING.getMessage())); + Exception f = assertThrows(UnavailableRuntimeException.class, () -> { + mStore.shouldAbort(0); + }); + assertTrue(f.getMessage().contains(ExceptionMessage.ROCKS_DB_CLOSING.getMessage())); + assertEquals(0, mStore.getSharedLockCount()); + assertTrue(mStore.isServiceStopping()); + exclusiveLock.close(); + assertEquals(0, mStore.getSharedLockCount()); + // The flag is reset after the lock is released, because the service will restore + assertFalse(mStore.isServiceStopping()); + } + + @Test + public void exclusiveLockOnRewrite() { + RocksExclusiveLockHandle exclusiveLock = mStore.lockForRewrite(); + + Exception e = assertThrows(UnavailableRuntimeException.class, () -> { + mStore.checkAndAcquireSharedLock(); + }); + assertTrue(e.getMessage().contains(ExceptionMessage.ROCKS_DB_CLOSING.getMessage())); + Exception f = assertThrows(UnavailableRuntimeException.class, () -> { + mStore.shouldAbort(0); + }); + assertTrue(f.getMessage().contains(ExceptionMessage.ROCKS_DB_CLOSING.getMessage())); + assertEquals(0, mStore.getSharedLockCount()); + assertTrue(mStore.isServiceStopping()); + exclusiveLock.close(); + assertEquals(0, mStore.getSharedLockCount()); + // The flag is reset after the lock is released, because the service will restore + assertFalse(mStore.isServiceStopping()); + } + + @Test + public void exclusiveLockForcedAndReleasedAfterSharedLock() throws Exception { + // One reader gets the shared lock and does not release for a long time + CountDownLatch readerCloseLatch = new CountDownLatch(1); + CountDownLatch writerStartLatch = new CountDownLatch(1); + Future f = mThreadPool.submit(() -> { + RocksSharedLockHandle lockHandle = mStore.checkAndAcquireSharedLock(); + System.out.println("Read lock grabbed"); + writerStartLatch.countDown(); + assertEquals(1, mStore.getSharedLockCount()); + try { + readerCloseLatch.await(); + } catch (InterruptedException e) { + throw new RuntimeException(e); + } + System.out.println("Able to unlock read lock now"); + // After a long time, this lock is released after the exclusive lock has been forced + lockHandle.close(); + System.out.println("Read lock released"); + // The lock release should not mess up the ref count + assertEquals(0, mStore.getSharedLockCount()); + return null; + }); + + // One closer comes in and eventually will grab the lock after wait + writerStartLatch.await(); + // Manually set this flag, otherwise an exception will be thrown when the exclusive lock + // is forced. + Configuration.set(PropertyKey.TEST_MODE, false); + RocksExclusiveLockHandle exclusiveLock = mStore.lockForCheckpoint(); + // After some wait, the closer will force the lock and reset the ref count + // And the ref count will be reset on that force + assertEquals(0, mStore.getSharedLockCount()); + // Let the reader finish before the exclusive lock is released + readerCloseLatch.countDown(); + f.get(); + // That should not mess up the ref count + assertEquals(0, mStore.getSharedLockCount()); + exclusiveLock.close(); + assertEquals(0, mStore.getSharedLockCount()); + } + + @Test + public void exclusiveLockForcedAndReleasedBeforeSharedLock() throws Exception { + // One reader gets the shared lock and does not release for a long time + CountDownLatch readerCloseLatch = new CountDownLatch(1); + CountDownLatch writerStartLatch = new CountDownLatch(1); + Future f = mThreadPool.submit(() -> { + RocksSharedLockHandle lockHandle = mStore.checkAndAcquireSharedLock(); + System.out.println("Read lock grabbed"); + writerStartLatch.countDown(); + assertEquals(1, mStore.getSharedLockCount()); + try { + readerCloseLatch.await(); + } catch (InterruptedException e) { + throw new RuntimeException(e); + } + System.out.println("Able to unlock read lock now"); + // After a long time, this lock is released after the exclusive lock has been forced + lockHandle.close(); + System.out.println("Read lock released"); + // The lock release should not mess up the ref count + assertEquals(0, mStore.getSharedLockCount()); + return null; + }); + + // One closer comes in and eventually will grab the lock after wait + writerStartLatch.await(); + // Manually set this flag, otherwise an exception will be thrown when the exclusive lock + // is forced. + Configuration.set(PropertyKey.TEST_MODE, false); + RocksExclusiveLockHandle exclusiveLock = mStore.lockForCheckpoint(); + // After some wait, the closer will force the lock and reset the ref count + // And the ref count will be reset on that force + assertEquals(0, mStore.getSharedLockCount()); + // The exclusive lock releases before the reader even wakes up + exclusiveLock.close(); + // Let the reader finish + readerCloseLatch.countDown(); + f.get(); + // The ref count is not messed up + assertEquals(0, mStore.getSharedLockCount()); + } + + @Test + public void forcingExclusiveLockInTestWillErr() throws Exception { + // One reader gets the shared lock and does not release for a long time + CountDownLatch readerCloseLatch = new CountDownLatch(1); + CountDownLatch writerStartLatch = new CountDownLatch(1); + Future f = mThreadPool.submit(() -> { + RocksSharedLockHandle lockHandle = mStore.checkAndAcquireSharedLock(); + System.out.println("Read lock grabbed"); + writerStartLatch.countDown(); + assertEquals(1, mStore.getSharedLockCount()); + try { + readerCloseLatch.await(); + } catch (InterruptedException e) { + throw new RuntimeException(e); + } + System.out.println("Able to unlock read lock now"); + // After a long time, this lock is released after the exclusive lock has been forced + lockHandle.close(); + System.out.println("Read lock released"); + // The lock release should not mess up the ref count + assertEquals(0, mStore.getSharedLockCount()); + return null; + }); + + // One closer comes in and eventually will grab the lock after wait + writerStartLatch.await(); + // In test mode, forcing the exclusive lock will result in an exception + // This will help us detect issues with the ref count + assertThrows(RuntimeException.class, () -> { + RocksExclusiveLockHandle exclusiveLock = mStore.lockForCheckpoint(); + }); + // Let the reader finish + readerCloseLatch.countDown(); + f.get(); + // Even if the exclusive lock attempt failed, the ref count will be correct + assertEquals(0, mStore.getSharedLockCount()); + } + + @Test + public void readerCanContinueAfterCheckpoint() throws Exception { + // One reader gets the shared lock and does not release for a long time + CountDownLatch readerCloseLatch = new CountDownLatch(1); + CountDownLatch writerStartLatch = new CountDownLatch(1); + Future f = mThreadPool.submit(() -> { + RocksSharedLockHandle lockHandle = mStore.checkAndAcquireSharedLock(); + System.out.println("Read lock grabbed"); + writerStartLatch.countDown(); + try { + readerCloseLatch.await(); + } catch (InterruptedException e) { + throw new RuntimeException(e); + } + // While this reader is sleeping, one checkpoint is completed in the background + // This check should pass without throwing an exception + // And that means the reader can continue doing what it was doing + mStore.shouldAbort(lockHandle.getLockVersion()); + + System.out.println("Able to continue reading"); + // After finishing its work, this lock is released + lockHandle.close(); + System.out.println("Read lock released"); + // The lock release has passed due but should not mess up the ref count + assertEquals(0, mStore.getSharedLockCount()); + return null; + }); + + // One closer comes in and eventually will grab the lock after wait + writerStartLatch.await(); + // Manually set this flag, otherwise an exception will be thrown when the exclusive lock + // is forced. + Configuration.set(PropertyKey.TEST_MODE, false); + RocksExclusiveLockHandle exclusiveLock = mStore.lockForCheckpoint(); + // After some wait, the closer will force the lock and reset the ref count + // And the ref count will be reset on that force + assertEquals(0, mStore.getSharedLockCount()); + // Now the checkpointing was done, while the reader is still asleep + exclusiveLock.close(); + // Let the reader wake up and continue + readerCloseLatch.countDown(); + f.get(); + assertEquals(0, mStore.getSharedLockCount()); + } + + @Test + public void readerCanNotContinueAfterRestore() throws Exception { + // One reader gets the shared lock and does not release for a long time + CountDownLatch readerCloseLatch = new CountDownLatch(1); + CountDownLatch writerStartLatch = new CountDownLatch(1); + Future f = mThreadPool.submit(() -> { + RocksSharedLockHandle lockHandle = mStore.checkAndAcquireSharedLock(); + System.out.println("Read lock grabbed"); + writerStartLatch.countDown(); + try { + readerCloseLatch.await(); + } catch (InterruptedException e) { + throw new RuntimeException(e); + } + // While this reader is sleeping, one restore action is completed in the background + // This check should throw an exception because the RocksDB contents have changed + Exception e = assertThrows(UnavailableRuntimeException.class, () -> { + mStore.shouldAbort(lockHandle.getLockVersion()); + }); + assertTrue(e.getMessage().contains(ExceptionMessage.ROCKS_DB_REWRITTEN.getMessage())); + + System.out.println("Not able to continue reading"); + // After finishing its work, this lock is released + lockHandle.close(); + System.out.println("Read lock released"); + // The lock release has passed due but should not mess up the ref count + assertEquals(0, mStore.getSharedLockCount()); + return null; + }); + + // One closer comes in and eventually will grab the lock after wait + writerStartLatch.await(); + // Manually set this flag, otherwise an exception will be thrown when the exclusive lock + // is forced. + Configuration.set(PropertyKey.TEST_MODE, false); + RocksExclusiveLockHandle exclusiveLock = mStore.lockForRewrite(); + // After some wait, the closer will force the lock and reset the ref count + // And the ref count will be reset on that force + assertEquals(0, mStore.getSharedLockCount()); + // Now the checkpointing was done, while the reader is still asleep + exclusiveLock.close(); + // Let the reader wake up and continue + readerCloseLatch.countDown(); + f.get(); + assertEquals(0, mStore.getSharedLockCount()); + } + + @Test + public void checkpointThenClose() { + RocksExclusiveLockHandle checkpointLock = mStore.lockForCheckpoint(); + assertEquals(0, mStore.getSharedLockCount()); + assertTrue(mStore.isServiceStopping()); + + // Before the checkpoint finishes, an attempt comes in to close + // This should succeed + RocksExclusiveLockHandle closeLock = mStore.lockForClosing(); + assertEquals(0, mStore.getSharedLockCount()); + assertTrue(mStore.isServiceStopping()); + + checkpointLock.close(); + closeLock.close(); + } + + @Test + public void rewriteThenClose() { + RocksExclusiveLockHandle rewriteLock = mStore.lockForRewrite(); + assertEquals(0, mStore.getSharedLockCount()); + assertTrue(mStore.isServiceStopping()); + + // Before the checkpoint finishes, an attempt comes in to close + // This should succeed + RocksExclusiveLockHandle closeLock = mStore.lockForClosing(); + assertEquals(0, mStore.getSharedLockCount()); + assertTrue(mStore.isServiceStopping()); + + rewriteLock.close(); + closeLock.close(); + } + + @Test + public void closeThenCheckpoint() { + RocksExclusiveLockHandle closeLock = mStore.lockForClosing(); + assertEquals(0, mStore.getSharedLockCount()); + assertTrue(mStore.isServiceStopping()); + + // Closing takes higher priority and a checkpoint attempt will fail + Exception e = assertThrows(UnavailableRuntimeException.class, () -> { + RocksExclusiveLockHandle checkpointLock = mStore.lockForCheckpoint(); + }); + assertTrue(e.getMessage().contains(ExceptionMessage.ROCKS_DB_CLOSING.getMessage())); + assertEquals(0, mStore.getSharedLockCount()); + assertTrue(mStore.isServiceStopping()); + + closeLock.close(); + } + + @Test + public void closeThenRewrite() { + RocksExclusiveLockHandle closeLock = mStore.lockForClosing(); + assertEquals(0, mStore.getSharedLockCount()); + assertTrue(mStore.isServiceStopping()); + + // Closing takes higher priority and a checkpoint attempt will fail + Exception e = assertThrows(UnavailableRuntimeException.class, () -> { + RocksExclusiveLockHandle rewriteLock = mStore.lockForRewrite(); + }); + assertTrue(e.getMessage().contains(ExceptionMessage.ROCKS_DB_CLOSING.getMessage())); + assertEquals(0, mStore.getSharedLockCount()); + assertTrue(mStore.isServiceStopping()); + + closeLock.close(); + } + + @Test + public void checkpointThenRewrite() { + RocksExclusiveLockHandle checkpointLock = mStore.lockForCheckpoint(); + assertEquals(0, mStore.getSharedLockCount()); + assertTrue(mStore.isServiceStopping()); + + // Rewrite/Checkpoint will yield to exclusive lock + Exception e = assertThrows(UnavailableRuntimeException.class, () -> { + RocksExclusiveLockHandle rewriteLock = mStore.lockForRewrite(); + }); + assertTrue(e.getMessage().contains(ExceptionMessage.ROCKS_DB_CLOSING.getMessage())); + assertEquals(0, mStore.getSharedLockCount()); + assertTrue(mStore.isServiceStopping()); + + checkpointLock.close(); } } diff --git a/core/server/master/src/test/java/alluxio/master/metastore/rocks/RocksStoreTestUtils.java b/core/server/master/src/test/java/alluxio/master/metastore/rocks/RocksStoreTestUtils.java new file mode 100644 index 000000000000..b3f004372d74 --- /dev/null +++ b/core/server/master/src/test/java/alluxio/master/metastore/rocks/RocksStoreTestUtils.java @@ -0,0 +1,29 @@ +/* + * The Alluxio Open Foundation licenses this work under the Apache License, version 2.0 + * (the "License"). You may not use this work except in compliance with the License, which is + * available at www.apache.org/licenses/LICENSE-2.0 + * + * This software is distributed on an "AS IS" basis, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, + * either express or implied, as more fully set forth in the License. + * + * See the NOTICE file distributed with this work for information regarding copyright ownership. + */ + +package alluxio.master.metastore.rocks; + +import static org.junit.Assert.fail; + +import java.util.List; +import java.util.concurrent.Future; + +public class RocksStoreTestUtils { + public static void waitForReaders(List> futures) { + futures.stream().forEach(f -> { + try { + f.get(); + } catch (Exception e) { + fail("Met uncaught exception from iteration"); + } + }); + } +} diff --git a/microbench/src/main/java/alluxio/inode/RocksBenchBase.java b/microbench/src/main/java/alluxio/inode/RocksBenchBase.java index 0e08225419ad..81328252b53a 100644 --- a/microbench/src/main/java/alluxio/inode/RocksBenchBase.java +++ b/microbench/src/main/java/alluxio/inode/RocksBenchBase.java @@ -44,8 +44,10 @@ public class RocksBenchBase { static final String NO_SER_NO_ALLOC_READ = "noSerNoAllocRead"; private final RocksInodeStore mRocksInodeStore; + // RocksDB resources managed by the RocksInodeStore, no need to close manually private final RocksDB mDB; private final AtomicReference mInodesColumn; + // Created and managed in this class private final WriteOptions mDisableWAL; RocksBenchBase(String confType) throws IOException { @@ -73,6 +75,7 @@ static MutableInode genInode(boolean mIsDirectory) { void after() { mRocksInodeStore.clear(); mRocksInodeStore.close(); + mDisableWAL.close(); } long getInodeReadId(long total, long nxt, long min, int threadCount, int threadId) { From cbf543aa47c5fb8ddde709c241e6faf65ae5e0cc Mon Sep 17 00:00:00 2001 From: Rico Chiu Date: Tue, 18 Apr 2023 22:11:28 -0700 Subject: [PATCH 236/334] [DOCFIX] Update generated tables with docGen pr-link: Alluxio/alluxio#17283 change-id: cid-1814ff8004cffff4a3a09a12a996d423eead3927 --- docs/_data/table/common-configuration.csv | 3 +- docs/_data/table/en/common-configuration.yml | 4 +-- docs/_data/table/en/master-configuration.yml | 10 +++--- docs/_data/table/en/master-metrics.yml | 32 +++++++++++++++++++- docs/_data/table/master-configuration.csv | 7 +++-- docs/_data/table/master-metrics.csv | 15 +++++++++ 6 files changed, 58 insertions(+), 13 deletions(-) diff --git a/docs/_data/table/common-configuration.csv b/docs/_data/table/common-configuration.csv index c867283e17fa..4cefe874f052 100644 --- a/docs/_data/table/common-configuration.csv +++ b/docs/_data/table/common-configuration.csv @@ -83,7 +83,7 @@ alluxio.network.host.resolution.timeout,"5sec" alluxio.network.ip.address.used,"false" alluxio.proxy.audit.logging.enabled,"false" alluxio.proxy.s3.bucket.naming.restrictions.enabled,"false" -alluxio.proxy.s3.bucketpathcache.timeout,"1min" +alluxio.proxy.s3.bucketpathcache.timeout,"0min" alluxio.proxy.s3.complete.multipart.upload.keepalive.enabled,"false" alluxio.proxy.s3.complete.multipart.upload.keepalive.time.interval,"30sec" alluxio.proxy.s3.complete.multipart.upload.min.part.size,"5MB" @@ -118,7 +118,6 @@ alluxio.site.conf.dir,"${alluxio.conf.dir}/,${user.home}/.alluxio/,/etc/alluxio/ alluxio.site.conf.rocks.block.file,"" alluxio.site.conf.rocks.inode.file,"" alluxio.standalone.fuse.jvm.monitor.enabled,"false" -alluxio.standby.master.grpc.enabled,"false" alluxio.standby.master.metrics.sink.enabled,"false" alluxio.standby.master.web.enabled,"false" alluxio.table.catalog.path,"/catalog" diff --git a/docs/_data/table/en/common-configuration.yml b/docs/_data/table/en/common-configuration.yml index adc535d25647..201a3df495a5 100644 --- a/docs/_data/table/en/common-configuration.yml +++ b/docs/_data/table/en/common-configuration.yml @@ -167,7 +167,7 @@ alluxio.proxy.audit.logging.enabled: alluxio.proxy.s3.bucket.naming.restrictions.enabled: 'Toggles whether or not the Alluxio S3 API will enforce AWS S3 bucket naming restrictions. See https://docs.aws.amazon.com/AmazonS3/latest/userguide/bucketnamingrules.html.' alluxio.proxy.s3.bucketpathcache.timeout: - 'Expire bucket path statistics in cache for this time period. Set 0min to disable the cache.' + 'Expire bucket path statistics in cache for this time period. Set 0min to disable the cache. If enabling the cache, be careful that Alluxio S3 API will behave differently from AWS S3 API if bucket path cache entries become stale.' alluxio.proxy.s3.complete.multipart.upload.keepalive.enabled: 'Whether or not to enabled sending whitespace characters as a keepalive message during CompleteMultipartUpload. Enabling this will cause any errors to be silently ignored. However, the errors will appear in the Proxy logs.' alluxio.proxy.s3.complete.multipart.upload.keepalive.time.interval: @@ -236,8 +236,6 @@ alluxio.site.conf.rocks.inode.file: 'Path of file containing RocksDB inode store configuration. A template configuration cab be found at ${alluxio.conf.dir}/rocks-inode.ini.template. See https://github.com/facebook/rocksdb/blob/main/examples/rocksdb_option_file_example.ini for more information on RocksDB configuration files. If unset then a default configuration will be used.' alluxio.standalone.fuse.jvm.monitor.enabled: 'Whether to enable start JVM monitor thread on the standalone fuse process. This will start a thread to detect JVM-wide pauses induced by GC or other reasons.' -alluxio.standby.master.grpc.enabled: - 'Whether a standby master runs a grpc server' alluxio.standby.master.metrics.sink.enabled: 'Whether a standby master runs the metric sink' alluxio.standby.master.web.enabled: diff --git a/docs/_data/table/en/master-configuration.yml b/docs/_data/table/en/master-configuration.yml index 0a70499defc9..ea9fffca137f 100644 --- a/docs/_data/table/en/master-configuration.yml +++ b/docs/_data/table/en/master-configuration.yml @@ -72,6 +72,10 @@ alluxio.master.embedded.journal.retry.cache.expiry.time: 'The time for embedded journal server retry cache to expire. Setting a bigger value allows embedded journal server to cache the responses for a longer time in case of journal writer retries, but will take up more memory in master.' alluxio.master.embedded.journal.snapshot.replication.chunk.size: 'The stream chunk size used by masters to replicate snapshots.' +alluxio.master.embedded.journal.snapshot.replication.compression.level: + 'The zip compression level of sending a snapshot from one master to another. Only applicable when alluxio.master.embedded.journal.snapshot.replication.compression.type is not NO_COMPRESSION. The zip format defines ten levels of compression, ranging from 0 (no compression, but very fast) to 9 (best compression, but slow). Or -1 for the system default compression level.' +alluxio.master.embedded.journal.snapshot.replication.compression.type: + 'The type of compression to use when transferring a snapshot from one master to another. Options are NO_COMPRESSION, GZIP, TAR_GZIP' alluxio.master.embedded.journal.transport.max.inbound.message.size: 'The maximum size of a message that can be sent to the embedded journal server node.' alluxio.master.embedded.journal.transport.request.timeout.ms: @@ -126,8 +130,6 @@ alluxio.master.journal.init.from.backup: 'A uri for a backup to initialize the journal from. When the master becomes primary, if it sees that its journal is freshly formatted, it will restore its state from the backup. When running multiple masters, this property must be configured on all masters since it isn''t known during startup which master will become the first primary.' alluxio.master.journal.local.log.compaction: 'Whether to employ a quorum level log compaction policy or a local (individual) log compaction policy.' -alluxio.master.journal.log.concurrency.max: - 'Max concurrency for notifyTermIndexUpdated method, be sure it''s enough' alluxio.master.journal.log.size.bytes.max: 'If a log file is bigger than this value, it will rotate to next file.' alluxio.master.journal.request.data.timeout: @@ -244,8 +246,8 @@ alluxio.master.metastore.rocks.block.meta.cache.size: 'The capacity in bytes of the RocksDB block metadata table LRU cache. If unset, the RocksDB default will be used. See https://github.com/facebook/rocksdb/wiki/Block-Cache' alluxio.master.metastore.rocks.block.meta.index: 'The index type to be used in the RocksDB block metadata table. If unset, the RocksDB default will be used. See https://github.com/facebook/rocksdb/wiki/Index-Block-Format' -alluxio.master.metastore.rocks.checkpoint.compression.level: - 'The zip compression level of checkpointing rocksdb, the zip format defines ten levels of compression, ranging from 0 (no compression, but very fast) to 9 (best compression, but slow). Or -1 for the system default compression level.' +alluxio.master.metastore.rocks.checkpoint.compression.type: + 'The compression algorithm that RocksDB uses internally. One of {NO_COMPRESSION SNAPPY_COMPRESSION ZLIB_COMPRESSION BZLIB2_COMPRESSION LZ4_COMPRESSION LZ4HC_COMPRESSION XPRESS_COMPRESSION ZSTD_COMPRESSION DISABLE_COMPRESSION_OPTION}' alluxio.master.metastore.rocks.edge.block.index: 'The block index type to be used in the RocksDB inode edge table. If unset, the RocksDB default will be used. See https://rocksdb.org/blog/2018/08/23/data-block-hash-index.html' alluxio.master.metastore.rocks.edge.bloom.filter: diff --git a/docs/_data/table/en/master-metrics.yml b/docs/_data/table/en/master-metrics.yml index fed9d6810da1..f468d9db954c 100644 --- a/docs/_data/table/en/master-metrics.yml +++ b/docs/_data/table/en/master-metrics.yml @@ -48,8 +48,32 @@ Master.EdgeCacheSize: 'Total number of edges (inode metadata) cached. The edge cache is responsible for managing the mapping from (parentId, childName) to childId.' Master.EdgeLockPoolSize: 'The size of master edge lock pool' +Master.EmbeddedJournalLastSnapshotDownloadDiskSize: + 'Describes the size on disk of the snapshot downloaded from other masters in the cluster the previous time the download occurred. Only valid when using the embedded journal.' +Master.EmbeddedJournalLastSnapshotDownloadDurationMs: + 'Describes the amount of time taken to download journal snapshots from other masters in the cluster the previous time the download occurred. Only valid when using the embedded journal.' +Master.EmbeddedJournalLastSnapshotDownloadSize: + 'Describes the size of the snapshot downloaded from other masters in the cluster the previous time the download occurred. Only valid when using the embedded journal.' +Master.EmbeddedJournalLastSnapshotDurationMs: + 'Describes the amount of time taken to generate the last local journal snapshots on this master. Only valid when using the embedded journal.' +Master.EmbeddedJournalLastSnapshotEntriesCount: + 'Describes the number of entries in the last local journal snapshots on this master. Only valid when using the embedded journal.' +Master.EmbeddedJournalLastSnapshotReplayDurationMs: + 'Represents the time the last restore from checkpoint operation took in milliseconds.' +Master.EmbeddedJournalLastSnapshotReplayEntriesCount: + 'Represents the time the last restore from checkpoint operation took in milliseconds.' +Master.EmbeddedJournalLastSnapshotUploadDiskSize: + 'Describes the size on disk of the snapshot uploaded to other masters in the cluster the previous time the download occurred. Only valid when using the embedded journal.' +Master.EmbeddedJournalLastSnapshotUploadDurationMs: + 'Describes the amount of time taken to upload journal snapshots to another master in the cluster the previous time the upload occurred. Only valid when using the embedded journal.' +Master.EmbeddedJournalLastSnapshotUploadSize: + 'Describes the size of the snapshot uploaded to other masters in the cluster the previous time the download occurred. Only valid when using the embedded journal.' +Master.EmbeddedJournalSnapshotDownloadDiskHistogram: + 'Describes the size on disk of the snapshot downloaded from another master in the cluster. Only valid when using the embedded journal. Long running average.' Master.EmbeddedJournalSnapshotDownloadGenerate: - 'Describes the amount of time taken to download journal snapshots from other masters in the cluster. Only valid when using the embedded journal. Use this metric to determine if there are potential communication bottlenecks between Alluxio masters.' + 'Describes the amount of time taken to download journal snapshots from other masters in the cluster. Only valid when using the embedded journal. Long running average.' +Master.EmbeddedJournalSnapshotDownloadHistogram: + 'Describes the size of the snapshot downloaded from another master in the cluster. Only valid when using the embedded journal. Long running average.' Master.EmbeddedJournalSnapshotGenerateTimer: 'Describes the amount of time taken to generate local journal snapshots on this master. Only valid when using the embedded journal. Use this metric to measure the performance of Alluxio''s snapshot generation.' Master.EmbeddedJournalSnapshotInstallTimer: @@ -58,6 +82,12 @@ Master.EmbeddedJournalSnapshotLastIndex: 'Represents the latest journal index that was recorded by this master in the most recent local snapshot or from a snapshot downloaded from another master in the cluster. Only valid when using the embedded journal.' Master.EmbeddedJournalSnapshotReplayTimer: 'Describes the amount of time taken to replay a journal snapshot onto the master''s state machine. Only valid only when using the embedded journal. Use this metric to determine the performance of Alluxio when replaying journal snapshot file. Higher numbers may indicate a slow disk or CPU contention' +Master.EmbeddedJournalSnapshotUploadDiskHistogram: + 'Describes the size on disk of the snapshot uploaded to another master in the cluster. Only valid when using the embedded journal. Long running average.' +Master.EmbeddedJournalSnapshotUploadHistogram: + 'Describes the size of the snapshot uploaded to another master in the cluster. Only valid when using the embedded journal. Long running average.' +Master.EmbeddedJournalSnapshotUploadTimer: + 'Describes the amount of time taken to upload journal snapshots to another master in the cluster. Only valid when using the embedded journal. long running average' Master.FileBlockInfosGot: 'Total number of succeed GetFileBlockInfo operations' Master.FileInfosGot: diff --git a/docs/_data/table/master-configuration.csv b/docs/_data/table/master-configuration.csv index c0f0c81f8648..68bfd7c6697e 100644 --- a/docs/_data/table/master-configuration.csv +++ b/docs/_data/table/master-configuration.csv @@ -36,6 +36,8 @@ alluxio.master.embedded.journal.raft.client.request.timeout,"60sec" alluxio.master.embedded.journal.ratis.config,"" alluxio.master.embedded.journal.retry.cache.expiry.time,"60s" alluxio.master.embedded.journal.snapshot.replication.chunk.size,"4MB" +alluxio.master.embedded.journal.snapshot.replication.compression.level,"1" +alluxio.master.embedded.journal.snapshot.replication.compression.type,"NO_COMPRESSION" alluxio.master.embedded.journal.transport.max.inbound.message.size,"100MB" alluxio.master.embedded.journal.transport.request.timeout.ms,"5sec" alluxio.master.embedded.journal.unsafe.flush.enabled,"false" @@ -63,10 +65,9 @@ alluxio.master.journal.gc.period,"2min" alluxio.master.journal.gc.threshold,"5min" alluxio.master.journal.init.from.backup,"" alluxio.master.journal.local.log.compaction,"true" -alluxio.master.journal.log.concurrency.max,"256" alluxio.master.journal.log.size.bytes.max,"10MB" alluxio.master.journal.request.data.timeout,"20000" -alluxio.master.journal.request.info.timeout,"20000" +alluxio.master.journal.request.info.timeout,"10000" alluxio.master.journal.retry.interval,"1sec" alluxio.master.journal.space.monitor.interval,"10min" alluxio.master.journal.space.monitor.percent.free.threshold,"10" @@ -122,7 +123,7 @@ alluxio.master.metastore.rocks.block.meta.block.index,"" alluxio.master.metastore.rocks.block.meta.bloom.filter,"false" alluxio.master.metastore.rocks.block.meta.cache.size,"" alluxio.master.metastore.rocks.block.meta.index,"" -alluxio.master.metastore.rocks.checkpoint.compression.level,"1" +alluxio.master.metastore.rocks.checkpoint.compression.type,"LZ4_COMPRESSION" alluxio.master.metastore.rocks.edge.block.index,"" alluxio.master.metastore.rocks.edge.bloom.filter,"false" alluxio.master.metastore.rocks.edge.cache.size,"" diff --git a/docs/_data/table/master-metrics.csv b/docs/_data/table/master-metrics.csv index 4374cce3f9ca..593fc709f3ff 100644 --- a/docs/_data/table/master-metrics.csv +++ b/docs/_data/table/master-metrics.csv @@ -24,11 +24,26 @@ Master.EdgeCacheLoadTimes,GAUGE Master.EdgeCacheMisses,GAUGE Master.EdgeCacheSize,GAUGE Master.EdgeLockPoolSize,GAUGE +Master.EmbeddedJournalLastSnapshotDownloadDiskSize,GAUGE +Master.EmbeddedJournalLastSnapshotDownloadDurationMs,GAUGE +Master.EmbeddedJournalLastSnapshotDownloadSize,GAUGE +Master.EmbeddedJournalLastSnapshotDurationMs,GAUGE +Master.EmbeddedJournalLastSnapshotEntriesCount,GAUGE +Master.EmbeddedJournalLastSnapshotReplayDurationMs,GAUGE +Master.EmbeddedJournalLastSnapshotReplayEntriesCount,GAUGE +Master.EmbeddedJournalLastSnapshotUploadDiskSize,GAUGE +Master.EmbeddedJournalLastSnapshotUploadDurationMs,GAUGE +Master.EmbeddedJournalLastSnapshotUploadSize,GAUGE +Master.EmbeddedJournalSnapshotDownloadDiskHistogram,HISTOGRAM Master.EmbeddedJournalSnapshotDownloadGenerate,TIMER +Master.EmbeddedJournalSnapshotDownloadHistogram,HISTOGRAM Master.EmbeddedJournalSnapshotGenerateTimer,TIMER Master.EmbeddedJournalSnapshotInstallTimer,TIMER Master.EmbeddedJournalSnapshotLastIndex,GAUGE Master.EmbeddedJournalSnapshotReplayTimer,TIMER +Master.EmbeddedJournalSnapshotUploadDiskHistogram,HISTOGRAM +Master.EmbeddedJournalSnapshotUploadHistogram,HISTOGRAM +Master.EmbeddedJournalSnapshotUploadTimer,TIMER Master.FileBlockInfosGot,COUNTER Master.FileInfosGot,COUNTER Master.FileSize,GAUGE From 208cb967b9b611d537a76c085197bd0f84a9e67f Mon Sep 17 00:00:00 2001 From: bingzheng Date: Wed, 19 Apr 2023 14:26:30 +0800 Subject: [PATCH 237/334] Fix the aggregate result problem in UfsIOBench ### What changes are proposed in this pull request? Fix the aggregate result problem in UfsIOBench. ### Why are the changes needed? The UfsIOBench result summary calculates the total duration by summing each points' result, and compute the average speed based on total duration, actually the average can't represent the system performance. For examples: An alluxio cluster with 1 job master and 3 job worker, Run the UfsIOBench with 16 threads: ``` ./bin/alluxio runUfsIOTest --path hdfs://hdfsCluster/tmp/ufsIoBanch --io-size 2G--threads 16 --cluster ``` WeChatWorkScreenshot_14f71ae8-abe3-410f-bc45-5d31e45e1691 the mTotalDurationSeconds is 16 * 3 *(points duration), but actually the 16 points read and write concurrently and finished almostly at same time. ### Does this PR introduce any user facing changes? Please list the user-facing changes introduced by your change, including No pr-link: Alluxio/alluxio#17063 change-id: cid-ff987441a28e7171eef9fdc765ca93aba3096887 --- .../alluxio/stress/worker/IOTaskSummary.java | 26 +++++++++++++------ .../stress/worker/IOTaskSummaryTest.java | 9 +++++-- 2 files changed, 25 insertions(+), 10 deletions(-) diff --git a/stress/common/src/main/java/alluxio/stress/worker/IOTaskSummary.java b/stress/common/src/main/java/alluxio/stress/worker/IOTaskSummary.java index 1d02f7d9a4d2..27e6cd22a676 100644 --- a/stress/common/src/main/java/alluxio/stress/worker/IOTaskSummary.java +++ b/stress/common/src/main/java/alluxio/stress/worker/IOTaskSummary.java @@ -26,6 +26,7 @@ import org.slf4j.LoggerFactory; import java.util.ArrayList; +import java.util.Arrays; import java.util.List; import java.util.stream.Collectors; @@ -149,9 +150,15 @@ public void setWriteSpeedStat(SpeedStat stat) { public static class SpeedStat implements JsonSerializable { public double mTotalDurationSeconds; public long mTotalSizeBytes; + // Max speed among all nodes public double mMaxSpeedMbps; + // Min speed among all nodes public double mMinSpeedMbps; + // Average speed of all nodes public double mAvgSpeedMbps; + // Cluster-wide throughput + public double mClusterAvgSpeedMbps; + // Standard deviation of speed reported by each node public double mStdDev; /** @@ -162,9 +169,10 @@ public SpeedStat() {} @Override public String toString() { return String.format("{totalDuration=%ss, totalSize=%s, maxSpeed=%sMB/s, " - + "minSpeed=%sMB/s, " + "avgSpeed=%sMB/s, stdDev=%s}", + + "minSpeed=%sMB/s, " + "avgSpeed=%sMB/s, clusterAvgSpeed=%sMB/s, " + + "stdDev=%s}", mTotalDurationSeconds, FormatUtils.getSizeFromBytes(mTotalSizeBytes), - mMaxSpeedMbps, mMinSpeedMbps, mAvgSpeedMbps, mStdDev); + mMaxSpeedMbps, mMinSpeedMbps, mAvgSpeedMbps, mClusterAvgSpeedMbps, mStdDev); } } @@ -177,31 +185,33 @@ private static SpeedStat calculateStat(List points) { return result; } - double totalDuration = 0.0; long totalSize = 0L; double[] speeds = new double[points.size()]; double maxSpeed = 0.0; double minSpeed = Double.MAX_VALUE; int i = 0; for (IOTaskResult.Point p : points) { - totalDuration += p.mDurationSeconds; + result.mTotalDurationSeconds = Math.max(p.mDurationSeconds, result.mTotalDurationSeconds); totalSize += p.mDataSizeBytes; double speed = p.mDataSizeBytes / (p.mDurationSeconds * 1024 * 1024); // convert B/s to MB/s maxSpeed = Math.max(maxSpeed, speed); minSpeed = Math.min(minSpeed, speed); speeds[i++] = speed; } - double avgSpeed = totalSize / (totalDuration * 1024 * 1024); // convert B/s to MB/s + // calculate the average speed for each point + double avgPointSpeed = Arrays.stream(speeds).sum() / points.size(); + double avgClusterSpeed = totalSize + / (result.mTotalDurationSeconds * 1024 * 1024); // convert B/s to MB/s double var = 0; for (double s : speeds) { - var += (s - avgSpeed) * (s - avgSpeed); + var += (s - avgPointSpeed) * (s - avgPointSpeed); } - result.mTotalDurationSeconds = totalDuration; result.mTotalSizeBytes = totalSize; result.mMaxSpeedMbps = maxSpeed; result.mMinSpeedMbps = Double.compare(minSpeed, Double.MAX_VALUE) == 0 ? 0.0 : minSpeed; - result.mAvgSpeedMbps = avgSpeed; + result.mAvgSpeedMbps = avgPointSpeed; + result.mClusterAvgSpeedMbps = avgClusterSpeed; result.mStdDev = Math.sqrt(var); return result; diff --git a/stress/common/src/test/java/alluxio/stress/worker/IOTaskSummaryTest.java b/stress/common/src/test/java/alluxio/stress/worker/IOTaskSummaryTest.java index e8cd17549ada..dc936bd63430 100644 --- a/stress/common/src/test/java/alluxio/stress/worker/IOTaskSummaryTest.java +++ b/stress/common/src/test/java/alluxio/stress/worker/IOTaskSummaryTest.java @@ -73,16 +73,19 @@ public void statCalculation() { IOTaskResult result = new IOTaskResult(); double[] durations = new double[]{1.0, 1.5, 2.0, 1.11}; long[] sizes = new long[]{200_000_000, 300_000_000, 500_000_000, 800_000_000}; + double[] speeds = new double[sizes.length]; for (int i = 0; i < sizes.length; i++) { result.addPoint(new IOTaskResult.Point(IOTaskResult.IOMode.READ, durations[i], sizes[i])); result.addPoint(new IOTaskResult.Point(IOTaskResult.IOMode.WRITE, durations[i], sizes[i])); + speeds[i] = sizes[i] / (durations[i] * 1024 * 1024); } IOTaskSummary summary = new IOTaskSummary(result); IOTaskSummary.SpeedStat readStat = summary.getReadSpeedStat(); - double totalDuration = Arrays.stream(durations).sum(); + double totalDuration = Arrays.stream(durations).max().orElse(0L); long totalSize = Arrays.stream(sizes).sum(); - double avgSpeed = totalSize / (totalDuration * 1024 * 1024); + double avgSpeed = Arrays.stream(speeds).sum() / speeds.length; + double clusterAvgSpeed = totalSize / (2.0 * 1024 * 1024); double maxSpeed = 800_000_000 / (1.11 * 1024 * 1024); double minSpeed = 200_000_000 / (1.0 * 1024 * 1024); assertEquals(totalDuration, readStat.mTotalDurationSeconds, 1e-5); @@ -90,6 +93,7 @@ public void statCalculation() { assertEquals(avgSpeed, readStat.mAvgSpeedMbps, 1e-5); assertEquals(maxSpeed, readStat.mMaxSpeedMbps, 1e-5); assertEquals(minSpeed, readStat.mMinSpeedMbps, 1e-5); + assertEquals(clusterAvgSpeed, readStat.mClusterAvgSpeedMbps, 1e-5); IOTaskSummary.SpeedStat writeStat = summary.getWriteSpeedStat(); assertEquals(totalDuration, writeStat.mTotalDurationSeconds, 1e-5); @@ -97,6 +101,7 @@ public void statCalculation() { assertEquals(avgSpeed, writeStat.mAvgSpeedMbps, 1e-5); assertEquals(maxSpeed, writeStat.mMaxSpeedMbps, 1e-5); assertEquals(minSpeed, writeStat.mMinSpeedMbps, 1e-5); + assertEquals(clusterAvgSpeed, writeStat.mClusterAvgSpeedMbps, 1e-5); } private void checkEquality(IOTaskSummary.SpeedStat a, IOTaskSummary.SpeedStat b) { From bf60aefc247f57708fd39dce1c440515712a54c1 Mon Sep 17 00:00:00 2001 From: Jiacheng Liu Date: Wed, 19 Apr 2023 17:32:31 +0800 Subject: [PATCH 238/334] [SMALLFIX] Remove unintended trace ### What changes are proposed in this pull request? Remove one log entry which was accidentally brought in from testing pr-link: Alluxio/alluxio#17285 change-id: cid-2ede46a3fe5b387e75c2273ef7f727106124b734 --- .../master/metastore/rocks/RocksStore.java | 2 - .../WorkerAllMasterRegistrationTest.java | 64 ++++++++++++++----- 2 files changed, 47 insertions(+), 19 deletions(-) diff --git a/core/server/master/src/main/java/alluxio/master/metastore/rocks/RocksStore.java b/core/server/master/src/main/java/alluxio/master/metastore/rocks/RocksStore.java index 766e9e7516fc..b7778af0dd40 100644 --- a/core/server/master/src/main/java/alluxio/master/metastore/rocks/RocksStore.java +++ b/core/server/master/src/main/java/alluxio/master/metastore/rocks/RocksStore.java @@ -663,8 +663,6 @@ private void resetRefCounter() { * @return the exclusive lock handle used to manage and close the lock */ public RocksExclusiveLockHandle lockForClosing() { - Exception e = new RuntimeException("Log trace here"); - LOG.error("Just recording the trace here", e); // Grab the lock with no respect to concurrent operations // Just grab the lock and close setFlagAndBlockingWait(false); diff --git a/tests/src/test/java/alluxio/server/worker/WorkerAllMasterRegistrationTest.java b/tests/src/test/java/alluxio/server/worker/WorkerAllMasterRegistrationTest.java index 81b57ad31314..1726b43bb105 100644 --- a/tests/src/test/java/alluxio/server/worker/WorkerAllMasterRegistrationTest.java +++ b/tests/src/test/java/alluxio/server/worker/WorkerAllMasterRegistrationTest.java @@ -22,6 +22,7 @@ import alluxio.client.file.FileOutStream; import alluxio.conf.Configuration; import alluxio.conf.PropertyKey; +import alluxio.exception.runtime.UnavailableRuntimeException; import alluxio.exception.status.UnavailableException; import alluxio.master.MultiMasterEmbeddedJournalLocalAlluxioCluster; import alluxio.master.block.BlockMaster; @@ -70,7 +71,6 @@ public void before() throws Exception { mNumMasters, mNumWorkers, PortCoordination.WORKER_ALL_MASTER_REGISTRATION); mCluster.initConfiguration( IntegrationTestUtils.getTestName(getClass().getSimpleName(), mTestName.getMethodName())); - Configuration.set(PropertyKey.MASTER_JOURNAL_CHECKPOINT_PERIOD_ENTRIES, 5); Configuration.set(PropertyKey.MASTER_JOURNAL_LOG_SIZE_BYTES_MAX, 100); Configuration.set(PropertyKey.WORKER_REGISTER_TO_ALL_MASTERS, true); Configuration.set(PropertyKey.STANDBY_MASTER_GRPC_ENABLED, true); @@ -80,7 +80,6 @@ public void before() throws Exception { Configuration.set(PropertyKey.MASTER_EMBEDDED_JOURNAL_WRITE_TIMEOUT, "10sec"); Configuration.set(PropertyKey.MASTER_EMBEDDED_JOURNAL_MIN_ELECTION_TIMEOUT, "3s"); Configuration.set(PropertyKey.MASTER_EMBEDDED_JOURNAL_MAX_ELECTION_TIMEOUT, "6s"); - Configuration.set(PropertyKey.WORKER_REGISTER_TO_ALL_MASTERS, true); mCluster.start(); @@ -126,16 +125,28 @@ public void happyPath() throws Exception { // New blocks are added by committing journals CommonUtils.waitFor("wait for blocks being committed to all masters", () -> - mBlockMasters.stream().allMatch( - it -> it.getBlockMetaStore().getLocations(blockId).size() == 1), - mDefaultWaitForOptions); + mBlockMasters.stream().allMatch(it -> { + try { + return it.getBlockMetaStore().getLocations(blockId).size() == 1; + } catch (UnavailableRuntimeException e) { + // The RocksDB is unavailable due to events like checkpoint + // Just retry + } + return false; + }), mDefaultWaitForOptions); // Removed blocks are reported by worker-master heartbeats mWorker.removeBlock(new Random().nextLong(), blockId); CommonUtils.waitFor("wait for blocks being removed to all masters", () -> - mBlockMasters.stream().allMatch( - it -> it.getBlockMetaStore().getLocations(blockId).size() == 0), - mDefaultWaitForOptions); + mBlockMasters.stream().allMatch(it -> { + try { + return it.getBlockMetaStore().getLocations(blockId).size() == 0; + } catch (UnavailableRuntimeException e) { + // The RocksDB is unavailable due to events like checkpoint + // Just retry + } + return false; + }), mDefaultWaitForOptions); assertTrue(mWorker.getBlockSyncMasterGroup().isRegisteredToAllMasters()); @@ -180,9 +191,15 @@ public void workerHeartbeatFail() throws Exception { // so even if the heartbeat fails, standby are still in sync with primary. CommonUtils.waitFor("wait for blocks being committed to all masters by heartbeats", () -> - mBlockMasters.stream().allMatch( - it -> it.getBlockMetaStore().getLocations(blockId).size() == 1), - mDefaultWaitForOptions); + mBlockMasters.stream().allMatch(it -> { + try { + return it.getBlockMetaStore().getLocations(blockId).size() == 1; + } catch (UnavailableRuntimeException e) { + // The RocksDB is unavailable due to events like checkpoint + // Just retry + } + return false; + }), mDefaultWaitForOptions); // Remove a block mWorker.removeBlock(new Random().nextLong(), blockId); @@ -192,9 +209,15 @@ public void workerHeartbeatFail() throws Exception { getBlockSyncOperators().values().forEach(TestSpecificMasterBlockSync::restoreHeartbeat); CommonUtils.waitFor("wait for blocks being removed on all masters by heartbeats", () -> - mBlockMasters.stream().allMatch( - it -> it.getBlockMetaStore().getLocations(blockId).size() == 0), - mDefaultWaitForOptions); + mBlockMasters.stream().allMatch(it -> { + try { + return it.getBlockMetaStore().getLocations(blockId).size() == 0; + } catch (UnavailableRuntimeException e) { + // The RocksDB is unavailable due to events like checkpoint + // Just retry + } + return false; + }), mDefaultWaitForOptions); // Make sure registration only happen once to each master assertTrue(getBlockSyncOperators().values().stream() @@ -347,10 +370,17 @@ public void heartbeatFallsBackToRegister() throws Exception { // so the block meta store should not contain any block location blockIdsToRemove.add(testFileBlockId); CommonUtils.waitFor("wait for blocks propagated to masters by heartbeats", - () -> mBlockMasters.stream() - .allMatch(it -> + () -> mBlockMasters.stream().allMatch(it -> blockIdsToRemove.stream() - .allMatch(blockId -> it.getBlockMetaStore().getLocations(blockId).size() == 0) + .allMatch(blockId -> { + try { + return it.getBlockMetaStore().getLocations(blockId).size() == 0; + } catch (UnavailableRuntimeException e) { + // The RocksDB is unavailable due to events like checkpoint + // Just retry + } + return false; + }) ), mDefaultWaitForOptions); } From e0a33e5e6defa374437f8f3a5f5d5bc60ef47687 Mon Sep 17 00:00:00 2001 From: David Zhu Date: Wed, 19 Apr 2023 10:29:14 -0700 Subject: [PATCH 239/334] Use SlidingTimeWindow Moving Average for meter computation ### What changes are proposed in this pull request? The exponential moving average used for meter can produce inaccurate rate when computing transfer rate. The reason is that we call mark(number of bytes) when a transfer is finished to indicate how many bytes have been transfered, but that is inaccurate in calculating an average rate which is really what we want in a Bytes/Sec calculation. ### Why are the changes needed? SlidingTimewindow produces a moving average that is more correct especially when a short window is used https://www.javadoc.io/static/io.dropwizard.metrics/metrics-core/4.1.1/com/codahale/metrics/SlidingTimeWindowMovingAverages.html ### Does this PR introduce any user facing changes? no pr-link: Alluxio/alluxio#17218 change-id: cid-d21fdf406c78f7ddcad011c117b77ef6b1bf46c7 --- .../src/main/java/alluxio/metrics/MetricsSystem.java | 11 +++++++---- .../main/java/alluxio/worker/block/UfsIOManager.java | 2 +- 2 files changed, 8 insertions(+), 5 deletions(-) diff --git a/core/common/src/main/java/alluxio/metrics/MetricsSystem.java b/core/common/src/main/java/alluxio/metrics/MetricsSystem.java index c43099df7a74..7d141c82bb93 100644 --- a/core/common/src/main/java/alluxio/metrics/MetricsSystem.java +++ b/core/common/src/main/java/alluxio/metrics/MetricsSystem.java @@ -27,6 +27,7 @@ import com.codahale.metrics.Histogram; import com.codahale.metrics.Meter; import com.codahale.metrics.MetricRegistry; +import com.codahale.metrics.SlidingTimeWindowMovingAverages; import com.codahale.metrics.Timer; import com.codahale.metrics.UniformReservoir; import com.codahale.metrics.jvm.CachedThreadStatesGaugeSet; @@ -93,6 +94,7 @@ public final class MetricsSystem { CommonUtils.memoize(() -> constructSourceName()); private static final Map EXECUTOR_SERVICES = new ConcurrentHashMap<>(); + private static final int SECONDS_IN_A_MINUTE = 60; /** * An enum of supported instance type. @@ -593,7 +595,8 @@ public static Counter counterWithTags(String name, boolean shouldReport, String. * @return a meter object with the qualified metric name */ public static Meter meter(String name) { - return METRIC_REGISTRY.meter(getMetricName(name)); + return METRIC_REGISTRY.meter(getMetricName(name), + () -> new Meter(new SlidingTimeWindowMovingAverages())); } /** @@ -798,7 +801,7 @@ private static synchronized List reportMetrics(InstanceType // that a value marked. For clients, especially short-life clients, // the minute rates will be zero for their whole life. // That's why all throughput meters are not aggregated at cluster level. - rpcMetrics.add(Metric.from(entry.getKey(), meter.getOneMinuteRate(), + rpcMetrics.add(Metric.from(entry.getKey(), meter.getOneMinuteRate() / SECONDS_IN_A_MINUTE, MetricType.METER).toProto()); } else if (metric instanceof Timer) { Timer timer = (Timer) metric; @@ -883,7 +886,7 @@ private static Metric getAlluxioMetricFromCodahaleMetric(String name, return Metric.from(name, counter.getCount(), MetricType.COUNTER); } else if (metric instanceof Meter) { Meter meter = (Meter) metric; - return Metric.from(name, meter.getOneMinuteRate(), MetricType.METER); + return Metric.from(name, meter.getOneMinuteRate() / SECONDS_IN_A_MINUTE, MetricType.METER); } else if (metric instanceof Timer) { Timer timer = (Timer) metric; return Metric.from(name, timer.getCount(), MetricType.TIMER); @@ -915,7 +918,7 @@ public static Map allMetrics() { .setDoubleValue(((Counter) metric).getCount()); } else if (metric instanceof Meter) { valueBuilder.setMetricType(MetricType.METER) - .setDoubleValue(((Meter) metric).getOneMinuteRate()); + .setDoubleValue(((Meter) metric).getOneMinuteRate() / SECONDS_IN_A_MINUTE); } else if (metric instanceof Timer) { valueBuilder.setMetricType(MetricType.TIMER) .setDoubleValue(((Timer) metric).getCount()); diff --git a/core/server/worker/src/main/java/alluxio/worker/block/UfsIOManager.java b/core/server/worker/src/main/java/alluxio/worker/block/UfsIOManager.java index 48f7773d127d..9fac931c5cae 100644 --- a/core/server/worker/src/main/java/alluxio/worker/block/UfsIOManager.java +++ b/core/server/worker/src/main/java/alluxio/worker/block/UfsIOManager.java @@ -125,7 +125,7 @@ private void schedule() { */ @VisibleForTesting public double getUsedThroughput(Meter meter) { - return meter.getOneMinuteRate(); + return meter.getOneMinuteRate() / 60; } /** From 94429edc786d997ad7feefe0365bca7d74b03e3e Mon Sep 17 00:00:00 2001 From: Rico Chiu Date: Wed, 19 Apr 2023 11:42:51 -0700 Subject: [PATCH 240/334] Skip unrelated property keys from generating in docGen pt2 pr-link: Alluxio/alluxio#17284 change-id: cid-b6800a0be10f9ef7b24b28c099a7431e52b98c6d --- .../java/alluxio/cli/docgen/ConfigurationDocGenerator.java | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/shell/src/main/java/alluxio/cli/docgen/ConfigurationDocGenerator.java b/shell/src/main/java/alluxio/cli/docgen/ConfigurationDocGenerator.java index 0413657134f6..931c807b28d1 100644 --- a/shell/src/main/java/alluxio/cli/docgen/ConfigurationDocGenerator.java +++ b/shell/src/main/java/alluxio/cli/docgen/ConfigurationDocGenerator.java @@ -187,8 +187,12 @@ public static void writeYMLFile(Collection defaultKeys, S fileWriter = fileWriterMap.get("security"); } else if (pKey.startsWith("alluxio.integration.")) { fileWriter = fileWriterMap.get("cluster-management"); - } else { + } else if (pKey.startsWith("alluxio.") || pKey.startsWith("fs.") + || pKey.startsWith("s3a.")) { fileWriter = fileWriterMap.get("common"); + } else { + // skip configuration properties unrelated to Alluxio + continue; } fileWriter.append(StringEscapeUtils.escapeHtml4(keyValueStr)); } From 589251236b1b4888182a9191044805aaee5401c0 Mon Sep 17 00:00:00 2001 From: Yaolong Liu Date: Thu, 20 Apr 2023 03:16:10 +0800 Subject: [PATCH 241/334] Integrate ratis-shell to the alluxio tarball Fix https://github.com/Alluxio/alluxio/issues/16943 pr-link: Alluxio/alluxio#16980 change-id: cid-ee63db7b1a00a46034bd9e44a35241a45ed98689 --- .../cmd/generate-tarball.go | 2 ++ integration/tools/ratis-shell/README.md | 5 ++++ .../tools/ratis-shell/install-ratis-shell.sh | 29 +++++++++++++++++++ 3 files changed, 36 insertions(+) create mode 100644 integration/tools/ratis-shell/README.md create mode 100755 integration/tools/ratis-shell/install-ratis-shell.sh diff --git a/dev/scripts/src/alluxio.org/build-distribution/cmd/generate-tarball.go b/dev/scripts/src/alluxio.org/build-distribution/cmd/generate-tarball.go index 18785199b855..52a82e93088f 100644 --- a/dev/scripts/src/alluxio.org/build-distribution/cmd/generate-tarball.go +++ b/dev/scripts/src/alluxio.org/build-distribution/cmd/generate-tarball.go @@ -228,6 +228,8 @@ func addAdditionalFiles(srcPath, dstPath string, hadoopVersion version, version "integration/metrics/otel-agent-config-worker.yaml", "integration/metrics/otel-collector-config.yaml", "integration/metrics/prometheus.yaml", + "integration/tools/ratis-shell/install-ratis-shell.sh", + "integration/tools/ratis-shell/README.md", ) } diff --git a/integration/tools/ratis-shell/README.md b/integration/tools/ratis-shell/README.md new file mode 100644 index 000000000000..47df3ac7e96d --- /dev/null +++ b/integration/tools/ratis-shell/README.md @@ -0,0 +1,5 @@ +### Apache Ratis Shell +`ratis-shell` can manage the ha of alluxio master, you can quickly install it by +running `install-ratis-shell.sh`. + +For more doc, please refer to [RATIS SHELL DOC](https://github.com/apache/ratis/blob/master/ratis-docs/src/site/markdown/cli.md) \ No newline at end of file diff --git a/integration/tools/ratis-shell/install-ratis-shell.sh b/integration/tools/ratis-shell/install-ratis-shell.sh new file mode 100755 index 000000000000..0eb0470b1047 --- /dev/null +++ b/integration/tools/ratis-shell/install-ratis-shell.sh @@ -0,0 +1,29 @@ +#!/usr/bin/env bash +# +# The Alluxio Open Foundation licenses this work under the Apache License, version 2.0 +# (the "License"). You may not use this work except in compliance with the License, which is +# available at www.apache.org/licenses/LICENSE-2.0 +# +# This software is distributed on an "AS IS" basis, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, +# either express or implied, as more fully set forth in the License. +# +# See the NOTICE file distributed with this work for information regarding copyright ownership. +# + +# +# This script is run from inside the Docker container +# +set -ex + +RATIS_SHELL_DIR=$(cd "$( dirname "$( readlink "$0" || echo "$0" )" )"; pwd) +# ratis-shell version +VERSION=$1 +if [ -z "$VERSION" ]; then + VERSION=2.4.1 +fi + +wget -P "$RATIS_SHELL_DIR" "https://dlcdn.apache.org/ratis/$VERSION/apache-ratis-$VERSION-bin.tar.gz" +mkdir ratis-cli +tar -zxvf apache-ratis-$VERSION-bin.tar.gz -C $RATIS_SHELL_DIR/ratis-cli --strip-component 1 +chmod 755 ratis-cli/bin/ratis +rm apache-ratis-$VERSION-bin.tar.gz From a578ca29a32eb6898973c97f415936aeb47ae293 Mon Sep 17 00:00:00 2001 From: Shawn Sun <32376495+ssz1997@users.noreply.github.com> Date: Wed, 19 Apr 2023 13:39:21 -0700 Subject: [PATCH 242/334] [DOCFIX] Add newline at EOF One file is missing the newline at end of file. This PR fixes it. pr-link: Alluxio/alluxio#17294 change-id: cid-db2278515ec362341914ad7e39ec30c3d4f72ef2 --- integration/tools/ratis-shell/README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/integration/tools/ratis-shell/README.md b/integration/tools/ratis-shell/README.md index 47df3ac7e96d..4e4111181208 100644 --- a/integration/tools/ratis-shell/README.md +++ b/integration/tools/ratis-shell/README.md @@ -2,4 +2,4 @@ `ratis-shell` can manage the ha of alluxio master, you can quickly install it by running `install-ratis-shell.sh`. -For more doc, please refer to [RATIS SHELL DOC](https://github.com/apache/ratis/blob/master/ratis-docs/src/site/markdown/cli.md) \ No newline at end of file +For more doc, please refer to [RATIS SHELL DOC](https://github.com/apache/ratis/blob/master/ratis-docs/src/site/markdown/cli.md) From 5e358fc4ee69d7f8ae77343d4f0bbe05fae2eb01 Mon Sep 17 00:00:00 2001 From: elega <445092967@qq.com> Date: Thu, 20 Apr 2023 13:18:33 +0800 Subject: [PATCH 243/334] Fix FileSystemMergeJournalContext related ### What changes are proposed in this pull request? 1. Only force flushing journals when a locked inode path is closed for FileSystemMergeJournalContext 2. Fix the journal merger broken logic where updating the fingerprint for a directry will be ignored. ### Why are the changes needed? We used to fix the merge journal context in https://github.com/Alluxio/alluxio/pull/17071, where a regular non-merging journal context is used when listStatus() is called. However, if a listStatus triggers a metadata sync and MASTER_FILE_SYSTEM_MERGE_INODE_JOURNALS is set to true, journals will be flushed on every lockedInodePath close during the metadata sync. This behavior leads to journals being flushed too many times and impairs the metadata sync performance. Also we found a minor issue that when inode directory journals are merged, the fingerprint will be ignored. This is becuase inode directory journal does not have a fingerprint field. ### Does this PR introduce any user facing changes? N/A pr-link: Alluxio/alluxio#17251 change-id: cid-23ea3229e6781483ab582c3f4d4c9c4f61cdf634 --- .../file/FileSystemJournalEntryMerger.java | 8 +++++ .../master/file/meta/LockedInodePath.java | 11 ++++-- .../FileSystemJournalEntryMergerTest.java | 35 +++++++++++++++++++ .../master/file/meta/LockedInodePathTest.java | 3 +- 4 files changed, 53 insertions(+), 4 deletions(-) diff --git a/core/server/master/src/main/java/alluxio/master/file/FileSystemJournalEntryMerger.java b/core/server/master/src/main/java/alluxio/master/file/FileSystemJournalEntryMerger.java index 5c8df09dace5..c0910746730a 100644 --- a/core/server/master/src/main/java/alluxio/master/file/FileSystemJournalEntryMerger.java +++ b/core/server/master/src/main/java/alluxio/master/file/FileSystemJournalEntryMerger.java @@ -70,6 +70,14 @@ else if ( MutableInodeDirectory.fromJournalEntry(existingEntry.getInodeDirectory()); if (entry.hasUpdateInode()) { inodeDirectory.updateFromEntry(entry.getUpdateInode()); + // Update Inode directory does not contain directory fingerprint, + // so we still need to add the new inode journal entry to the list to keep the + // fingerprint update, + // while we still merge it with the existing inode directory on as best efforts. + if (entry.getUpdateInode().hasUfsFingerprint() + && !entry.getUpdateInode().getUfsFingerprint().equals("")) { + mJournalEntries.add(entry); + } } else if (entry.hasUpdateInodeDirectory()) { inodeDirectory.updateFromEntry(entry.getUpdateInodeDirectory()); } diff --git a/core/server/master/src/main/java/alluxio/master/file/meta/LockedInodePath.java b/core/server/master/src/main/java/alluxio/master/file/meta/LockedInodePath.java index c9aee1e81168..7c66b86a5e1f 100644 --- a/core/server/master/src/main/java/alluxio/master/file/meta/LockedInodePath.java +++ b/core/server/master/src/main/java/alluxio/master/file/meta/LockedInodePath.java @@ -20,6 +20,7 @@ import alluxio.exception.InvalidPathException; import alluxio.exception.status.UnavailableException; import alluxio.master.file.meta.InodeTree.LockPattern; +import alluxio.master.journal.FileSystemMergeJournalContext; import alluxio.master.journal.JournalContext; import alluxio.master.metastore.ReadOnlyInodeStore; import alluxio.resource.AlluxioResourceLeakDetectorFactory; @@ -85,9 +86,7 @@ public class LockedInodePath implements Closeable { @Nullable private final ResourceLeakTracker mTracker; /** To determine if we should flush the journals when lock is released or scope reduced. */ - private final boolean mMergeInodeJournals = Configuration.getBoolean( - PropertyKey.MASTER_FILE_SYSTEM_MERGE_INODE_JOURNALS - ); + private final boolean mMergeInodeJournals; /** * Keeps a reference of JournalContext and flushes it before the lock is released. @@ -159,6 +158,9 @@ public LockedInodePath(AlluxioURI uri, ReadOnlyInodeStore inodeStore, mLockList = new SimpleInodeLockList(inodeLockManager, mUseTryLock); mTracker = DETECTOR.track(this); mJournalContext = journalContext; + mMergeInodeJournals = Configuration.getBoolean( + PropertyKey.MASTER_FILE_SYSTEM_MERGE_INODE_JOURNALS + ) && mJournalContext instanceof FileSystemMergeJournalContext; } /** @@ -184,6 +186,9 @@ private LockedInodePath(AlluxioURI uri, LockedInodePath path, String[] pathCompo // So the new created LockInodePath instance must be on the same thread with // the original one and hence they will use the same JournalContext. mJournalContext = path.mJournalContext; + mMergeInodeJournals = Configuration.getBoolean( + PropertyKey.MASTER_FILE_SYSTEM_MERGE_INODE_JOURNALS + ) && mJournalContext instanceof FileSystemMergeJournalContext; } /** diff --git a/core/server/master/src/test/java/alluxio/master/file/FileSystemJournalEntryMergerTest.java b/core/server/master/src/test/java/alluxio/master/file/FileSystemJournalEntryMergerTest.java index d2c1e77bdb80..746404a26b6e 100644 --- a/core/server/master/src/test/java/alluxio/master/file/FileSystemJournalEntryMergerTest.java +++ b/core/server/master/src/test/java/alluxio/master/file/FileSystemJournalEntryMergerTest.java @@ -13,6 +13,7 @@ import static org.junit.Assert.assertEquals; import static org.junit.Assert.assertNotNull; +import static org.junit.Assert.assertTrue; import alluxio.AlluxioURI; import alluxio.master.block.BlockId; @@ -112,4 +113,38 @@ public void testFileSystemJournalEntryMerger() { merger.clear(); assertEquals(0, merger.getMergedJournalEntries().size()); } + + @Test + public void testMergeDirectoryFingerprint() { + AlluxioURI uri = new AlluxioURI("/dir/test1"); + + FileSystemJournalEntryMerger merger = new FileSystemJournalEntryMerger(); + + merger.add(Journal.JournalEntry.newBuilder().setInodeDirectory( + File.InodeDirectoryEntry.newBuilder().setId(1).setParentId(0) + .setPersistenceState(PersistenceState.PERSISTED.name()) + .setName("test_dir").setPath("test_dir").build()).build()); + + merger.add(Journal.JournalEntry.newBuilder().setUpdateInodeDirectory( + File.UpdateInodeDirectoryEntry.newBuilder().setId(1) + .setDirectChildrenLoaded(true).build()).build()); + + merger.add(Journal.JournalEntry.newBuilder().setUpdateInode( + File.UpdateInodeEntry.newBuilder().setId(1) + .setName("test_dir_updated") + .setUfsFingerprint("fingerprint") + .build()).build()); + + List entries = merger.getMergedJournalEntries(); + Journal.JournalEntry entry = entries.get(0); + assertNotNull(entry.getInodeDirectory()); + assertEquals(1, entry.getInodeDirectory().getId()); + assertEquals("test_dir_updated", entry.getInodeDirectory().getName()); + assertEquals("test_dir", entry.getInodeDirectory().getPath()); + assertTrue(entry.getInodeDirectory().getDirectChildrenLoaded()); + + Journal.JournalEntry entry2 = entries.get(1); + assertNotNull(entry2.getUpdateInode()); + assertEquals("fingerprint", entry2.getUpdateInode().getUfsFingerprint()); + } } diff --git a/core/server/master/src/test/java/alluxio/master/file/meta/LockedInodePathTest.java b/core/server/master/src/test/java/alluxio/master/file/meta/LockedInodePathTest.java index b071694b5824..d56512ee55d5 100644 --- a/core/server/master/src/test/java/alluxio/master/file/meta/LockedInodePathTest.java +++ b/core/server/master/src/test/java/alluxio/master/file/meta/LockedInodePathTest.java @@ -24,6 +24,7 @@ import alluxio.exception.InvalidPathException; import alluxio.exception.status.UnavailableException; import alluxio.master.file.meta.InodeTree.LockPattern; +import alluxio.master.journal.FileSystemMergeJournalContext; import alluxio.master.journal.JournalContext; import alluxio.master.journal.NoopJournalContext; @@ -598,7 +599,7 @@ public void lockFinalEdgeWriteAlreadyLocked() throws Exception { @Test public void testFlushJournal() throws InvalidPathException, UnavailableException { AtomicInteger journalFlushCount = new AtomicInteger(); - JournalContext journalContext = mock(JournalContext.class); + JournalContext journalContext = mock(FileSystemMergeJournalContext.class); Mockito.doAnswer( (mock) -> { journalFlushCount.getAndIncrement(); From e5fd12eae89e5e7bbf6b21020a5d5a8ef9baf732 Mon Sep 17 00:00:00 2001 From: maobaolong <307499405@qq.com> Date: Fri, 21 Apr 2023 16:36:19 +0800 Subject: [PATCH 244/334] Print journal system information to help diagnose stop reason ### What changes are proposed in this pull request? When journal stop, print the reason, so user can find out why it stop. Something like journal is not formatted. ### Why are the changes needed? Show the journal stop reason pr-link: Alluxio/alluxio#17299 change-id: cid-a5938fcd3b664eb873ad5e380f4e047b038621ee --- .../main/java/alluxio/master/journal/AbstractJournalSystem.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/core/server/common/src/main/java/alluxio/master/journal/AbstractJournalSystem.java b/core/server/common/src/main/java/alluxio/master/journal/AbstractJournalSystem.java index 31ee1d00a96d..871e3d501642 100644 --- a/core/server/common/src/main/java/alluxio/master/journal/AbstractJournalSystem.java +++ b/core/server/common/src/main/java/alluxio/master/journal/AbstractJournalSystem.java @@ -49,7 +49,7 @@ public synchronized void start() { @Override public synchronized void stop() { - Preconditions.checkState(mRunning, "Journal is not running"); + Preconditions.checkState(mRunning, "Journal is not running : " + this); mAllJournalSinks.forEach(JournalSink::beforeShutdown); mRunning = false; stopInternal(); From 4ad6185ab23a7cf151e4289eb7e11b34f87f2d91 Mon Sep 17 00:00:00 2001 From: maobaolong <307499405@qq.com> Date: Sat, 22 Apr 2023 12:18:45 +0800 Subject: [PATCH 245/334] Do not pass sync interval by default ### What changes are proposed in this pull request? Before this PR, if we did not config `alluxio.user.file.metadata.sync.interval` in client side, and config it in master side while we disable the `alluxio.user.conf.cluster.default.enabled` meanwhile, the metadata sync interval will always set to `-1`. After this PR, when `alluxio.user.conf.cluster.default.enabled=false`, master side `alluxio.user.file.metadata.sync.interval` will be accepted if user didn't set it. image ### Does this PR introduce any user facing changes? No pr-link: Alluxio/alluxio#17182 change-id: cid-b54bf51844aba97544237370748d3a2e1c74ec91 --- .../java/alluxio/util/FileSystemOptionsUtils.java | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) diff --git a/core/client/fs/src/main/java/alluxio/util/FileSystemOptionsUtils.java b/core/client/fs/src/main/java/alluxio/util/FileSystemOptionsUtils.java index 9480d88ca004..56520f8bfb77 100644 --- a/core/client/fs/src/main/java/alluxio/util/FileSystemOptionsUtils.java +++ b/core/client/fs/src/main/java/alluxio/util/FileSystemOptionsUtils.java @@ -185,12 +185,14 @@ public static FileSystemMasterCommonPOptions commonDefaults(AlluxioConfiguration public static FileSystemMasterCommonPOptions commonDefaults(AlluxioConfiguration conf, boolean withOpId) { FileSystemMasterCommonPOptions.Builder builder = FileSystemMasterCommonPOptions.newBuilder() - .setSyncIntervalMs(conf.getMs(PropertyKey.USER_FILE_METADATA_SYNC_INTERVAL)) .setTtl(conf.getMs(PropertyKey.USER_FILE_CREATE_TTL)) .setTtlAction(conf.getEnum(PropertyKey.USER_FILE_CREATE_TTL_ACTION, TtlAction.class)); if (withOpId && conf.getBoolean(PropertyKey.USER_FILE_INCLUDE_OPERATION_ID)) { builder.setOperationId(new OperationId(UUID.randomUUID()).toFsProto()); } + if (conf.isSetByUser(PropertyKey.USER_FILE_METADATA_SYNC_INTERVAL)) { + builder.setSyncIntervalMs(conf.getMs(PropertyKey.USER_FILE_METADATA_SYNC_INTERVAL)); + } return builder.build(); } @@ -342,10 +344,12 @@ public static SetAttributePOptions setAttributeClientDefaults(AlluxioConfigurati // Specifically set and override *only* the metadata sync interval // Setting other attributes by default will make the server think the user is intentionally // setting the values. Most fields withinSetAttributePOptions are set by inclusion + FileSystemMasterCommonPOptions.Builder builder = FileSystemMasterCommonPOptions.newBuilder(); + if (conf.isSetByUser(PropertyKey.USER_FILE_METADATA_SYNC_INTERVAL)) { + builder.setSyncIntervalMs(conf.getMs(PropertyKey.USER_FILE_METADATA_SYNC_INTERVAL)); + } return SetAttributePOptions.newBuilder() - .setCommonOptions(FileSystemMasterCommonPOptions.newBuilder() - .setSyncIntervalMs(conf.getMs(PropertyKey.USER_FILE_METADATA_SYNC_INTERVAL)) - .build()) + .setCommonOptions(builder.build()) .build(); } From dbac084c1e1d89c1e9253da036746e2df9e1ef14 Mon Sep 17 00:00:00 2001 From: Jiacheng Liu Date: Tue, 25 Apr 2023 10:18:26 +0800 Subject: [PATCH 246/334] Improve proxy register functionality and command Cherry-pick of existing commit. orig-pr: TachyonNexus/enterprise#4081 orig-commit: TachyonNexus/enterprise@ac514e4d5ab33746c100f90564ecb102c9f9c73a orig-commit-author: Jiacheng Liu pr-link: Alluxio/alluxio#17225 change-id: cid-d4d2cdbb3dacd37eced812e10c267d0e567917da --- .../alluxio/client/meta/MetaMasterClient.java | 9 ++ .../meta/RetryHandlingMetaMasterClient.java | 10 ++ .../src/main/java/alluxio/Constants.java | 2 + .../main/java/alluxio/conf/PropertyKey.java | 42 +++++ .../ServiceVersionClientServiceHandler.java | 3 + .../alluxio/heartbeat/HeartbeatContext.java | 4 + .../master/meta/DefaultMetaMaster.java | 111 +++++++++++++ .../java/alluxio/master/meta/MetaMaster.java | 17 +- .../meta/MetaMasterClientServiceHandler.java | 11 ++ .../meta/MetaMasterProxyServiceHandler.java | 52 +++++++ .../java/alluxio/master/meta/ProxyInfo.java | 120 ++++++++++++++ .../alluxio/proxy/AlluxioProxyProcess.java | 33 ++++ .../java/alluxio/proxy/ProxyMasterSync.java | 69 ++++++++ .../RetryHandlingMetaMasterProxyClient.java | 95 +++++++++++ .../src/main/proto/grpc/block_master.proto | 6 - .../src/main/proto/grpc/common.proto | 7 + .../src/main/proto/grpc/meta_master.proto | 45 ++++++ .../src/main/proto/grpc/version.proto | 1 + core/transport/src/main/proto/proto.lock | 136 ++++++++++++++-- .../cli/fsadmin/command/ReportCommand.java | 11 +- .../cli/fsadmin/report/ProxyCommand.java | 98 ++++++++++++ .../cli/fsadmin/report/ProxyCommandTest.java | 147 ++++++++++++++++++ 22 files changed, 1006 insertions(+), 23 deletions(-) create mode 100644 core/server/master/src/main/java/alluxio/master/meta/MetaMasterProxyServiceHandler.java create mode 100644 core/server/master/src/main/java/alluxio/master/meta/ProxyInfo.java create mode 100644 core/server/proxy/src/main/java/alluxio/proxy/ProxyMasterSync.java create mode 100644 core/server/proxy/src/main/java/alluxio/proxy/RetryHandlingMetaMasterProxyClient.java create mode 100644 shell/src/main/java/alluxio/cli/fsadmin/report/ProxyCommand.java create mode 100644 shell/src/test/java/alluxio/cli/fsadmin/report/ProxyCommandTest.java diff --git a/core/client/fs/src/main/java/alluxio/client/meta/MetaMasterClient.java b/core/client/fs/src/main/java/alluxio/client/meta/MetaMasterClient.java index 02e4b7a5eca7..073048d86c5f 100644 --- a/core/client/fs/src/main/java/alluxio/client/meta/MetaMasterClient.java +++ b/core/client/fs/src/main/java/alluxio/client/meta/MetaMasterClient.java @@ -15,10 +15,12 @@ import alluxio.grpc.BackupPRequest; import alluxio.grpc.MasterInfo; import alluxio.grpc.MasterInfoField; +import alluxio.grpc.ProxyStatus; import alluxio.wire.BackupStatus; import alluxio.wire.ConfigCheckReport; import java.io.IOException; +import java.util.List; import java.util.Set; import java.util.UUID; @@ -71,4 +73,11 @@ public interface MetaMasterClient extends Client { * @return the hostname of the master that did the checkpoint */ String checkpoint() throws IOException; + + /** + * Lists information of all known proxy instances from the primary master. + * + * @return the list of proxy status + */ + List listProxyStatus() throws IOException; } diff --git a/core/client/fs/src/main/java/alluxio/client/meta/RetryHandlingMetaMasterClient.java b/core/client/fs/src/main/java/alluxio/client/meta/RetryHandlingMetaMasterClient.java index 63e44aa2b38c..875074b76281 100644 --- a/core/client/fs/src/main/java/alluxio/client/meta/RetryHandlingMetaMasterClient.java +++ b/core/client/fs/src/main/java/alluxio/client/meta/RetryHandlingMetaMasterClient.java @@ -18,9 +18,11 @@ import alluxio.grpc.CheckpointPOptions; import alluxio.grpc.GetConfigReportPOptions; import alluxio.grpc.GetMasterInfoPOptions; +import alluxio.grpc.ListProxyStatusPRequest; import alluxio.grpc.MasterInfo; import alluxio.grpc.MasterInfoField; import alluxio.grpc.MetaMasterClientServiceGrpc; +import alluxio.grpc.ProxyStatus; import alluxio.grpc.ServiceType; import alluxio.master.MasterClientContext; import alluxio.wire.BackupStatus; @@ -30,6 +32,7 @@ import org.slf4j.LoggerFactory; import java.io.IOException; +import java.util.List; import java.util.Set; import java.util.UUID; import javax.annotation.concurrent.ThreadSafe; @@ -106,4 +109,11 @@ public String checkpoint() throws IOException { .checkpoint(CheckpointPOptions.newBuilder().build()).getMasterHostname(), RPC_LOG, "Checkpoint", ""); } + + @Override + public List listProxyStatus() throws IOException { + return retryRPC(() -> mClient.listProxyStatus( + ListProxyStatusPRequest.getDefaultInstance()).getProxyStatusesList(), + RPC_LOG, "ListProxyStatus", ""); + } } diff --git a/core/common/src/main/java/alluxio/Constants.java b/core/common/src/main/java/alluxio/Constants.java index 588675d1d662..3a7dcf5b9019 100644 --- a/core/common/src/main/java/alluxio/Constants.java +++ b/core/common/src/main/java/alluxio/Constants.java @@ -95,6 +95,7 @@ public final class Constants { public static final long META_MASTER_CONFIG_SERVICE_VERSION = 2; public static final long META_MASTER_CLIENT_SERVICE_VERSION = 2; public static final long META_MASTER_MASTER_SERVICE_VERSION = 1; + public static final long META_MASTER_PROXY_SERVICE_VERSION = 1; public static final long METRICS_MASTER_CLIENT_SERVICE_VERSION = 2; public static final long JOURNAL_MASTER_CLIENT_SERVICE_VERSION = 1; public static final long RAFT_JOURNAL_SERVICE_VERSION = 1; @@ -117,6 +118,7 @@ public final class Constants { // Its value is "MetaMaster" for backwards compatibility so 1.7 clients can talk to 1.8 MetaMaster public static final String META_MASTER_CONFIG_SERVICE_NAME = "MetaMaster"; public static final String META_MASTER_CLIENT_SERVICE_NAME = "MetaMaster"; + public static final String META_MASTER_PROXY_SERVICE_NAME = "MetaMasterProxy"; public static final String META_MASTER_MASTER_SERVICE_NAME = "MetaMasterMaster"; public static final String METRICS_MASTER_CLIENT_SERVICE_NAME = "MetricsMasterClient"; public static final String BLOCK_WORKER_CLIENT_SERVICE_NAME = "BlockWorkerClient"; diff --git a/core/common/src/main/java/alluxio/conf/PropertyKey.java b/core/common/src/main/java/alluxio/conf/PropertyKey.java index dfdab6e892e9..2f1421513512 100755 --- a/core/common/src/main/java/alluxio/conf/PropertyKey.java +++ b/core/common/src/main/java/alluxio/conf/PropertyKey.java @@ -3319,6 +3319,32 @@ public String toString() { .setConsistencyCheckLevel(ConsistencyCheckLevel.ENFORCE) .setScope(Scope.MASTER) .build(); + public static final PropertyKey MASTER_PROXY_TIMEOUT_MS = + durationBuilder(Name.MASTER_PROXY_TIMEOUT_MS) + .setAlias("alluxio.master.proxy.timeout.ms") + .setDefaultValue("5m") + .setDescription("An Alluxio Proxy instance will maintain heartbeat to the primary " + + "Alluxio Master. No heartbeat more than this timeout indicates a lost Proxy.") + .setConsistencyCheckLevel(ConsistencyCheckLevel.WARN) + .setScope(Scope.MASTER) + .build(); + public static final PropertyKey MASTER_PROXY_CHECK_HEARTBEAT_INTERVAL = + durationBuilder(Name.MASTER_PROXY_CHECK_HEARTBEAT_INTERVAL) + .setDefaultValue("1min") + .setDescription("The master will periodically check the last heartbeat time from all " + + "Proxy instances. This key specifies the frequency of the check.") + .setConsistencyCheckLevel(ConsistencyCheckLevel.WARN) + .setScope(Scope.MASTER) + .build(); + public static final PropertyKey MASTER_LOST_PROXY_DELETION_TIMEOUT_MS = + durationBuilder(Name.MASTER_LOST_PROXY_DELETION_TIMEOUT_MS) + .setAlias("alluxio.master.lost.proxy.deletion.timeout.ms") + .setDefaultValue("30min") + .setDescription("If an Alluxio Proxy has been lost for more than this timeout, " + + "the master will totally forget this worker.") + .setConsistencyCheckLevel(ConsistencyCheckLevel.WARN) + .setScope(Scope.MASTER) + .build(); public static final PropertyKey MASTER_RPC_PORT = intBuilder(Name.MASTER_RPC_PORT) .setAlias("alluxio.master.port") @@ -5233,6 +5259,15 @@ public String toString() { // // Proxy related properties // + public static final PropertyKey PROXY_MASTER_HEARTBEAT_INTERVAL = + durationBuilder(Name.PROXY_MASTER_HEARTBEAT_INTERVAL) + .setAlias("alluxio.proxy.master.heartbeat.interval.ms") + .setDefaultValue("10sec") + .setDescription("Proxy instances maintain a heartbeat with the primary master. " + + "This key specifies the heartbeat interval.") + .setConsistencyCheckLevel(ConsistencyCheckLevel.WARN) + .setScope(Scope.SERVER) + .build(); public static final PropertyKey PROXY_S3_WRITE_TYPE = enumBuilder(Name.PROXY_S3_WRITE_TYPE, WriteType.class) .setDefaultValue(WriteType.CACHE_THROUGH) @@ -7978,6 +8013,8 @@ public static final class Name { "alluxio.master.lock.pool.high.watermark"; public static final String MASTER_LOCK_POOL_CONCURRENCY_LEVEL = "alluxio.master.lock.pool.concurrency.level"; + public static final String MASTER_LOST_PROXY_DELETION_TIMEOUT_MS = + "alluxio.master.lost.proxy.deletion.timeout"; public static final String MASTER_JOURNAL_CATCHUP_PROTECT_ENABLED = "alluxio.master.journal.catchup.protect.enabled"; public static final String MASTER_JOURNAL_EXIT_ON_DEMOTION = @@ -8196,6 +8233,9 @@ public static final class Name { public static final String MASTER_PERIODIC_BLOCK_INTEGRITY_CHECK_INTERVAL = "alluxio.master.periodic.block.integrity.check.interval"; public static final String MASTER_PRINCIPAL = "alluxio.master.principal"; + public static final String MASTER_PROXY_TIMEOUT_MS = "alluxio.master.proxy.timeout"; + public static final String MASTER_PROXY_CHECK_HEARTBEAT_INTERVAL = + "alluxio.master.proxy.check.heartbeat.timeout"; public static final String MASTER_REPLICATION_CHECK_INTERVAL_MS = "alluxio.master.replication.check.interval"; public static final String MASTER_RPC_PORT = "alluxio.master.rpc.port"; @@ -8586,6 +8626,8 @@ public static final class Name { // // Proxy related properties // + public static final String PROXY_MASTER_HEARTBEAT_INTERVAL = + "alluxio.proxy.master.heartbeat.interval"; public static final String PROXY_S3_WRITE_TYPE = "alluxio.proxy.s3.writetype"; public static final String PROXY_S3_DELETE_TYPE = "alluxio.proxy.s3.deletetype"; public static final String PROXY_S3_MULTIPART_UPLOAD_CLEANER_ENABLED = diff --git a/core/common/src/main/java/alluxio/grpc/ServiceVersionClientServiceHandler.java b/core/common/src/main/java/alluxio/grpc/ServiceVersionClientServiceHandler.java index a795266c1cd4..b889f972c698 100644 --- a/core/common/src/main/java/alluxio/grpc/ServiceVersionClientServiceHandler.java +++ b/core/common/src/main/java/alluxio/grpc/ServiceVersionClientServiceHandler.java @@ -97,6 +97,9 @@ public void getServiceVersion(GetServiceVersionPRequest request, case META_MASTER_MASTER_SERVICE: serviceVersion = Constants.META_MASTER_MASTER_SERVICE_VERSION; break; + case META_MASTER_PROXY_SERVICE: + serviceVersion = Constants.META_MASTER_PROXY_SERVICE_VERSION; + break; case METRICS_MASTER_CLIENT_SERVICE: serviceVersion = Constants.METRICS_MASTER_CLIENT_SERVICE_VERSION; break; diff --git a/core/common/src/main/java/alluxio/heartbeat/HeartbeatContext.java b/core/common/src/main/java/alluxio/heartbeat/HeartbeatContext.java index c5378bf0f141..7daf23012c6e 100644 --- a/core/common/src/main/java/alluxio/heartbeat/HeartbeatContext.java +++ b/core/common/src/main/java/alluxio/heartbeat/HeartbeatContext.java @@ -47,6 +47,7 @@ public final class HeartbeatContext { public static final String MASTER_LOST_FILES_DETECTION = "Master Lost Files Detection"; public static final String MASTER_LOST_MASTER_DETECTION = "Master Lost Master Detection"; public static final String MASTER_LOST_WORKER_DETECTION = "Master Lost Worker Detection"; + public static final String MASTER_LOST_PROXY_DETECTION = "Master Lost Proxy Detection"; public static final String MASTER_METRICS_SYNC = "Master Metrics Sync"; public static final String MASTER_METRICS_TIME_SERIES = "Master Metrics Time Series"; public static final String MASTER_ORPHANED_METRICS_CLEANER = "Master Orphaned Metrics Cleaner"; @@ -59,6 +60,7 @@ public final class HeartbeatContext { public static final String MASTER_UFS_CLEANUP = "Master Ufs Cleanup"; public static final String MASTER_UPDATE_CHECK = "Master Update Check"; public static final String META_MASTER_SYNC = "Meta Master Sync"; + public static final String PROXY_META_MASTER_SYNC = "Proxy MetaMaster Sync"; public static final String WORKER_BLOCK_SYNC = "Worker Block Sync"; public static final String WORKER_CLIENT = "Worker Client"; public static final String WORKER_FILESYSTEM_MASTER_SYNC = "Worker FileSystemMaster Sync"; @@ -136,6 +138,7 @@ private HeartbeatType(int value) { sTimerClasses.put(MASTER_LOST_FILES_DETECTION, SLEEPING_TIMER_CLASS); sTimerClasses.put(MASTER_LOST_MASTER_DETECTION, SLEEPING_TIMER_CLASS); sTimerClasses.put(MASTER_LOST_WORKER_DETECTION, SLEEPING_TIMER_CLASS); + sTimerClasses.put(MASTER_LOST_PROXY_DETECTION, SLEEPING_TIMER_CLASS); sTimerClasses.put(MASTER_METRICS_SYNC, SLEEPING_TIMER_CLASS); sTimerClasses.put(MASTER_METRICS_TIME_SERIES, SLEEPING_TIMER_CLASS); sTimerClasses.put(MASTER_PERSISTENCE_CHECKER, SLEEPING_TIMER_CLASS); @@ -147,6 +150,7 @@ private HeartbeatType(int value) { sTimerClasses.put(MASTER_UFS_CLEANUP, SLEEPING_TIMER_CLASS); sTimerClasses.put(MASTER_UPDATE_CHECK, SLEEPING_TIMER_CLASS); sTimerClasses.put(META_MASTER_SYNC, SLEEPING_TIMER_CLASS); + sTimerClasses.put(PROXY_META_MASTER_SYNC, SLEEPING_TIMER_CLASS); sTimerClasses.put(WORKER_BLOCK_SYNC, SLEEPING_TIMER_CLASS); sTimerClasses.put(WORKER_CLIENT, SLEEPING_TIMER_CLASS); sTimerClasses.put(WORKER_FILESYSTEM_MASTER_SYNC, SLEEPING_TIMER_CLASS); diff --git a/core/server/master/src/main/java/alluxio/master/meta/DefaultMetaMaster.java b/core/server/master/src/main/java/alluxio/master/meta/DefaultMetaMaster.java index b1b8fe6bb393..42822132b27c 100644 --- a/core/server/master/src/main/java/alluxio/master/meta/DefaultMetaMaster.java +++ b/core/server/master/src/main/java/alluxio/master/meta/DefaultMetaMaster.java @@ -28,10 +28,15 @@ import alluxio.grpc.BackupPOptions; import alluxio.grpc.BackupPRequest; import alluxio.grpc.BackupStatusPRequest; +import alluxio.grpc.BuildVersion; import alluxio.grpc.GetConfigurationPOptions; import alluxio.grpc.GrpcService; import alluxio.grpc.MasterHeartbeatPOptions; import alluxio.grpc.MetaCommand; +import alluxio.grpc.NetAddress; +import alluxio.grpc.ProxyHeartbeatPOptions; +import alluxio.grpc.ProxyHeartbeatPRequest; +import alluxio.grpc.ProxyStatus; import alluxio.grpc.RegisterMasterPOptions; import alluxio.grpc.Scope; import alluxio.grpc.ServiceType; @@ -78,12 +83,14 @@ import java.net.InetSocketAddress; import java.text.MessageFormat; import java.time.Clock; +import java.util.ArrayList; import java.util.Collections; import java.util.HashMap; import java.util.List; import java.util.Map; import java.util.Optional; import java.util.Set; +import java.util.concurrent.ConcurrentHashMap; import java.util.concurrent.Executors; import javax.annotation.Nullable; import javax.annotation.concurrent.NotThreadSafe; @@ -124,6 +131,11 @@ public final class DefaultMetaMaster extends CoreMaster implements MetaMaster { private final IndexedSet mLostMasters = new IndexedSet<>(ID_INDEX, ADDRESS_INDEX); + /** Keeps track of proxies which are in communication with the primary master. */ + private final Map mProxies = new ConcurrentHashMap<>(); + /** Keeps track of proxies which are no longer in communication with the primary master. */ + private final Map mLostProxies = new ConcurrentHashMap<>(); + /** The connect address for the rpc server. */ private final InetSocketAddress mRpcConnectAddress = NetworkAddressUtils.getConnectAddress(NetworkAddressUtils.ServiceType.MASTER_RPC, @@ -272,6 +284,8 @@ public Map getServices() { new GrpcService(ServerInterceptors.intercept( new MetaMasterMasterServiceHandler(this), new ClientContextServerInjector()))); + services.put(ServiceType.META_MASTER_PROXY_SERVICE, + new GrpcService(new MetaMasterProxyServiceHandler(this))); // Add backup role services. services.putAll(mBackupRole.getRoleServices()); services.putAll(mJournalSystem.getJournalServices()); @@ -317,6 +331,12 @@ public void start(Boolean isPrimary) throws IOException { () -> new FixedIntervalSupplier( Configuration.getMs(PropertyKey.MASTER_LOG_CONFIG_REPORT_HEARTBEAT_INTERVAL)), Configuration.global(), mMasterContext.getUserState())); + getExecutorService().submit(new HeartbeatThread( + HeartbeatContext.MASTER_LOST_PROXY_DETECTION, + new LostProxyDetectionHeartbeatExecutor(), + () -> new FixedIntervalSupplier( + Configuration.getMs(PropertyKey.MASTER_PROXY_CHECK_HEARTBEAT_INTERVAL)), + Configuration.global(), mMasterContext.getUserState())); if (Configuration.getBoolean(PropertyKey.MASTER_DAILY_BACKUP_ENABLED)) { mDailyBackup = new DailyMetadataBackup(this, Executors.newSingleThreadScheduledExecutor( @@ -648,6 +668,29 @@ public void masterRegister(long masterId, RegisterMasterPOptions options) LOG.info("registerMaster(): master: {}", master); } + @Override + public void proxyHeartbeat(ProxyHeartbeatPRequest request) { + LOG.debug("Received proxy heartbeat {}", request); + ProxyHeartbeatPOptions options = request.getOptions(); + NetAddress address = options.getProxyAddress(); + mProxies.compute(address, (key, proxyInfo) -> { + if (proxyInfo == null) { + ProxyInfo info = new ProxyInfo(address); + info.setStartTimeMs(options.getStartTime()); + info.setVersion(options.getVersion().getVersion()); + info.setRevision(options.getVersion().getRevision()); + info.updateLastHeartbeatTimeMs(); + return info; + } else { + proxyInfo.setVersion(options.getVersion().getVersion()); + proxyInfo.setRevision(options.getVersion().getRevision()); + proxyInfo.updateLastHeartbeatTimeMs(); + return proxyInfo; + } + }); + mLostProxies.remove(address); + } + @Override public CheckpointName getCheckpointName() { return CheckpointName.META_MASTER; @@ -707,6 +750,30 @@ public Map updateConfiguration(Map propertiesMa return result; } + @Override + public List listProxyStatus() { + List result = new ArrayList<>(); + for (Map.Entry entry : mProxies.entrySet()) { + ProxyInfo info = entry.getValue(); + result.add(ProxyStatus.newBuilder().setAddress(entry.getKey()) + .setState("ACTIVE") + .setVersion(BuildVersion.newBuilder() + .setVersion(info.getVersion()).setRevision(info.getRevision()).build()) + .setStartTime(info.getStartTimeMs()) + .setLastHeartbeatTime(info.getLastHeartbeatTimeMs()).build()); + } + for (Map.Entry entry : mLostProxies.entrySet()) { + ProxyInfo info = entry.getValue(); + result.add(ProxyStatus.newBuilder().setAddress(entry.getKey()) + .setState("LOST") + .setVersion(BuildVersion.newBuilder() + .setVersion(info.getVersion()).setRevision(info.getRevision()).build()) + .setStartTime(info.getStartTimeMs()) + .setLastHeartbeatTime(info.getLastHeartbeatTimeMs()).build()); + } + return result; + } + /** * Lost master periodic check. */ @@ -741,6 +808,50 @@ public void close() { } } + /** + * Lost proxy periodic check. + */ + private final class LostProxyDetectionHeartbeatExecutor implements HeartbeatExecutor { + + /** + * Constructs a new {@link LostProxyDetectionHeartbeatExecutor}. + */ + public LostProxyDetectionHeartbeatExecutor() { + } + + @Override + public void heartbeat(long timeLimitMs) { + long proxyTimeoutMs = Configuration.getMs(PropertyKey.MASTER_PROXY_TIMEOUT_MS); + long masterProxyDeleteTimeoutMs = + Configuration.getMs(PropertyKey.MASTER_LOST_PROXY_DELETION_TIMEOUT_MS); + LOG.debug("LostProxyDetection checking proxies at {}", mProxies.keySet()); + mProxies.entrySet().removeIf(entry -> { + final long lastUpdate = mClock.millis() - entry.getValue().getLastHeartbeatTimeMs(); + if (lastUpdate > proxyTimeoutMs) { + LOG.warn("Proxy {} last heartbeat time {} was more than {}ms ago", + entry.getKey(), entry.getValue().getLastHeartbeatTimeMs(), proxyTimeoutMs); + mLostProxies.put(entry.getKey(), entry.getValue()); + return true; + } + return false; + }); + mLostProxies.entrySet().removeIf(entry -> { + final long lastUpdate = mClock.millis() - entry.getValue().getLastHeartbeatTimeMs(); + if (lastUpdate > masterProxyDeleteTimeoutMs) { + LOG.warn("Proxy {} has been LOST for more than {}ms. " + + "Master will forget about this Proxy", entry.getKey(), masterProxyDeleteTimeoutMs); + return true; + } + return false; + }); + } + + @Override + public void close() { + // Nothing to clean up + } + } + /** * Periodically log the config check report. */ diff --git a/core/server/master/src/main/java/alluxio/master/meta/MetaMaster.java b/core/server/master/src/main/java/alluxio/master/meta/MetaMaster.java index 3dbb7e33318e..175077106312 100644 --- a/core/server/master/src/main/java/alluxio/master/meta/MetaMaster.java +++ b/core/server/master/src/main/java/alluxio/master/meta/MetaMaster.java @@ -17,6 +17,8 @@ import alluxio.grpc.GetConfigurationPOptions; import alluxio.grpc.MasterHeartbeatPOptions; import alluxio.grpc.MetaCommand; +import alluxio.grpc.ProxyHeartbeatPRequest; +import alluxio.grpc.ProxyStatus; import alluxio.grpc.RegisterMasterPOptions; import alluxio.master.Master; import alluxio.master.backup.BackupOps; @@ -37,7 +39,6 @@ * The interface of meta master. */ public interface MetaMaster extends BackupOps, Master { - /** * @return the cluster ID */ @@ -188,4 +189,18 @@ void setPathConfiguration(String path, Map properties) * @return the update properties status map */ Map updateConfiguration(Map propertiesMap); + + /** + * A Proxy periodically heartbeats with the primary master. + * + * @param request the heartbeat message + */ + void proxyHeartbeat(ProxyHeartbeatPRequest request); + + /** + * Lists information of all known Proxy instances. + * + * @return a list of status + */ + List listProxyStatus(); } diff --git a/core/server/master/src/main/java/alluxio/master/meta/MetaMasterClientServiceHandler.java b/core/server/master/src/main/java/alluxio/master/meta/MetaMasterClientServiceHandler.java index 2d76019d89d4..a6e4bbefe09b 100644 --- a/core/server/master/src/main/java/alluxio/master/meta/MetaMasterClientServiceHandler.java +++ b/core/server/master/src/main/java/alluxio/master/meta/MetaMasterClientServiceHandler.java @@ -24,6 +24,8 @@ import alluxio.grpc.GetConfigReportPResponse; import alluxio.grpc.GetMasterInfoPOptions; import alluxio.grpc.GetMasterInfoPResponse; +import alluxio.grpc.ListProxyStatusPRequest; +import alluxio.grpc.ListProxyStatusPResponse; import alluxio.grpc.MasterInfo; import alluxio.grpc.MasterInfoField; import alluxio.grpc.MasterVersion; @@ -186,4 +188,13 @@ public void checkpoint(CheckpointPOptions options, () -> CheckpointPResponse.newBuilder().setMasterHostname(mMetaMaster.checkpoint()).build(), "checkpoint", "options=%s", responseObserver, options); } + + @Override + public void listProxyStatus(ListProxyStatusPRequest request, + StreamObserver responseObserver) { + RpcUtils.call(LOG, + () -> ListProxyStatusPResponse.newBuilder() + .addAllProxyStatuses(mMetaMaster.listProxyStatus()).build(), + "listProxyStatus", "options=%s", responseObserver, request.getOptions()); + } } diff --git a/core/server/master/src/main/java/alluxio/master/meta/MetaMasterProxyServiceHandler.java b/core/server/master/src/main/java/alluxio/master/meta/MetaMasterProxyServiceHandler.java new file mode 100644 index 000000000000..97eeebfab155 --- /dev/null +++ b/core/server/master/src/main/java/alluxio/master/meta/MetaMasterProxyServiceHandler.java @@ -0,0 +1,52 @@ +/* + * The Alluxio Open Foundation licenses this work under the Apache License, version 2.0 + * (the "License"). You may not use this work except in compliance with the License, which is + * available at www.apache.org/licenses/LICENSE-2.0 + * + * This software is distributed on an "AS IS" basis, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, + * either express or implied, as more fully set forth in the License. + * + * See the NOTICE file distributed with this work for information regarding copyright ownership. + */ + +package alluxio.master.meta; + +import alluxio.RpcUtils; +import alluxio.grpc.MetaMasterProxyServiceGrpc; +import alluxio.grpc.ProxyHeartbeatPRequest; +import alluxio.grpc.ProxyHeartbeatPResponse; + +import io.grpc.stub.StreamObserver; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import javax.annotation.concurrent.NotThreadSafe; + +/** + * This class is a gRPC handler for meta master RPCs invoked by an Alluxio standby master. + */ +@NotThreadSafe +public final class MetaMasterProxyServiceHandler + extends MetaMasterProxyServiceGrpc.MetaMasterProxyServiceImplBase { + private static final Logger LOG = LoggerFactory.getLogger(MetaMasterProxyServiceHandler.class); + + private final MetaMaster mMetaMaster; + + /** + * Creates a new instance of {@link MetaMasterProxyServiceHandler}. + * + * @param metaMaster the Alluxio meta master + */ + public MetaMasterProxyServiceHandler(MetaMaster metaMaster) { + mMetaMaster = metaMaster; + } + + @Override + public void proxyHeartbeat(ProxyHeartbeatPRequest request, + StreamObserver responseObserver) { + RpcUtils.call(LOG, () -> { + mMetaMaster.proxyHeartbeat(request); + return ProxyHeartbeatPResponse.newBuilder().build(); + }, "proxyHeartbeat", "request=%s", responseObserver, request); + } +} diff --git a/core/server/master/src/main/java/alluxio/master/meta/ProxyInfo.java b/core/server/master/src/main/java/alluxio/master/meta/ProxyInfo.java new file mode 100644 index 000000000000..3f8aecb121f2 --- /dev/null +++ b/core/server/master/src/main/java/alluxio/master/meta/ProxyInfo.java @@ -0,0 +1,120 @@ +/* + * The Alluxio Open Foundation licenses this work under the Apache License, version 2.0 + * (the "License"). You may not use this work except in compliance with the License, which is + * available at www.apache.org/licenses/LICENSE-2.0 + * + * This software is distributed on an "AS IS" basis, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, + * either express or implied, as more fully set forth in the License. + * + * See the NOTICE file distributed with this work for information regarding copyright ownership. + */ + +package alluxio.master.meta; + +import alluxio.grpc.NetAddress; +import alluxio.util.CommonUtils; + +import com.google.common.base.MoreObjects; +import com.google.common.base.Preconditions; + +import javax.annotation.concurrent.NotThreadSafe; + +/** + * Proxy information. + */ +@NotThreadSafe +public final class ProxyInfo { + /** Proxy's address. */ + private final NetAddress mAddress; + /** Proxy's last updated time in ms. */ + private long mLastHeartbeatTimeMs; + /** Proxy's start time in ms. */ + private long mStartTimeMs = 0; + /** Proxy's version. */ + private String mVersion = ""; + /** Proxy's revision. */ + private String mRevision = ""; + + /** + * Creates a new instance of {@link ProxyInfo}. + * + * @param address the proxy address to use + */ + public ProxyInfo(NetAddress address) { + mAddress = Preconditions.checkNotNull(address, "address"); + mLastHeartbeatTimeMs = CommonUtils.getCurrentMs(); + } + + /** + * @return the proxy's address + */ + public NetAddress getAddress() { + return mAddress; + } + + /** + * @return the last updated time of the proxy in ms + */ + public long getLastHeartbeatTimeMs() { + return mLastHeartbeatTimeMs; + } + + /** + * @return the start time of the proxy in ms + */ + public long getStartTimeMs() { + return mStartTimeMs; + } + + /** + * @return the version of the proxy + */ + public String getVersion() { + return mVersion; + } + + /** + * @return the revision of the proxy + */ + public String getRevision() { + return mRevision; + } + + @Override + public String toString() { + return MoreObjects.toStringHelper(this) + .add("address", mAddress) + .add("lastHeartbeatTimeMs", mLastHeartbeatTimeMs) + .add("startTimeMs", mStartTimeMs) + .add("version", mVersion) + .add("revision", mRevision).toString(); + } + + /** + * @param startTimeMs the start time of the proxy in ms + */ + public void setStartTimeMs(long startTimeMs) { + mStartTimeMs = startTimeMs; + } + + /** + * @param version the version of the proxy + */ + public void setVersion(String version) { + mVersion = version; + } + + /** + * @param revision the revision of the proxy + */ + public void setRevision(String revision) { + mRevision = revision; + } + + /** + * Updates the last updated time of the proxy in ms. + */ + public void updateLastHeartbeatTimeMs() { + mLastHeartbeatTimeMs = CommonUtils.getCurrentMs(); + } +} diff --git a/core/server/proxy/src/main/java/alluxio/proxy/AlluxioProxyProcess.java b/core/server/proxy/src/main/java/alluxio/proxy/AlluxioProxyProcess.java index cb1f90f01963..217feb9befb8 100644 --- a/core/server/proxy/src/main/java/alluxio/proxy/AlluxioProxyProcess.java +++ b/core/server/proxy/src/main/java/alluxio/proxy/AlluxioProxyProcess.java @@ -11,15 +11,23 @@ package alluxio.proxy; +import alluxio.ClientContext; import alluxio.Constants; import alluxio.conf.Configuration; import alluxio.conf.PropertyKey; +import alluxio.grpc.NetAddress; +import alluxio.heartbeat.FixedIntervalSupplier; +import alluxio.heartbeat.HeartbeatContext; +import alluxio.heartbeat.HeartbeatThread; +import alluxio.master.MasterClientContext; import alluxio.util.CommonUtils; +import alluxio.util.ThreadFactoryUtils; import alluxio.util.WaitForOptions; import alluxio.util.network.NetworkAddressUtils; import alluxio.util.network.NetworkAddressUtils.ServiceType; import alluxio.web.ProxyWebServer; import alluxio.web.WebServer; +import alluxio.wire.Address; import org.apache.commons.io.IOUtils; import org.apache.http.HttpResponse; @@ -33,6 +41,8 @@ import java.io.IOException; import java.nio.charset.StandardCharsets; import java.util.concurrent.CountDownLatch; +import java.util.concurrent.ExecutorService; +import java.util.concurrent.Executors; import java.util.concurrent.TimeoutException; import javax.annotation.concurrent.NotThreadSafe; @@ -50,6 +60,10 @@ public final class AlluxioProxyProcess implements ProxyProcess { private final long mStartTimeMs; private final CountDownLatch mLatch; + private ProxyMasterSync mMasterSync; + + private ExecutorService mPool = Executors.newFixedThreadPool(1, + ThreadFactoryUtils.build("proxy-routine-%d", true)); /** * Creates an instance of {@link AlluxioProxy}. @@ -82,7 +96,19 @@ public void start() throws Exception { // reset proxy web port Configuration.set(PropertyKey.PROXY_WEB_PORT, mWebServer.getLocalPort()); + NetAddress proxyAddress = NetAddress.newBuilder() + .setHost(NetworkAddressUtils.getConnectHost(ServiceType.PROXY_WEB, + Configuration.global())) + .setRpcPort(mWebServer.getLocalPort()).build(); mWebServer.start(); + MasterClientContext context = MasterClientContext.newBuilder(ClientContext.create()).build(); + mMasterSync = new ProxyMasterSync( + Address.fromProto(proxyAddress), context, mStartTimeMs); + mPool.submit(new HeartbeatThread(HeartbeatContext.PROXY_META_MASTER_SYNC, mMasterSync, + () -> new FixedIntervalSupplier( + Configuration.getMs(PropertyKey.PROXY_MASTER_HEARTBEAT_INTERVAL)), + Configuration.global(), context.getUserState())); + mLatch.await(); } @@ -92,6 +118,13 @@ public void stop() throws Exception { mWebServer.stop(); mWebServer = null; } + if (mMasterSync != null) { + mMasterSync.close(); + } + if (mPool != null) { + mPool.shutdownNow(); + mPool = null; + } mLatch.countDown(); } diff --git a/core/server/proxy/src/main/java/alluxio/proxy/ProxyMasterSync.java b/core/server/proxy/src/main/java/alluxio/proxy/ProxyMasterSync.java new file mode 100644 index 000000000000..6b63f630ed1a --- /dev/null +++ b/core/server/proxy/src/main/java/alluxio/proxy/ProxyMasterSync.java @@ -0,0 +1,69 @@ +/* + * The Alluxio Open Foundation licenses this work under the Apache License, version 2.0 + * (the "License"). You may not use this work except in compliance with the License, which is + * available at www.apache.org/licenses/LICENSE-2.0 + * + * This software is distributed on an "AS IS" basis, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, + * either express or implied, as more fully set forth in the License. + * + * See the NOTICE file distributed with this work for information regarding copyright ownership. + */ + +package alluxio.proxy; + +import alluxio.heartbeat.HeartbeatExecutor; +import alluxio.master.MasterClientContext; +import alluxio.wire.Address; + +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import java.io.IOException; +import java.time.Instant; +import javax.annotation.concurrent.NotThreadSafe; + +/** + * The Proxy will maintain a stateless heartbeat with the primary master. + * This enables the admin to list all living Proxy instances in the cluster. + */ +@NotThreadSafe +public final class ProxyMasterSync implements HeartbeatExecutor { + private static final Logger LOG = LoggerFactory.getLogger(ProxyMasterSync.class); + + /** The address of this proxy. */ + private final Address mAddress; + + /** Client for communication with the primary master. */ + private final RetryHandlingMetaMasterProxyClient mMasterClient; + + /** + * Creates a new instance of {@link ProxyMasterSync}. + * + * @param address the proxy address + * @param context the communication context + * @param startTimeMs start time of this instance + */ + public ProxyMasterSync(Address address, MasterClientContext context, long startTimeMs) { + mAddress = address; + mMasterClient = new RetryHandlingMetaMasterProxyClient(mAddress, context, startTimeMs); + LOG.info("Proxy start time is {}", Instant.ofEpochMilli(startTimeMs)); + } + + /** + * Heartbeats to the primary master node. + */ + @Override + public void heartbeat(long timeLimitMs) { + try { + LOG.debug("Heart beating to primary master"); + mMasterClient.proxyHeartbeat(); + } catch (IOException e) { + // Log the error but do not shut down the proxy + LOG.error("Failed to heartbeat to primary master", e); + mMasterClient.disconnect(); + } + } + + @Override + public void close() {} +} diff --git a/core/server/proxy/src/main/java/alluxio/proxy/RetryHandlingMetaMasterProxyClient.java b/core/server/proxy/src/main/java/alluxio/proxy/RetryHandlingMetaMasterProxyClient.java new file mode 100644 index 000000000000..03e9dcd846ba --- /dev/null +++ b/core/server/proxy/src/main/java/alluxio/proxy/RetryHandlingMetaMasterProxyClient.java @@ -0,0 +1,95 @@ +/* + * The Alluxio Open Foundation licenses this work under the Apache License, version 2.0 + * (the "License"). You may not use this work except in compliance with the License, which is + * available at www.apache.org/licenses/LICENSE-2.0 + * + * This software is distributed on an "AS IS" basis, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, + * either express or implied, as more fully set forth in the License. + * + * See the NOTICE file distributed with this work for information regarding copyright ownership. + */ + +package alluxio.proxy; + +import alluxio.AbstractMasterClient; +import alluxio.Constants; +import alluxio.RuntimeConstants; +import alluxio.conf.PropertyKey; +import alluxio.grpc.BuildVersion; +import alluxio.grpc.MetaMasterProxyServiceGrpc; +import alluxio.grpc.ProxyHeartbeatPOptions; +import alluxio.grpc.ProxyHeartbeatPRequest; +import alluxio.grpc.ServiceType; +import alluxio.master.MasterClientContext; +import alluxio.wire.Address; + +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import java.io.IOException; +import java.util.concurrent.TimeUnit; +import javax.annotation.concurrent.ThreadSafe; + +/** + * A wrapper for the gRPC client to interact with the meta master. + */ +@ThreadSafe +public class RetryHandlingMetaMasterProxyClient extends AbstractMasterClient { + private static final Logger RPC_LOG = + LoggerFactory.getLogger(RetryHandlingMetaMasterProxyClient.class); + private MetaMasterProxyServiceGrpc.MetaMasterProxyServiceBlockingStub mClient = null; + private final Address mProxyAddress; + private final long mStartTimeMs; + + /** + * Creates a new meta master client. + * + * @param proxyAddress address of the proxy + * @param conf master client configuration + * @param startTimeMs start timestamp + */ + public RetryHandlingMetaMasterProxyClient( + Address proxyAddress, MasterClientContext conf, long startTimeMs) { + super(conf); + mProxyAddress = proxyAddress; + mStartTimeMs = startTimeMs; + } + + @Override + protected ServiceType getRemoteServiceType() { + return ServiceType.META_MASTER_PROXY_SERVICE; + } + + @Override + protected String getServiceName() { + return Constants.META_MASTER_PROXY_SERVICE_NAME; + } + + @Override + protected long getServiceVersion() { + return Constants.META_MASTER_PROXY_SERVICE_VERSION; + } + + @Override + protected void afterConnect() { + mClient = MetaMasterProxyServiceGrpc.newBlockingStub(mChannel); + } + + /** + * Sends a heartbeat to the primary master. + */ + public void proxyHeartbeat() throws IOException { + BuildVersion version = BuildVersion.newBuilder().setVersion(RuntimeConstants.VERSION) + .setRevision(RuntimeConstants.REVISION_SHORT).build(); + ProxyHeartbeatPOptions options = ProxyHeartbeatPOptions.newBuilder() + .setProxyAddress(mProxyAddress.toProto()) + .setStartTime(mStartTimeMs) + .setVersion(version).build(); + retryRPC(() -> mClient.withDeadlineAfter( + mContext.getClusterConf().getMs( + PropertyKey.USER_RPC_RETRY_MAX_DURATION), TimeUnit.MILLISECONDS) + .proxyHeartbeat(ProxyHeartbeatPRequest.newBuilder().setOptions(options).build()), + RPC_LOG, "ProxyHeartbeat", "options=%s", options); + } +} + diff --git a/core/transport/src/main/proto/grpc/block_master.proto b/core/transport/src/main/proto/grpc/block_master.proto index b7c3358232c6..f7d89ebb8bdb 100644 --- a/core/transport/src/main/proto/grpc/block_master.proto +++ b/core/transport/src/main/proto/grpc/block_master.proto @@ -283,12 +283,6 @@ message GetRegisterLeasePResponse { optional GetRegisterLeasePOptions options = 4; } -message BuildVersion { - /** the project version of the worker */ - optional string version = 1; - /** the git revision at the time of building the worker */ - optional string revision = 2; -} message RegisterWorkerPOptions { repeated grpc.ConfigProperty configs = 1; /** the worker version to display in info pages (useful for rolling upgrades) */ diff --git a/core/transport/src/main/proto/grpc/common.proto b/core/transport/src/main/proto/grpc/common.proto index b0aa380a75be..67dbd85fd3be 100644 --- a/core/transport/src/main/proto/grpc/common.proto +++ b/core/transport/src/main/proto/grpc/common.proto @@ -147,3 +147,10 @@ enum ErrorType { Internal = 1; External = 2; } + +message BuildVersion { + /** the project version */ + optional string version = 1; + /** the git revision at the time of building */ + optional string revision = 2; +} diff --git a/core/transport/src/main/proto/grpc/meta_master.proto b/core/transport/src/main/proto/grpc/meta_master.proto index 6790b05c3144..5a408c68714a 100644 --- a/core/transport/src/main/proto/grpc/meta_master.proto +++ b/core/transport/src/main/proto/grpc/meta_master.proto @@ -148,6 +148,25 @@ message BackupStatusPRequest { optional string backupId = 1; } +message ProxyStatus { + optional grpc.NetAddress address = 1; + optional string state = 2; + optional int64 startTime = 3; + optional int64 lastHeartbeatTime = 4; + optional grpc.BuildVersion version = 5; +} + +message ListProxyStatusPRequest { + optional ListProxyStatusPOptions options = 1; +} + +message ListProxyStatusPOptions { +} + +message ListProxyStatusPResponse { + repeated ProxyStatus proxyStatuses = 1; +} + /** * This interface contains meta master service endpoints for Alluxio clients. */ @@ -176,6 +195,11 @@ service MetaMasterClientService { * Creates a checkpoint in the primary master journal system. */ rpc Checkpoint(CheckpointPOptions) returns (CheckpointPResponse); + + /** + * Returns the status of all known Proxy instances in the cluster. + */ + rpc ListProxyStatus(ListProxyStatusPRequest) returns (ListProxyStatusPResponse); } message SetPathConfigurationPOptions {} @@ -296,3 +320,24 @@ service MetaMasterMasterService { */ rpc MasterHeartbeat(MasterHeartbeatPRequest) returns (MasterHeartbeatPResponse); } + +message ProxyHeartbeatPOptions { + optional grpc.NetAddress proxyAddress = 1; + optional int64 startTime = 2; + optional grpc.BuildVersion version = 3; +} +message ProxyHeartbeatPRequest { + optional ProxyHeartbeatPOptions options = 1; +} +message ProxyHeartbeatPResponse { +} + +/** + * This interface contains meta master service endpoints for Alluxio Proxy instances. + */ +service MetaMasterProxyService { + /** + * Stateless heartbeat from proxy instances to report the current status. + */ + rpc ProxyHeartbeat(ProxyHeartbeatPRequest) returns (ProxyHeartbeatPResponse); +} diff --git a/core/transport/src/main/proto/grpc/version.proto b/core/transport/src/main/proto/grpc/version.proto index 3d5011cff33a..9a8104b95ac2 100644 --- a/core/transport/src/main/proto/grpc/version.proto +++ b/core/transport/src/main/proto/grpc/version.proto @@ -22,6 +22,7 @@ enum ServiceType { META_MASTER_CONFIG_SERVICE = 6; META_MASTER_CLIENT_SERVICE = 7; META_MASTER_MASTER_SERVICE = 8; + META_MASTER_PROXY_SERVICE = 18; METRICS_MASTER_CLIENT_SERVICE = 9; JOB_MASTER_CLIENT_SERVICE = 10; JOB_MASTER_WORKER_SERVICE = 11; diff --git a/core/transport/src/main/proto/proto.lock b/core/transport/src/main/proto/proto.lock index 12d48a5d5116..e256cf1d00c3 100644 --- a/core/transport/src/main/proto/proto.lock +++ b/core/transport/src/main/proto/proto.lock @@ -705,21 +705,6 @@ } ] }, - { - "name": "BuildVersion", - "fields": [ - { - "id": 1, - "name": "version", - "type": "string" - }, - { - "id": 2, - "name": "revision", - "type": "string" - } - ] - }, { "name": "RegisterWorkerPOptions", "fields": [ @@ -1905,6 +1890,21 @@ "type": "ErrorType" } ] + }, + { + "name": "BuildVersion", + "fields": [ + { + "id": 1, + "name": "version", + "type": "string" + }, + { + "id": 2, + "name": "revision", + "type": "string" + } + ] } ], "package": { @@ -6168,6 +6168,60 @@ } ] }, + { + "name": "ProxyStatus", + "fields": [ + { + "id": 1, + "name": "address", + "type": "grpc.NetAddress" + }, + { + "id": 2, + "name": "state", + "type": "string" + }, + { + "id": 3, + "name": "startTime", + "type": "int64" + }, + { + "id": 4, + "name": "lastHeartbeatTime", + "type": "int64" + }, + { + "id": 5, + "name": "version", + "type": "grpc.BuildVersion" + } + ] + }, + { + "name": "ListProxyStatusPRequest", + "fields": [ + { + "id": 1, + "name": "options", + "type": "ListProxyStatusPOptions" + } + ] + }, + { + "name": "ListProxyStatusPOptions" + }, + { + "name": "ListProxyStatusPResponse", + "fields": [ + { + "id": 1, + "name": "proxyStatuses", + "type": "ProxyStatus", + "is_repeated": true + } + ] + }, { "name": "SetPathConfigurationPOptions" }, @@ -6386,6 +6440,39 @@ } } ] + }, + { + "name": "ProxyHeartbeatPOptions", + "fields": [ + { + "id": 1, + "name": "proxyAddress", + "type": "grpc.NetAddress" + }, + { + "id": 2, + "name": "startTime", + "type": "int64" + }, + { + "id": 3, + "name": "version", + "type": "grpc.BuildVersion" + } + ] + }, + { + "name": "ProxyHeartbeatPRequest", + "fields": [ + { + "id": 1, + "name": "options", + "type": "ProxyHeartbeatPOptions" + } + ] + }, + { + "name": "ProxyHeartbeatPResponse" } ], "services": [ @@ -6416,6 +6503,11 @@ "name": "Checkpoint", "in_type": "CheckpointPOptions", "out_type": "CheckpointPResponse" + }, + { + "name": "ListProxyStatus", + "in_type": "ListProxyStatusPRequest", + "out_type": "ListProxyStatusPResponse" } ] }, @@ -6468,6 +6560,16 @@ "out_type": "MasterHeartbeatPResponse" } ] + }, + { + "name": "MetaMasterProxyService", + "rpcs": [ + { + "name": "ProxyHeartbeat", + "in_type": "ProxyHeartbeatPRequest", + "out_type": "ProxyHeartbeatPResponse" + } + ] } ], "imports": [ @@ -8374,6 +8476,10 @@ "name": "META_MASTER_MASTER_SERVICE", "integer": 8 }, + { + "name": "META_MASTER_PROXY_SERVICE", + "integer": 18 + }, { "name": "METRICS_MASTER_CLIENT_SERVICE", "integer": 9 diff --git a/shell/src/main/java/alluxio/cli/fsadmin/command/ReportCommand.java b/shell/src/main/java/alluxio/cli/fsadmin/command/ReportCommand.java index cd9739feafc9..28127f8400bb 100644 --- a/shell/src/main/java/alluxio/cli/fsadmin/command/ReportCommand.java +++ b/shell/src/main/java/alluxio/cli/fsadmin/command/ReportCommand.java @@ -17,6 +17,7 @@ import alluxio.cli.fsadmin.report.CapacityCommand; import alluxio.cli.fsadmin.report.JobServiceMetricsCommand; import alluxio.cli.fsadmin.report.MetricsCommand; +import alluxio.cli.fsadmin.report.ProxyCommand; import alluxio.cli.fsadmin.report.SummaryCommand; import alluxio.cli.fsadmin.report.UfsCommand; import alluxio.conf.AlluxioConfiguration; @@ -81,7 +82,8 @@ enum Command { METRICS, // Report metrics information SUMMARY, // Report cluster summary UFS, // Report under filesystem information - JOBSERVICE // Report job service metrics information + JOBSERVICE, // Report job service metrics information + PROXY // Report proxy information in the cluster } private AlluxioConfiguration mConf; @@ -133,6 +135,9 @@ public int run(CommandLine cl) throws IOException { case "jobservice": command = Command.JOBSERVICE; break; + case "proxy": + command = Command.PROXY; + break; default: System.out.println(getUsage()); System.out.println(getDescription()); @@ -173,6 +178,10 @@ public int run(CommandLine cl) throws IOException { mJobMasterClient, mPrintStream, mConf.getString(PropertyKey.USER_DATE_FORMAT_PATTERN)); jobmetricsCommand.run(); break; + case PROXY: + ProxyCommand proxyCommand = new ProxyCommand(mMetaClient, mPrintStream); + proxyCommand.run(); + break; default: break; } diff --git a/shell/src/main/java/alluxio/cli/fsadmin/report/ProxyCommand.java b/shell/src/main/java/alluxio/cli/fsadmin/report/ProxyCommand.java new file mode 100644 index 000000000000..b5bdb6a160b4 --- /dev/null +++ b/shell/src/main/java/alluxio/cli/fsadmin/report/ProxyCommand.java @@ -0,0 +1,98 @@ +/* + * The Alluxio Open Foundation licenses this work under the Apache License, version 2.0 + * (the "License"). You may not use this work except in compliance with the License, which is + * available at www.apache.org/licenses/LICENSE-2.0 + * + * This software is distributed on an "AS IS" basis, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, + * either express or implied, as more fully set forth in the License. + * + * See the NOTICE file distributed with this work for information regarding copyright ownership. + */ + +package alluxio.cli.fsadmin.report; + +import alluxio.client.meta.MetaMasterClient; +import alluxio.grpc.BuildVersion; +import alluxio.grpc.NetAddress; +import alluxio.grpc.ProxyStatus; + +import java.io.IOException; +import java.io.PrintStream; +import java.time.Instant; +import java.time.ZoneId; +import java.time.format.DateTimeFormatter; +import java.time.format.FormatStyle; +import java.util.List; +import java.util.Locale; + +/** + * Prints information about proxy instances in the cluster. + */ +public class ProxyCommand { + private final MetaMasterClient mMetaMasterClient; + private final PrintStream mPrintStream; + + public static final DateTimeFormatter DATETIME_FORMAT = + DateTimeFormatter.ofLocalizedDateTime(FormatStyle.SHORT).ofPattern("yyyyMMdd-HHmmss") + .withLocale(Locale.getDefault()).withZone(ZoneId.systemDefault()); + + /** + * Creates a new instance of {@link ProxyCommand}. + * + * @param metaMasterClient the client to talk to the master with + * @param printStream the stream to print to + */ + public ProxyCommand(MetaMasterClient metaMasterClient, PrintStream printStream) { + mMetaMasterClient = metaMasterClient; + mPrintStream = printStream; + } + + /** + * Runs a proxy report command. + * + * @return 0 on success, 1 otherwise + */ + public int run() throws IOException { + String[] header = new String[]{"Address", "State", "Start Time", "Last Heartbeat Time", + "Version", "Revision"}; + + try { + List allProxyStatus = mMetaMasterClient.listProxyStatus(); + int liveCount = 0; + int lostCount = 0; + int maxAddressLength = 24; + for (ProxyStatus proxyStatus : allProxyStatus) { + String state = proxyStatus.getState(); + if (state.equals("ACTIVE")) { + liveCount++; + } else if (state.equals("LOST")) { + lostCount++; + } + NetAddress address = proxyStatus.getAddress(); + String addressStr = address.getHost() + ":" + address.getRpcPort(); + if (maxAddressLength < addressStr.length()) { + maxAddressLength = addressStr.length(); + } + } + mPrintStream.printf("%s Proxy instances in the cluster, %s serving and %s lost%n%n", + liveCount + lostCount, liveCount, lostCount); + + String format = "%-" + maxAddressLength + "s %-8s %-16s %-20s %-32s %-8s%n"; + mPrintStream.printf(format, header); + for (ProxyStatus proxyStatus : allProxyStatus) { + NetAddress address = proxyStatus.getAddress(); + BuildVersion version = proxyStatus.getVersion(); + mPrintStream.printf(format, + address.getHost() + ":" + address.getRpcPort(), + proxyStatus.getState(), + DATETIME_FORMAT.format(Instant.ofEpochMilli(proxyStatus.getStartTime())), + DATETIME_FORMAT.format(Instant.ofEpochMilli(proxyStatus.getLastHeartbeatTime())), + version.getVersion(), version.getRevision()); + } + return 0; + } catch (Exception e) { + e.printStackTrace(); + return 1; + } + } +} diff --git a/shell/src/test/java/alluxio/cli/fsadmin/report/ProxyCommandTest.java b/shell/src/test/java/alluxio/cli/fsadmin/report/ProxyCommandTest.java new file mode 100644 index 000000000000..ac61970062cb --- /dev/null +++ b/shell/src/test/java/alluxio/cli/fsadmin/report/ProxyCommandTest.java @@ -0,0 +1,147 @@ +/* + * The Alluxio Open Foundation licenses this work under the Apache License, version 2.0 + * (the "License"). You may not use this work except in compliance with the License, which is + * available at www.apache.org/licenses/LICENSE-2.0 + * + * This software is distributed on an "AS IS" basis, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, + * either express or implied, as more fully set forth in the License. + * + * See the NOTICE file distributed with this work for information regarding copyright ownership. + */ + +package alluxio.cli.fsadmin.report; + +import alluxio.client.meta.RetryHandlingMetaMasterClient; +import alluxio.grpc.BuildVersion; +import alluxio.grpc.NetAddress; +import alluxio.grpc.ProxyStatus; + +import org.hamcrest.collection.IsIterableContainingInOrder; +import org.joda.time.Instant; +import org.junit.Assert; +import org.junit.Before; +import org.junit.Test; +import org.mockito.Mockito; + +import java.io.ByteArrayOutputStream; +import java.io.IOException; +import java.io.PrintStream; +import java.nio.charset.StandardCharsets; +import java.util.ArrayList; +import java.util.Arrays; +import java.util.List; + +public class ProxyCommandTest { + private RetryHandlingMetaMasterClient mMetaMasterClient; + + @Before + public void prepareDependencies() throws IOException { + mMetaMasterClient = Mockito.mock(RetryHandlingMetaMasterClient.class); + } + + @Test + public void listProxyInstances() throws IOException { + List longInfoList = prepareInfoList(); + Mockito.when(mMetaMasterClient.listProxyStatus()) + .thenReturn(longInfoList); + + try (ByteArrayOutputStream outputStream = new ByteArrayOutputStream(); + PrintStream printStream = new PrintStream(outputStream, true, "utf-8")) { + ProxyCommand proxyCommand = new ProxyCommand(mMetaMasterClient, printStream); + proxyCommand.run(); + String output = new String(outputStream.toByteArray(), StandardCharsets.UTF_8); + // CHECKSTYLE.OFF: LineLengthExceed - Much more readable + List expectedOutput = Arrays.asList("2 Proxy instances in the cluster, 1 serving and 1 lost", + "", + "Address State Start Time Last Heartbeat Time Version Revision", + "proxy-0:12345 ACTIVE 20230421-182944 20230421-183005 1.0 abc ", + "proxy-1:23456 LOST 20230421-182707 20230421-190507 1.1 abc "); + // CHECKSTYLE.ON: LineLengthExceed + List testOutput = Arrays.asList(output.split("\n")); + Assert.assertThat(testOutput, + IsIterableContainingInOrder.contains(expectedOutput.toArray())); + } + } + + @Test + public void listProxyInstancesLongName() throws IOException { + List longInfoList = prepareInfoListLongName(); + Mockito.when(mMetaMasterClient.listProxyStatus()) + .thenReturn(longInfoList); + + try (ByteArrayOutputStream outputStream = new ByteArrayOutputStream(); + PrintStream printStream = new PrintStream(outputStream, true, "utf-8")) { + ProxyCommand proxyCommand = new ProxyCommand(mMetaMasterClient, printStream); + proxyCommand.run(); + String output = new String(outputStream.toByteArray(), StandardCharsets.UTF_8); + // CHECKSTYLE.OFF: LineLengthExceed - Much more readable + List expectedOutput = Arrays.asList("2 Proxy instances in the cluster, 1 serving and 1 lost", + "", + "Address State Start Time Last Heartbeat Time Version Revision", + "datacenter-namespace-department-proxy-0:12345 ACTIVE 20230421-182944 20230421-183005 1.0 abc ", + "datacenter-namespace-department-proxy-1:23456 LOST 20230421-182707 20230421-190507 1.1 abc "); + // CHECKSTYLE.ON: LineLengthExceed + List testOutput = Arrays.asList(output.split("\n")); + Assert.assertThat(testOutput, + IsIterableContainingInOrder.contains(expectedOutput.toArray())); + } + } + + /** + * @return long worker info list to test + */ + private List prepareInfoList() { + List infoList = new ArrayList<>(); + + long proxy0startTime = Instant.parse("2023-04-21T18:29:44").getMillis(); + long proxy0HeartbeatTime = Instant.parse("2023-04-21T18:30:05").getMillis(); + NetAddress address0 = NetAddress.newBuilder().setHost("proxy-0").setRpcPort(12345).build(); + BuildVersion version0 = BuildVersion.newBuilder().setVersion("1.0").setRevision("abc").build(); + ProxyStatus proxy0 = ProxyStatus.newBuilder() + .setAddress(address0).setState("ACTIVE") + .setStartTime(proxy0startTime) + .setLastHeartbeatTime(proxy0HeartbeatTime) + .setVersion(version0).build(); + infoList.add(proxy0); + + long proxy1startTime = Instant.parse("2023-04-21T18:27:07").getMillis(); + long proxy1HeartbeatTime = Instant.parse("2023-04-21T19:05:07").getMillis(); + NetAddress address1 = NetAddress.newBuilder().setHost("proxy-1").setRpcPort(23456).build(); + BuildVersion version1 = BuildVersion.newBuilder().setVersion("1.1").setRevision("abc").build(); + ProxyStatus proxy1 = ProxyStatus.newBuilder() + .setAddress(address1).setState("LOST") + .setStartTime(proxy1startTime) + .setLastHeartbeatTime(proxy1HeartbeatTime) + .setVersion(version1).build(); + infoList.add(proxy1); + + return infoList; + } + + private List prepareInfoListLongName() { + List infoList = new ArrayList<>(); + + long proxy0startTime = Instant.parse("2023-04-21T18:29:44").getMillis(); + long proxy0HeartbeatTime = Instant.parse("2023-04-21T18:30:05").getMillis(); + NetAddress address0 = NetAddress.newBuilder() + .setHost("datacenter-namespace-department-proxy-0").setRpcPort(12345).build(); + BuildVersion version0 = BuildVersion.newBuilder().setVersion("1.0").setRevision("abc").build(); + ProxyStatus proxy0 = ProxyStatus.newBuilder().setAddress(address0).setState("ACTIVE") + .setStartTime(proxy0startTime).setLastHeartbeatTime(proxy0HeartbeatTime) + .setVersion(version0).build(); + infoList.add(proxy0); + + long proxy1startTime = Instant.parse("2023-04-21T18:27:07").getMillis(); + long proxy1HeartbeatTime = Instant.parse("2023-04-21T19:05:07").getMillis(); + NetAddress address1 = NetAddress.newBuilder() + .setHost("datacenter-namespace-department-proxy-1").setRpcPort(23456).build(); + BuildVersion version1 = BuildVersion.newBuilder().setVersion("1.1").setRevision("abc").build(); + ProxyStatus proxy1 = ProxyStatus.newBuilder().setAddress(address1).setState("LOST") + .setStartTime(proxy1startTime).setLastHeartbeatTime(proxy1HeartbeatTime) + .setVersion(version1).build(); + infoList.add(proxy1); + + return infoList; + } +} + From 092703f860ca6576c859c000a966c1dca5231b07 Mon Sep 17 00:00:00 2001 From: Bowen Ding <6999708+dbw9580@users.noreply.github.com> Date: Tue, 25 Apr 2023 10:55:27 +0800 Subject: [PATCH 247/334] Fix infinite recursion with RefCountedNioByteBuf.setBytes ### What changes are proposed in this pull request? Fix an infinite recursion with `RefCountedNioByteBuf.setBytes` which causes stack overflow error. ### Why are the changes needed? The current implementation of `RefCountedNioByteBuf.setBytes(int index, ByteBuf src, int srcIndex, int length)` delegates the copying of bytes to the `getBytes(int index, ByteBuf dst, int dstIndex, int length)` method of the source buffer. In some implementations of a direct `ByteBuf`, e.g. [`io.netty.buffer.UnpooledUnsafeDirectByteBuf`](https://github.com/netty/netty/blob/d773f37e3422b8bc38429bbde94583173c3b7e4a/buffer/src/main/java/io/netty/buffer/UnpooledUnsafeDirectByteBuf.java), the `getBytes(int index, ByteBuf src, int srcIndex, int length)` method in turn delegates the call back to the `setBytes` method of the destination buffer ([here](https://github.com/netty/netty/blob/d773f37e3422b8bc38429bbde94583173c3b7e4a/buffer/src/main/java/io/netty/buffer/UnpooledUnsafeDirectByteBuf.java#L159) and [here](https://github.com/netty/netty/blob/d773f37e3422b8bc38429bbde94583173c3b7e4a/buffer/src/main/java/io/netty/buffer/UnsafeByteBufUtil.java#L464)). This causes an infinite recursion. Error stack when stack overflow: ``` Exception in thread "main" java.lang.StackOverflowError at io.netty.buffer.AbstractByteBuf.ensureAccessible(AbstractByteBuf.java:1488) at alluxio.network.protocol.databuffer.RefCountedNioByteBuf.ensureIndexInBounds(RefCountedNioByteBuf.java:418) at alluxio.network.protocol.databuffer.RefCountedNioByteBuf.setBytes(RefCountedNioByteBuf.java:283) at alluxio.network.protocol.databuffer.PooledDirectNioByteBuf.setBytes(PooledDirectNioByteBuf.java:22) at io.netty.buffer.UnsafeByteBufUtil.getBytes(UnsafeByteBufUtil.java:476) at io.netty.buffer.PooledUnsafeDirectByteBuf.getBytes(PooledUnsafeDirectByteBuf.java:124) at alluxio.network.protocol.databuffer.RefCountedNioByteBuf.setBytes(RefCountedNioByteBuf.java:284) at alluxio.network.protocol.databuffer.PooledDirectNioByteBuf.setBytes(PooledDirectNioByteBuf.java:22) at io.netty.buffer.UnsafeByteBufUtil.getBytes(UnsafeByteBufUtil.java:476) at io.netty.buffer.PooledUnsafeDirectByteBuf.getBytes(PooledUnsafeDirectByteBuf.java:124) at alluxio.network.protocol.databuffer.RefCountedNioByteBuf.setBytes(RefCountedNioByteBuf.java:284) at alluxio.network.protocol.databuffer.PooledDirectNioByteBuf.setBytes(PooledDirectNioByteBuf.java:22) ... ``` An unit test is added to cover this case. ### Does this PR introduce any user facing changes? No. pr-link: Alluxio/alluxio#17311 change-id: cid-3f9afa7df0b8cddb8c127c40bbfeb240bdc93553 --- .../protocol/databuffer/RefCountedNioByteBuf.java | 4 +++- .../databuffer/RefCountedNioByteBufTest.java | 15 +++++++++++++++ 2 files changed, 18 insertions(+), 1 deletion(-) diff --git a/core/common/src/main/java/alluxio/network/protocol/databuffer/RefCountedNioByteBuf.java b/core/common/src/main/java/alluxio/network/protocol/databuffer/RefCountedNioByteBuf.java index 1ff14a53fab9..58536d6ea07b 100644 --- a/core/common/src/main/java/alluxio/network/protocol/databuffer/RefCountedNioByteBuf.java +++ b/core/common/src/main/java/alluxio/network/protocol/databuffer/RefCountedNioByteBuf.java @@ -281,7 +281,9 @@ public int getBytes(int index, FileChannel out, long position, int length) throw @Override public ByteBuf setBytes(int index, ByteBuf src, int srcIndex, int length) { ensureIndexInBounds(srcIndex, src.capacity(), index, capacity(), length); - src.getBytes(srcIndex, this, index, length); + ByteBuffer dup = mDelegate.duplicate(); + dup.position(index).limit(index + length); + src.getBytes(srcIndex, dup); return this; } diff --git a/core/common/src/test/java/alluxio/network/protocol/databuffer/RefCountedNioByteBufTest.java b/core/common/src/test/java/alluxio/network/protocol/databuffer/RefCountedNioByteBufTest.java index bfa2d76d6264..955c9d9b7a73 100644 --- a/core/common/src/test/java/alluxio/network/protocol/databuffer/RefCountedNioByteBufTest.java +++ b/core/common/src/test/java/alluxio/network/protocol/databuffer/RefCountedNioByteBufTest.java @@ -12,10 +12,13 @@ package alluxio.network.protocol.databuffer; import static org.junit.Assert.assertThrows; +import static org.junit.Assert.assertTrue; import alluxio.Constants; +import alluxio.util.io.BufferUtils; import io.netty.buffer.ByteBuf; +import io.netty.buffer.Unpooled; import org.junit.Test; import org.junit.runner.RunWith; import org.junit.runners.Suite; @@ -55,6 +58,18 @@ public void invalidCapacity() { assertThrows(IllegalArgumentException.class, () -> buf.capacity(10)); } + + @Test + public void setBytesWithAnotherByteBuf() { + ByteBuf srcBuf = Unpooled.directBuffer(100); + srcBuf.setBytes(0, BufferUtils.getIncreasingByteArray(100)); + ByteBuf dstBuf = new LeakyByteBuf(ByteBuffer.allocateDirect(100), 100, 100); + final int offset = 42; + final int length = 17; + dstBuf.setBytes(0, srcBuf, offset, length); + assertTrue(BufferUtils.equalIncreasingByteBuffer( + offset, length, dstBuf.slice(0, length).nioBuffer())); + } } private static class LeakyByteBuf extends RefCountedNioByteBuf { From 8da59539201cff376c33256846bd565d976a148e Mon Sep 17 00:00:00 2001 From: Jiacheng Liu Date: Tue, 25 Apr 2023 22:36:30 +0800 Subject: [PATCH 248/334] Fix JobServiceMetricsCommandTest Cherry-pick of existing commit. orig-pr: TachyonNexus/enterprise#4063 orig-commit: TachyonNexus/enterprise@e39e948e567d8ec5e05079a9b54001c6900e818d orig-commit-author: Jiacheng Liu pr-link: Alluxio/alluxio#17314 change-id: cid-c3cae8f23c6b5695b6908217e815f09fe5edb625 --- .../src/main/java/alluxio/Constants.java | 2 + .../main/java/alluxio/RuntimeConstants.java | 7 + .../main/java/alluxio/conf/PropertyKey.java | 27 ++ .../ServiceVersionClientServiceHandler.java | 3 + .../alluxio/heartbeat/HeartbeatContext.java | 4 + .../master/AlluxioSimpleMasterProcess.java | 8 +- .../src/main/proto/grpc/job_master.proto | 81 ++++++ .../src/main/proto/grpc/version.proto | 1 + core/transport/src/main/proto/proto.lock | 198 ++++++++++++++ .../alluxio/client/job/JobMasterClient.java | 8 + .../job/RetryHandlingJobMasterClient.java | 12 + .../java/alluxio/job/MasterWorkerInfo.java | 27 ++ .../alluxio/job/wire/JobWorkerHealth.java | 40 ++- .../java/alluxio/master/job/JobMaster.java | 244 +++++++++++++++++- .../job/JobMasterClientServiceHandler.java | 19 ++ .../job/JobMasterWorkerServiceHandler.java | 9 +- .../alluxio/master/meta/JobMasterInfo.java | 132 ++++++++++ .../meta/JobMasterMasterServiceHandler.java | 77 ++++++ .../alluxio/master/meta/JobMasterSync.java | 116 +++++++++ .../RetryHandlingJobMasterMasterClient.java | 136 ++++++++++ .../job/RetryHandlingJobMasterClient.java | 5 +- .../report/JobServiceMetricsCommand.java | 70 ++++- .../report/JobServiceMetricsCommandTest.java | 59 ++++- 23 files changed, 1257 insertions(+), 28 deletions(-) create mode 100644 job/server/src/main/java/alluxio/master/meta/JobMasterInfo.java create mode 100644 job/server/src/main/java/alluxio/master/meta/JobMasterMasterServiceHandler.java create mode 100644 job/server/src/main/java/alluxio/master/meta/JobMasterSync.java create mode 100644 job/server/src/main/java/alluxio/master/meta/RetryHandlingJobMasterMasterClient.java diff --git a/core/common/src/main/java/alluxio/Constants.java b/core/common/src/main/java/alluxio/Constants.java index 3a7dcf5b9019..80ca29f23721 100644 --- a/core/common/src/main/java/alluxio/Constants.java +++ b/core/common/src/main/java/alluxio/Constants.java @@ -96,6 +96,7 @@ public final class Constants { public static final long META_MASTER_CLIENT_SERVICE_VERSION = 2; public static final long META_MASTER_MASTER_SERVICE_VERSION = 1; public static final long META_MASTER_PROXY_SERVICE_VERSION = 1; + public static final long JOB_MASTER_MASTER_SERVICE_VERSION = 1; public static final long METRICS_MASTER_CLIENT_SERVICE_VERSION = 2; public static final long JOURNAL_MASTER_CLIENT_SERVICE_VERSION = 1; public static final long RAFT_JOURNAL_SERVICE_VERSION = 1; @@ -120,6 +121,7 @@ public final class Constants { public static final String META_MASTER_CLIENT_SERVICE_NAME = "MetaMaster"; public static final String META_MASTER_PROXY_SERVICE_NAME = "MetaMasterProxy"; public static final String META_MASTER_MASTER_SERVICE_NAME = "MetaMasterMaster"; + public static final String JOB_MASTER_MASTER_SERVICE_NAME = "JobMasterMaster"; public static final String METRICS_MASTER_CLIENT_SERVICE_NAME = "MetricsMasterClient"; public static final String BLOCK_WORKER_CLIENT_SERVICE_NAME = "BlockWorkerClient"; public static final String FILE_SYSTEM_WORKER_CLIENT_SERVICE_NAME = "FileSystemWorkerClient"; diff --git a/core/common/src/main/java/alluxio/RuntimeConstants.java b/core/common/src/main/java/alluxio/RuntimeConstants.java index 8c0e21cfb696..a59d9d0aad63 100644 --- a/core/common/src/main/java/alluxio/RuntimeConstants.java +++ b/core/common/src/main/java/alluxio/RuntimeConstants.java @@ -11,6 +11,8 @@ package alluxio; +import alluxio.grpc.BuildVersion; + import javax.annotation.concurrent.ThreadSafe; /** @@ -40,6 +42,11 @@ public final class RuntimeConstants { ? ProjectConstants.REVISION.substring(0, 8) : ProjectConstants.REVISION; public static final String VERSION_AND_REVISION_SHORT = VERSION + "-" + REVISION_SHORT; + public static final BuildVersion UNKNOWN_VERSION_INFO = BuildVersion.newBuilder() + .setVersion("UNKNOWN").setRevision("UNKNOWN").build(); + public static final BuildVersion CURRENT_VERSION_INFO = BuildVersion.newBuilder() + .setVersion(RuntimeConstants.VERSION) + .setRevision(RuntimeConstants.REVISION_SHORT).build(); /** The relative path to the Alluxio target jar. */ public static final String ALLUXIO_JAR = "target/alluxio-" + VERSION diff --git a/core/common/src/main/java/alluxio/conf/PropertyKey.java b/core/common/src/main/java/alluxio/conf/PropertyKey.java index 2f1421513512..9d6f1150c446 100755 --- a/core/common/src/main/java/alluxio/conf/PropertyKey.java +++ b/core/common/src/main/java/alluxio/conf/PropertyKey.java @@ -7285,6 +7285,20 @@ public String toString() { .setDefaultValue(100000) .setScope(Scope.MASTER) .build(); + public static final PropertyKey JOB_MASTER_MASTER_HEARTBEAT_INTERVAL = + durationBuilder(Name.JOB_MASTER_MASTER_HEARTBEAT_INTERVAL) + .setDescription("The amount of time that a standby Alluxio Job Master should wait " + + "in between heartbeats to the primary Job Master.") + .setDefaultValue("1sec") + .setScope(Scope.MASTER) + .build(); + public static final PropertyKey JOB_MASTER_MASTER_TIMEOUT = + durationBuilder(Name.JOB_MASTER_MASTER_TIMEOUT) + .setDescription("The time period after which the primary Job Master will mark a standby " + + "as lost without a subsequent heartbeat.") + .setDefaultValue("60sec") + .setScope(Scope.MASTER) + .build(); public static final PropertyKey JOB_MASTER_WORKER_HEARTBEAT_INTERVAL = durationBuilder(Name.JOB_MASTER_WORKER_HEARTBEAT_INTERVAL) .setDescription("The amount of time that the Alluxio job worker should wait in between " @@ -7311,6 +7325,13 @@ public String toString() { .setDefaultValue(format("${%s}", Name.MASTER_HOSTNAME)) .setScope(Scope.ALL) .build(); + public static final PropertyKey JOB_MASTER_LOST_MASTER_INTERVAL = + durationBuilder(Name.JOB_MASTER_LOST_MASTER_INTERVAL) + .setDescription("The time interval the job master waits between checks for " + + "lost job masters.") + .setDefaultValue("10sec") + .setScope(Scope.MASTER) + .build(); public static final PropertyKey JOB_MASTER_LOST_WORKER_INTERVAL = durationBuilder(Name.JOB_MASTER_LOST_WORKER_INTERVAL) .setDescription("The time interval the job master waits between checks for lost workers.") @@ -9073,6 +9094,10 @@ public static final class Name { public static final String JOB_MASTER_FINISHED_JOB_RETENTION_TIME = "alluxio.job.master.finished.job.retention.time"; public static final String JOB_MASTER_JOB_CAPACITY = "alluxio.job.master.job.capacity"; + public static final String JOB_MASTER_MASTER_HEARTBEAT_INTERVAL = + "alluxio.job.master.master.heartbeat.interval"; + public static final String JOB_MASTER_MASTER_TIMEOUT = + "alluxio.job.master.master.timeout"; public static final String JOB_MASTER_WORKER_HEARTBEAT_INTERVAL = "alluxio.job.master.worker.heartbeat.interval"; public static final String JOB_MASTER_WORKER_TIMEOUT = @@ -9080,6 +9105,8 @@ public static final class Name { public static final String JOB_MASTER_BIND_HOST = "alluxio.job.master.bind.host"; public static final String JOB_MASTER_HOSTNAME = "alluxio.job.master.hostname"; + public static final String JOB_MASTER_LOST_MASTER_INTERVAL = + "alluxio.job.master.lost.master.interval"; public static final String JOB_MASTER_LOST_WORKER_INTERVAL = "alluxio.job.master.lost.worker.interval"; public static final String JOB_MASTER_RPC_PORT = "alluxio.job.master.rpc.port"; diff --git a/core/common/src/main/java/alluxio/grpc/ServiceVersionClientServiceHandler.java b/core/common/src/main/java/alluxio/grpc/ServiceVersionClientServiceHandler.java index b889f972c698..18fe8b6c1600 100644 --- a/core/common/src/main/java/alluxio/grpc/ServiceVersionClientServiceHandler.java +++ b/core/common/src/main/java/alluxio/grpc/ServiceVersionClientServiceHandler.java @@ -109,6 +109,9 @@ public void getServiceVersion(GetServiceVersionPRequest request, case JOB_MASTER_WORKER_SERVICE: serviceVersion = Constants.JOB_MASTER_WORKER_SERVICE_VERSION; break; + case JOB_MASTER_MASTER_SERVICE: + serviceVersion = Constants.JOB_MASTER_MASTER_SERVICE_VERSION; + break; case JOURNAL_MASTER_CLIENT_SERVICE: serviceVersion = Constants.JOURNAL_MASTER_CLIENT_SERVICE_VERSION; break; diff --git a/core/common/src/main/java/alluxio/heartbeat/HeartbeatContext.java b/core/common/src/main/java/alluxio/heartbeat/HeartbeatContext.java index 7daf23012c6e..b77444e354e6 100644 --- a/core/common/src/main/java/alluxio/heartbeat/HeartbeatContext.java +++ b/core/common/src/main/java/alluxio/heartbeat/HeartbeatContext.java @@ -32,6 +32,8 @@ public final class HeartbeatContext { // Names of different heartbeat executors. public static final String FUSE_UPDATE_CHECK = "Fuse update check"; public static final String JOB_MASTER_LOST_WORKER_DETECTION = "Job Master Lost Worker Detection"; + public static final String JOB_MASTER_LOST_MASTER_DETECTION = "Job Master Lost Master Detection"; + public static final String JOB_MASTER_SYNC = "Job Master Sync"; public static final String JOB_WORKER_COMMAND_HANDLING = "Job Worker Command Handling"; public static final String MASTER_THROTTLE = "Master Throttle"; @@ -126,6 +128,8 @@ private HeartbeatType(int value) { sTimerClasses = new HashMap<>(); sTimerClasses.put(MASTER_THROTTLE, SLEEPING_TIMER_CLASS); sTimerClasses.put(JOB_MASTER_LOST_WORKER_DETECTION, SLEEPING_TIMER_CLASS); + sTimerClasses.put(JOB_MASTER_LOST_MASTER_DETECTION, SLEEPING_TIMER_CLASS); + sTimerClasses.put(JOB_MASTER_SYNC, SLEEPING_TIMER_CLASS); sTimerClasses.put(JOB_WORKER_COMMAND_HANDLING, SLEEPING_TIMER_CLASS); sTimerClasses.put(MASTER_ACTIVE_UFS_SYNC, SLEEPING_TIMER_CLASS); sTimerClasses.put(MASTER_BLOCK_INTEGRITY_CHECK, SLEEPING_TIMER_CLASS); diff --git a/core/server/master/src/main/java/alluxio/master/AlluxioSimpleMasterProcess.java b/core/server/master/src/main/java/alluxio/master/AlluxioSimpleMasterProcess.java index 4626e0d57149..7950dde7335a 100644 --- a/core/server/master/src/main/java/alluxio/master/AlluxioSimpleMasterProcess.java +++ b/core/server/master/src/main/java/alluxio/master/AlluxioSimpleMasterProcess.java @@ -80,12 +80,16 @@ public void start() throws Exception { mLeaderSelector.start(getRpcAddress()); while (!Thread.interrupted()) { + // Start the master components in standby mode + // Eg. for job master they are the JobMaster and JournalMaster + startMasterComponents(false); + LOG.info("Standby started"); // We are in standby mode. Nothing to do until we become the primary. mLeaderSelector.waitForState(NodeState.PRIMARY); LOG.info("Transitioning from standby to primary"); mJournalSystem.gainPrimacy(); stopMasterComponents(); - LOG.info("Secondary stopped"); + LOG.info("Standby stopped"); startMasterComponents(true); mServices.forEach(SimpleService::promote); LOG.info("Primary started"); @@ -96,8 +100,6 @@ public void start() throws Exception { stopMasterComponents(); mJournalSystem.losePrimacy(); LOG.info("Primary stopped"); - startMasterComponents(false); - LOG.info("Standby started"); } } diff --git a/core/transport/src/main/proto/grpc/job_master.proto b/core/transport/src/main/proto/grpc/job_master.proto index ad5f553cf7de..7f2ee9be8182 100644 --- a/core/transport/src/main/proto/grpc/job_master.proto +++ b/core/transport/src/main/proto/grpc/job_master.proto @@ -107,6 +107,7 @@ message JobWorkerHealth { optional int32 taskPoolSize = 5; optional int32 numActiveTasks = 6; optional int32 unfinishedTasks = 7; + optional grpc.BuildVersion version = 8; } message JobCommand { @@ -197,6 +198,21 @@ message GetAllWorkerHealthPResponse { repeated JobWorkerHealth workerHealths = 1; } +message JobMasterStatus { + optional string state = 1; + optional grpc.NetAddress masterAddress = 2; + optional int64 startTime = 3; + optional grpc.BuildVersion version = 4; +} + +message GetAllMasterStatusPOptions {} +message GetAllMasterStatusPRequest { + optional GetAllMasterStatusPOptions options = 1; +} +message GetAllMasterStatusPResponse { + repeated JobMasterStatus jobMasterStatus = 1; +} + message SubmitOptions {} message SubmitRequest { optional bytes cmdConfig = 1; @@ -275,6 +291,11 @@ service JobMasterClientService { */ rpc GetAllWorkerHealth(GetAllWorkerHealthPRequest) returns (GetAllWorkerHealthPResponse); + /** + * Lists all job master status. + */ + rpc GetAllMasterStatus(GetAllMasterStatusPRequest) returns (GetAllMasterStatusPResponse); + /** * Submit a CMD job, return a jobControlId. */ @@ -305,6 +326,7 @@ message RegisterJobWorkerPOptions {} message RegisterJobWorkerPRequest { optional grpc.WorkerNetAddress workerNetAddress = 1; optional RegisterJobWorkerPOptions options = 2; + optional grpc.BuildVersion version = 3; } message RegisterJobWorkerPResponse { optional int64 id = 1; @@ -325,3 +347,62 @@ service JobMasterWorkerService { */ rpc RegisterJobWorker(RegisterJobWorkerPRequest) returns (RegisterJobWorkerPResponse); } + +message GetJobMasterIdPOptions {} +message GetJobMasterIdPRequest { + optional grpc.NetAddress masterAddress = 1; + optional GetJobMasterIdPOptions options = 2; +} +message GetJobMasterIdPResponse { + optional int64 masterId = 1; +} + +enum JobMasterMetaCommand { + MetaCommand_Unknown = 0; + MetaCommand_Nothing = 1; + MetaCommand_Register = 2; // Ask the standby master to re-register. +} + +message RegisterJobMasterPOptions { + optional int64 startTimeMs = 2; + optional int64 losePrimacyTimeMs = 3; + optional grpc.BuildVersion version = 4; +} + +message RegisterJobMasterPRequest { + optional int64 jobMasterId = 1; + optional RegisterJobMasterPOptions options = 2; +} +message RegisterJobMasterPResponse {} + +message JobMasterHeartbeatPOptions { +} +message JobMasterHeartbeatPRequest { + optional int64 masterId = 1; + optional JobMasterHeartbeatPOptions options = 2; +} +message JobMasterHeartbeatPResponse { + optional JobMasterMetaCommand command = 1; +} + +/** + * This interface contains meta master service endpoints for Alluxio standby masters. + */ +service JobMasterMasterService { + + /** + * Returns a master id for the given master address. + */ + rpc GetMasterId(GetJobMasterIdPRequest) returns (GetJobMasterIdPResponse); + + /** + * Registers a master. + */ + rpc RegisterMaster(RegisterJobMasterPRequest) returns (RegisterJobMasterPResponse); + + /** + * Heartbeats to indicate the master is lost or not. + */ + rpc MasterHeartbeat(JobMasterHeartbeatPRequest) returns (JobMasterHeartbeatPResponse); +} + diff --git a/core/transport/src/main/proto/grpc/version.proto b/core/transport/src/main/proto/grpc/version.proto index 9a8104b95ac2..de22bfe008b3 100644 --- a/core/transport/src/main/proto/grpc/version.proto +++ b/core/transport/src/main/proto/grpc/version.proto @@ -26,6 +26,7 @@ enum ServiceType { METRICS_MASTER_CLIENT_SERVICE = 9; JOB_MASTER_CLIENT_SERVICE = 10; JOB_MASTER_WORKER_SERVICE = 11; + JOB_MASTER_MASTER_SERVICE = 19; JOURNAL_MASTER_CLIENT_SERVICE = 13; TABLE_MASTER_CLIENT_SERVICE = 14; META_MASTER_BACKUP_MESSAGING_SERVICE = 15; diff --git a/core/transport/src/main/proto/proto.lock b/core/transport/src/main/proto/proto.lock index e256cf1d00c3..312a4ed0844e 100644 --- a/core/transport/src/main/proto/proto.lock +++ b/core/transport/src/main/proto/proto.lock @@ -4451,6 +4451,22 @@ "integer": 3 } ] + }, + { + "name": "JobMasterMetaCommand", + "enum_fields": [ + { + "name": "MetaCommand_Unknown" + }, + { + "name": "MetaCommand_Nothing", + "integer": 1 + }, + { + "name": "MetaCommand_Register", + "integer": 2 + } + ] } ], "messages": [ @@ -4723,6 +4739,11 @@ "id": 7, "name": "unfinishedTasks", "type": "int32" + }, + { + "id": 8, + "name": "version", + "type": "grpc.BuildVersion" } ] }, @@ -5004,6 +5025,55 @@ } ] }, + { + "name": "JobMasterStatus", + "fields": [ + { + "id": 1, + "name": "state", + "type": "string" + }, + { + "id": 2, + "name": "masterAddress", + "type": "grpc.NetAddress" + }, + { + "id": 3, + "name": "startTime", + "type": "int64" + }, + { + "id": 4, + "name": "version", + "type": "grpc.BuildVersion" + } + ] + }, + { + "name": "GetAllMasterStatusPOptions" + }, + { + "name": "GetAllMasterStatusPRequest", + "fields": [ + { + "id": 1, + "name": "options", + "type": "GetAllMasterStatusPOptions" + } + ] + }, + { + "name": "GetAllMasterStatusPResponse", + "fields": [ + { + "id": 1, + "name": "jobMasterStatus", + "type": "JobMasterStatus", + "is_repeated": true + } + ] + }, { "name": "SubmitOptions" }, @@ -5184,6 +5254,11 @@ "id": 2, "name": "options", "type": "RegisterJobWorkerPOptions" + }, + { + "id": 3, + "name": "version", + "type": "grpc.BuildVersion" } ] }, @@ -5196,6 +5271,100 @@ "type": "int64" } ] + }, + { + "name": "GetJobMasterIdPOptions" + }, + { + "name": "GetJobMasterIdPRequest", + "fields": [ + { + "id": 1, + "name": "masterAddress", + "type": "grpc.NetAddress" + }, + { + "id": 2, + "name": "options", + "type": "GetJobMasterIdPOptions" + } + ] + }, + { + "name": "GetJobMasterIdPResponse", + "fields": [ + { + "id": 1, + "name": "masterId", + "type": "int64" + } + ] + }, + { + "name": "RegisterJobMasterPOptions", + "fields": [ + { + "id": 2, + "name": "startTimeMs", + "type": "int64" + }, + { + "id": 3, + "name": "losePrimacyTimeMs", + "type": "int64" + }, + { + "id": 4, + "name": "version", + "type": "grpc.BuildVersion" + } + ] + }, + { + "name": "RegisterJobMasterPRequest", + "fields": [ + { + "id": 1, + "name": "jobMasterId", + "type": "int64" + }, + { + "id": 2, + "name": "options", + "type": "RegisterJobMasterPOptions" + } + ] + }, + { + "name": "RegisterJobMasterPResponse" + }, + { + "name": "JobMasterHeartbeatPOptions" + }, + { + "name": "JobMasterHeartbeatPRequest", + "fields": [ + { + "id": 1, + "name": "masterId", + "type": "int64" + }, + { + "id": 2, + "name": "options", + "type": "JobMasterHeartbeatPOptions" + } + ] + }, + { + "name": "JobMasterHeartbeatPResponse", + "fields": [ + { + "id": 1, + "name": "command", + "type": "JobMasterMetaCommand" + } + ] } ], "services": [ @@ -5237,6 +5406,11 @@ "in_type": "GetAllWorkerHealthPRequest", "out_type": "GetAllWorkerHealthPResponse" }, + { + "name": "GetAllMasterStatus", + "in_type": "GetAllMasterStatusPRequest", + "out_type": "GetAllMasterStatusPResponse" + }, { "name": "Submit", "in_type": "SubmitRequest", @@ -5268,6 +5442,26 @@ "out_type": "RegisterJobWorkerPResponse" } ] + }, + { + "name": "JobMasterMasterService", + "rpcs": [ + { + "name": "GetMasterId", + "in_type": "GetJobMasterIdPRequest", + "out_type": "GetJobMasterIdPResponse" + }, + { + "name": "RegisterMaster", + "in_type": "RegisterJobMasterPRequest", + "out_type": "RegisterJobMasterPResponse" + }, + { + "name": "MasterHeartbeat", + "in_type": "JobMasterHeartbeatPRequest", + "out_type": "JobMasterHeartbeatPResponse" + } + ] } ], "imports": [ @@ -8492,6 +8686,10 @@ "name": "JOB_MASTER_WORKER_SERVICE", "integer": 11 }, + { + "name": "JOB_MASTER_MASTER_SERVICE", + "integer": 19 + }, { "name": "JOURNAL_MASTER_CLIENT_SERVICE", "integer": 13 diff --git a/job/client/src/main/java/alluxio/client/job/JobMasterClient.java b/job/client/src/main/java/alluxio/client/job/JobMasterClient.java index 0ce80ec8391c..47725661cc80 100644 --- a/job/client/src/main/java/alluxio/client/job/JobMasterClient.java +++ b/job/client/src/main/java/alluxio/client/job/JobMasterClient.java @@ -12,6 +12,7 @@ package alluxio.client.job; import alluxio.Client; +import alluxio.grpc.JobMasterStatus; import alluxio.grpc.ListAllPOptions; import alluxio.job.CmdConfig; import alluxio.job.JobConfig; @@ -112,6 +113,13 @@ default List list() throws IOException { */ long submit(CmdConfig cmdConfig) throws IOException; + /** + * Gets the status of all job masters in the cluster. + * + * @return a list of status + */ + List getAllMasterStatus() throws IOException; + /** * Gets the status of the given command jobControlId. * diff --git a/job/client/src/main/java/alluxio/client/job/RetryHandlingJobMasterClient.java b/job/client/src/main/java/alluxio/client/job/RetryHandlingJobMasterClient.java index ca8ae2c8f022..9dd51ba6cada 100644 --- a/job/client/src/main/java/alluxio/client/job/RetryHandlingJobMasterClient.java +++ b/job/client/src/main/java/alluxio/client/job/RetryHandlingJobMasterClient.java @@ -14,6 +14,7 @@ import alluxio.AbstractJobMasterClient; import alluxio.Constants; import alluxio.grpc.CancelPRequest; +import alluxio.grpc.GetAllMasterStatusPRequest; import alluxio.grpc.GetAllWorkerHealthPRequest; import alluxio.grpc.GetCmdStatusDetailedRequest; import alluxio.grpc.GetCmdStatusRequest; @@ -21,6 +22,7 @@ import alluxio.grpc.GetJobStatusDetailedPRequest; import alluxio.grpc.GetJobStatusPRequest; import alluxio.grpc.JobMasterClientServiceGrpc; +import alluxio.grpc.JobMasterStatus; import alluxio.grpc.ListAllPOptions; import alluxio.grpc.ListAllPRequest; import alluxio.grpc.RunPRequest; @@ -164,6 +166,16 @@ public List getAllWorkerHealth() throws IOException { }, RPC_LOG, "GetAllWorkerHealth", ""); } + @Override + public List getAllMasterStatus() throws IOException { + return retryRPC(() -> { + List masterStatusList = + mClient.getAllMasterStatus(GetAllMasterStatusPRequest.newBuilder().build()) + .getJobMasterStatusList(); + return masterStatusList; + }, RPC_LOG, "GetAllMasterStatus", ""); + } + @Override public Status getCmdStatus(long id) throws IOException { return ProtoUtils.fromProto(retryRPC(() -> mClient.getCmdStatus( diff --git a/job/common/src/main/java/alluxio/job/MasterWorkerInfo.java b/job/common/src/main/java/alluxio/job/MasterWorkerInfo.java index 80df3e1aa6f6..2a654c0e1eae 100644 --- a/job/common/src/main/java/alluxio/job/MasterWorkerInfo.java +++ b/job/common/src/main/java/alluxio/job/MasterWorkerInfo.java @@ -12,6 +12,8 @@ package alluxio.job; import alluxio.Constants; +import alluxio.RuntimeConstants; +import alluxio.grpc.BuildVersion; import alluxio.util.CommonUtils; import alluxio.wire.WorkerInfo; import alluxio.wire.WorkerNetAddress; @@ -29,6 +31,8 @@ public final class MasterWorkerInfo { private final WorkerNetAddress mWorkerAddress; /** The id of the worker. */ private final long mId; + + private final BuildVersion mVersion; /** Start time of the worker in ms. */ private final long mStartTimeMs; /** Worker's last updated time in ms. */ @@ -45,6 +49,22 @@ public MasterWorkerInfo(long id, WorkerNetAddress address) { mId = id; mStartTimeMs = System.currentTimeMillis(); mLastUpdatedTimeMs = System.currentTimeMillis(); + mVersion = RuntimeConstants.UNKNOWN_VERSION_INFO; + } + + /** + * Creates a new instance of {@link MasterWorkerInfo}. + * + * @param id the worker id to use + * @param address the worker address to use + * @param version the worker version info + */ + public MasterWorkerInfo(long id, WorkerNetAddress address, BuildVersion version) { + mWorkerAddress = Preconditions.checkNotNull(address); + mId = id; + mStartTimeMs = System.currentTimeMillis(); + mLastUpdatedTimeMs = System.currentTimeMillis(); + mVersion = version; } /** @@ -75,6 +95,13 @@ public synchronized long getStartTime() { return mStartTimeMs; } + /** + * @return the build version + */ + public synchronized BuildVersion getVersion() { + return mVersion; + } + /** * @return generated {@link WorkerInfo} for this worker */ diff --git a/job/common/src/main/java/alluxio/job/wire/JobWorkerHealth.java b/job/common/src/main/java/alluxio/job/wire/JobWorkerHealth.java index f2d81d136c0a..f07b71ba0526 100644 --- a/job/common/src/main/java/alluxio/job/wire/JobWorkerHealth.java +++ b/job/common/src/main/java/alluxio/job/wire/JobWorkerHealth.java @@ -11,6 +11,8 @@ package alluxio.job.wire; +import alluxio.RuntimeConstants; +import alluxio.grpc.BuildVersion; import alluxio.util.CommonUtils; import com.google.common.base.MoreObjects; @@ -23,8 +25,8 @@ * The job worker health information. */ public class JobWorkerHealth { - private final long mWorkerId; + private BuildVersion mVersion = RuntimeConstants.UNKNOWN_VERSION_INFO; private final List mLoadAverage; private final int mUnfinishedTasks; private final long mLastUpdated; @@ -44,6 +46,24 @@ public class JobWorkerHealth { */ public JobWorkerHealth(long workerId, List loadAverage, int taskPoolSize, int numActiveTasks, int unfinishedTasks, String hostname) { + this(workerId, loadAverage, taskPoolSize, numActiveTasks, unfinishedTasks, hostname, + RuntimeConstants.CURRENT_VERSION_INFO); + } + + /** + * Default constructor. + * + * @param workerId the worker id + * @param loadAverage output of CentralProcessor.getSystemLoadAverage on the worker + * @param taskPoolSize task pool size + * @param numActiveTasks number of active tasks in the worker + * @param unfinishedTasks number of unfinished tasks that the worker has + * @param hostname hostname of the worker + * @param version the worker's version info + */ + public JobWorkerHealth(long workerId, List loadAverage, int taskPoolSize, + int numActiveTasks, int unfinishedTasks, String hostname, + BuildVersion version) { mWorkerId = workerId; mLoadAverage = loadAverage; mUnfinishedTasks = unfinishedTasks; @@ -51,6 +71,7 @@ public JobWorkerHealth(long workerId, List loadAverage, int taskPoolSize mTaskPoolSize = taskPoolSize; mNumActiveTasks = numActiveTasks; mHostname = hostname; + mVersion = version; } /** @@ -66,6 +87,9 @@ public JobWorkerHealth(alluxio.grpc.JobWorkerHealth jobWorkerHealth) { mTaskPoolSize = jobWorkerHealth.getTaskPoolSize(); mNumActiveTasks = jobWorkerHealth.getNumActiveTasks(); mHostname = jobWorkerHealth.getHostname(); + if (jobWorkerHealth.hasVersion()) { + mVersion = jobWorkerHealth.getVersion(); + } } /** @@ -114,6 +138,13 @@ public String getHostname() { return mHostname; } + /** + * @return the worker version info + */ + public BuildVersion getVersion() { + return mVersion; + } + /** * @return proto representation of JobWorkerInfo */ @@ -121,7 +152,7 @@ public alluxio.grpc.JobWorkerHealth toProto() { alluxio.grpc.JobWorkerHealth.Builder builder = alluxio.grpc.JobWorkerHealth.newBuilder() .setWorkerId(mWorkerId).addAllLoadAverage(mLoadAverage).setUnfinishedTasks(mUnfinishedTasks) .setTaskPoolSize(mTaskPoolSize).setNumActiveTasks(mNumActiveTasks) - .setLastUpdated(mLastUpdated).setHostname(mHostname); + .setLastUpdated(mLastUpdated).setHostname(mHostname).setVersion(mVersion); return builder.build(); } @@ -149,7 +180,8 @@ public boolean equals(Object o) { && Objects.equal(mLastUpdated, that.mLastUpdated) && Objects.equal(mHostname, that.mHostname) && Objects.equal(mTaskPoolSize, that.mTaskPoolSize) - && Objects.equal(mNumActiveTasks, that.mNumActiveTasks); + && Objects.equal(mNumActiveTasks, that.mNumActiveTasks) + && Objects.equal(mVersion, that.mVersion); } @Override @@ -161,6 +193,8 @@ public String toString() { .add("hostname", mHostname) .add("taskPoolSize", mTaskPoolSize) .add("numActiveTasks", mNumActiveTasks) + .add("version", mVersion.getVersion()) + .add("revision", mVersion.getRevision()) .toString(); } } diff --git a/job/server/src/main/java/alluxio/master/job/JobMaster.java b/job/server/src/main/java/alluxio/master/job/JobMaster.java index bc2782e01bfb..8dc21170b26b 100644 --- a/job/server/src/main/java/alluxio/master/job/JobMaster.java +++ b/job/server/src/main/java/alluxio/master/job/JobMaster.java @@ -11,7 +11,9 @@ package alluxio.master.job; +import alluxio.ClientContext; import alluxio.Constants; +import alluxio.RuntimeConstants; import alluxio.client.file.FileSystem; import alluxio.client.file.FileSystemContext; import alluxio.clock.SystemClock; @@ -22,11 +24,17 @@ import alluxio.exception.AccessControlException; import alluxio.exception.ExceptionMessage; import alluxio.exception.JobDoesNotExistException; +import alluxio.exception.status.NotFoundException; import alluxio.exception.status.ResourceExhaustedException; +import alluxio.grpc.BuildVersion; import alluxio.grpc.GrpcService; import alluxio.grpc.JobCommand; +import alluxio.grpc.JobMasterHeartbeatPOptions; +import alluxio.grpc.JobMasterMetaCommand; +import alluxio.grpc.JobMasterStatus; import alluxio.grpc.ListAllPOptions; import alluxio.grpc.RegisterCommand; +import alluxio.grpc.RegisterJobMasterPOptions; import alluxio.grpc.ServiceType; import alluxio.heartbeat.FixedIntervalSupplier; import alluxio.heartbeat.HeartbeatContext; @@ -56,6 +64,10 @@ import alluxio.master.job.tracker.CmdJobTracker; import alluxio.master.job.workflow.WorkflowTracker; import alluxio.master.journal.NoopJournaled; +import alluxio.master.meta.JobMasterInfo; +import alluxio.master.meta.JobMasterMasterServiceHandler; +import alluxio.master.meta.JobMasterSync; +import alluxio.master.meta.RetryHandlingJobMasterMasterClient; import alluxio.metrics.MetricKey; import alluxio.metrics.MetricsSystem; import alluxio.resource.LockResource; @@ -64,10 +76,16 @@ import alluxio.security.authentication.ClientContextServerInjector; import alluxio.underfs.UfsManager; import alluxio.util.CommonUtils; +import alluxio.util.ConfigurationUtils; +import alluxio.util.IdUtils; import alluxio.util.executor.ExecutorServiceFactories; +import alluxio.util.network.NetworkAddressUtils; +import alluxio.wire.Address; import alluxio.wire.WorkerInfo; import alluxio.wire.WorkerNetAddress; +import alluxio.worker.job.JobMasterClientContext; +import com.codahale.metrics.Gauge; import com.google.common.collect.Lists; import com.google.common.collect.Maps; import io.grpc.Context; @@ -76,6 +94,8 @@ import org.slf4j.LoggerFactory; import java.io.IOException; +import java.net.InetSocketAddress; +import java.text.MessageFormat; import java.util.ArrayList; import java.util.Collections; import java.util.Comparator; @@ -105,6 +125,27 @@ public class JobMaster extends AbstractMaster implements NoopJournaled { private final IndexDefinition mAddressIndex = IndexDefinition.ofUnique(MasterWorkerInfo::getWorkerAddress); + // Job master metadata management. + private static final IndexDefinition ID_INDEX = + IndexDefinition.ofUnique(JobMasterInfo::getId); + private static final IndexDefinition ADDRESS_INDEX = + IndexDefinition.ofUnique(JobMasterInfo::getAddress); + + /** Keeps track of standby job masters which are in communication with the primary. */ + private final IndexedSet mJobMasters = + new IndexedSet<>(ID_INDEX, ADDRESS_INDEX); + /** Keeps track of standby job masters which are no longer in communication with the primary. */ + private final IndexedSet mLostJobMasters = + new IndexedSet<>(ID_INDEX, ADDRESS_INDEX); + + /** The connect address for the rpc server. */ + private final InetSocketAddress mRpcConnectAddress = + NetworkAddressUtils.getConnectAddress(NetworkAddressUtils.ServiceType.JOB_MASTER_RPC, + Configuration.global()); + private final int mPort; + /** The address of this master. */ + private final Address mJobMasterAddress; + /** * The Filesystem context that the job master uses for its client. */ @@ -155,6 +196,12 @@ public JobMaster(MasterContext masterContext, FileSystem filesystem, mJobIdGenerator = new JobIdGenerator(); mWorkflowTracker = new WorkflowTracker(this); + mPort = NetworkAddressUtils.getPort(NetworkAddressUtils.ServiceType.JOB_MASTER_RPC, + Configuration.global()); + mJobMasterAddress = new Address().setHost(NetworkAddressUtils + .getConnectHost(NetworkAddressUtils.ServiceType.JOB_MASTER_RPC, Configuration.global())) + .setRpcPort(mPort); + mPlanTracker = new PlanTracker( Configuration.getLong(PropertyKey.JOB_MASTER_JOB_CAPACITY), Configuration.getMs(PropertyKey.JOB_MASTER_FINISHED_JOB_RETENTION_TIME), @@ -184,12 +231,14 @@ public long getNewJobId() { @Override public void start(Boolean isLeader) throws IOException { + LOG.info("Job master starting with state {}", isLeader ? "primary" : "standby"); super.start(isLeader); // Start serving metrics system, this will not block MetricsSystem.startSinks(Configuration.getString(PropertyKey.METRICS_CONF_FILE)); // Fail any jobs that were still running when the last job master stopped. + LOG.info("Updating job statuses"); for (PlanCoordinator planCoordinator : mPlanTracker.coordinators()) { if (!planCoordinator.isJobFinished()) { planCoordinator.setJobAsFailed("JobMasterShutdown", @@ -197,12 +246,19 @@ public void start(Boolean isLeader) throws IOException { } } if (isLeader) { + LOG.info("Starting job master as primary"); getExecutorService() .submit(new HeartbeatThread(HeartbeatContext.JOB_MASTER_LOST_WORKER_DETECTION, new LostWorkerDetectionHeartbeatExecutor(), () -> new FixedIntervalSupplier( Configuration.getMs(PropertyKey.JOB_MASTER_LOST_WORKER_INTERVAL)), Configuration.global(), mMasterContext.getUserState())); + getExecutorService() + .submit(new HeartbeatThread(HeartbeatContext.JOB_MASTER_LOST_MASTER_DETECTION, + new LostMasterDetectionHeartbeatExecutor(), + () -> new FixedIntervalSupplier( + Configuration.getMs(PropertyKey.JOB_MASTER_LOST_MASTER_INTERVAL)), + Configuration.global(), mMasterContext.getUserState())); if (Configuration.getBoolean(PropertyKey.MASTER_AUDIT_LOGGING_ENABLED)) { mAsyncAuditLogWriter = new AsyncUserAccessAuditLogWriter("JOB_MASTER_AUDIT_LOG"); mAsyncAuditLogWriter.start(); @@ -211,6 +267,21 @@ public void start(Boolean isLeader) throws IOException { () -> mAsyncAuditLogWriter != null ? mAsyncAuditLogWriter.getAuditLogEntriesSize() : -1); } + } else { + LOG.info("Starting job master as standby"); + if (ConfigurationUtils.isHaMode(Configuration.global())) { + // Standby master should setup MetaMasterSync to communicate with the leader master + RetryHandlingJobMasterMasterClient jobMasterClient = + new RetryHandlingJobMasterMasterClient(JobMasterClientContext + .newBuilder(ClientContext.create(Configuration.global())).build()); + getExecutorService().submit(new HeartbeatThread(HeartbeatContext.JOB_MASTER_SYNC, + new JobMasterSync(mJobMasterAddress, jobMasterClient), + () -> new FixedIntervalSupplier( + Configuration.getMs(PropertyKey.JOB_MASTER_MASTER_HEARTBEAT_INTERVAL)), + Configuration.global(), mMasterContext.getUserState())); + LOG.info("Standby job master with address {} starts sending heartbeat to the primary.", + mJobMasterAddress); + } } } @@ -232,6 +303,8 @@ public Map getServices() { new ClientContextServerInjector()))); services.put(ServiceType.JOB_MASTER_WORKER_SERVICE, new GrpcService(new JobMasterWorkerServiceHandler(this))); + services.put(ServiceType.JOB_MASTER_MASTER_SERVICE, + new GrpcService(new JobMasterMasterServiceHandler(this))); return services; } @@ -538,13 +611,55 @@ public List getAllWorkerHealth() { } } + /** + * Lists the status of all job masters in the cluster. + * + * @return the list + */ + public List getAllJobMasterStatus() { + try (JobMasterAuditContext auditContext = + createAuditContext("getAllMasterStatus")) { + ArrayList result = new ArrayList<>(); + final Map gauges = MetricsSystem.METRIC_REGISTRY.getGauges(); + Gauge startTimeGauge = gauges.get(MetricKey.MASTER_START_TIME.getName()); + JobMasterStatus primaryStatus = JobMasterStatus.newBuilder() + .setMasterAddress(mJobMasterAddress.toProto()) + .setState("PRIMARY") + .setStartTime((long) startTimeGauge.getValue()) + .setVersion(RuntimeConstants.CURRENT_VERSION_INFO).build(); + result.add(primaryStatus); + + for (JobMasterInfo standbyJobMaster : mJobMasters) { + JobMasterStatus status = JobMasterStatus.newBuilder() + .setMasterAddress(standbyJobMaster.getAddress().toProto()) + .setState("STANDBY") + .setStartTime(standbyJobMaster.getStartTimeMs()) + .setVersion(standbyJobMaster.getVersion()) + .build(); + result.add(status); + } + for (JobMasterInfo standbyJobMaster : mLostJobMasters) { + JobMasterStatus status = JobMasterStatus.newBuilder() + .setMasterAddress(standbyJobMaster.getAddress().toProto()) + .setState("LOST") + .setStartTime(standbyJobMaster.getStartTimeMs()) + .setVersion(standbyJobMaster.getVersion()) + .build(); + result.add(status); + } + auditContext.setSucceeded(true); + return result; + } + } + /** * Returns a worker id for the given worker. * * @param workerNetAddress the worker {@link WorkerNetAddress} + * @param version the version info of the job worker * @return the worker id for this worker */ - public long registerWorker(WorkerNetAddress workerNetAddress) { + public long registerWorker(WorkerNetAddress workerNetAddress, BuildVersion version) { // Run under exclusive lock for mWorkers try (LockResource workersLockExclusive = new LockResource(mWorkerRWLock.writeLock())) { // Check if worker has already been registered with this job master @@ -564,8 +679,9 @@ public long registerWorker(WorkerNetAddress workerNetAddress) { } // Generate a new worker id. long workerId = mNextWorkerId.getAndIncrement(); - mWorkers.add(new MasterWorkerInfo(workerId, workerNetAddress)); - LOG.info("registerWorker(): WorkerNetAddress: {} id: {}", workerNetAddress, workerId); + mWorkers.add(new MasterWorkerInfo(workerId, workerNetAddress, version)); + LOG.info("registerWorker(): WorkerNetAddress: {} id: {} version-revision: {}-{}", + workerNetAddress, workerId, version.getVersion(), version.getRevision()); return workerId; } } @@ -641,6 +757,95 @@ public List workerHeartbeat(JobWorkerHealth jobWorkerHealth, return mCommandManager.pollAllPendingCommands(workerId); } + /** + * Handles a heartbeat from a standby job master. + * + * @param masterId the job master id allocated by the primary job master + * @param options extra options + * @return the command to the standby job master + */ + public JobMasterMetaCommand jobMasterHeartbeat( + long masterId, JobMasterHeartbeatPOptions options) { + JobMasterInfo master = mJobMasters.getFirstByField(ID_INDEX, masterId); + if (master == null) { + LOG.warn("Could not find master id: {} for heartbeat. Instructed to register", masterId); + return JobMasterMetaCommand.MetaCommand_Register; + } + + master.updateLastUpdatedTimeMs(); + return JobMasterMetaCommand.MetaCommand_Nothing; + } + + /** + * Handles the register request from a standby job master. + * + * @param masterId the job master id + * @param options extra options + */ + public void jobMasterRegister(long masterId, RegisterJobMasterPOptions options) + throws NotFoundException { + LOG.info("Job master {} attempts to register", masterId); + JobMasterInfo master = mJobMasters.getFirstByField(ID_INDEX, masterId); + if (master == null) { + throw new NotFoundException( + MessageFormat.format("No master with masterId {0,number,#} is found", masterId)); + } + + master.updateLastUpdatedTimeMs(); + if (options.hasStartTimeMs()) { + master.setStartTimeMs(options.getStartTimeMs()); + } + if (options.hasLosePrimacyTimeMs()) { + master.setLosePrimacyTimeMs(options.getLosePrimacyTimeMs()); + } + if (options.hasVersion()) { + master.setVersion(options.getVersion()); + } else { + master.setVersion(RuntimeConstants.UNKNOWN_VERSION_INFO); + } + LOG.info("registerMaster(): master: {}", master); + } + + /** + * Allocates an ID for the job master from the target address. + * + * @param address the address + * @return an allocated id + */ + public long getMasterId(Address address) { + JobMasterInfo existingMaster = mJobMasters.getFirstByField(ADDRESS_INDEX, address); + if (existingMaster != null) { + // This master address is already mapped to a master id. + long oldMasterId = existingMaster.getId(); + LOG.warn("The master {} already exists as id {}.", address, oldMasterId); + return oldMasterId; + } + + JobMasterInfo lostMaster = mLostJobMasters.getFirstByField(ADDRESS_INDEX, address); + if (lostMaster != null) { + // This is one of the lost masters + synchronized (lostMaster) { + final long lostMasterId = lostMaster.getId(); + LOG.warn("A lost master {} has requested its old id {}.", address, lostMasterId); + + // Update the timestamp of the master before it is considered an active master. + lostMaster.updateLastUpdatedTimeMs(); + mJobMasters.add(lostMaster); + mLostJobMasters.remove(lostMaster); + return lostMasterId; + } + } + + // Generate a new master id. + long masterId = IdUtils.getRandomNonNegativeLong(); + while (!mJobMasters.add(new JobMasterInfo(masterId, address))) { + masterId = IdUtils.getRandomNonNegativeLong(); + } + + LOG.info("getMasterId(): MasterAddress: {} id: {}", address, masterId); + return masterId; + } + /** * Creates a {@link JobMasterAuditContext} instance. * @@ -685,6 +890,39 @@ private JobMasterAuditContext createAuditContext(String command) { return auditContext; } + /** + * Lost job master periodic check. + */ + private final class LostMasterDetectionHeartbeatExecutor implements HeartbeatExecutor { + + /** + * Constructs a new {@link LostMasterDetectionHeartbeatExecutor}. + */ + public LostMasterDetectionHeartbeatExecutor() { + } + + @Override + public void heartbeat(long timeout) { + long masterTimeoutMs = Configuration.getMs(PropertyKey.JOB_MASTER_MASTER_TIMEOUT); + for (JobMasterInfo master : mJobMasters) { + synchronized (master) { + final long lastUpdate = mClock.millis() - master.getLastUpdatedTimeMs(); + if (lastUpdate > masterTimeoutMs) { + LOG.error("A standby job master {}({}) timed out after {}ms without a heartbeat!", + master.getId(), master.getAddress(), lastUpdate); + mLostJobMasters.add(master); + mJobMasters.remove(master); + } + } + } + } + + @Override + public void close() { + // Nothing to clean up + } + } + /** * Lost worker periodic check. */ diff --git a/job/server/src/main/java/alluxio/master/job/JobMasterClientServiceHandler.java b/job/server/src/main/java/alluxio/master/job/JobMasterClientServiceHandler.java index 06ffdc74587d..50a79b4157b5 100644 --- a/job/server/src/main/java/alluxio/master/job/JobMasterClientServiceHandler.java +++ b/job/server/src/main/java/alluxio/master/job/JobMasterClientServiceHandler.java @@ -15,6 +15,8 @@ import alluxio.exception.status.InvalidArgumentException; import alluxio.grpc.CancelPRequest; import alluxio.grpc.CancelPResponse; +import alluxio.grpc.GetAllMasterStatusPRequest; +import alluxio.grpc.GetAllMasterStatusPResponse; import alluxio.grpc.GetAllWorkerHealthPRequest; import alluxio.grpc.GetAllWorkerHealthPResponse; import alluxio.grpc.GetCmdStatusDetailedRequest; @@ -28,6 +30,7 @@ import alluxio.grpc.GetJobStatusPRequest; import alluxio.grpc.GetJobStatusPResponse; import alluxio.grpc.JobMasterClientServiceGrpc; +import alluxio.grpc.JobMasterStatus; import alluxio.grpc.ListAllPRequest; import alluxio.grpc.ListAllPResponse; import alluxio.grpc.RunPRequest; @@ -143,6 +146,22 @@ public void getAllWorkerHealth(GetAllWorkerHealthPRequest request, }, "getAllWorkerHealth", "request=%s", responseObserver, request); } + @Override + public void getAllMasterStatus(GetAllMasterStatusPRequest request, + StreamObserver responseObserver) { + RpcUtils.call(LOG, () -> { + GetAllMasterStatusPResponse.Builder builder = GetAllMasterStatusPResponse.newBuilder(); + + List masterStatuses = mJobMaster.getAllJobMasterStatus(); + + for (JobMasterStatus masterStatus : masterStatuses) { + builder.addJobMasterStatus(masterStatus); + } + + return builder.build(); + }, "getAllMasterStatus", "request=%s", responseObserver, request); + } + @Override public void submit(SubmitRequest request, StreamObserver responseObserver) { RpcUtils.call(LOG, () -> { diff --git a/job/server/src/main/java/alluxio/master/job/JobMasterWorkerServiceHandler.java b/job/server/src/main/java/alluxio/master/job/JobMasterWorkerServiceHandler.java index 5ab4c9dde9e0..26914aa1953b 100644 --- a/job/server/src/main/java/alluxio/master/job/JobMasterWorkerServiceHandler.java +++ b/job/server/src/main/java/alluxio/master/job/JobMasterWorkerServiceHandler.java @@ -12,6 +12,8 @@ package alluxio.master.job; import alluxio.RpcUtils; +import alluxio.RuntimeConstants; +import alluxio.grpc.BuildVersion; import alluxio.grpc.GrpcUtils; import alluxio.grpc.JobHeartbeatPRequest; import alluxio.grpc.JobHeartbeatPResponse; @@ -72,9 +74,12 @@ public void heartbeat(JobHeartbeatPRequest request, @Override public void registerJobWorker(RegisterJobWorkerPRequest request, StreamObserver responseObserver) { - + LOG.info("Received job worker {}", request); + BuildVersion version = request.hasVersion() ? request.getVersion() + : RuntimeConstants.UNKNOWN_VERSION_INFO; RpcUtils.call(LOG, () -> RegisterJobWorkerPResponse.newBuilder() - .setId(mJobMaster.registerWorker(GrpcUtils.fromProto(request.getWorkerNetAddress()))) + .setId(mJobMaster.registerWorker( + GrpcUtils.fromProto(request.getWorkerNetAddress()), version)) .build(), "registerJobWorker", "request=%s", responseObserver, request); } } diff --git a/job/server/src/main/java/alluxio/master/meta/JobMasterInfo.java b/job/server/src/main/java/alluxio/master/meta/JobMasterInfo.java new file mode 100644 index 000000000000..52fe93c61217 --- /dev/null +++ b/job/server/src/main/java/alluxio/master/meta/JobMasterInfo.java @@ -0,0 +1,132 @@ +/* + * The Alluxio Open Foundation licenses this work under the Apache License, version 2.0 + * (the "License"). You may not use this work except in compliance with the License, which is + * available at www.apache.org/licenses/LICENSE-2.0 + * + * This software is distributed on an "AS IS" basis, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, + * either express or implied, as more fully set forth in the License. + * + * See the NOTICE file distributed with this work for information regarding copyright ownership. + */ + +package alluxio.master.meta; + +import alluxio.RuntimeConstants; +import alluxio.grpc.BuildVersion; +import alluxio.wire.Address; + +import com.google.common.base.MoreObjects; +import com.google.common.base.Preconditions; + +import javax.annotation.concurrent.NotThreadSafe; + +/** + * Job master information. + */ +@NotThreadSafe +public final class JobMasterInfo { + /** Master's address. */ + private final Address mAddress; + /** The id of the master. */ + private final long mId; + /** Master's last updated time in ms. */ + private long mLastUpdatedTimeMs; + /** Master's start time in ms. */ + private long mStartTimeMs = 0; + /** Master's last lose primacy time in ms. */ + private long mLosePrimacyTimeMs = 0; + /** Master's version. */ + private BuildVersion mVersion = RuntimeConstants.UNKNOWN_VERSION_INFO; + + /** + * Creates a new instance of {@link MasterInfo}. + * + * @param id the master id to use + * @param address the master address to use + */ + public JobMasterInfo(long id, Address address) { + mAddress = Preconditions.checkNotNull(address, "address"); + mId = id; + mLastUpdatedTimeMs = System.currentTimeMillis(); + } + + /** + * @return the master's address + */ + public Address getAddress() { + return mAddress; + } + + /** + * @return the id of the master + */ + public long getId() { + return mId; + } + + /** + * @return the last updated time of the master in ms + */ + public long getLastUpdatedTimeMs() { + return mLastUpdatedTimeMs; + } + + /** + * @return the start time of the master in ms + */ + public long getStartTimeMs() { + return mStartTimeMs; + } + + /** + * @return the last lose primacy time of the master in ms + */ + public long getLosePrimacyTimeMs() { + return mLosePrimacyTimeMs; + } + + /** + * @return the version of the master + */ + public BuildVersion getVersion() { + return mVersion; + } + + @Override + public String toString() { + return MoreObjects.toStringHelper(this).add("id", mId).add("address", mAddress) + .add("lastUpdatedTimeMs", mLastUpdatedTimeMs).add("startTimeMs", mStartTimeMs) + .add("losePrimacyTimeMs", mLosePrimacyTimeMs) + .add("version", mVersion.getVersion()) + .add("revision", mVersion.getRevision()).toString(); + } + + /** + * @param startTimeMs the start time of the master in ms + */ + public void setStartTimeMs(long startTimeMs) { + mStartTimeMs = startTimeMs; + } + + /** + * @param losePrimacyTimeMs the last primacy state change time of the master in ms + */ + public void setLosePrimacyTimeMs(long losePrimacyTimeMs) { + mLosePrimacyTimeMs = losePrimacyTimeMs; + } + + /** + * @param version the version of the master + */ + public void setVersion(BuildVersion version) { + mVersion = version; + } + + /** + * Updates the last updated time of the master in ms. + */ + public void updateLastUpdatedTimeMs() { + mLastUpdatedTimeMs = System.currentTimeMillis(); + } +} + diff --git a/job/server/src/main/java/alluxio/master/meta/JobMasterMasterServiceHandler.java b/job/server/src/main/java/alluxio/master/meta/JobMasterMasterServiceHandler.java new file mode 100644 index 000000000000..f5ce56eca5c3 --- /dev/null +++ b/job/server/src/main/java/alluxio/master/meta/JobMasterMasterServiceHandler.java @@ -0,0 +1,77 @@ +/* + * The Alluxio Open Foundation licenses this work under the Apache License, version 2.0 + * (the "License"). You may not use this work except in compliance with the License, which is + * available at www.apache.org/licenses/LICENSE-2.0 + * + * This software is distributed on an "AS IS" basis, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, + * either express or implied, as more fully set forth in the License. + * + * See the NOTICE file distributed with this work for information regarding copyright ownership. + */ + +package alluxio.master.meta; + +import alluxio.RpcUtils; +import alluxio.grpc.GetJobMasterIdPRequest; +import alluxio.grpc.GetJobMasterIdPResponse; +import alluxio.grpc.JobMasterHeartbeatPRequest; +import alluxio.grpc.JobMasterHeartbeatPResponse; +import alluxio.grpc.JobMasterMasterServiceGrpc; +import alluxio.grpc.NetAddress; +import alluxio.grpc.RegisterJobMasterPRequest; +import alluxio.grpc.RegisterJobMasterPResponse; +import alluxio.master.job.JobMaster; +import alluxio.wire.Address; + +import io.grpc.stub.StreamObserver; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import javax.annotation.concurrent.NotThreadSafe; + +/** + * This class is a gRPC handler for the primary job master to answer RPC from standby job masters. + */ +@NotThreadSafe +public final class JobMasterMasterServiceHandler + extends JobMasterMasterServiceGrpc.JobMasterMasterServiceImplBase { + private static final Logger LOG = LoggerFactory.getLogger(JobMasterMasterServiceHandler.class); + + private final JobMaster mJobMaster; + + /** + * Creates a new instance of {@link JobMasterMasterServiceHandler}. + * + * @param jobMaster the job master from the primary job master process + */ + public JobMasterMasterServiceHandler(JobMaster jobMaster) { + LOG.info("Started to serve standby job master requests"); + mJobMaster = jobMaster; + } + + @Override + public void getMasterId(GetJobMasterIdPRequest request, + StreamObserver responseObserver) { + NetAddress masterAddress = request.getMasterAddress(); + RpcUtils.call(LOG, () -> GetJobMasterIdPResponse.newBuilder() + .setMasterId(mJobMaster.getMasterId(Address.fromProto(masterAddress))).build(), + "GetJobMasterId", "request=%s", responseObserver, request); + } + + @Override + public void registerMaster(RegisterJobMasterPRequest request, + StreamObserver responseObserver) { + RpcUtils.call(LOG, () -> { + mJobMaster.jobMasterRegister(request.getJobMasterId(), request.getOptions()); + return RegisterJobMasterPResponse.getDefaultInstance(); + }, "RegisterJobMaster", "request=%s", responseObserver, request); + } + + @Override + public void masterHeartbeat(JobMasterHeartbeatPRequest request, + StreamObserver responseObserver) { + RpcUtils.call(LOG, () -> JobMasterHeartbeatPResponse.newBuilder().setCommand( + mJobMaster.jobMasterHeartbeat(request.getMasterId(), request.getOptions())).build(), + "JobMasterHeartbeat", "request=%s", responseObserver, request); + } +} diff --git a/job/server/src/main/java/alluxio/master/meta/JobMasterSync.java b/job/server/src/main/java/alluxio/master/meta/JobMasterSync.java new file mode 100644 index 000000000000..a241c89b227b --- /dev/null +++ b/job/server/src/main/java/alluxio/master/meta/JobMasterSync.java @@ -0,0 +1,116 @@ +/* + * The Alluxio Open Foundation licenses this work under the Apache License, version 2.0 + * (the "License"). You may not use this work except in compliance with the License, which is + * available at www.apache.org/licenses/LICENSE-2.0 + * + * This software is distributed on an "AS IS" basis, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, + * either express or implied, as more fully set forth in the License. + * + * See the NOTICE file distributed with this work for information regarding copyright ownership. + */ + +package alluxio.master.meta; + +import alluxio.grpc.JobMasterMetaCommand; +import alluxio.heartbeat.HeartbeatExecutor; +import alluxio.wire.Address; + +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import java.io.IOException; +import java.util.concurrent.atomic.AtomicReference; +import javax.annotation.concurrent.NotThreadSafe; + +/** + * If a job master is detected as a standby job master. It will set up its JobMasterSync and + * use its {@link RetryHandlingMetaMasterMasterClient} to register to the primary job master, + * then maintain a heartbeat with the primary. + */ +@NotThreadSafe +public final class JobMasterSync implements HeartbeatExecutor { + private static final Logger LOG = LoggerFactory.getLogger(JobMasterSync.class); + private static final long UNINITIALIZED_MASTER_ID = -1L; + + /** The address of this standby job master. */ + private final Address mMasterAddress; + + /** Client for communication with the primary master. */ + private final RetryHandlingJobMasterMasterClient mMasterClient; + + /** The ID of this standby master. */ + private final AtomicReference mMasterId = new AtomicReference<>(UNINITIALIZED_MASTER_ID); + + /** + * Creates a new instance of {@link MetaMasterSync}. + * + * @param masterAddress the master address + * @param masterClient the meta master client + */ + public JobMasterSync(Address masterAddress, RetryHandlingJobMasterMasterClient masterClient) { + mMasterAddress = masterAddress; + mMasterClient = masterClient; + } + + /** + * Heartbeats to the leader master node. + */ + @Override + public void heartbeat(long timeout) { + JobMasterMetaCommand command = null; + try { + if (mMasterId.get() == UNINITIALIZED_MASTER_ID) { + setIdAndRegister(); + } + command = mMasterClient.heartbeat(mMasterId.get()); + handleCommand(command); + } catch (IOException e) { + // An error occurred, log and ignore it or error if heartbeat timeout is reached + if (command == null) { + LOG.error("Failed to receive primary master heartbeat command.", e); + } else { + LOG.error("Failed to execute primary master heartbeat command: {}", command, e); + } + mMasterClient.disconnect(); + } + } + + /** + * Handles a leader master command. + * + * @param cmd the command to execute + */ + private void handleCommand(JobMasterMetaCommand cmd) throws IOException { + if (cmd == null) { + return; + } + switch (cmd) { + case MetaCommand_Nothing: + break; + // Primary master requests re-registration + case MetaCommand_Register: + setIdAndRegister(); + break; + // Unknown request + case MetaCommand_Unknown: + LOG.error("Master heartbeat sends unknown command {}", cmd); + break; + default: + throw new RuntimeException("Un-recognized command from primary master " + cmd); + } + } + + /** + * Sets the master id and registers with the Alluxio leader master. + */ + private void setIdAndRegister() throws IOException { + LOG.info("Prepare to register to primary job master"); + mMasterId.set(mMasterClient.getId(mMasterAddress)); + LOG.info("Received job master ID {}", mMasterId.get()); + mMasterClient.register(mMasterId.get()); + LOG.info("Registered with primary job master"); + } + + @Override + public void close() {} +} diff --git a/job/server/src/main/java/alluxio/master/meta/RetryHandlingJobMasterMasterClient.java b/job/server/src/main/java/alluxio/master/meta/RetryHandlingJobMasterMasterClient.java new file mode 100644 index 000000000000..540cee90d760 --- /dev/null +++ b/job/server/src/main/java/alluxio/master/meta/RetryHandlingJobMasterMasterClient.java @@ -0,0 +1,136 @@ +/* + * The Alluxio Open Foundation licenses this work under the Apache License, version 2.0 + * (the "License"). You may not use this work except in compliance with the License, which is + * available at www.apache.org/licenses/LICENSE-2.0 + * + * This software is distributed on an "AS IS" basis, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, + * either express or implied, as more fully set forth in the License. + * + * See the NOTICE file distributed with this work for information regarding copyright ownership. + */ + +package alluxio.master.meta; + +import alluxio.AbstractJobMasterClient; +import alluxio.Constants; +import alluxio.RuntimeConstants; +import alluxio.exception.status.AlluxioStatusException; +import alluxio.grpc.BuildVersion; +import alluxio.grpc.GetJobMasterIdPRequest; +import alluxio.grpc.JobMasterHeartbeatPOptions; +import alluxio.grpc.JobMasterHeartbeatPRequest; +import alluxio.grpc.JobMasterMasterServiceGrpc; +import alluxio.grpc.JobMasterMetaCommand; +import alluxio.grpc.RegisterJobMasterPOptions; +import alluxio.grpc.RegisterJobMasterPRequest; +import alluxio.grpc.ServiceType; +import alluxio.metrics.MetricKey; +import alluxio.metrics.MetricsSystem; +import alluxio.wire.Address; +import alluxio.worker.job.JobMasterClientContext; + +import com.codahale.metrics.Gauge; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import java.io.IOException; +import java.util.Map; +import javax.annotation.concurrent.ThreadSafe; + +/** + * A wrapper for the gRPC client to interact with the primary job master, + * used by Alluxio standby job masters. + */ +@ThreadSafe +public final class RetryHandlingJobMasterMasterClient extends AbstractJobMasterClient { + private static final Logger LOG = + LoggerFactory.getLogger(RetryHandlingJobMasterMasterClient.class); + private JobMasterMasterServiceGrpc.JobMasterMasterServiceBlockingStub mClient = null; + + /** + * Creates a instance of {@link RetryHandlingJobMasterMasterClient}. + * + * @param conf master client configuration + */ + public RetryHandlingJobMasterMasterClient(JobMasterClientContext conf) { + super(conf); + } + + @Override + public void connect() throws AlluxioStatusException { + super.connect(); + } + + @Override + protected ServiceType getRemoteServiceType() { + return ServiceType.JOB_MASTER_MASTER_SERVICE; + } + + @Override + protected String getServiceName() { + return Constants.JOB_MASTER_MASTER_SERVICE_NAME; + } + + @Override + protected long getServiceVersion() { + return Constants.JOB_MASTER_MASTER_SERVICE_VERSION; + } + + @Override + protected void afterConnect() { + mClient = JobMasterMasterServiceGrpc.newBlockingStub(mChannel); + } + + /** + * Returns a master id for a master address. + * + * @param address the address to get a master id for + * @return a master id + */ + public long getId(final Address address) throws IOException { + return retryRPC(() -> mClient + .getMasterId(GetJobMasterIdPRequest.newBuilder() + .setMasterAddress(address.toProto()).build()) + .getMasterId(), LOG, "GetId", "address=%s", address); + } + + /** + * Sends a heartbeat to the leader master. Standby masters periodically execute this method + * so that the leader master knows they are still running. + * + * @param masterId the master id + * @return whether this master should re-register + */ + public JobMasterMetaCommand heartbeat(final long masterId) throws IOException { + return retryRPC(() -> mClient + .masterHeartbeat(JobMasterHeartbeatPRequest.newBuilder().setMasterId(masterId) + .setOptions(JobMasterHeartbeatPOptions.getDefaultInstance()).build()) + .getCommand(), LOG, "JobMasterHeartbeat", "masterId=%d", masterId); + } + + /** + * Registers with the leader master. + * + * @param masterId the master id of the standby master registering + */ + public void register(final long masterId) + throws IOException { + final Map gauges = MetricsSystem.METRIC_REGISTRY.getGauges(); + RegisterJobMasterPOptions.Builder optionsBuilder = RegisterJobMasterPOptions.newBuilder() + .setVersion(BuildVersion.newBuilder().setVersion(RuntimeConstants.VERSION) + .setRevision(RuntimeConstants.REVISION_SHORT).build()); + Gauge startTimeGauge = gauges.get(MetricKey.MASTER_START_TIME.getName()); + if (startTimeGauge != null) { + optionsBuilder.setStartTimeMs((long) startTimeGauge.getValue()); + } + Gauge lastLosePrimacyGuage = gauges.get(MetricKey.MASTER_LAST_LOSE_PRIMACY_TIME.getName()); + if (lastLosePrimacyGuage != null) { + optionsBuilder.setLosePrimacyTimeMs((long) lastLosePrimacyGuage.getValue()); + } + retryRPC(() -> { + mClient.registerMaster(RegisterJobMasterPRequest.newBuilder().setJobMasterId(masterId) + .setOptions(optionsBuilder).build()); + return null; + }, LOG, "Register", "jobMasterId=%d", masterId); + } +} diff --git a/job/server/src/main/java/alluxio/worker/job/RetryHandlingJobMasterClient.java b/job/server/src/main/java/alluxio/worker/job/RetryHandlingJobMasterClient.java index 0684f04c3b4c..a0ce8d883ebd 100644 --- a/job/server/src/main/java/alluxio/worker/job/RetryHandlingJobMasterClient.java +++ b/job/server/src/main/java/alluxio/worker/job/RetryHandlingJobMasterClient.java @@ -13,6 +13,7 @@ import alluxio.AbstractJobMasterClient; import alluxio.Constants; +import alluxio.RuntimeConstants; import alluxio.grpc.GrpcUtils; import alluxio.grpc.JobCommand; import alluxio.grpc.JobHeartbeatPRequest; @@ -73,7 +74,9 @@ protected void afterConnect() { @Override public long registerWorker(final WorkerNetAddress address) throws IOException { return retryRPC(() -> mClient.registerJobWorker(RegisterJobWorkerPRequest.newBuilder() - .setWorkerNetAddress(GrpcUtils.toProto(address)).build()).getId(), + .setWorkerNetAddress(GrpcUtils.toProto(address)) + .setVersion(RuntimeConstants.CURRENT_VERSION_INFO) + .build()).getId(), RPC_LOG, "RegisterWorker", "address=%s", address); } diff --git a/shell/src/main/java/alluxio/cli/fsadmin/report/JobServiceMetricsCommand.java b/shell/src/main/java/alluxio/cli/fsadmin/report/JobServiceMetricsCommand.java index d688b5c810fa..030f63203405 100644 --- a/shell/src/main/java/alluxio/cli/fsadmin/report/JobServiceMetricsCommand.java +++ b/shell/src/main/java/alluxio/cli/fsadmin/report/JobServiceMetricsCommand.java @@ -12,6 +12,8 @@ package alluxio.cli.fsadmin.report; import alluxio.client.job.JobMasterClient; +import alluxio.grpc.JobMasterStatus; +import alluxio.grpc.NetAddress; import alluxio.job.wire.JobInfo; import alluxio.job.wire.JobServiceSummary; import alluxio.job.wire.JobWorkerHealth; @@ -22,8 +24,14 @@ import java.io.IOException; import java.io.PrintStream; +import java.time.Instant; +import java.time.ZoneId; +import java.time.format.DateTimeFormatter; +import java.time.format.FormatStyle; import java.util.Collection; +import java.util.Comparator; import java.util.List; +import java.util.Locale; /** * Prints job service metric information. @@ -48,23 +56,43 @@ public JobServiceMetricsCommand(JobMasterClient JobMasterClient, PrintStream pri mDateFormatPattern = dateFormatPattern; } + public static final DateTimeFormatter DATETIME_FORMAT = + DateTimeFormatter.ofLocalizedDateTime(FormatStyle.SHORT).ofPattern("yyyyMMdd-HHmmss") + .withLocale(Locale.getDefault()).withZone(ZoneId.systemDefault()); + /** * Runs a job services report metrics command. * * @return 0 on success, 1 otherwise */ public int run() throws IOException { + List allMasterStatus = mJobMasterClient.getAllMasterStatus(); + String masterFormat = getMasterInfoFormat(allMasterStatus); + mPrintStream.printf(masterFormat, "Master Address", "State", "Start Time", + "Version", "Revision"); + for (JobMasterStatus masterStatus : allMasterStatus) { + NetAddress address = masterStatus.getMasterAddress(); + mPrintStream.printf(masterFormat, + address.getHost() + ":" + address.getRpcPort(), + masterStatus.getState(), + DATETIME_FORMAT.format(Instant.ofEpochMilli(masterStatus.getStartTime())), + masterStatus.getVersion().getVersion(), + masterStatus.getVersion().getRevision()); + } + mPrintStream.println(); + List allWorkerHealth = mJobMasterClient.getAllWorkerHealth(); + String workerFormat = getWorkerInfoFormat(allWorkerHealth); + mPrintStream.printf(workerFormat, "Job Worker", "Version", "Revision", "Task Pool Size", + "Unfinished Tasks", "Active Tasks", "Load Avg"); for (JobWorkerHealth workerHealth : allWorkerHealth) { - mPrintStream.print(String.format("Worker: %-10s ", workerHealth.getHostname())); - mPrintStream.print(String.format("Task Pool Size: %-7s", workerHealth.getTaskPoolSize())); - mPrintStream.print(String.format("Unfinished Tasks: %-7s", - workerHealth.getUnfinishedTasks())); - mPrintStream.print(String.format("Active Tasks: %-7s", - workerHealth.getNumActiveTasks())); - mPrintStream.println(String.format("Load Avg: %s", - StringUtils.join(workerHealth.getLoadAverage(), ", "))); + mPrintStream.printf(workerFormat, + workerHealth.getHostname(), workerHealth.getVersion().getVersion(), + workerHealth.getVersion().getRevision(), + workerHealth.getTaskPoolSize(), workerHealth.getUnfinishedTasks(), + workerHealth.getNumActiveTasks(), + StringUtils.join(workerHealth.getLoadAverage(), ", ")); } mPrintStream.println(); @@ -99,6 +127,32 @@ public int run() throws IOException { return 0; } + private String getMasterInfoFormat(List masters) { + int maxNameLength = 16; + if (masters.size() > 0) { + maxNameLength = masters.stream().map(m -> m.getMasterAddress().getHost().length() + 6) + .max(Comparator.comparing(Integer::intValue)).get(); + } + // hostname:port + state + startTime + version + revision + return "%-" + maxNameLength + "s %-8s %-16s %-32s %-8s%n"; + } + + private String getWorkerInfoFormat(List workers) { + int maxNameLength = 16; + if (workers.size() > 0) { + maxNameLength = workers.stream().map(w -> w.getHostname().length()) + .max(Comparator.comparing(Integer::intValue)).get(); + } + int firstIndent = 16; + if (firstIndent <= maxNameLength) { + // extend first indent according to the longest worker name + firstIndent = maxNameLength + 6; + } + + // hostname + version + revision + poolSize + unfinishedTasks + activeTasks + loadAvg + return "%-" + firstIndent + "s %-32s %-8s %-14s %-16s %-12s %s%n"; + } + private void printJobInfos(List jobInfos) { for (JobInfo jobInfo : jobInfos) { mPrintStream.print(String.format("Timestamp: %-30s", diff --git a/shell/src/test/java/alluxio/cli/fsadmin/report/JobServiceMetricsCommandTest.java b/shell/src/test/java/alluxio/cli/fsadmin/report/JobServiceMetricsCommandTest.java index cecca63554b0..c279a9e75614 100644 --- a/shell/src/test/java/alluxio/cli/fsadmin/report/JobServiceMetricsCommandTest.java +++ b/shell/src/test/java/alluxio/cli/fsadmin/report/JobServiceMetricsCommandTest.java @@ -12,8 +12,12 @@ package alluxio.cli.fsadmin.report; import static org.junit.Assert.assertEquals; +import static org.junit.Assert.assertTrue; import alluxio.client.job.JobMasterClient; +import alluxio.grpc.BuildVersion; +import alluxio.grpc.JobMasterStatus; +import alluxio.grpc.NetAddress; import alluxio.job.wire.JobInfo; import alluxio.job.wire.JobServiceSummary; import alluxio.job.wire.JobWorkerHealth; @@ -33,6 +37,7 @@ import java.nio.charset.StandardCharsets; import java.text.ParseException; import java.text.SimpleDateFormat; +import java.time.Instant; import java.util.ArrayList; import java.util.List; @@ -57,10 +62,34 @@ public void after() { @Test public void testBasic() throws IOException, ParseException { + long now = Instant.now().toEpochMilli(); + String startTimeStr = JobServiceMetricsCommand.DATETIME_FORMAT + .format(Instant.ofEpochMilli(now)); + JobMasterStatus primaryMaster = JobMasterStatus.newBuilder() + .setMasterAddress(NetAddress.newBuilder() + .setHost("master-node-1").setRpcPort(19998).build()) + .setState("PRIMARY").setStartTime(now).setVersion(BuildVersion.newBuilder() + .setVersion("alluxio-version-2.9").setRevision("abcdef").build()).build(); + JobMasterStatus standbyMaster1 = JobMasterStatus.newBuilder() + .setMasterAddress(NetAddress.newBuilder() + .setHost("master-node-0").setRpcPort(19998).build()) + .setState("STANDBY").setStartTime(now).setVersion( + BuildVersion.newBuilder().setVersion("alluxio-version-2.10") + .setRevision("abcdef").build()).build(); + JobMasterStatus standbyMaster2 = JobMasterStatus.newBuilder() + .setMasterAddress(NetAddress.newBuilder() + .setHost("master-node-2").setRpcPort(19998).build()) + .setState("STANDBY").setStartTime(now).setVersion( + BuildVersion.newBuilder().setVersion("alluxio-version-2.10") + .setRevision("bcdefg").build()).build(); + Mockito.when(mJobMasterClient.getAllMasterStatus()) + .thenReturn(Lists.newArrayList(primaryMaster, standbyMaster1, standbyMaster2)); JobWorkerHealth jobWorkerHealth = new JobWorkerHealth( - 1, Lists.newArrayList(1.2, 0.9, 0.7), 10, 2, 2, "testHost"); - + 1, Lists.newArrayList(1.2, 0.9, 0.7), + 10, 2, 2, "testHost", + BuildVersion.newBuilder() + .setVersion("2.10.0-SNAPSHOT").setRevision("ac6a0616").build()); Mockito.when(mJobMasterClient.getAllWorkerHealth()) .thenReturn(Lists.newArrayList(jobWorkerHealth)); @@ -78,14 +107,29 @@ public void testBasic() throws IOException, ParseException { String[] lineByLine = output.split("\n"); + // Master Status Section + assertTrue(lineByLine[0].contains("Master Address State Start Time " + + "Version Revision")); + assertTrue(lineByLine[1].contains("master-node-1:19998 PRIMARY")); + assertTrue(lineByLine[1].contains(startTimeStr)); + assertTrue(lineByLine[1].contains("alluxio-version-2.9 abcdef")); + assertTrue(lineByLine[2].contains("master-node-0:19998 STANDBY")); + assertTrue(lineByLine[2].contains(startTimeStr)); + assertTrue(lineByLine[2].contains("alluxio-version-2.10 abcdef")); + assertTrue(lineByLine[3].contains("master-node-2:19998 STANDBY")); + assertTrue(lineByLine[3].contains(startTimeStr)); + assertTrue(lineByLine[3].contains("alluxio-version-2.10 bcdefg")); + // Worker Health Section - assertEquals("Worker: testHost Task Pool Size: 10 Unfinished Tasks: 2" - + " Active Tasks: 2 Load Avg: 1.2, 0.9, 0.7", - lineByLine[0]); - assertEquals("", lineByLine[1]); + assertTrue(lineByLine[5].contains("Job Worker Version " + + "Revision Task Pool Size Unfinished Tasks Active Tasks Load Avg")); + assertTrue(lineByLine[6].contains("testHost 2.10.0-SNAPSHOT " + + "ac6a0616")); + assertTrue(lineByLine[6].contains("10 2 2 " + + "1.2, 0.9, 0.7")); // Group By Status - lineByLine = ArrayUtils.subarray(lineByLine, 2, lineByLine.length); + lineByLine = ArrayUtils.subarray(lineByLine, 8, lineByLine.length); assertEquals("Status: CREATED Count: 0", lineByLine[0]); assertEquals("Status: CANCELED Count: 0", lineByLine[1]); @@ -124,7 +168,6 @@ private JobInfo createJobInfo(int id, String name, Status status, String datetim throws ParseException { long timeMillis = new SimpleDateFormat("yyyy-mm-dd HH:mm:ss").parse(datetime).getTime(); PlanInfo jobInfo = new PlanInfo(id, name, status, timeMillis, null); - return jobInfo; } } From 32f923e8a303301705eeb8bd4e5c8ee3b361875c Mon Sep 17 00:00:00 2001 From: jja725 Date: Wed, 26 Apr 2023 18:35:53 -0700 Subject: [PATCH 249/334] Support getting real content hash from hdfs when using getFileStatus ### What changes are proposed in this pull request? Support getting real content hash from hdfs ### Why are the changes needed? for data integrity while moving data between object store & HDFS ### Does this PR introduce any user facing changes? na pr-link: Alluxio/alluxio#17207 change-id: cid-b2dbbde6082a3fd771b988f3ba4430fb2506f559 --- .../client/file/ufs/UfsBaseFileSystem.java | 14 +++++-- .../ManagedBlockingUfsForwarder.java | 3 +- .../main/java/alluxio/conf/PropertyKey.java | 8 ++++ .../underfs/ObjectUnderFileSystem.java | 3 +- .../java/alluxio/underfs/UnderFileSystem.java | 16 ++++++- .../underfs/UnderFileSystemWithLogging.java | 4 +- .../underfs/options/GetFileStatusOptions.java | 42 +++++++++++++++++++ .../main/proto/grpc/file_system_master.proto | 1 + core/transport/src/main/proto/proto.lock | 5 +++ .../delegating/DelegatingUnderFileSystem.java | 5 ++- .../sleeping/SleepingUnderFileSystem.java | 3 +- .../underfs/cephfs/CephFSUnderFileSystem.java | 5 ++- .../underfs/hdfs/HdfsUnderFileSystem.java | 23 ++++++++-- .../underfs/local/LocalUnderFileSystem.java | 3 +- .../underfs/web/WebUnderFileSystem.java | 3 +- 15 files changed, 121 insertions(+), 17 deletions(-) create mode 100644 core/common/src/main/java/alluxio/underfs/options/GetFileStatusOptions.java diff --git a/core/client/fs/src/main/java/alluxio/client/file/ufs/UfsBaseFileSystem.java b/core/client/fs/src/main/java/alluxio/client/file/ufs/UfsBaseFileSystem.java index 3d8f1befbcfc..77c2f4c6d786 100644 --- a/core/client/fs/src/main/java/alluxio/client/file/ufs/UfsBaseFileSystem.java +++ b/core/client/fs/src/main/java/alluxio/client/file/ufs/UfsBaseFileSystem.java @@ -46,12 +46,14 @@ import alluxio.resource.CloseableResource; import alluxio.security.authorization.AclEntry; import alluxio.security.authorization.Mode; +import alluxio.underfs.Fingerprint; import alluxio.underfs.UfsFileStatus; import alluxio.underfs.UfsManager; import alluxio.underfs.UfsStatus; import alluxio.underfs.UnderFileSystem; import alluxio.underfs.options.CreateOptions; import alluxio.underfs.options.DeleteOptions; +import alluxio.underfs.options.GetFileStatusOptions; import alluxio.underfs.options.ListOptions; import alluxio.underfs.options.MkdirsOptions; import alluxio.underfs.options.OpenOptions; @@ -218,8 +220,10 @@ public URIStatus getStatus(AlluxioURI path) { public URIStatus getStatus(AlluxioURI path, final GetStatusPOptions options) { return callWithReturn(() -> { String ufsPath = path.getPath(); - return transformStatus(mUfs.get().isFile(ufsPath) - ? mUfs.get().getFileStatus(ufsPath) : mUfs.get().getDirectoryStatus(ufsPath)); + return transformStatus(mUfs.get().isFile(ufsPath) ? mUfs.get().getFileStatus(ufsPath, + GetFileStatusOptions.defaults() + .setIncludeRealContentHash(options.getIncludeRealContentHash())) : + mUfs.get().getDirectoryStatus(ufsPath)); }); } @@ -439,7 +443,11 @@ private URIStatus transformStatus(UfsStatus ufsStatus) { UfsFileStatus fileStatus = (UfsFileStatus) ufsStatus; info.setLength(fileStatus.getContentLength()); info.setBlockSizeBytes(fileStatus.getBlockSize()); - } else { + info.setUfsFingerprint( + Fingerprint.create(mUfs.get().getUnderFSType(), ufsStatus, fileStatus.getContentHash()) + .serialize()); + } + else { info.setLength(0); } return new URIStatus(info); diff --git a/core/common/src/main/java/alluxio/concurrent/ManagedBlockingUfsForwarder.java b/core/common/src/main/java/alluxio/concurrent/ManagedBlockingUfsForwarder.java index cc3f9f3af616..f9cd4cdd4111 100755 --- a/core/common/src/main/java/alluxio/concurrent/ManagedBlockingUfsForwarder.java +++ b/core/common/src/main/java/alluxio/concurrent/ManagedBlockingUfsForwarder.java @@ -28,6 +28,7 @@ import alluxio.underfs.options.CreateOptions; import alluxio.underfs.options.DeleteOptions; import alluxio.underfs.options.FileLocationOptions; +import alluxio.underfs.options.GetFileStatusOptions; import alluxio.underfs.options.ListOptions; import alluxio.underfs.options.MkdirsOptions; import alluxio.underfs.options.OpenOptions; @@ -260,7 +261,7 @@ public List execute() throws IOException { } @Override - public UfsFileStatus getFileStatus(String path) throws IOException { + public UfsFileStatus getFileStatus(String path, GetFileStatusOptions options) throws IOException { return new ManagedBlockingUfsMethod() { @Override public UfsFileStatus execute() throws IOException { diff --git a/core/common/src/main/java/alluxio/conf/PropertyKey.java b/core/common/src/main/java/alluxio/conf/PropertyKey.java index 9d6f1150c446..9ce9ac5ee79b 100755 --- a/core/common/src/main/java/alluxio/conf/PropertyKey.java +++ b/core/common/src/main/java/alluxio/conf/PropertyKey.java @@ -7637,6 +7637,12 @@ public String toString() { .setConsistencyCheckLevel(ConsistencyCheckLevel.ENFORCE) .setScope(Scope.SERVER) .build(); + public static final PropertyKey HADOOP_CHECKSUM_COMBINE_MODE = + booleanBuilder(Name.HADOOP_CHECKSUM_COMBINE_MODE) + .setDescription("File Checksum combine mode.") + .setConsistencyCheckLevel(ConsistencyCheckLevel.WARN) + .setScope(Scope.CLIENT) + .build(); /** * @deprecated This key is used for testing. It is always deprecated. */ @@ -9185,6 +9191,8 @@ public static final class Name { public static final String HADOOP_KERBEROS_KEYTAB_LOGIN_AUTORENEWAL = "alluxio.hadoop.kerberos.keytab.login.autorenewal"; + public static final String HADOOP_CHECKSUM_COMBINE_MODE = + "alluxio.hadoop.checksum.combine.mode"; private Name() {} // prevent instantiation } diff --git a/core/common/src/main/java/alluxio/underfs/ObjectUnderFileSystem.java b/core/common/src/main/java/alluxio/underfs/ObjectUnderFileSystem.java index 456f088f8f3f..527149f2653f 100755 --- a/core/common/src/main/java/alluxio/underfs/ObjectUnderFileSystem.java +++ b/core/common/src/main/java/alluxio/underfs/ObjectUnderFileSystem.java @@ -22,6 +22,7 @@ import alluxio.underfs.options.CreateOptions; import alluxio.underfs.options.DeleteOptions; import alluxio.underfs.options.FileLocationOptions; +import alluxio.underfs.options.GetFileStatusOptions; import alluxio.underfs.options.ListOptions; import alluxio.underfs.options.MkdirsOptions; import alluxio.underfs.options.OpenOptions; @@ -511,7 +512,7 @@ public long getSpace(String path, SpaceType type) throws IOException { } @Override - public UfsFileStatus getFileStatus(String path) throws IOException { + public UfsFileStatus getFileStatus(String path, GetFileStatusOptions options) throws IOException { ObjectStatus details = getObjectStatus(stripPrefixIfPresent(path)); if (details != null) { ObjectPermissions permissions = getPermissions(); diff --git a/core/common/src/main/java/alluxio/underfs/UnderFileSystem.java b/core/common/src/main/java/alluxio/underfs/UnderFileSystem.java index 783afca7eac8..a680f0ff9c31 100755 --- a/core/common/src/main/java/alluxio/underfs/UnderFileSystem.java +++ b/core/common/src/main/java/alluxio/underfs/UnderFileSystem.java @@ -24,6 +24,7 @@ import alluxio.underfs.options.CreateOptions; import alluxio.underfs.options.DeleteOptions; import alluxio.underfs.options.FileLocationOptions; +import alluxio.underfs.options.GetFileStatusOptions; import alluxio.underfs.options.ListOptions; import alluxio.underfs.options.MkdirsOptions; import alluxio.underfs.options.OpenOptions; @@ -422,7 +423,20 @@ default AlluxioConfiguration getConfiguration() throws IOException { * @return the file status * @throws FileNotFoundException when the path does not exist */ - UfsFileStatus getFileStatus(String path) throws IOException; + default UfsFileStatus getFileStatus(String path) throws IOException { + return getFileStatus(path, GetFileStatusOptions.defaults()); + } + + /** + * Gets the file status. The caller must already know the path is a file. This method will + * throw an exception if the path exists, but is a directory. + * + * @param path the path to the file + * @param options method options + * @return the file status + * @throws FileNotFoundException when the path does not exist + */ + UfsFileStatus getFileStatus(String path, GetFileStatusOptions options) throws IOException; /** * Gets the file status. diff --git a/core/common/src/main/java/alluxio/underfs/UnderFileSystemWithLogging.java b/core/common/src/main/java/alluxio/underfs/UnderFileSystemWithLogging.java index 9694c6125a3c..0cf021796834 100755 --- a/core/common/src/main/java/alluxio/underfs/UnderFileSystemWithLogging.java +++ b/core/common/src/main/java/alluxio/underfs/UnderFileSystemWithLogging.java @@ -28,6 +28,7 @@ import alluxio.underfs.options.CreateOptions; import alluxio.underfs.options.DeleteOptions; import alluxio.underfs.options.FileLocationOptions; +import alluxio.underfs.options.GetFileStatusOptions; import alluxio.underfs.options.ListOptions; import alluxio.underfs.options.MkdirsOptions; import alluxio.underfs.options.OpenOptions; @@ -520,7 +521,8 @@ public String toString() { } @Override - public UfsFileStatus getFileStatus(final String path) throws IOException { + public UfsFileStatus getFileStatus(final String path, GetFileStatusOptions options) + throws IOException { return call(new UfsCallable() { @Override public UfsFileStatus call() throws IOException { diff --git a/core/common/src/main/java/alluxio/underfs/options/GetFileStatusOptions.java b/core/common/src/main/java/alluxio/underfs/options/GetFileStatusOptions.java new file mode 100644 index 000000000000..d9a55c73e691 --- /dev/null +++ b/core/common/src/main/java/alluxio/underfs/options/GetFileStatusOptions.java @@ -0,0 +1,42 @@ +/* + * The Alluxio Open Foundation licenses this work under the Apache License, version 2.0 + * (the "License"). You may not use this work except in compliance with the License, which is + * available at www.apache.org/licenses/LICENSE-2.0 + * + * This software is distributed on an "AS IS" basis, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, + * either express or implied, as more fully set forth in the License. + * + * See the NOTICE file distributed with this work for information regarding copyright ownership. + */ + +package alluxio.underfs.options; + +/** + * Method options for getting the status of a file in {@link alluxio.underfs.UnderFileSystem}. + */ +public class GetFileStatusOptions { + private boolean mIncludeRealContentHash = false; + + /** + * @return whether include real content hash + */ + public boolean isIncludeRealContentHash() { + return mIncludeRealContentHash; + } + + /** + * @param includeRealContentHash include real content hash flag value + * @return the updated options object + */ + public GetFileStatusOptions setIncludeRealContentHash(boolean includeRealContentHash) { + mIncludeRealContentHash = includeRealContentHash; + return this; + } + + /** + * @return the default {@link GetFileStatusOptions} + */ + public static GetFileStatusOptions defaults() { + return new GetFileStatusOptions(); + } +} diff --git a/core/transport/src/main/proto/grpc/file_system_master.proto b/core/transport/src/main/proto/grpc/file_system_master.proto index ad680ad10ae5..5dde330eb43a 100644 --- a/core/transport/src/main/proto/grpc/file_system_master.proto +++ b/core/transport/src/main/proto/grpc/file_system_master.proto @@ -202,6 +202,7 @@ message GetStatusPOptions { optional FileSystemMasterCommonPOptions commonOptions = 2; optional Bits accessMode = 3; optional bool updateTimestamps = 4 [default = true]; + optional bool includeRealContentHash = 5; } message GetStatusPRequest { /** the path of the file or directory */ diff --git a/core/transport/src/main/proto/proto.lock b/core/transport/src/main/proto/proto.lock index 312a4ed0844e..d169882e78be 100644 --- a/core/transport/src/main/proto/proto.lock +++ b/core/transport/src/main/proto/proto.lock @@ -2688,6 +2688,11 @@ "value": "true" } ] + }, + { + "id": 5, + "name": "includeRealContentHash", + "type": "bool" } ] }, diff --git a/tests/src/test/java/alluxio/testutils/underfs/delegating/DelegatingUnderFileSystem.java b/tests/src/test/java/alluxio/testutils/underfs/delegating/DelegatingUnderFileSystem.java index c8c8517efd48..366f56200e05 100755 --- a/tests/src/test/java/alluxio/testutils/underfs/delegating/DelegatingUnderFileSystem.java +++ b/tests/src/test/java/alluxio/testutils/underfs/delegating/DelegatingUnderFileSystem.java @@ -27,6 +27,7 @@ import alluxio.underfs.options.CreateOptions; import alluxio.underfs.options.DeleteOptions; import alluxio.underfs.options.FileLocationOptions; +import alluxio.underfs.options.GetFileStatusOptions; import alluxio.underfs.options.ListOptions; import alluxio.underfs.options.MkdirsOptions; import alluxio.underfs.options.OpenOptions; @@ -165,8 +166,8 @@ public List getFileLocations(String path, FileLocationOptions options) } @Override - public UfsFileStatus getFileStatus(String path) throws IOException { - return mUfs.getFileStatus(path); + public UfsFileStatus getFileStatus(String path, GetFileStatusOptions options) throws IOException { + return mUfs.getFileStatus(path, options); } @Override diff --git a/tests/src/test/java/alluxio/testutils/underfs/sleeping/SleepingUnderFileSystem.java b/tests/src/test/java/alluxio/testutils/underfs/sleeping/SleepingUnderFileSystem.java index 10b2da3924a9..ae63f1d42b4f 100755 --- a/tests/src/test/java/alluxio/testutils/underfs/sleeping/SleepingUnderFileSystem.java +++ b/tests/src/test/java/alluxio/testutils/underfs/sleeping/SleepingUnderFileSystem.java @@ -21,6 +21,7 @@ import alluxio.underfs.options.CreateOptions; import alluxio.underfs.options.DeleteOptions; import alluxio.underfs.options.FileLocationOptions; +import alluxio.underfs.options.GetFileStatusOptions; import alluxio.underfs.options.ListOptions; import alluxio.underfs.options.MkdirsOptions; import alluxio.underfs.options.OpenOptions; @@ -138,7 +139,7 @@ public UfsStatus getStatus(String path) throws IOException { } @Override - public UfsFileStatus getFileStatus(String path) throws IOException { + public UfsFileStatus getFileStatus(String path, GetFileStatusOptions options) throws IOException { sleepIfNecessary(mOptions.getGetFileStatusMs()); return super.getFileStatus(cleanPath(path)); } diff --git a/underfs/cephfs/src/main/java/alluxio/underfs/cephfs/CephFSUnderFileSystem.java b/underfs/cephfs/src/main/java/alluxio/underfs/cephfs/CephFSUnderFileSystem.java index 44ba1c7c3a48..9fa4d6aaf71e 100644 --- a/underfs/cephfs/src/main/java/alluxio/underfs/cephfs/CephFSUnderFileSystem.java +++ b/underfs/cephfs/src/main/java/alluxio/underfs/cephfs/CephFSUnderFileSystem.java @@ -28,6 +28,7 @@ import alluxio.underfs.options.CreateOptions; import alluxio.underfs.options.DeleteOptions; import alluxio.underfs.options.FileLocationOptions; +import alluxio.underfs.options.GetFileStatusOptions; import alluxio.underfs.options.MkdirsOptions; import alluxio.underfs.options.OpenOptions; import alluxio.util.UnderFileSystemUtils; @@ -381,17 +382,17 @@ public List getFileLocations(String path, FileLocationOptions options) * Ceph's support for these is a bit different. * * @param path The path to stat + * @param options method options * @return FileStatus object containing the stat information * @throws FileNotFoundException if the path could not be resolved */ @Override - public UfsFileStatus getFileStatus(String path) throws IOException { + public UfsFileStatus getFileStatus(String path, GetFileStatusOptions options) throws IOException { path = stripPath(path); CephStat stat = new CephStat(); lstat(path, stat); String contentHash = UnderFileSystemUtils.approximateContentHash(stat.size, stat.m_time); - return new UfsFileStatus(path, contentHash, stat.size, stat.m_time, "", "", (short) stat.mode); } diff --git a/underfs/hdfs/src/main/java/alluxio/underfs/hdfs/HdfsUnderFileSystem.java b/underfs/hdfs/src/main/java/alluxio/underfs/hdfs/HdfsUnderFileSystem.java index 630bb683addf..2c876fa0d979 100755 --- a/underfs/hdfs/src/main/java/alluxio/underfs/hdfs/HdfsUnderFileSystem.java +++ b/underfs/hdfs/src/main/java/alluxio/underfs/hdfs/HdfsUnderFileSystem.java @@ -33,6 +33,7 @@ import alluxio.underfs.options.CreateOptions; import alluxio.underfs.options.DeleteOptions; import alluxio.underfs.options.FileLocationOptions; +import alluxio.underfs.options.GetFileStatusOptions; import alluxio.underfs.options.MkdirsOptions; import alluxio.underfs.options.OpenOptions; import alluxio.util.CommonUtils; @@ -43,6 +44,7 @@ import com.google.common.cache.CacheBuilder; import com.google.common.cache.CacheLoader; import com.google.common.cache.LoadingCache; +import org.apache.commons.codec.binary.Base64; import org.apache.commons.lang3.StringUtils; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.BlockLocation; @@ -109,6 +111,9 @@ public class HdfsUnderFileSystem extends ConsistentUnderFileSystem private static final String KRB_KEYTAB_LOGIN_AUTO_RENEW = "hadoop.kerberos.keytab.login.autorenewal.enabled"; + private static final String CHECKSUM_COMBINE_MODE = + "dfs.checksum.combine.mode"; + private final LoadingCache mUserFs; private final HdfsAclProvider mHdfsAclProvider; @@ -178,6 +183,11 @@ public HdfsUnderFileSystem(AlluxioURI ufsUri, UnderFileSystemConfiguration conf, hdfsConf.setBoolean(KRB_KEYTAB_LOGIN_AUTO_RENEW, mUfsConf.getBoolean(PropertyKey.HADOOP_KERBEROS_KEYTAB_LOGIN_AUTORENEWAL)); } + if (mUfsConf.isSet(PropertyKey.HADOOP_CHECKSUM_COMBINE_MODE)) { + hdfsConf.set(CHECKSUM_COMBINE_MODE, + mUfsConf.getString(PropertyKey.HADOOP_CHECKSUM_COMBINE_MODE)); + } + // Set Hadoop UGI configuration to ensure UGI can be initialized by the shaded classes for // group service. UserGroupInformation.setConfiguration(hdfsConf); @@ -428,12 +438,19 @@ public List getFileLocations(String path, FileLocationOptions options) } @Override - public UfsFileStatus getFileStatus(String path) throws IOException { + public UfsFileStatus getFileStatus(String path, GetFileStatusOptions options) throws IOException { Path tPath = new Path(path); FileSystem hdfs = getFs(); FileStatus fs = hdfs.getFileStatus(tPath); - String contentHash = - UnderFileSystemUtils.approximateContentHash(fs.getLen(), fs.getModificationTime()); + String contentHash; + if (options.isIncludeRealContentHash()) { + contentHash = Base64.encodeBase64String(hdfs.getFileChecksum(tPath).getBytes()); + } + else { + contentHash = + UnderFileSystemUtils.approximateContentHash(fs.getLen(), fs.getModificationTime()); + } + return new UfsFileStatus(path, contentHash, fs.getLen(), fs.getModificationTime(), fs.getOwner(), fs.getGroup(), fs.getPermission().toShort(), fs.getBlockSize()); } diff --git a/underfs/local/src/main/java/alluxio/underfs/local/LocalUnderFileSystem.java b/underfs/local/src/main/java/alluxio/underfs/local/LocalUnderFileSystem.java index 78bcd9ed913e..03d06918ec31 100755 --- a/underfs/local/src/main/java/alluxio/underfs/local/LocalUnderFileSystem.java +++ b/underfs/local/src/main/java/alluxio/underfs/local/LocalUnderFileSystem.java @@ -27,6 +27,7 @@ import alluxio.underfs.options.CreateOptions; import alluxio.underfs.options.DeleteOptions; import alluxio.underfs.options.FileLocationOptions; +import alluxio.underfs.options.GetFileStatusOptions; import alluxio.underfs.options.MkdirsOptions; import alluxio.underfs.options.OpenOptions; import alluxio.util.UnderFileSystemUtils; @@ -227,7 +228,7 @@ public List getFileLocations(String path, FileLocationOptions options) } @Override - public UfsFileStatus getFileStatus(String path) throws IOException { + public UfsFileStatus getFileStatus(String path, GetFileStatusOptions options) throws IOException { String tpath = stripPath(path); File file = new File(tpath); try { diff --git a/underfs/web/src/main/java/alluxio/underfs/web/WebUnderFileSystem.java b/underfs/web/src/main/java/alluxio/underfs/web/WebUnderFileSystem.java index 9e5537c0e525..cdce2dd37f87 100755 --- a/underfs/web/src/main/java/alluxio/underfs/web/WebUnderFileSystem.java +++ b/underfs/web/src/main/java/alluxio/underfs/web/WebUnderFileSystem.java @@ -22,6 +22,7 @@ import alluxio.underfs.options.CreateOptions; import alluxio.underfs.options.DeleteOptions; import alluxio.underfs.options.FileLocationOptions; +import alluxio.underfs.options.GetFileStatusOptions; import alluxio.underfs.options.MkdirsOptions; import alluxio.underfs.options.OpenOptions; import alluxio.util.UnderFileSystemUtils; @@ -132,7 +133,7 @@ public List getFileLocations(String path, FileLocationOptions options) } @Override - public UfsFileStatus getFileStatus(String path) throws IOException { + public UfsFileStatus getFileStatus(String path, GetFileStatusOptions options) throws IOException { UfsStatus ufsStatus = getStatus(path); if (ufsStatus instanceof UfsFileStatus) { return (UfsFileStatus) ufsStatus; From 764d3c8d0cd7d24db4b18f6a6569ba05202b9b6f Mon Sep 17 00:00:00 2001 From: Haoning Sun Date: Thu, 27 Apr 2023 12:55:23 +0800 Subject: [PATCH 250/334] Avoid null when using BlockMasterClientPool ### What changes are proposed in this pull request? Avoid using mBlockMasterClientPool directly. ### Why are the changes needed? Using mBlockMasterClientPool during reinit may throw an exception. pr-link: Alluxio/alluxio#17326 change-id: cid-1b43697d52fe43065b71208cd5594c248a8c4fe9 --- .../main/java/alluxio/client/file/FileSystemContext.java | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) diff --git a/core/client/fs/src/main/java/alluxio/client/file/FileSystemContext.java b/core/client/fs/src/main/java/alluxio/client/file/FileSystemContext.java index 8e0cbb95c238..b8e1c1964b67 100644 --- a/core/client/fs/src/main/java/alluxio/client/file/FileSystemContext.java +++ b/core/client/fs/src/main/java/alluxio/client/file/FileSystemContext.java @@ -719,11 +719,9 @@ private void initializeLocalWorker() throws IOException { */ private List getWorkerAddresses() throws IOException { List infos; - BlockMasterClient blockMasterClient = mBlockMasterClientPool.acquire(); - try { - infos = blockMasterClient.getWorkerInfoList(); - } finally { - mBlockMasterClientPool.release(blockMasterClient); + try (CloseableResource masterClientResource = + acquireBlockMasterClientResource()) { + infos = masterClientResource.get().getWorkerInfoList(); } if (infos.isEmpty()) { throw new UnavailableException(ExceptionMessage.NO_WORKER_AVAILABLE.getMessage()); From 141ee0e567a4c6c60907f85ea66b828704bd761d Mon Sep 17 00:00:00 2001 From: Jiacheng Liu Date: Thu, 27 Apr 2023 23:45:31 +0800 Subject: [PATCH 251/334] Support gracefully shutdown worker Cherry-pick of existing commit. orig-pr: TachyonNexus/enterprise#4058 orig-commit: TachyonNexus/enterprise@85246fe11db2f57d2eddfcd36875dd098f985074 orig-commit-author: Jiacheng Liu pr-link: Alluxio/alluxio#17310 change-id: cid-bf474aac505427be6180828a158b5c23825b4d93 --- .../client/block/BlockMasterClient.java | 7 +- .../block/RetryHandlingBlockMasterClient.java | 13 +- .../client/file/AlluxioFileInStream.java | 5 +- .../alluxio/exception/ExceptionMessage.java | 5 + .../main/java/alluxio/metrics/MetricKey.java | 12 + .../alluxio/wire/WorkerWebUIOperations.java | 78 +++ .../executor/ExecutorServiceBuilder.java | 16 +- .../master/AlluxioExecutorService.java | 124 +++- core/server/master/pom.xml | 5 + .../alluxio/master/block/BlockMaster.java | 18 +- .../BlockMasterClientServiceHandler.java | 38 +- .../master/block/DefaultBlockMaster.java | 235 ++++++-- .../alluxio/master/block/BlockMasterTest.java | 566 +++++++++++++++++- .../AlluxioWorkerRestServiceHandler.java | 52 ++ .../alluxio/worker/block/BlockMasterSync.java | 6 + .../worker/block/DefaultBlockWorker.java | 2 + .../worker/block/RegisterStreamer.java | 63 +- .../alluxio/worker/grpc/GrpcDataServer.java | 5 +- .../alluxio/worker/grpc/GrpcExecutors.java | 133 +++- .../src/main/proto/grpc/block_master.proto | 15 +- .../src/main/proto/grpc/common.proto | 2 + core/transport/src/main/proto/proto.lock | 37 +- .../fs/command/DecommissionWorkerCommand.java | 112 ---- .../cli/fs/command/FreeWorkerCommand.java | 12 +- .../command/DecommissionWorkerCommand.java | 546 +++++++++++++++++ .../fsadmin/command/EnableWorkerCommand.java | 131 ++++ .../fsadmin/command/WorkerAddressUtils.java | 95 +++ ...leInStreamDecommissionIntegrationTest.java | 394 ++++++++++++ ...eOutStreamDecommissionIntegrationTest.java | 501 ++++++++++++++++ 29 files changed, 2976 insertions(+), 252 deletions(-) create mode 100644 core/common/src/main/java/alluxio/wire/WorkerWebUIOperations.java delete mode 100644 shell/src/main/java/alluxio/cli/fs/command/DecommissionWorkerCommand.java create mode 100644 shell/src/main/java/alluxio/cli/fsadmin/command/DecommissionWorkerCommand.java create mode 100644 shell/src/main/java/alluxio/cli/fsadmin/command/EnableWorkerCommand.java create mode 100644 shell/src/main/java/alluxio/cli/fsadmin/command/WorkerAddressUtils.java create mode 100644 tests/src/test/java/alluxio/client/fs/io/FileInStreamDecommissionIntegrationTest.java create mode 100644 tests/src/test/java/alluxio/client/fs/io/FileOutStreamDecommissionIntegrationTest.java diff --git a/core/client/fs/src/main/java/alluxio/client/block/BlockMasterClient.java b/core/client/fs/src/main/java/alluxio/client/block/BlockMasterClient.java index 5051c2d2c0db..12649a817187 100644 --- a/core/client/fs/src/main/java/alluxio/client/block/BlockMasterClient.java +++ b/core/client/fs/src/main/java/alluxio/client/block/BlockMasterClient.java @@ -15,6 +15,7 @@ import alluxio.client.block.options.GetWorkerReportOptions; import alluxio.exception.status.AlluxioStatusException; import alluxio.grpc.DecommissionWorkerPOptions; +import alluxio.grpc.RemoveDisabledWorkerPOptions; import alluxio.grpc.WorkerLostStorageInfo; import alluxio.master.MasterClientContext; import alluxio.wire.BlockInfo; @@ -59,11 +60,11 @@ public static BlockMasterClient create(MasterClientContext conf) { List getWorkerInfoList() throws IOException; /** - * Remove the metadata of a decommissioned worker. + * Revert disabling a worker, enabling it to register to the cluster. * - * @param workerName contains a string, representing the workerName + * @param options contains the info used to find the target worker(s) */ - void removeDecommissionedWorker(String workerName) throws IOException; + void removeDisabledWorker(RemoveDisabledWorkerPOptions options) throws IOException; /** * Gets the worker information of selected workers and selected fields for report CLI. diff --git a/core/client/fs/src/main/java/alluxio/client/block/RetryHandlingBlockMasterClient.java b/core/client/fs/src/main/java/alluxio/client/block/RetryHandlingBlockMasterClient.java index 55b1f53dab47..7c1652156d91 100644 --- a/core/client/fs/src/main/java/alluxio/client/block/RetryHandlingBlockMasterClient.java +++ b/core/client/fs/src/main/java/alluxio/client/block/RetryHandlingBlockMasterClient.java @@ -23,7 +23,7 @@ import alluxio.grpc.GetWorkerInfoListPOptions; import alluxio.grpc.GetWorkerLostStoragePOptions; import alluxio.grpc.GrpcUtils; -import alluxio.grpc.RemoveDecommissionedWorkerPOptions; +import alluxio.grpc.RemoveDisabledWorkerPOptions; import alluxio.grpc.ServiceType; import alluxio.grpc.WorkerLostStorageInfo; import alluxio.master.MasterClientContext; @@ -121,10 +121,9 @@ public List getWorkerInfoList() throws IOException { } @Override - public void removeDecommissionedWorker(String workerName) throws IOException { - retryRPC(() -> mClient.removeDecommissionedWorker(RemoveDecommissionedWorkerPOptions - .newBuilder().setWorkerName(workerName).build()), - RPC_LOG, "RemoveDecommissionedWorker", ""); + public void removeDisabledWorker(RemoveDisabledWorkerPOptions options) throws IOException { + retryRPC(() -> mClient.removeDisabledWorker(options), + RPC_LOG, "RemoveDisabledWorker", ""); } @Override @@ -182,7 +181,7 @@ public long getUsedBytes() throws IOException { @Override public void decommissionWorker(DecommissionWorkerPOptions options) throws IOException { retryRPC(() -> mClient.decommissionWorker(options), - RPC_LOG, "DecommissionWorker", "workerName=%s,options=%s", - options.getWorkerName(), options); + RPC_LOG, "DecommissionWorker", "workerHostName=%s,workerWebPort=%s,options=%s", + options.getWorkerHostname(), options.getWorkerWebPort(), options); } } diff --git a/core/client/fs/src/main/java/alluxio/client/file/AlluxioFileInStream.java b/core/client/fs/src/main/java/alluxio/client/file/AlluxioFileInStream.java index afa5a452d1b8..83769689b75b 100644 --- a/core/client/fs/src/main/java/alluxio/client/file/AlluxioFileInStream.java +++ b/core/client/fs/src/main/java/alluxio/client/file/AlluxioFileInStream.java @@ -48,6 +48,7 @@ import java.nio.ByteBuffer; import java.time.Duration; import java.util.HashMap; +import java.util.List; import java.util.Map; import java.util.Objects; import javax.annotation.concurrent.NotThreadSafe; @@ -389,6 +390,7 @@ private void updateStream() throws IOException { throw new IOException("No BlockInfo for block(id=" + blockId + ") of file" + "(id=" + mStatus.getFileId() + ", path=" + mStatus.getPath() + ")"); } + // Create stream boolean isBlockInfoOutdated = true; // blockInfo is "outdated" when all the locations in that blockInfo are failed workers, @@ -396,7 +398,8 @@ private void updateStream() throws IOException { if (mFailedWorkers.isEmpty() || mFailedWorkers.size() < blockInfo.getLocations().size()) { isBlockInfoOutdated = false; } else { - for (BlockLocation location : blockInfo.getLocations()) { + List locs = blockInfo.getLocations(); + for (BlockLocation location : locs) { if (!mFailedWorkers.containsKey(location.getWorkerAddress())) { isBlockInfoOutdated = false; break; diff --git a/core/common/src/main/java/alluxio/exception/ExceptionMessage.java b/core/common/src/main/java/alluxio/exception/ExceptionMessage.java index 34ef67c9beef..09bc977dee5f 100644 --- a/core/common/src/main/java/alluxio/exception/ExceptionMessage.java +++ b/core/common/src/main/java/alluxio/exception/ExceptionMessage.java @@ -118,6 +118,11 @@ public enum ExceptionMessage { // file system master ufs FAILED_UFS_RENAME("Failed to rename {0} to {1} in the under file system"), + // worker + WORKER_NOT_FOUND("Worker {0} not found"), + WORKER_DECOMMISSIONED_BEFORE_REGISTER("Attempting to decommission an unregistered worker {0}. " + + "Please wait until this worker is registered."), + // cli INVALID_ARGS_NULL("Null args for command {0}"), INVALID_ARGS_NUM("Command {0} takes {1} arguments, not {2}"), diff --git a/core/common/src/main/java/alluxio/metrics/MetricKey.java b/core/common/src/main/java/alluxio/metrics/MetricKey.java index e82b2972a788..a56d95df27b3 100644 --- a/core/common/src/main/java/alluxio/metrics/MetricKey.java +++ b/core/common/src/main/java/alluxio/metrics/MetricKey.java @@ -1754,12 +1754,24 @@ public static String getSyncMetricName(long mountId) { .setMetricType(MetricType.COUNTER) .setIsClusterAggregated(true) .build(); + /* + * This metric is inaccurate because it is updated at so many places. + * Given time, it should be deprecated and replaced by WORKER_ACTIVE_OPERATIONS. + */ + @Deprecated public static final MetricKey WORKER_ACTIVE_CLIENTS = new Builder("Worker.ActiveClients") .setDescription("The number of clients actively reading from or writing to this worker") .setMetricType(MetricType.COUNTER) .setIsClusterAggregated(true) .build(); + public static final MetricKey WORKER_ACTIVE_OPERATIONS = + new Builder("Worker.ActiveOperations") + .setDescription("The number of active RPCs in the worker, including control RPCs " + + "and data I/O. Used to tell if the worker is idle or busy.") + .setMetricType(MetricType.COUNTER) + .setIsClusterAggregated(false) + .build(); public static final MetricKey WORKER_BLOCKS_ACCESSED = new Builder("Worker.BlocksAccessed") .setDescription("Total number of times any one of the blocks in this worker is accessed.") diff --git a/core/common/src/main/java/alluxio/wire/WorkerWebUIOperations.java b/core/common/src/main/java/alluxio/wire/WorkerWebUIOperations.java new file mode 100644 index 000000000000..daef90540cba --- /dev/null +++ b/core/common/src/main/java/alluxio/wire/WorkerWebUIOperations.java @@ -0,0 +1,78 @@ +/* + * The Alluxio Open Foundation licenses this work under the Apache License, version 2.0 + * (the "License"). You may not use this work except in compliance with the License, which is + * available at www.apache.org/licenses/LICENSE-2.0 + * + * This software is distributed on an "AS IS" basis, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, + * either express or implied, as more fully set forth in the License. + * + * See the NOTICE file distributed with this work for information regarding copyright ownership. + */ + +package alluxio.wire; + +import com.google.common.base.MoreObjects; + +import java.io.Serializable; +import javax.annotation.concurrent.NotThreadSafe; + +/** + * Alluxio WebUI overview information. + */ +@NotThreadSafe +public final class WorkerWebUIOperations implements Serializable { + private static final long serialVersionUID = 5444572986825500733L; + + private long mOperationCount; + private long mRpcQueueLength; + + /** + * Creates a new instance of {@link WorkerWebUIInit}. + */ + public WorkerWebUIOperations() { + } + + /** + * Gets the operation count. + * @return the number of operations + */ + public long getOperationCount() { + return mOperationCount; + } + + /** + * Gets the current RPC queue length. + * @return the RPC queue length + */ + public long getRpcQueueLength() { + return mRpcQueueLength; + } + + /** + * Sets the operation count. + * @param operationCount the operation count + * @return the current obj + */ + public WorkerWebUIOperations setOperationCount(long operationCount) { + mOperationCount = operationCount; + return this; + } + + /** + * Sets the RPC queue length. + * @param rpcQueueLength queue length + * @return the current obj + */ + public WorkerWebUIOperations setRpcQueueLength(long rpcQueueLength) { + mRpcQueueLength = rpcQueueLength; + return this; + } + + @Override + public String toString() { + return MoreObjects.toStringHelper(this) + .add("operationCount", mOperationCount) + .add("rpcQueueLength", mRpcQueueLength) + .toString(); + } +} diff --git a/core/server/common/src/main/java/alluxio/executor/ExecutorServiceBuilder.java b/core/server/common/src/main/java/alluxio/executor/ExecutorServiceBuilder.java index d1633848b7fb..a040c776156e 100644 --- a/core/server/common/src/main/java/alluxio/executor/ExecutorServiceBuilder.java +++ b/core/server/common/src/main/java/alluxio/executor/ExecutorServiceBuilder.java @@ -17,6 +17,7 @@ import alluxio.master.AlluxioExecutorService; import alluxio.util.ThreadFactoryUtils; +import com.codahale.metrics.Counter; import com.google.common.base.Preconditions; import java.util.concurrent.ArrayBlockingQueue; @@ -26,6 +27,7 @@ import java.util.concurrent.SynchronousQueue; import java.util.concurrent.ThreadPoolExecutor; import java.util.concurrent.TimeUnit; +import javax.annotation.Nullable; /** * Used to create {@link ExecutorService} instances dynamically by configuration. @@ -38,6 +40,18 @@ public class ExecutorServiceBuilder { * @return instance of {@link ExecutorService} */ public static AlluxioExecutorService buildExecutorService(RpcExecutorHost executorHost) { + return buildExecutorService(executorHost, null); + } + + /** + * Creates an {@link ExecutorService} for given Alluxio process dynamically by configuration. + * + * @param executorHost Where the executor is needed + * @param rpcCounter the counter to track ongoing RPC + * @return instance of {@link ExecutorService} + */ + public static AlluxioExecutorService buildExecutorService( + RpcExecutorHost executorHost, @Nullable Counter rpcCounter) { // Get executor type for given host. RpcExecutorType executorType = Configuration.getEnum( PropertyKey.Template.RPC_EXECUTOR_TYPE.format(executorHost.toString()), @@ -123,7 +137,7 @@ public static AlluxioExecutorService buildExecutorService(RpcExecutorHost execut // Post settings. ((ThreadPoolExecutor) executorService).allowCoreThreadTimeOut(allowCoreThreadsTimeout); } - return new AlluxioExecutorService(executorService); + return new AlluxioExecutorService(executorService, rpcCounter); } /** diff --git a/core/server/common/src/main/java/alluxio/master/AlluxioExecutorService.java b/core/server/common/src/main/java/alluxio/master/AlluxioExecutorService.java index a18c82554ca8..9cfb6e36d1d6 100644 --- a/core/server/common/src/main/java/alluxio/master/AlluxioExecutorService.java +++ b/core/server/common/src/main/java/alluxio/master/AlluxioExecutorService.java @@ -13,21 +13,26 @@ import alluxio.concurrent.jsr.ForkJoinPool; +import com.codahale.metrics.Counter; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + import java.util.Collection; import java.util.List; import java.util.concurrent.Callable; -import java.util.concurrent.ExecutionException; import java.util.concurrent.ExecutorService; import java.util.concurrent.Future; import java.util.concurrent.ThreadPoolExecutor; import java.util.concurrent.TimeUnit; -import java.util.concurrent.TimeoutException; /** * Forwarder over ExecutorService interface for exposing internal queue length. */ public class AlluxioExecutorService implements ExecutorService { + private static final Logger LOG = LoggerFactory.getLogger(AlluxioExecutorService.class); + private ExecutorService mExecutor; + private final Counter mRpcTracker; /** * Creates Alluxio ExecutorService wrapper. @@ -36,12 +41,24 @@ public class AlluxioExecutorService implements ExecutorService { */ public AlluxioExecutorService(ExecutorService executor) { mExecutor = executor; + mRpcTracker = null; + } + + /** + * Creates Alluxio ExecutorService wrapper. + * + * @param executor underlying executor + * @param counter the counter to track active operations + */ + public AlluxioExecutorService(ExecutorService executor, Counter counter) { + mExecutor = executor; + mRpcTracker = counter; } /** * @return the current RPC queue size */ - public long getRpcQueueLength() { + public int getRpcQueueLength() { if (mExecutor instanceof ThreadPoolExecutor) { return ((ThreadPoolExecutor) mExecutor).getQueue().size(); } else if (mExecutor instanceof ForkJoinPool) { @@ -55,7 +72,7 @@ public long getRpcQueueLength() { /** * @return the current RPC active thread count */ - public long getActiveCount() { + public int getActiveCount() { if (mExecutor instanceof ThreadPoolExecutor) { return ((ThreadPoolExecutor) mExecutor).getActiveCount(); } else if (mExecutor instanceof ForkJoinPool) { @@ -69,7 +86,7 @@ public long getActiveCount() { /** * @return the current RPC thread pool size */ - public long getPoolSize() { + public int getPoolSize() { if (mExecutor instanceof ThreadPoolExecutor) { return ((ThreadPoolExecutor) mExecutor).getPoolSize(); } else if (mExecutor instanceof ForkJoinPool) { @@ -82,11 +99,23 @@ public long getPoolSize() { @Override public void shutdown() { + if (mRpcTracker != null) { + long activeRpcCount = mRpcTracker.getCount(); + if (activeRpcCount > 0) { + LOG.warn("{} operations have not completed", activeRpcCount); + } + } mExecutor.shutdown(); } @Override public List shutdownNow() { + if (mRpcTracker != null) { + long activeRpcCount = mRpcTracker.getCount(); + if (activeRpcCount > 0) { + LOG.warn("{} operations have not completed", activeRpcCount); + } + } return mExecutor.shutdownNow(); } @@ -107,45 +136,106 @@ public boolean awaitTermination(long timeout, TimeUnit unit) throws InterruptedE @Override public Future submit(Callable task) { - return mExecutor.submit(task); + if (mRpcTracker != null) { + mRpcTracker.inc(); + LOG.trace("Inc from rpc server in submit(Callable)"); + } + try { + return mExecutor.submit(task); + } finally { + if (mRpcTracker != null) { + mRpcTracker.dec(); + } + } } @Override public Future submit(Runnable task, T result) { - return mExecutor.submit(task, result); + if (mRpcTracker != null) { + mRpcTracker.inc(); + LOG.trace("Inc from rpc server in submit(Runnable,T)"); + } + try { + return mExecutor.submit(task, result); + } finally { + if (mRpcTracker != null) { + mRpcTracker.dec(); + } + } } @Override public Future submit(Runnable task) { - return mExecutor.submit(task); + if (mRpcTracker != null) { + mRpcTracker.inc(); + LOG.trace("Inc from rpc server in submit(Runnable)"); + } + try { + return mExecutor.submit(task); + } finally { + if (mRpcTracker != null) { + mRpcTracker.dec(); + } + } } @Override public List> invokeAll(Collection> tasks) throws InterruptedException { - return mExecutor.invokeAll(tasks); + if (mRpcTracker != null) { + mRpcTracker.inc(); + LOG.trace("Inc from rpc server in invokeAll(Collection)"); + } + try { + return mExecutor.invokeAll(tasks); + } finally { + if (mRpcTracker != null) { + mRpcTracker.dec(); + } + } } @Override public List> invokeAll(Collection> tasks, long timeout, TimeUnit unit) throws InterruptedException { - return mExecutor.invokeAll(tasks, timeout, unit); + if (mRpcTracker != null) { + mRpcTracker.inc(); + LOG.trace("Inc from rpc server in invokeAll(Collection,long,TimeUnit)"); + } + try { + return mExecutor.invokeAll(tasks, timeout, unit); + } finally { + if (mRpcTracker != null) { + mRpcTracker.dec(); + } + } } @Override - public T invokeAny(Collection> tasks) - throws InterruptedException, ExecutionException { - return null; + public T invokeAny(Collection> tasks) { + // Not used. Also the active counter is hard, so we do not support it. + throw new UnsupportedOperationException("invokeAny(Collection) is not supported"); } @Override - public T invokeAny(Collection> tasks, long timeout, TimeUnit unit) - throws InterruptedException, ExecutionException, TimeoutException { - return mExecutor.invokeAny(tasks, timeout, unit); + public T invokeAny(Collection> tasks, long timeout, TimeUnit unit) { + // Not used. Also the active counter is hard, so we do not support it. + throw new UnsupportedOperationException( + "invokeAny(Collection,long,TimeUnit) is not supported"); } @Override public void execute(Runnable command) { - mExecutor.execute(command); + if (mRpcTracker != null) { + mRpcTracker.inc(); + LOG.trace("Inc from rpc server in execute(Runnable)"); + } + try { + mExecutor.execute(command); + } finally { + if (mRpcTracker != null) { + mRpcTracker.dec(); + } + } } } diff --git a/core/server/master/pom.xml b/core/server/master/pom.xml index 99305b7c0e4e..84a81926050d 100644 --- a/core/server/master/pom.xml +++ b/core/server/master/pom.xml @@ -110,6 +110,11 @@ alluxio-job-client ${project.version} + + org.alluxio + alluxio-stress-shell + ${project.version} + diff --git a/core/server/master/src/main/java/alluxio/master/block/BlockMaster.java b/core/server/master/src/main/java/alluxio/master/block/BlockMaster.java index 3d29e9cd7f8e..061b3348a7a1 100644 --- a/core/server/master/src/main/java/alluxio/master/block/BlockMaster.java +++ b/core/server/master/src/main/java/alluxio/master/block/BlockMaster.java @@ -19,9 +19,11 @@ import alluxio.exception.status.UnavailableException; import alluxio.grpc.Command; import alluxio.grpc.ConfigProperty; +import alluxio.grpc.DecommissionWorkerPOptions; import alluxio.grpc.GetRegisterLeasePRequest; import alluxio.grpc.RegisterWorkerPOptions; import alluxio.grpc.RegisterWorkerPRequest; +import alluxio.grpc.RemoveDisabledWorkerPOptions; import alluxio.grpc.StorageList; import alluxio.grpc.WorkerLostStorageInfo; import alluxio.master.Master; @@ -123,17 +125,17 @@ List getWorkerReport(GetWorkerReportOptions options) List getWorkerLostStorage(); /** - * @param workerId the worker id + * @param address worker address to check * @return true if the worker is excluded, otherwise false */ - boolean isNotServing(long workerId); + boolean isRejected(WorkerNetAddress address); /** * Decommission a worker. * - * @param workerName the worker hostname of worker to be decommissioned + * @param requestOptions the request */ - void decommissionWorker(String workerName) throws NotFoundException; + void decommissionWorker(DecommissionWorkerPOptions requestOptions) throws NotFoundException; /** * Removes blocks from workers. @@ -392,12 +394,10 @@ void workerRegisterStream( long getJournaledNextContainerId(); /** - * Removes all associated metadata about the decommissioned worker from block master. - * - * The worker to free must have been decommissioned. - * @param workerId the workerId of target worker + * Revert disabling a worker, enabling it to register to the cluster. + * @param requestOptions the request */ - void removeDecommissionedWorker(long workerId) throws NotFoundException; + void removeDisabledWorker(RemoveDisabledWorkerPOptions requestOptions) throws NotFoundException; /** * Notify the worker id to a master. diff --git a/core/server/master/src/main/java/alluxio/master/block/BlockMasterClientServiceHandler.java b/core/server/master/src/main/java/alluxio/master/block/BlockMasterClientServiceHandler.java index b2088bc56653..79acbf755115 100644 --- a/core/server/master/src/main/java/alluxio/master/block/BlockMasterClientServiceHandler.java +++ b/core/server/master/src/main/java/alluxio/master/block/BlockMasterClientServiceHandler.java @@ -13,7 +13,6 @@ import alluxio.RpcUtils; import alluxio.client.block.options.GetWorkerReportOptions; -import alluxio.exception.status.NotFoundException; import alluxio.grpc.BlockMasterClientServiceGrpc; import alluxio.grpc.BlockMasterInfo; import alluxio.grpc.BlockMasterInfoField; @@ -34,11 +33,8 @@ import alluxio.grpc.GetWorkerLostStoragePResponse; import alluxio.grpc.GetWorkerReportPOptions; import alluxio.grpc.GrpcUtils; -import alluxio.grpc.RemoveDecommissionedWorkerPOptions; -import alluxio.grpc.RemoveDecommissionedWorkerPResponse; -import alluxio.grpc.WorkerInfoField; -import alluxio.grpc.WorkerRange; -import alluxio.wire.WorkerInfo; +import alluxio.grpc.RemoveDisabledWorkerPOptions; +import alluxio.grpc.RemoveDisabledWorkerPResponse; import com.google.common.base.Preconditions; import io.grpc.stub.StreamObserver; @@ -46,7 +42,6 @@ import org.slf4j.LoggerFactory; import java.util.Arrays; -import java.util.List; import java.util.stream.Collectors; /** @@ -148,24 +143,13 @@ public void getWorkerInfoList(GetWorkerInfoListPOptions options, } @Override - public void removeDecommissionedWorker(RemoveDecommissionedWorkerPOptions options, - StreamObserver responseObserver) { + public void removeDisabledWorker(RemoveDisabledWorkerPOptions options, + StreamObserver responseObserver) { RpcUtils.call(LOG, () -> { - List decommissionedWorkers = mBlockMaster.getWorkerReport( - new GetWorkerReportOptions(GetWorkerReportPOptions.newBuilder() - .setWorkerRange(WorkerRange.DECOMMISSIONED) - .addFieldRanges(WorkerInfoField.ADDRESS) - .addFieldRanges(WorkerInfoField.ID) - .build())); - for (WorkerInfo worker : decommissionedWorkers) { - if (worker.getAddress().getHost().equals(options.getWorkerName())) { - mBlockMaster.removeDecommissionedWorker(worker.getId()); - return RemoveDecommissionedWorkerPResponse.getDefaultInstance(); - } - } - // Exception info has been added in FreeWorkerCommand. - throw new NotFoundException(options.getWorkerName()); - }, "RemoveDecommissionedWorker", "options=%s", responseObserver, options); + // This command is idempotent and is no-op if the address is not recognized + mBlockMaster.removeDisabledWorker(options); + return RemoveDisabledWorkerPResponse.getDefaultInstance(); + }, "RemoveDisabledWorker", "options=%s", responseObserver, options); } @Override @@ -188,11 +172,11 @@ public void getWorkerLostStorage(GetWorkerLostStoragePOptions options, } @Override - public void decommissionWorker(DecommissionWorkerPOptions request, + public void decommissionWorker(DecommissionWorkerPOptions options, StreamObserver responseObserver) { RpcUtils.call(LOG, () -> { - mBlockMaster.decommissionWorker(request.getWorkerName()); + mBlockMaster.decommissionWorker(options); return DecommissionWorkerPResponse.getDefaultInstance(); - }, "DecommissionWorker", "request=%s", responseObserver, request); + }, "DecommissionWorker", "request=%s", responseObserver, options); } } diff --git a/core/server/master/src/main/java/alluxio/master/block/DefaultBlockMaster.java b/core/server/master/src/main/java/alluxio/master/block/DefaultBlockMaster.java index b138b94c8f2b..73d0f6c83180 100644 --- a/core/server/master/src/main/java/alluxio/master/block/DefaultBlockMaster.java +++ b/core/server/master/src/main/java/alluxio/master/block/DefaultBlockMaster.java @@ -32,12 +32,14 @@ import alluxio.grpc.Command; import alluxio.grpc.CommandType; import alluxio.grpc.ConfigProperty; +import alluxio.grpc.DecommissionWorkerPOptions; import alluxio.grpc.GetRegisterLeasePRequest; import alluxio.grpc.GrpcService; import alluxio.grpc.GrpcUtils; import alluxio.grpc.NodeState; import alluxio.grpc.RegisterWorkerPOptions; import alluxio.grpc.RegisterWorkerPRequest; +import alluxio.grpc.RemoveDisabledWorkerPOptions; import alluxio.grpc.ServiceType; import alluxio.grpc.StorageList; import alluxio.grpc.WorkerLostStorageInfo; @@ -121,6 +123,7 @@ import java.util.concurrent.Executors; import java.util.concurrent.TimeUnit; import java.util.concurrent.TimeoutException; +import java.util.concurrent.atomic.AtomicBoolean; import java.util.concurrent.atomic.AtomicInteger; import java.util.concurrent.locks.Lock; import java.util.function.BiConsumer; @@ -175,6 +178,11 @@ public class DefaultBlockMaster extends CoreMaster implements BlockMaster { private static final Logger LOG = LoggerFactory.getLogger(DefaultBlockMaster.class); + private static final String WORKER_DISABLED = + "Worker with address %s is manually decommissioned and marked not able to join " + + "the cluster again. If you want this worker to register to the cluster again, " + + "use `bin/alluxio fsadmin enableWorker -h ` command."; + /** * Concurrency and locking in the BlockMaster * @@ -243,6 +251,7 @@ public class DefaultBlockMaster extends CoreMaster implements BlockMaster { /** Worker is not visualable until registration completes. */ private final IndexedSet mTempWorkers = new IndexedSet<>(ID_INDEX, ADDRESS_INDEX); + private final Set mRejectWorkers = new ConcurrentHashSet<>(); /** * Keeps track of workers which have been decommissioned. * For we need to distinguish the lost worker accidentally and the decommissioned worker manually. @@ -693,15 +702,29 @@ public List getLostWorkersInfoList() throws UnavailableException { } @Override - public void removeDecommissionedWorker(long workerId) throws NotFoundException { + public void removeDisabledWorker(RemoveDisabledWorkerPOptions requestOptions) + throws NotFoundException { if (mStandbyMasterRpcEnabled && mPrimarySelector.getStateUnsafe() == NodeState.STANDBY) { throw new UnavailableRuntimeException( - "RemoveDecommissionedWorker operation is not supported on standby masters"); + "RemoveDisabledWorker operation is not supported on standby masters"); + } + String workerHostName = requestOptions.getWorkerHostname(); + long workerWebPort = requestOptions.getWorkerWebPort(); + AtomicBoolean found = new AtomicBoolean(false); + mRejectWorkers.removeIf(entry -> { + if (entry.getHost().equals(workerHostName) && entry.getWebPort() == workerWebPort) { + LOG.info("Received admin command to re-accept worker {}. The worker should be " + + "accepted to the cluster when it registers again.", entry); + found.set(true); + return true; + } + return false; + }); + if (!found.get()) { + LOG.info("Received admin command to re-accept worker {} but the worker is " + + "not decommissioned. The worker will be able to register to the cluster normally. " + + "No further action is required.", workerHostName); } - MasterWorkerInfo worker = getWorker(workerId); - Preconditions.checkNotNull(mDecommissionedWorkers - .getFirstByField(ADDRESS_INDEX, worker.getWorkerAddress())); - processFreedWorker(worker); } @Override @@ -776,7 +799,8 @@ public List getWorkerReport(GetWorkerReportOptions options) } for (MasterWorkerInfo worker : selectedDecommissionedWorkers) { // extractWorkerInfo handles the locking internally - workerInfoList.add(extractWorkerInfo(worker, options.getFieldRange(), WorkerState.LOST)); + workerInfoList.add(extractWorkerInfo(worker, options.getFieldRange(), + WorkerState.DECOMMISSIONED)); } return workerInfoList; } @@ -846,6 +870,9 @@ public void removeBlocks(Collection blockIds, boolean delete) throws Unava // with the block), the block will not be freed ever. The locking logic in // workerRegister should be changed to address this race condition. for (long workerId : workerIds) { + // No need to update if the worker is lost or decommissioned + // When that lost/decommissioned worker registers again, those removed blocks + // will not be recognized, and the master will instruct the worker to remove them anyway MasterWorkerInfo worker = mWorkers.getFirstByField(ID_INDEX, workerId); if (worker != null) { try (LockResource r = worker.lockWorkerMeta( @@ -859,25 +886,68 @@ public void removeBlocks(Collection blockIds, boolean delete) throws Unava } @Override - public boolean isNotServing(long workerId) { - return mDecommissionedWorkers.getFirstByField(ID_INDEX, workerId) != null; + public boolean isRejected(WorkerNetAddress address) { + return mRejectWorkers.contains(address); } @Override - public void decommissionWorker(String workerHostName) + public void decommissionWorker(DecommissionWorkerPOptions requestOptions) throws NotFoundException { + String workerHostName = requestOptions.getWorkerHostname(); + long workerWebPort = requestOptions.getWorkerWebPort(); + boolean canRegisterAgain = requestOptions.getCanRegisterAgain(); + LOG.info("Decommissioning worker {}:{}", requestOptions.getWorkerHostname(), + requestOptions.getWorkerWebPort()); for (MasterWorkerInfo workerInfo : mWorkers) { - if (workerHostName.equals(workerInfo.getWorkerAddress().getHost())) { + WorkerNetAddress address = workerInfo.getWorkerAddress(); + if (workerHostName.equals(address.getHost()) && workerWebPort == address.getWebPort()) { + LOG.info("Found worker to decommission {}", workerInfo.getWorkerAddress()); + try (LockResource r = workerInfo.lockWorkerMeta( + EnumSet.of(WorkerMetaLockSection.BLOCKS), false)) { + processDecommissionedWorker(workerInfo, canRegisterAgain); + } + LOG.info("Worker {}@{}:{} has been added to the decommissionedWorkers set.", + workerInfo.getId(), workerHostName, workerWebPort); + return; + } + } + // The worker is not active, but it has been decommissioned from a previous call + for (MasterWorkerInfo workerInfo : mDecommissionedWorkers) { + WorkerNetAddress address = workerInfo.getWorkerAddress(); + if (workerHostName.equals(address.getHost()) && workerWebPort == address.getWebPort()) { + LOG.info("Worker {}@{}:{} has been decommissioned already", + workerInfo.getId(), workerHostName, workerWebPort); + return; + } + } + // If the worker is about to register, it may register back even if we decommission it + // here. So we let the admin wait until the worker is registered, to reduce the number of + // states to manage. + for (MasterWorkerInfo workerInfo : mTempWorkers) { + WorkerNetAddress address = workerInfo.getWorkerAddress(); + if (workerHostName.equals(address.getHost()) && workerWebPort == address.getWebPort()) { + throw new NotFoundException(ExceptionMessage.WORKER_DECOMMISSIONED_BEFORE_REGISTER + .getMessage(workerHostName + ":" + workerWebPort)); + } + } + // If the worker is lost, we guess it is more likely that the worker will not come back + // immediately + for (MasterWorkerInfo workerInfo : mLostWorkers) { + WorkerNetAddress address = workerInfo.getWorkerAddress(); + if (workerHostName.equals(address.getHost()) && workerWebPort == address.getWebPort()) { + LOG.info("Found worker to decommission {} from lost workers", + workerInfo.getWorkerAddress()); try (LockResource r = workerInfo.lockWorkerMeta( EnumSet.of(WorkerMetaLockSection.BLOCKS), false)) { - processDecommissionedWorker(workerInfo); + processDecommissionedWorker(workerInfo, canRegisterAgain); } - LOG.info("{} has been added to the decommissionedWorkers set.", - workerHostName); + LOG.info("A lost worker {}@{}:{} has been added to the decommissionedWorkers set.", + workerInfo.getId(), workerHostName, workerWebPort); return; } } - throw new NotFoundException("Worker {} not found in alive worker set"); + throw new NotFoundException(ExceptionMessage.WORKER_NOT_FOUND + .getMessage(workerHostName + ":" + workerWebPort)); } @Override @@ -1005,7 +1075,33 @@ public void commitBlock(long workerId, long usedBytesOnTier, String tierAlias, MasterWorkerInfo worker = mWorkers.getFirstByField(ID_INDEX, workerId); // TODO(peis): Check lost workers as well. if (worker == null) { - throw new NotFoundException(ExceptionMessage.NO_WORKER_FOUND.getMessage(workerId)); + /* + * If the worker is not recognized: + * 1. [Probably] The worker has been decommissioned and removed from the active worker list + * 2. [Possible] The worker has not finished its register process. Maybe the master has + * failed over and the worker has not registered to this new primary. + * 3. [Unlikely] The worker does not belong to this cluster and has never registered. + * This is unlikely because the worker has an ID and it must be from some master. + * 4. [Unlikely] The worker is lost to the master. This is unlikely because the CommitBlock + * call is from the worker. This is more possibly the master is busy and did not + * handle the worker's heartbeat message for too long. + */ + worker = mDecommissionedWorkers.getFirstByField(ID_INDEX, workerId); + if (worker == null) { + throw new NotFoundException(ExceptionMessage.NO_WORKER_FOUND.getMessage(workerId)); + } else { + WorkerNetAddress addr = worker.getWorkerAddress(); + LOG.info("Committing blocks from a decommissioned worker {}", + addr.getHost() + ":" + addr.getRpcPort()); + /* + * Even though the worker is now decommissioned, the master still accepts the block + * and updates the BlockLocation normally. + * Updating the BlockLocation is not strictly necessary, because when the worker + * registers again after restart, all locations will be rebuilt. + * But for simplicity, the location is still updated. + * A disabled worker is allowed to commit block, so ongoing operations will succeed. + */ + } } try (JournalContext journalContext = createJournalContext()) { @@ -1193,6 +1289,11 @@ public long getWorkerId(WorkerNetAddress workerNetAddress) { throw new UnavailableRuntimeException( "GetWorkerId operation is not supported on standby masters"); } + if (isRejected(workerNetAddress)) { + String msg = String.format(WORKER_DISABLED, workerNetAddress); + LOG.warn("{}", msg); + throw new UnavailableRuntimeException(msg); + } LOG.info("Worker {} requesting for an ID", workerNetAddress); MasterWorkerInfo existingWorker = mWorkers.getFirstByField(ADDRESS_INDEX, workerNetAddress); if (existingWorker != null) { @@ -1259,11 +1360,6 @@ public void workerRegister(long workerId, List storageTiers, Map> currentBlocksOnLocation, Map lostStorage, RegisterWorkerPOptions options) throws NotFoundException { - - if (isNotServing(workerId)) { - return; - } - MasterWorkerInfo worker = mWorkers.getFirstByField(ID_INDEX, workerId); if (worker == null) { @@ -1273,6 +1369,10 @@ public void workerRegister(long workerId, List storageTiers, if (worker == null) { throw new NotFoundException(ExceptionMessage.NO_WORKER_FOUND.getMessage(workerId)); } + if (isRejected(worker.getWorkerAddress())) { + throw new UnavailableRuntimeException(String.format(WORKER_DISABLED, + worker.getWorkerAddress())); + } worker.setBuildVersion(options.getBuildVersion()); @@ -1330,6 +1430,15 @@ public MasterWorkerInfo getWorker(long workerId) throws NotFoundException { return worker; } + private MasterWorkerInfo getLiveOrDecommissionedWorker(long workerId) { + MasterWorkerInfo worker = mWorkers.getFirstByField(ID_INDEX, workerId); + if (worker != null) { + return worker; + } + // If not found in the decommissioned worker, this returns null + return mDecommissionedWorkers.getFirstByField(ID_INDEX, workerId); + } + private void processDecommissionedWorkerBlocks(MasterWorkerInfo workerInfo) { processWorkerRemovedBlocks(workerInfo, workerInfo.getBlocks(), false); } @@ -1338,9 +1447,24 @@ private void processDecommissionedWorkerBlocks(MasterWorkerInfo workerInfo) { * Updates the metadata for the specified decommissioned worker. * @param worker the master worker info */ - private void processDecommissionedWorker(MasterWorkerInfo worker) { + private void processDecommissionedWorker(MasterWorkerInfo worker, boolean canRegisterAgain) { + WorkerNetAddress address = worker.getWorkerAddress(); + if (canRegisterAgain) { + LOG.info("Worker with address {} is decommissioned but will be accepted when it " + + "registers again.", address); + } else { + LOG.info("Worker with address {} will be rejected on register/heartbeat", address); + mRejectWorkers.add(address); + } + mDecommissionedWorkers.add(worker); + // Remove worker from all other possible states mWorkers.remove(worker); + mTempWorkers.remove(worker); + mLostWorkers.remove(worker); + // Invalidate cache to trigger new build of worker info list + mWorkerInfoCache.invalidate(WORKER_INFO_CACHE_KEY); + WorkerNetAddress workerNetAddress = worker.getWorkerAddress(); // TODO(bzheng888): Maybe need a new listener such as WorkerDecommissionListener. for (Consumer
function : mWorkerLostListeners) { @@ -1352,10 +1476,6 @@ private void processDecommissionedWorker(MasterWorkerInfo worker) { @Override public void workerRegisterStream(WorkerRegisterContext context, RegisterWorkerPRequest chunk, boolean isFirstMsg) { - if (isNotServing(context.getWorkerId())) { - // Stop register the excluded worker - return; - } // TODO(jiacheng): find a place to check the lease if (isFirstMsg) { workerRegisterStart(context, chunk); @@ -1369,7 +1489,10 @@ protected void workerRegisterStart(WorkerRegisterContext context, MasterWorkerInfo workerInfo = context.getWorkerInfo(); Preconditions.checkState(workerInfo != null, "No workerInfo metadata found in the WorkerRegisterContext!"); - + if (isRejected(workerInfo.getWorkerAddress())) { + throw new UnavailableRuntimeException(String.format(WORKER_DISABLED, + workerInfo.getWorkerAddress())); + } final List storageTiers = chunk.getStorageTiersList(); final Map totalBytesOnTiers = chunk.getTotalBytesOnTiersMap(); final Map usedBytesOnTiers = chunk.getUsedBytesOnTiersMap(); @@ -1409,7 +1532,10 @@ protected void workerRegisterBatch(WorkerRegisterContext context, RegisterWorker MasterWorkerInfo workerInfo = context.getWorkerInfo(); Preconditions.checkState(workerInfo != null, "No workerInfo metadata found in the WorkerRegisterContext!"); - + if (isRejected(workerInfo.getWorkerAddress())) { + throw new UnavailableRuntimeException(String.format(WORKER_DISABLED, + workerInfo.getWorkerAddress())); + } // Even if we add the BlockLocation before the workerInfo is fully registered, // it should be fine because the block can be read on this workerInfo. // If the stream fails in the middle, the blocks recorded on the MasterWorkerInfo @@ -1427,7 +1553,10 @@ public void workerRegisterFinish(WorkerRegisterContext context) { MasterWorkerInfo workerInfo = context.getWorkerInfo(); Preconditions.checkState(workerInfo != null, "No workerInfo metadata found in the WorkerRegisterContext!"); - + if (isRejected(workerInfo.getWorkerAddress())) { + throw new UnavailableRuntimeException(String.format(WORKER_DISABLED, + workerInfo.getWorkerAddress())); + } // Detect any lost blocks on this workerInfo. Set removedBlocks; if (workerInfo.mIsRegistered) { @@ -1464,14 +1593,32 @@ public Command workerHeartbeat(long workerId, Map capacityBytesOnT Map> addedBlocks, Map lostStorage, List metrics) { - if (isNotServing(workerId)) { - return Command.newBuilder().setCommandType(CommandType.Nothing).build(); - } MasterWorkerInfo worker = mWorkers.getFirstByField(ID_INDEX, workerId); if (worker == null) { + /* + * If the worker is not recognized: + * 1. The worker never registered to the cluster, or the master has restarted/failover + * 2. The worker has been decommissioned and removed from the active worker list + */ + worker = mDecommissionedWorkers.getFirstByField(ID_INDEX, workerId); + if (worker != null) { + WorkerNetAddress workerAddr = worker.getWorkerAddress(); + if (isRejected(worker.getWorkerAddress())) { + LOG.info("Received heartbeat from a disabled worker {}", + workerAddr.getHost() + ":" + workerAddr.getRpcPort()); + return Command.newBuilder().setCommandType(CommandType.Disabled).build(); + } + LOG.info("Received heartbeat from a decommissioned worker {}", + workerAddr.getHost() + ":" + workerAddr.getRpcPort()); + return Command.newBuilder().setCommandType(CommandType.Decommissioned).build(); + } LOG.warn("Could not find worker id: {} for heartbeat.", workerId); return Command.newBuilder().setCommandType(CommandType.Register).build(); } + if (isRejected(worker.getWorkerAddress())) { + throw new UnavailableRuntimeException(String.format(WORKER_DISABLED, + worker.getWorkerAddress())); + } // Update the TS before the heartbeat so even if the worker heartbeat processing // is time-consuming or triggers GC, the worker does not get marked as lost @@ -1730,6 +1877,9 @@ private Optional generateBlockInfo(long blockId) throws UnavailableEx List locations = new ArrayList<>(blockLocations.size()); for (BlockLocation location : blockLocations) { + // Decommissioned workers are not included in the available locations + // Note that this may introduce a short unavailability on the block, before + // this worker registers again (and wipes out the decommissioned state). MasterWorkerInfo workerInfo = mWorkers.getFirstByField(ID_INDEX, location.getWorkerId()); if (workerInfo != null) { @@ -1787,7 +1937,19 @@ public void heartbeat(long timeLimitMs) { EnumSet.of(WorkerMetaLockSection.BLOCKS), false)) { final long lastUpdate = mClock.millis() - worker.getLastUpdatedTimeMs(); if ((lastUpdate - masterWorkerTimeoutMs) > masterWorkerDeleteTimeoutMs) { - LOG.error("The worker {}({}) timed out after {}ms without a heartbeat! " + LOG.error("The lost worker {}({}) timed out after {}ms without a heartbeat! " + + "Master will forget about this worker.", worker.getId(), + worker.getWorkerAddress(), lastUpdate); + deleteWorkerMetadata(worker); + } + } + } + for (MasterWorkerInfo worker : mDecommissionedWorkers) { + try (LockResource r = worker.lockWorkerMeta( + EnumSet.of(WorkerMetaLockSection.BLOCKS), false)) { + final long lastUpdate = mClock.millis() - worker.getLastUpdatedTimeMs(); + if ((lastUpdate - masterWorkerTimeoutMs) > masterWorkerDeleteTimeoutMs) { + LOG.error("The decommissioned worker {}({}) timed out after {}ms without a heartbeat! " + "Master will forget about this worker.", worker.getId(), worker.getWorkerAddress(), lastUpdate); deleteWorkerMetadata(worker); @@ -1827,6 +1989,8 @@ public void forgetAllWorkers() { private void processLostWorker(MasterWorkerInfo worker) { mLostWorkers.add(worker); mWorkers.remove(worker); + // Invalidate cache to trigger new build of worker info list + mWorkerInfoCache.invalidate(WORKER_INFO_CACHE_KEY); // If a worker is gone before registering, avoid it getting stuck in mTempWorker forever mTempWorkers.remove(worker); WorkerNetAddress workerAddress = worker.getWorkerAddress(); @@ -1845,16 +2009,13 @@ private void deleteWorkerMetadata(MasterWorkerInfo worker) { mLostWorkers.remove(worker); // If a worker is gone before registering, avoid it getting stuck in mTempWorker forever mTempWorkers.remove(worker); + mDecommissionedWorkers.remove(worker); WorkerNetAddress workerAddress = worker.getWorkerAddress(); for (Consumer
function : mWorkerDeleteListeners) { function.accept(new Address(workerAddress.getHost(), workerAddress.getRpcPort())); } } - private void processFreedWorker(MasterWorkerInfo worker) { - mDecommissionedWorkers.remove(worker); - } - LockResource lockBlock(long blockId) { return new LockResource(mBlockLocks.get(blockId)); } diff --git a/core/server/master/src/test/java/alluxio/master/block/BlockMasterTest.java b/core/server/master/src/test/java/alluxio/master/block/BlockMasterTest.java index c675588dae9a..71a200c1010f 100644 --- a/core/server/master/src/test/java/alluxio/master/block/BlockMasterTest.java +++ b/core/server/master/src/test/java/alluxio/master/block/BlockMasterTest.java @@ -11,19 +11,28 @@ package alluxio.master.block; +import static alluxio.stress.rpc.TierAlias.MEM; import static org.junit.Assert.assertEquals; +import static org.junit.Assert.assertNotNull; import static org.junit.Assert.assertThrows; import static org.junit.Assert.assertTrue; import alluxio.Constants; +import alluxio.client.block.options.GetWorkerReportOptions; import alluxio.clock.ManualClock; import alluxio.conf.Configuration; import alluxio.conf.PropertyKey; +import alluxio.exception.BlockInfoException; +import alluxio.exception.ExceptionMessage; import alluxio.exception.status.NotFoundException; import alluxio.grpc.BuildVersion; import alluxio.grpc.Command; import alluxio.grpc.CommandType; +import alluxio.grpc.ConfigProperty; +import alluxio.grpc.DecommissionWorkerPOptions; import alluxio.grpc.RegisterWorkerPOptions; +import alluxio.grpc.RegisterWorkerPRequest; +import alluxio.grpc.RegisterWorkerPResponse; import alluxio.grpc.StorageList; import alluxio.grpc.WorkerLostStorageInfo; import alluxio.heartbeat.HeartbeatContext; @@ -33,6 +42,8 @@ import alluxio.master.CoreMasterContext; import alluxio.master.MasterRegistry; import alluxio.master.MasterTestUtils; +import alluxio.master.block.meta.MasterWorkerInfo; +import alluxio.master.block.meta.WorkerState; import alluxio.master.journal.JournalSystem; import alluxio.master.journal.noop.NoopJournalSystem; import alluxio.master.metrics.MetricsMaster; @@ -45,11 +56,14 @@ import alluxio.wire.BlockLocation; import alluxio.wire.WorkerInfo; import alluxio.wire.WorkerNetAddress; +import alluxio.worker.block.BlockStoreLocation; +import alluxio.worker.block.RegisterStreamer; import com.google.common.collect.ImmutableList; import com.google.common.collect.ImmutableMap; import com.google.common.collect.Iterables; import com.google.common.collect.Lists; +import io.grpc.stub.StreamObserver; import org.junit.After; import org.junit.Before; import org.junit.ClassRule; @@ -62,6 +76,8 @@ import java.util.HashMap; import java.util.List; import java.util.Map; +import java.util.Queue; +import java.util.concurrent.ConcurrentLinkedQueue; import java.util.concurrent.CyclicBarrier; import java.util.concurrent.ExecutorService; import java.util.concurrent.Executors; @@ -73,6 +89,7 @@ * Unit tests for {@link BlockMaster}. */ public class BlockMasterTest { + public static final long CAPACITY = 20L * 1024 * 1024 * 1024; // 20GB private static final WorkerNetAddress NET_ADDRESS_1 = new WorkerNetAddress().setHost("localhost") .setRpcPort(80).setDataPort(81).setWebPort(82); private static final WorkerNetAddress NET_ADDRESS_2 = new WorkerNetAddress().setHost("localhost") @@ -82,6 +99,15 @@ public class BlockMasterTest { private static final Map> NO_BLOCKS_ON_LOCATION = ImmutableMap.of(); private static final Map NO_LOST_STORAGE = ImmutableMap.of(); + public static final Map> LOST_STORAGE = + ImmutableMap.of(MEM.toString(), ImmutableList.of()); + public static final List EMPTY_CONFIG = ImmutableList.of(); + public static final int BATCH_SIZE = 1000; + + public static final BuildVersion OLD_VERSION = BuildVersion.newBuilder().setVersion("1.0.0") + .setRevision("foobar").build(); + public static final BuildVersion NEW_VERSION = BuildVersion.newBuilder().setVersion("1.1.0") + .setRevision("foobaz").build(); private BlockMaster mBlockMaster; private MasterRegistry mRegistry; @@ -198,7 +224,7 @@ public void countBytes() throws Exception { } @Test - public void detectLostWorkers() throws Exception { + public void detectLostWorker() throws Exception { // Register a worker. long worker1 = mBlockMaster.getWorkerId(NET_ADDRESS_1); mBlockMaster.workerRegister(worker1, @@ -233,13 +259,549 @@ public void decommissionWorker() throws Exception { RegisterWorkerPOptions.getDefaultInstance()); // Decommission worker - mBlockMaster.decommissionWorker(NET_ADDRESS_1.getHost()); + DecommissionWorkerPOptions options = DecommissionWorkerPOptions.newBuilder() + .setWorkerHostname(NET_ADDRESS_1.getHost()).setWorkerWebPort(NET_ADDRESS_1.getWebPort()) + .build(); + mBlockMaster.decommissionWorker(options); // Make sure the worker is decommissioned. int decommissionedCount = mBlockMaster.getDecommissionedWorkerCount(); int liveCount = mBlockMaster.getWorkerCount(); + int lostCount = mBlockMaster.getLostWorkerCount(); assertEquals(1, decommissionedCount); assertEquals(0, liveCount); + assertEquals(0, lostCount); + } + + @Test + public void decommissionLostWorker() throws Exception { + // Register a worker. + long worker1 = mBlockMaster.getWorkerId(NET_ADDRESS_1); + mBlockMaster.workerRegister(worker1, + ImmutableList.of(Constants.MEDIUM_MEM), + ImmutableMap.of(Constants.MEDIUM_MEM, 100L), + ImmutableMap.of(Constants.MEDIUM_MEM, 10L), + NO_BLOCKS_ON_LOCATION, + NO_LOST_STORAGE, + RegisterWorkerPOptions.getDefaultInstance()); + + // Advance the block master's clock by an hour so that worker appears lost. + mClock.setTimeMs(System.currentTimeMillis() + Constants.HOUR_MS); + + // Run the lost worker detector. + HeartbeatScheduler.execute(HeartbeatContext.MASTER_LOST_WORKER_DETECTION); + + // Make sure the worker is detected as lost. + List info = mBlockMaster.getLostWorkersInfoList(); + assertEquals(worker1, Iterables.getOnlyElement(info).getId()); + + // Decommission worker + DecommissionWorkerPOptions options = DecommissionWorkerPOptions.newBuilder() + .setWorkerHostname(NET_ADDRESS_1.getHost()).setWorkerWebPort(NET_ADDRESS_1.getWebPort()) + .build(); + mBlockMaster.decommissionWorker(options); + + // Make sure the worker is decommissioned. + int decommissionedCount = mBlockMaster.getDecommissionedWorkerCount(); + int liveCount = mBlockMaster.getWorkerCount(); + int lostCount = mBlockMaster.getLostWorkerCount(); + assertEquals(1, decommissionedCount); + assertEquals(0, liveCount); + assertEquals(0, lostCount); + } + + @Test + public void decommissionCommitUpgradeRegister() throws Exception { + long workerId = mBlockMaster.getWorkerId(NET_ADDRESS_1); + RegisterWorkerPOptions options = RegisterWorkerPOptions.newBuilder() + .setBuildVersion(OLD_VERSION).build(); + mBlockMaster.workerRegister(workerId, + ImmutableList.of(Constants.MEDIUM_MEM), + ImmutableMap.of(Constants.MEDIUM_MEM, 100L), + ImmutableMap.of(Constants.MEDIUM_MEM, 0L), + NO_BLOCKS_ON_LOCATION, + NO_LOST_STORAGE, + options); + List liveWorkerInfo = mBlockMaster.getWorkerInfoList(); + List allWorkerInfo = mBlockMaster.getWorkerReport(createGetWorkerReportOptions()); + assertEquals(1, liveWorkerInfo.size()); + assertEquals(1, allWorkerInfo.size()); + WorkerInfo w = liveWorkerInfo.get(0); + assertEquals(WorkerState.LIVE.toString(), w.getState()); + assertEquals(OLD_VERSION.getVersion(), w.getVersion()); + assertEquals(OLD_VERSION.getRevision(), w.getRevision()); + + // Decommission the worker + DecommissionWorkerPOptions decomReq = DecommissionWorkerPOptions.newBuilder() + .setWorkerHostname(NET_ADDRESS_1.getHost()).setWorkerWebPort(NET_ADDRESS_1.getWebPort()) + .setCanRegisterAgain(true) + .build(); + mBlockMaster.decommissionWorker(decomReq); + List liveWorkersAfterDecom = mBlockMaster.getWorkerInfoList(); + assertEquals(0, liveWorkersAfterDecom.size()); + List allWorkersAfterDecom = + mBlockMaster.getWorkerReport(createGetWorkerReportOptions()); + assertEquals(1, allWorkersAfterDecom.size()); + WorkerInfo decomWorker = allWorkersAfterDecom.get(0); + assertEquals(WorkerState.DECOMMISSIONED.toString(), decomWorker.getState()); + assertEquals(OLD_VERSION.getVersion(), decomWorker.getVersion()); + assertEquals(OLD_VERSION.getRevision(), decomWorker.getRevision()); + + // After decommissioned, the worker can still heartbeat to the master + Map memUsage = ImmutableMap.of(Constants.MEDIUM_MEM, 0L); + alluxio.grpc.Command heartBeat = mBlockMaster.workerHeartbeat(workerId, null, memUsage, + NO_BLOCKS, NO_BLOCKS_ON_LOCATION, NO_LOST_STORAGE, mMetrics); + assertEquals(CommandType.Decommissioned, heartBeat.getCommandType()); + + // The leftover operations on the worker can still commit blocks to the master + long blockId = 1L; + long blockLength = 100L; + mBlockMaster.commitBlock(workerId, blockLength, "MEM", "MEM", blockId, blockLength); + // The block can be found on the master + BlockInfo blockInfo = mBlockMaster.getBlockInfo(blockId); + assertNotNull(blockInfo); + assertEquals(blockInfo.getLength(), blockLength); + // Although the block can successfully commit, the available locations do not include + // the decommissioned worker, so clients will not read from that worker for that block + assertEquals(0, blockInfo.getLocations().size()); + + // Heartbeat to the master again, the master does not remove the block incorrectly + Map memUsageWithBlock = ImmutableMap.of(Constants.MEDIUM_MEM, blockLength); + List memBlockList = ImmutableList.of(blockId); + Block.BlockLocation memTier = Block.BlockLocation.newBuilder() + .setTier("MEM").setMediumType("MEM").setWorkerId(workerId).build(); + alluxio.grpc.Command heartBeatAgain = mBlockMaster.workerHeartbeat(workerId, null, + memUsageWithBlock, memBlockList, ImmutableMap.of(memTier, memBlockList), + NO_LOST_STORAGE, mMetrics); + assertEquals(CommandType.Decommissioned, heartBeatAgain.getCommandType()); + + // The worker registers again with a higher version + RegisterWorkerPOptions upgradedWorker = RegisterWorkerPOptions.newBuilder() + .setBuildVersion(NEW_VERSION).build(); + mBlockMaster.workerRegister(workerId, + ImmutableList.of(Constants.MEDIUM_MEM), + memUsageWithBlock, + memUsageWithBlock, + ImmutableMap.of(memTier, memBlockList), + NO_LOST_STORAGE, + upgradedWorker); + List liveWorkerAfterRestart = mBlockMaster.getWorkerInfoList(); + List allWorkerAfterRestart = + mBlockMaster.getWorkerReport(createGetWorkerReportOptions()); + assertEquals(1, liveWorkerAfterRestart.size()); + assertEquals(1, allWorkerAfterRestart.size()); + WorkerInfo restartedWorker = liveWorkerAfterRestart.get(0); + assertEquals(WorkerState.LIVE.toString(), restartedWorker.getState()); + assertEquals(NEW_VERSION.getVersion(), restartedWorker.getVersion()); + assertEquals(NEW_VERSION.getRevision(), restartedWorker.getRevision()); + MasterWorkerInfo upgradedWorkerInfo = mBlockMaster.getWorker(workerId); + assertEquals(1, upgradedWorkerInfo.getBlockCount()); + BlockInfo blockInfoCheckAgain = mBlockMaster.getBlockInfo(blockId); + assertNotNull(blockInfoCheckAgain); + assertEquals(blockInfoCheckAgain.getLength(), blockLength); + // The block can be found on the decommissioned worker once the worker registers + // again after the upgrade + assertEquals(1, blockInfoCheckAgain.getLocations().size()); + BlockLocation locCheckAgain = blockInfoCheckAgain.getLocations().get(0); + assertEquals(workerId, locCheckAgain.getWorkerId()); + + // Heartbeat to the master again, the master does not remove the block incorrectly + alluxio.grpc.Command heartBeatAfterUpgrade = mBlockMaster.workerHeartbeat(workerId, null, + memUsageWithBlock, memBlockList, ImmutableMap.of(memTier, memBlockList), + NO_LOST_STORAGE, mMetrics); + assertEquals(CommandType.Nothing, heartBeatAfterUpgrade.getCommandType()); + } + + @Test + public void decommissionCommitUpgradeStreamRegister() throws Exception { + long workerId = mBlockMaster.getWorkerId(NET_ADDRESS_1); + BlockMasterWorkerServiceHandler handler = new BlockMasterWorkerServiceHandler(mBlockMaster); + Queue errors = + streamRegisterWorkerWithVersion(handler, workerId, 0L, ImmutableList.of(), OLD_VERSION); + assertEquals(0, errors.size()); + + List liveWorkerInfo = mBlockMaster.getWorkerInfoList(); + List allWorkerInfo = mBlockMaster.getWorkerReport(createGetWorkerReportOptions()); + assertEquals(1, liveWorkerInfo.size()); + assertEquals(1, allWorkerInfo.size()); + WorkerInfo w = liveWorkerInfo.get(0); + assertEquals(WorkerState.LIVE.toString(), w.getState()); + assertEquals(OLD_VERSION.getVersion(), w.getVersion()); + assertEquals(OLD_VERSION.getRevision(), w.getRevision()); + + // Decommission the worker + DecommissionWorkerPOptions decomReq = DecommissionWorkerPOptions.newBuilder() + .setWorkerHostname(NET_ADDRESS_1.getHost()).setWorkerWebPort(NET_ADDRESS_1.getWebPort()) + .setCanRegisterAgain(true) + .build(); + mBlockMaster.decommissionWorker(decomReq); + List liveWorkersAfterDecom = mBlockMaster.getWorkerInfoList(); + assertEquals(0, liveWorkersAfterDecom.size()); + List allWorkersAfterDecom = + mBlockMaster.getWorkerReport(createGetWorkerReportOptions()); + assertEquals(1, allWorkersAfterDecom.size()); + WorkerInfo decomWorker = allWorkersAfterDecom.get(0); + assertEquals(WorkerState.DECOMMISSIONED.toString(), decomWorker.getState()); + assertEquals(OLD_VERSION.getVersion(), decomWorker.getVersion()); + assertEquals(OLD_VERSION.getRevision(), decomWorker.getRevision()); + + // After decommissioned, the worker can still heartbeat to the master + Map memUsage = ImmutableMap.of(Constants.MEDIUM_MEM, 0L); + alluxio.grpc.Command heartBeat = mBlockMaster.workerHeartbeat(workerId, null, memUsage, + NO_BLOCKS, NO_BLOCKS_ON_LOCATION, NO_LOST_STORAGE, mMetrics); + assertEquals(CommandType.Decommissioned, heartBeat.getCommandType()); + + // The leftover operations on the worker can still commit blocks to the master + long blockId = 1L; + long blockLength = 100L; + mBlockMaster.commitBlock(workerId, blockLength, "MEM", "MEM", blockId, blockLength); + // The block can be found on the master + BlockInfo blockInfo = mBlockMaster.getBlockInfo(blockId); + assertNotNull(blockInfo); + assertEquals(blockInfo.getLength(), blockLength); + // Although the block can successfully commit, the available locations do not include + // the decommissioned worker, so clients will not read from that worker for that block + assertEquals(0, blockInfo.getLocations().size()); + + // Heartbeat to the master again, the master does not remove the block incorrectly + Map memUsageWithBlock = ImmutableMap.of(Constants.MEDIUM_MEM, blockLength); + List memBlockList = ImmutableList.of(blockId); + Block.BlockLocation memTier = Block.BlockLocation.newBuilder() + .setTier("MEM").setMediumType("MEM").setWorkerId(workerId).build(); + alluxio.grpc.Command heartBeatAgain = mBlockMaster.workerHeartbeat(workerId, null, + memUsageWithBlock, memBlockList, ImmutableMap.of(memTier, memBlockList), + NO_LOST_STORAGE, mMetrics); + assertEquals(CommandType.Decommissioned, heartBeatAgain.getCommandType()); + + // The worker registers again with a higher version + errors = streamRegisterWorkerWithVersion(handler, workerId, blockLength, + ImmutableList.of(blockId), NEW_VERSION); + assertEquals(0, errors.size()); + List liveWorkerAfterRestart = mBlockMaster.getWorkerInfoList(); + List allWorkerAfterRestart = + mBlockMaster.getWorkerReport(createGetWorkerReportOptions()); + assertEquals(1, liveWorkerAfterRestart.size()); + assertEquals(1, allWorkerAfterRestart.size()); + WorkerInfo restartedWorker = liveWorkerAfterRestart.get(0); + assertEquals(WorkerState.LIVE.toString(), restartedWorker.getState()); + assertEquals(NEW_VERSION.getVersion(), restartedWorker.getVersion()); + assertEquals(NEW_VERSION.getRevision(), restartedWorker.getRevision()); + MasterWorkerInfo upgradedWorkerInfo = mBlockMaster.getWorker(workerId); + assertEquals(1, upgradedWorkerInfo.getBlockCount()); + BlockInfo blockInfoCheckAgain = mBlockMaster.getBlockInfo(blockId); + assertNotNull(blockInfoCheckAgain); + assertEquals(blockInfoCheckAgain.getLength(), blockLength); + // The block can be found on the decommissioned worker once the worker registers + // again after the upgrade + assertEquals(1, blockInfoCheckAgain.getLocations().size()); + BlockLocation locCheckAgain = blockInfoCheckAgain.getLocations().get(0); + assertEquals(workerId, locCheckAgain.getWorkerId()); + + // Heartbeat to the master again, the master does not remove the block incorrectly + alluxio.grpc.Command heartBeatAfterUpgrade = mBlockMaster.workerHeartbeat(workerId, null, + memUsageWithBlock, memBlockList, ImmutableMap.of(memTier, memBlockList), + NO_LOST_STORAGE, mMetrics); + assertEquals(CommandType.Nothing, heartBeatAfterUpgrade.getCommandType()); + } + + @Test + public void decommissionRemoveUpgradeStreamRegister() throws Exception { + long workerId = mBlockMaster.getWorkerId(NET_ADDRESS_1); + BlockMasterWorkerServiceHandler handler = new BlockMasterWorkerServiceHandler(mBlockMaster); + + // Sequence to simulate worker upgrade and downgrade, + // with or without buildVersion in registerWorkerPOptions + Queue errors = streamRegisterWorkerWithVersion(handler, workerId, 0L, + ImmutableList.of(), OLD_VERSION); + assertEquals(0, errors.size()); + List liveWorkerInfo = mBlockMaster.getWorkerInfoList(); + List allWorkerInfo = mBlockMaster.getWorkerReport(createGetWorkerReportOptions()); + assertEquals(1, liveWorkerInfo.size()); + assertEquals(1, allWorkerInfo.size()); + WorkerInfo w = liveWorkerInfo.get(0); + assertEquals(WorkerState.LIVE.toString(), w.getState()); + assertEquals(OLD_VERSION.getVersion(), w.getVersion()); + assertEquals(OLD_VERSION.getRevision(), w.getRevision()); + + // Prepare a block for removal + long blockId = 1L; + long blockLength = 100L; + mBlockMaster.commitBlock(workerId, blockLength, "MEM", "MEM", blockId, blockLength); + + // Decommission the worker + DecommissionWorkerPOptions decomReq = DecommissionWorkerPOptions.newBuilder() + .setWorkerHostname(NET_ADDRESS_1.getHost()).setWorkerWebPort(NET_ADDRESS_1.getWebPort()) + .setCanRegisterAgain(true) + .build(); + mBlockMaster.decommissionWorker(decomReq); + List liveWorkersAfterDecom = mBlockMaster.getWorkerInfoList(); + assertEquals(0, liveWorkersAfterDecom.size()); + List allWorkersAfterDecom = + mBlockMaster.getWorkerReport(createGetWorkerReportOptions()); + assertEquals(1, allWorkersAfterDecom.size()); + WorkerInfo decomWorker = allWorkersAfterDecom.get(0); + assertEquals(WorkerState.DECOMMISSIONED.toString(), decomWorker.getState()); + assertEquals(OLD_VERSION.getVersion(), decomWorker.getVersion()); + assertEquals(OLD_VERSION.getRevision(), decomWorker.getRevision()); + + // After decommissioned, the worker can still heartbeat to the master + Map memUsage = ImmutableMap.of(Constants.MEDIUM_MEM, 0L); + alluxio.grpc.Command heartBeat = mBlockMaster.workerHeartbeat(workerId, null, memUsage, + NO_BLOCKS, NO_BLOCKS_ON_LOCATION, NO_LOST_STORAGE, mMetrics); + assertEquals(CommandType.Decommissioned, heartBeat.getCommandType()); + + // Remove the block from the master and workers + mBlockMaster.removeBlocks(ImmutableList.of(blockId), true); + Exception e = assertThrows(BlockInfoException.class, () -> { + BlockInfo shouldNotExist = mBlockMaster.getBlockInfo(blockId); + }); + assertTrue(e.getMessage().contains(ExceptionMessage.BLOCK_META_NOT_FOUND.getMessage(blockId))); + + // Heartbeat to the master again, the master does nothing about the block + Map memUsageWithBlock = ImmutableMap.of(Constants.MEDIUM_MEM, blockLength); + List memBlockList = ImmutableList.of(blockId); + Block.BlockLocation memTier = Block.BlockLocation.newBuilder() + .setTier("MEM").setMediumType("MEM").setWorkerId(workerId).build(); + alluxio.grpc.Command heartBeatAgain = mBlockMaster.workerHeartbeat(workerId, null, + memUsageWithBlock, memBlockList, ImmutableMap.of(memTier, memBlockList), + NO_LOST_STORAGE, mMetrics); + assertEquals(CommandType.Decommissioned, heartBeatAgain.getCommandType()); + + // The worker registers again with a higher version + errors = streamRegisterWorkerWithVersion(handler, workerId, blockLength, + ImmutableList.of(blockId), NEW_VERSION); + assertEquals(0, errors.size()); + List liveWorkerAfterRestart = mBlockMaster.getWorkerInfoList(); + List allWorkerAfterRestart = + mBlockMaster.getWorkerReport(createGetWorkerReportOptions()); + assertEquals(1, liveWorkerAfterRestart.size()); + assertEquals(1, allWorkerAfterRestart.size()); + WorkerInfo restartedWorker = liveWorkerAfterRestart.get(0); + assertEquals(WorkerState.LIVE.toString(), restartedWorker.getState()); + assertEquals(NEW_VERSION.getVersion(), restartedWorker.getVersion()); + assertEquals(NEW_VERSION.getRevision(), restartedWorker.getRevision()); + MasterWorkerInfo upgradedWorkerInfo = mBlockMaster.getWorker(workerId); + // The block should not be recognized and therefore the master will want to remove that block + assertEquals(0, upgradedWorkerInfo.getBlockCount()); + assertEquals(1, upgradedWorkerInfo.getToRemoveBlockCount()); + + // Heartbeat to the master again, the master does not remove the block incorrectly + alluxio.grpc.Command heartBeatAfterUpgrade = mBlockMaster.workerHeartbeat(workerId, null, + memUsageWithBlock, memBlockList, ImmutableMap.of(memTier, memBlockList), + NO_LOST_STORAGE, mMetrics); + assertEquals(CommandType.Free, heartBeatAfterUpgrade.getCommandType()); + assertEquals(ImmutableList.of(blockId), heartBeatAfterUpgrade.getDataList()); + } + + @Test + public void decommissionRemoveUpgradeRegister() throws Exception { + long workerId = mBlockMaster.getWorkerId(NET_ADDRESS_1); + + // Sequence to simulate worker upgrade and downgrade, + // with or without buildVersion in registerWorkerPOptions + RegisterWorkerPOptions options = RegisterWorkerPOptions.newBuilder() + .setBuildVersion(OLD_VERSION).build(); + + mBlockMaster.workerRegister(workerId, + ImmutableList.of(Constants.MEDIUM_MEM), + ImmutableMap.of(Constants.MEDIUM_MEM, 100L), + ImmutableMap.of(Constants.MEDIUM_MEM, 0L), + NO_BLOCKS_ON_LOCATION, + NO_LOST_STORAGE, + options); + List liveWorkerInfo = mBlockMaster.getWorkerInfoList(); + List allWorkerInfo = mBlockMaster.getWorkerReport(createGetWorkerReportOptions()); + assertEquals(1, liveWorkerInfo.size()); + assertEquals(1, allWorkerInfo.size()); + WorkerInfo w = liveWorkerInfo.get(0); + assertEquals(WorkerState.LIVE.toString(), w.getState()); + assertEquals(OLD_VERSION.getVersion(), w.getVersion()); + assertEquals(OLD_VERSION.getRevision(), w.getRevision()); + + // Prepare a block for removal + long blockId = 1L; + long blockLength = 100L; + mBlockMaster.commitBlock(workerId, blockLength, "MEM", "MEM", blockId, blockLength); + + // Decommission the worker + DecommissionWorkerPOptions decomReq = DecommissionWorkerPOptions.newBuilder() + .setWorkerHostname(NET_ADDRESS_1.getHost()).setWorkerWebPort(NET_ADDRESS_1.getWebPort()) + .setCanRegisterAgain(true) + .build(); + mBlockMaster.decommissionWorker(decomReq); + List liveWorkersAfterDecom = mBlockMaster.getWorkerInfoList(); + assertEquals(0, liveWorkersAfterDecom.size()); + List allWorkersAfterDecom = + mBlockMaster.getWorkerReport(createGetWorkerReportOptions()); + assertEquals(1, allWorkersAfterDecom.size()); + WorkerInfo decomWorker = allWorkersAfterDecom.get(0); + assertEquals(WorkerState.DECOMMISSIONED.toString(), decomWorker.getState()); + assertEquals(OLD_VERSION.getVersion(), decomWorker.getVersion()); + assertEquals(OLD_VERSION.getRevision(), decomWorker.getRevision()); + + // After decommissioned, the worker can still heartbeat to the master + Map memUsage = ImmutableMap.of(Constants.MEDIUM_MEM, 0L); + alluxio.grpc.Command heartBeat = mBlockMaster.workerHeartbeat(workerId, null, memUsage, + NO_BLOCKS, NO_BLOCKS_ON_LOCATION, NO_LOST_STORAGE, mMetrics); + assertEquals(CommandType.Decommissioned, heartBeat.getCommandType()); + + // Remove the block from the master and workers + mBlockMaster.removeBlocks(ImmutableList.of(blockId), true); + Exception e = assertThrows(BlockInfoException.class, () -> { + BlockInfo shouldNotExist = mBlockMaster.getBlockInfo(blockId); + }); + assertTrue(e.getMessage().contains(ExceptionMessage.BLOCK_META_NOT_FOUND.getMessage(blockId))); + + // Heartbeat to the master again, the master does nothing about the block + Map memUsageWithBlock = ImmutableMap.of(Constants.MEDIUM_MEM, blockLength); + List memBlockList = ImmutableList.of(blockId); + Block.BlockLocation memTier = Block.BlockLocation.newBuilder() + .setTier("MEM").setMediumType("MEM").setWorkerId(workerId).build(); + alluxio.grpc.Command heartBeatAgain = mBlockMaster.workerHeartbeat(workerId, null, + memUsageWithBlock, memBlockList, ImmutableMap.of(memTier, memBlockList), + NO_LOST_STORAGE, mMetrics); + assertEquals(CommandType.Decommissioned, heartBeatAgain.getCommandType()); + + // The worker registers again with a higher version + RegisterWorkerPOptions upgradedWorker = RegisterWorkerPOptions.newBuilder() + .setBuildVersion(NEW_VERSION).build(); + mBlockMaster.workerRegister(workerId, + ImmutableList.of(Constants.MEDIUM_MEM), + memUsageWithBlock, + memUsageWithBlock, + ImmutableMap.of(memTier, memBlockList), + NO_LOST_STORAGE, + upgradedWorker); + List liveWorkerAfterRestart = mBlockMaster.getWorkerInfoList(); + List allWorkerAfterRestart = + mBlockMaster.getWorkerReport(createGetWorkerReportOptions()); + assertEquals(1, liveWorkerAfterRestart.size()); + assertEquals(1, allWorkerAfterRestart.size()); + WorkerInfo restartedWorker = liveWorkerAfterRestart.get(0); + assertEquals(WorkerState.LIVE.toString(), restartedWorker.getState()); + assertEquals(NEW_VERSION.getVersion(), restartedWorker.getVersion()); + assertEquals(NEW_VERSION.getRevision(), restartedWorker.getRevision()); + MasterWorkerInfo upgradedWorkerInfo = mBlockMaster.getWorker(workerId); + // The block should not be recognized and therefore the master will want to remove that block + assertEquals(0, upgradedWorkerInfo.getBlockCount()); + assertEquals(1, upgradedWorkerInfo.getToRemoveBlockCount()); + + // Heartbeat to the master again, the master does not remove the block incorrectly + alluxio.grpc.Command heartBeatAfterUpgrade = mBlockMaster.workerHeartbeat(workerId, null, + memUsageWithBlock, memBlockList, ImmutableMap.of(memTier, memBlockList), + NO_LOST_STORAGE, mMetrics); + assertEquals(CommandType.Free, heartBeatAfterUpgrade.getCommandType()); + assertEquals(ImmutableList.of(blockId), heartBeatAfterUpgrade.getDataList()); + } + + public static Queue streamRegisterWorkerWithVersion( + BlockMasterWorkerServiceHandler handler, + long workerId, long blockSize, List blockList, BuildVersion version) { + List requests = generateRegisterStreamForWorkerWithVersion( + workerId, blockSize, blockList, version); + Queue errorQueue = new ConcurrentLinkedQueue<>(); + sendStreamToMaster(handler, requests, getErrorCapturingResponseObserver(errorQueue)); + return errorQueue; + } + + public static List generateRegisterStreamForWorkerWithVersion( + long workerId, long blockSize, List blockList, BuildVersion version) { + Map> blockMap = new HashMap<>(); + BlockStoreLocation mem = new BlockStoreLocation("MEM", 0, "MEM"); + blockMap.put(mem, blockList); + + // We just use the RegisterStreamer to generate the batch of requests + RegisterStreamer registerStreamer = new RegisterStreamer(null, + workerId, ImmutableList.of("MEM"), + ImmutableMap.of("MEM", CAPACITY), // capacity + ImmutableMap.of("MEM", blockSize * blockList.size()), // usage + blockMap, LOST_STORAGE, EMPTY_CONFIG, version); + + // Get chunks from the RegisterStreamer + return ImmutableList.copyOf(registerStreamer); + } + + public static StreamObserver getErrorCapturingResponseObserver( + Queue errorQueue) { + return new StreamObserver() { + @Override + public void onNext(RegisterWorkerPResponse response) {} + + @Override + public void onError(Throwable t) { + errorQueue.offer(t); + } + + @Override + public void onCompleted() {} + }; + } + + public static void sendStreamToMaster(BlockMasterWorkerServiceHandler handler, + List requestChunks, + StreamObserver responseObserver) { + StreamObserver requestObserver = + handler.registerWorkerStream(responseObserver); + for (RegisterWorkerPRequest chunk : requestChunks) { + requestObserver.onNext(chunk); + } + requestObserver.onCompleted(); + } + + @Test + public void streamRegDecommissionUpgradeStreamReg() throws Exception { + long worker1 = mBlockMaster.getWorkerId(NET_ADDRESS_1); + + // Sequence to simulate worker upgrade and downgrade, + // with or without buildVersion in registerWorkerPOptions + BuildVersion oldVersion = BuildVersion.newBuilder().setVersion("1.0.0") + .setRevision("abc").build(); + BuildVersion newVersion = BuildVersion.newBuilder().setVersion("1.1.0") + .setRevision("def").build(); + + BlockMasterWorkerServiceHandler handler = new BlockMasterWorkerServiceHandler(mBlockMaster); + Queue errors = streamRegisterWorkerWithVersion(handler, worker1, 64 * Constants.MB, + ImmutableList.of(), oldVersion); + assertEquals(0, errors.size()); + + List availableWorkerList = mBlockMaster.getWorkerInfoList(); + assertEquals(1, availableWorkerList.size()); + assertEquals(1, mBlockMaster.getWorkerCount()); + assertEquals(0, mBlockMaster.getLostWorkerCount()); + assertEquals(0, mBlockMaster.getDecommissionedWorkerCount()); + assertEquals(oldVersion.getVersion(), availableWorkerList.get(0).getVersion()); + assertEquals(oldVersion.getRevision(), availableWorkerList.get(0).getRevision()); + + // Decommission the worker + DecommissionWorkerPOptions decomReq = DecommissionWorkerPOptions.newBuilder() + .setWorkerHostname(NET_ADDRESS_1.getHost()).setWorkerWebPort(NET_ADDRESS_1.getWebPort()) + .setCanRegisterAgain(true) + .build(); + mBlockMaster.decommissionWorker(decomReq); + assertEquals(0, mBlockMaster.getWorkerCount()); + assertEquals(0, mBlockMaster.getLostWorkerCount()); + assertEquals(1, mBlockMaster.getDecommissionedWorkerCount()); + List workerReport = mBlockMaster.getWorkerReport(createGetWorkerReportOptions()); + assertEquals(oldVersion.getVersion(), workerReport.get(0).getVersion()); + assertEquals(oldVersion.getRevision(), workerReport.get(0).getRevision()); + + // Worker is restarted with a newer version + errors = streamRegisterWorkerWithVersion(handler, worker1, 64 * Constants.MB, + ImmutableList.of(), newVersion); + assertEquals(0, errors.size()); + assertEquals(1, mBlockMaster.getWorkerCount()); + assertEquals(0, mBlockMaster.getLostWorkerCount()); + assertEquals(0, mBlockMaster.getDecommissionedWorkerCount()); + List availableWorkerListNow = mBlockMaster.getWorkerInfoList(); + assertEquals(newVersion.getVersion(), availableWorkerListNow.get(0).getVersion()); + assertEquals(newVersion.getRevision(), availableWorkerListNow.get(0).getRevision()); + } + + private GetWorkerReportOptions createGetWorkerReportOptions() { + GetWorkerReportOptions getReportOptions = GetWorkerReportOptions.defaults(); + getReportOptions.setFieldRange(GetWorkerReportOptions.WorkerInfoField.ALL); + getReportOptions.setWorkerRange(GetWorkerReportOptions.WorkerRange.ALL); + return getReportOptions; } @Test diff --git a/core/server/worker/src/main/java/alluxio/worker/AlluxioWorkerRestServiceHandler.java b/core/server/worker/src/main/java/alluxio/worker/AlluxioWorkerRestServiceHandler.java index cda6c75f2ba3..f5dc57f26d47 100644 --- a/core/server/worker/src/main/java/alluxio/worker/AlluxioWorkerRestServiceHandler.java +++ b/core/server/worker/src/main/java/alluxio/worker/AlluxioWorkerRestServiceHandler.java @@ -48,6 +48,7 @@ import alluxio.wire.WorkerWebUIInit; import alluxio.wire.WorkerWebUILogs; import alluxio.wire.WorkerWebUIMetrics; +import alluxio.wire.WorkerWebUIOperations; import alluxio.wire.WorkerWebUIOverview; import alluxio.worker.block.BlockStoreMeta; import alluxio.worker.block.BlockWorker; @@ -107,6 +108,7 @@ public final class AlluxioWorkerRestServiceHandler { // endpoints public static final String GET_INFO = "info"; + public static final String GET_OPERATIONS = "operations"; // webui endpoints // TODO(william): DRY up these enpoints public static final String WEBUI_INIT = "webui_init"; @@ -170,6 +172,56 @@ public Response getInfo(@QueryParam(QUERY_RAW_CONFIGURATION) final Boolean rawCo }, Configuration.global()); } + /** + * Gets the current active operations count in the worker. + * + * @return the response + */ + @GET + @Path(GET_OPERATIONS) + public Response getActiveOperations() { + return RestUtils.call(() -> { + WorkerWebUIOperations response = new WorkerWebUIOperations(); + /* + * This contains running operations in: + * 1. Worker RPC thread pool, for ongoing RPCs + * 2. GrpcExecutors.BLOCK_READER_EXECUTOR, for block readers + * 3. GrpcExecutors.BLOCK_READER_SERIALIZED_RUNNER_EXECUTOR, for replying to the client + * 4. GrpcExecutors.BLOCK_WRITER_EXECUTOR, for block writers + * + * So this is the number of operations actively running in the thread pools. + * In other to know the total accepted but not finished request, we need to consider the + * thread pool task queues. + */ + long operations = MetricsSystem.counter( + MetricKey.WORKER_ACTIVE_OPERATIONS.getName()).getCount(); + /* + * Only the RPC thread pool can have a meaningful length. The other block reader/writer + * thread pools all have 0/1 queue length and create threads immediately when there is + * a request. So we only need to consider the RPC pool queue length for idleness. + */ + String workerRpcPoolSizeGaugeName = MetricKey.WORKER_RPC_QUEUE_LENGTH.getName(); + long rpcQueueSize = getGaugeValue(workerRpcPoolSizeGaugeName); + response.setOperationCount(operations) + .setRpcQueueLength(rpcQueueSize); + LOG.debug("Checking worker activity: {}", response); + return response; + }, Configuration.global()); + } + + // Cast to long to safely handle all gauges + private static long getGaugeValue(String gaugeName) { + try { + Gauge gauge = MetricsSystem.METRIC_REGISTRY.gauge(gaugeName, null); + // Carefully cast here because Integer cannot be cast to Long directly + return ((Number) gauge.getValue()).longValue(); + } catch (Exception e) { + LOG.error("Incorrect gauge name {}. Available names are: {}", + gaugeName, MetricsSystem.METRIC_REGISTRY.getGauges().keySet(), e); + return 0; + } + } + /** * Gets Web UI initialization data. * diff --git a/core/server/worker/src/main/java/alluxio/worker/block/BlockMasterSync.java b/core/server/worker/src/main/java/alluxio/worker/block/BlockMasterSync.java index 3ac632238cc2..e9572f7b448f 100644 --- a/core/server/worker/src/main/java/alluxio/worker/block/BlockMasterSync.java +++ b/core/server/worker/src/main/java/alluxio/worker/block/BlockMasterSync.java @@ -11,6 +11,7 @@ package alluxio.worker.block; +import alluxio.Constants; import alluxio.ProcessUtils; import alluxio.conf.Configuration; import alluxio.conf.PropertyKey; @@ -18,6 +19,7 @@ import alluxio.exception.FailedToAcquireRegisterLeaseException; import alluxio.grpc.Command; import alluxio.heartbeat.HeartbeatExecutor; +import alluxio.util.logging.SamplingLogger; import alluxio.wire.WorkerNetAddress; import org.slf4j.Logger; @@ -43,6 +45,7 @@ @NotThreadSafe public final class BlockMasterSync implements HeartbeatExecutor { private static final Logger LOG = LoggerFactory.getLogger(BlockMasterSync.class); + private static final Logger SAMPLING_LOG = new SamplingLogger(LOG, 30L * Constants.SECOND); private static final long ACQUIRE_LEASE_WAIT_MAX_DURATION = Configuration.getMs(PropertyKey.WORKER_REGISTER_LEASE_RETRY_MAX_DURATION); private static final int HEARTBEAT_TIMEOUT_MS = @@ -177,6 +180,9 @@ private void handleMasterCommand(Command cmd) throws IOException, ConnectionFail case Unknown: LOG.error("Master heartbeat sends unknown command {}", cmd); break; + case Decommissioned: + SAMPLING_LOG.info("This worker has been decommissioned"); + break; default: throw new RuntimeException("Un-recognized command from master " + cmd); } diff --git a/core/server/worker/src/main/java/alluxio/worker/block/DefaultBlockWorker.java b/core/server/worker/src/main/java/alluxio/worker/block/DefaultBlockWorker.java index fcba47cedbe6..23de3f7fd05f 100644 --- a/core/server/worker/src/main/java/alluxio/worker/block/DefaultBlockWorker.java +++ b/core/server/worker/src/main/java/alluxio/worker/block/DefaultBlockWorker.java @@ -520,6 +520,8 @@ public void cleanupSession(long sessionId) { public static final class Metrics { public static final Counter WORKER_ACTIVE_CLIENTS = MetricsSystem.counter(MetricKey.WORKER_ACTIVE_CLIENTS.getName()); + public static final Counter WORKER_ACTIVE_OPERATIONS = + MetricsSystem.counter(MetricKey.WORKER_ACTIVE_OPERATIONS.getName()); /** * Registers metric gauges. diff --git a/core/server/worker/src/main/java/alluxio/worker/block/RegisterStreamer.java b/core/server/worker/src/main/java/alluxio/worker/block/RegisterStreamer.java index 69ea65239817..cdeaea5250f9 100644 --- a/core/server/worker/src/main/java/alluxio/worker/block/RegisterStreamer.java +++ b/core/server/worker/src/main/java/alluxio/worker/block/RegisterStreamer.java @@ -99,7 +99,35 @@ public RegisterStreamer( final Map> lostStorage, final List configList) { this(asyncClient, workerId, storageTierAliases, totalBytesOnTiers, usedBytesOnTiers, - lostStorage, configList, new BlockMapIterator(currentBlocksOnLocation)); + lostStorage, configList, new BlockMapIterator(currentBlocksOnLocation), + BuildVersion.newBuilder() + .setVersion(ProjectConstants.VERSION) + .setRevision(ProjectConstants.REVISION).build()); + } + + /** + * Constructor. + * + * @param asyncClient the grpc client + * @param workerId the worker ID + * @param storageTierAliases storage/tier setup from the configuration + * @param totalBytesOnTiers the capacity of each tier + * @param usedBytesOnTiers the current usage of each tier + * @param currentBlocksOnLocation the blocks in each tier/dir + * @param lostStorage the lost storage paths + * @param configList the configuration properties + * @param version the version info + */ + @VisibleForTesting + public RegisterStreamer( + final BlockMasterWorkerServiceGrpc.BlockMasterWorkerServiceStub asyncClient, + final long workerId, final List storageTierAliases, + final Map totalBytesOnTiers, final Map usedBytesOnTiers, + final Map> currentBlocksOnLocation, + final Map> lostStorage, + final List configList, BuildVersion version) { + this(asyncClient, workerId, storageTierAliases, totalBytesOnTiers, usedBytesOnTiers, + lostStorage, configList, new BlockMapIterator(currentBlocksOnLocation), version); } /** @@ -115,22 +143,45 @@ public RegisterStreamer( * @param blockListIterator an iterator used to iterate the blocks */ public RegisterStreamer( + final BlockMasterWorkerServiceGrpc.BlockMasterWorkerServiceStub asyncClient, + final long workerId, final List storageTierAliases, + final Map totalBytesOnTiers, final Map usedBytesOnTiers, + final Map> lostStorage, + final List configList, + BlockMapIterator blockListIterator) { + this(asyncClient, workerId, storageTierAliases, totalBytesOnTiers, usedBytesOnTiers, + lostStorage, configList, blockListIterator, + BuildVersion.newBuilder() + .setVersion(ProjectConstants.VERSION) + .setRevision(ProjectConstants.REVISION).build()); + } + + /** + * Constructor. + * + * @param asyncClient the grpc client + * @param workerId the worker ID + * @param storageTierAliases storage/tier setup from the configuration + * @param totalBytesOnTiers the capacity of each tier + * @param usedBytesOnTiers the current usage of each tier + * @param lostStorage the lost storage paths + * @param configList the configuration properties + * @param blockListIterator an iterator used to iterate the blocks + */ + private RegisterStreamer( final BlockMasterWorkerServiceGrpc.BlockMasterWorkerServiceStub asyncClient, final long workerId, final List storageTierAliases, final Map totalBytesOnTiers, final Map usedBytesOnTiers, final Map> lostStorage, final List configList, - BlockMapIterator blockListIterator) { + BlockMapIterator blockListIterator, + BuildVersion buildVersion) { mAsyncClient = asyncClient; mWorkerId = workerId; mStorageTierAliases = storageTierAliases; mTotalBytesOnTiers = totalBytesOnTiers; mUsedBytesOnTiers = usedBytesOnTiers; - final BuildVersion buildVersion = BuildVersion.newBuilder() - .setVersion(ProjectConstants.VERSION) - .setRevision(ProjectConstants.REVISION) - .build(); mOptions = RegisterWorkerPOptions.newBuilder().addAllConfigs(configList) .setBuildVersion(buildVersion).build(); mLostStorageMap = lostStorage.entrySet().stream() diff --git a/core/server/worker/src/main/java/alluxio/worker/grpc/GrpcDataServer.java b/core/server/worker/src/main/java/alluxio/worker/grpc/GrpcDataServer.java index 53fe2cd4aa88..d92823619986 100644 --- a/core/server/worker/src/main/java/alluxio/worker/grpc/GrpcDataServer.java +++ b/core/server/worker/src/main/java/alluxio/worker/grpc/GrpcDataServer.java @@ -28,7 +28,9 @@ import alluxio.util.network.NettyUtils; import alluxio.worker.DataServer; import alluxio.worker.WorkerProcess; +import alluxio.worker.block.DefaultBlockWorker; +import com.codahale.metrics.Counter; import io.netty.buffer.PooledByteBufAllocator; import io.netty.channel.ChannelOption; import io.netty.channel.EventLoopGroup; @@ -122,8 +124,9 @@ public GrpcDataServer(final String hostName, final SocketAddress bindAddress, private GrpcServerBuilder createServerBuilder(String hostName, SocketAddress bindAddress, ChannelType type) { // Create an executor for Worker RPC server. + final Counter clientCounter = DefaultBlockWorker.Metrics.WORKER_ACTIVE_OPERATIONS; mRPCExecutor = ExecutorServiceBuilder.buildExecutorService( - ExecutorServiceBuilder.RpcExecutorHost.WORKER); + ExecutorServiceBuilder.RpcExecutorHost.WORKER, clientCounter); MetricsSystem.registerGaugeIfAbsent(MetricKey.WORKER_RPC_QUEUE_LENGTH.getName(), mRPCExecutor::getRpcQueueLength); MetricsSystem.registerGaugeIfAbsent(MetricKey.WORKER_RPC_THREAD_ACTIVE_COUNT.getName(), diff --git a/core/server/worker/src/main/java/alluxio/worker/grpc/GrpcExecutors.java b/core/server/worker/src/main/java/alluxio/worker/grpc/GrpcExecutors.java index 83c5ac90c941..d8f6fa1c6b00 100644 --- a/core/server/worker/src/main/java/alluxio/worker/grpc/GrpcExecutors.java +++ b/core/server/worker/src/main/java/alluxio/worker/grpc/GrpcExecutors.java @@ -20,10 +20,17 @@ import alluxio.security.authentication.AuthenticatedClientUser; import alluxio.util.ThreadFactoryUtils; import alluxio.util.executor.UniqueBlockingQueue; +import alluxio.worker.block.DefaultBlockWorker; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import java.util.Collection; import java.util.List; import java.util.concurrent.AbstractExecutorService; +import java.util.concurrent.Callable; import java.util.concurrent.ExecutorService; +import java.util.concurrent.Future; import java.util.concurrent.LinkedBlockingQueue; import java.util.concurrent.SynchronousQueue; import java.util.concurrent.ThreadPoolExecutor; @@ -35,6 +42,7 @@ */ @ThreadSafe public final class GrpcExecutors { + private static final Logger LOG = LoggerFactory.getLogger(GrpcExecutors.class); private static final long THREAD_STOP_MS = Constants.SECOND_MS * 10; private static final int THREADS_MIN = 4; @@ -44,17 +52,23 @@ public final class GrpcExecutors { THREAD_STOP_MS, TimeUnit.MILLISECONDS, new UniqueBlockingQueue<>( Configuration.getInt(PropertyKey.WORKER_NETWORK_ASYNC_CACHE_MANAGER_QUEUE_MAX)), ThreadFactoryUtils.build("CacheManagerExecutor-%d", true)); + // Async caching is an optimization internal to Alluxio, which can be aborted any time public static final ExecutorService CACHE_MANAGER_EXECUTOR = - new ImpersonateThreadPoolExecutor(CACHE_MANAGER_THREAD_POOL_EXECUTOR); + new ImpersonateThreadPoolExecutor(CACHE_MANAGER_THREAD_POOL_EXECUTOR, false); + // Used by BlockWorkerClientServiceHandler.readBlock() by DataReader threads, + // where each DataReader reads a block content for reply. + // The thread pool queue is always empty. private static final ThreadPoolExecutor BLOCK_READER_THREAD_POOL_EXECUTOR = new ThreadPoolExecutor(THREADS_MIN, Configuration.getInt( PropertyKey.WORKER_NETWORK_BLOCK_READER_THREADS_MAX), THREAD_STOP_MS, TimeUnit.MILLISECONDS, new SynchronousQueue<>(), ThreadFactoryUtils.build("BlockDataReaderExecutor-%d", true)); public static final ExecutorService BLOCK_READER_EXECUTOR = - new ImpersonateThreadPoolExecutor(BLOCK_READER_THREAD_POOL_EXECUTOR); + new ImpersonateThreadPoolExecutor(BLOCK_READER_THREAD_POOL_EXECUTOR, true); + // Used for replying data to the client in BlockReadHandler. + // The thread pool has a small queue of a constant size. private static final ThreadPoolExecutor BLOCK_SERIALIZED_THREAD_POOL_EXECUTOR = new ThreadPoolExecutor(THREADS_MIN, Configuration.getInt(PropertyKey.WORKER_NETWORK_BLOCK_READER_THREADS_MAX), @@ -62,15 +76,16 @@ public final class GrpcExecutors { ThreadFactoryUtils.build("BlockDataReaderSerializedExecutor-%d", true), new ThreadPoolExecutor.CallerRunsPolicy()); public static final ExecutorService BLOCK_READER_SERIALIZED_RUNNER_EXECUTOR = - new ImpersonateThreadPoolExecutor(BLOCK_SERIALIZED_THREAD_POOL_EXECUTOR); + new ImpersonateThreadPoolExecutor(BLOCK_SERIALIZED_THREAD_POOL_EXECUTOR, true); + // Used for writing blocks. The queue is always empty. private static final ThreadPoolExecutor BLOCK_WRITE_THREAD_POOL_EXECUTOR = new ThreadPoolExecutor(THREADS_MIN, Configuration.getInt( PropertyKey.WORKER_NETWORK_BLOCK_WRITER_THREADS_MAX), THREAD_STOP_MS, TimeUnit.MILLISECONDS, new SynchronousQueue<>(), ThreadFactoryUtils.build("BlockDataWriterExecutor-%d", true)); public static final ExecutorService BLOCK_WRITER_EXECUTOR = - new ImpersonateThreadPoolExecutor(BLOCK_WRITE_THREAD_POOL_EXECUTOR); + new ImpersonateThreadPoolExecutor(BLOCK_WRITE_THREAD_POOL_EXECUTOR, true); static { MetricsSystem.registerCachedGaugeIfAbsent(MetricsSystem.getMetricName( @@ -144,9 +159,11 @@ private GrpcExecutors() {} * */ private static class ImpersonateThreadPoolExecutor extends AbstractExecutorService { private final ExecutorService mDelegate; + private final boolean mTracked; - public ImpersonateThreadPoolExecutor(ExecutorService service) { + public ImpersonateThreadPoolExecutor(ExecutorService service, boolean tracked) { mDelegate = service; + mTracked = tracked; } @Override @@ -154,22 +171,128 @@ public void execute(final Runnable command) { // If there's no impersonation, proxyUser is just null User proxyUser = AuthenticatedClientUser.getOrNull(); mDelegate.execute(() -> { + if (mTracked) { + DefaultBlockWorker.Metrics.WORKER_ACTIVE_OPERATIONS.inc(); + } try { +// SleepUtils.sleepMs(1000); AuthenticatedClientUser.set(proxyUser); command.run(); } finally { + if (mTracked) { + DefaultBlockWorker.Metrics.WORKER_ACTIVE_OPERATIONS.dec(); + } AuthenticatedClientUser.remove(); } }); } + @Override + public Future submit(Callable task) { + // If there's no impersonation, proxyUser is just null + User proxyUser = AuthenticatedClientUser.getOrNull(); + return mDelegate.submit(() -> { + if (mTracked) { + DefaultBlockWorker.Metrics.WORKER_ACTIVE_OPERATIONS.inc(); + } + try { +// SleepUtils.sleepMs(1000); + AuthenticatedClientUser.set(proxyUser); + return task.call(); + } finally { + if (mTracked) { + DefaultBlockWorker.Metrics.WORKER_ACTIVE_OPERATIONS.dec(); + } + AuthenticatedClientUser.remove(); + } + }); + } + + @Override + public Future submit(Runnable task, T result) { + // If there's no impersonation, proxyUser is just null + User proxyUser = AuthenticatedClientUser.getOrNull(); + return mDelegate.submit(() -> { + if (mTracked) { + DefaultBlockWorker.Metrics.WORKER_ACTIVE_OPERATIONS.inc(); + } + try { +// SleepUtils.sleepMs(1000); + AuthenticatedClientUser.set(proxyUser); + task.run(); + } finally { + if (mTracked) { + DefaultBlockWorker.Metrics.WORKER_ACTIVE_OPERATIONS.dec(); + } + AuthenticatedClientUser.remove(); + } + }, result); + } + + @Override + public Future submit(Runnable task) { + // If there's no impersonation, proxyUser is just null + User proxyUser = AuthenticatedClientUser.getOrNull(); + return mDelegate.submit(() -> { + if (mTracked) { + DefaultBlockWorker.Metrics.WORKER_ACTIVE_OPERATIONS.inc(); + } + try { +// SleepUtils.sleepMs(1000); + AuthenticatedClientUser.set(proxyUser); + task.run(); + } finally { + if (mTracked) { + DefaultBlockWorker.Metrics.WORKER_ACTIVE_OPERATIONS.dec(); + } + AuthenticatedClientUser.remove(); + } + }); + } + + @Override + public List> invokeAll(Collection> tasks) + throws InterruptedException { + // Not used. Also the active counter is hard, so we do not support it. + throw new UnsupportedOperationException("invokeAll(Collection) is not supported"); + } + + @Override + public List> invokeAll(Collection> tasks, long timeout, + TimeUnit unit) throws InterruptedException { + // Not used. Also the active counter is hard, so we do not support it. + throw new UnsupportedOperationException( + "invokeAll(Collection,long,TimeUnit) is not supported"); + } + + @Override + public T invokeAny(Collection> tasks) { + // Not used. Also the active counter is hard, so we do not support it. + throw new UnsupportedOperationException("invokeAny(Callable) is not supported"); + } + + @Override + public T invokeAny(Collection> tasks, long timeout, TimeUnit unit) { + // Not used. Also the active counter is hard, so we do not support it. + throw new UnsupportedOperationException( + "invokeAny(Callable,long,TimeUnit) is not supported"); + } + @Override public void shutdown() { + long operationCount = DefaultBlockWorker.Metrics.WORKER_ACTIVE_OPERATIONS.getCount(); + if (operationCount > 0) { + LOG.warn("{} operations have not completed at shutdown()", operationCount); + } mDelegate.shutdown(); } @Override public List shutdownNow() { + long operationCount = DefaultBlockWorker.Metrics.WORKER_ACTIVE_OPERATIONS.getCount(); + if (operationCount > 0) { + LOG.warn("{} operations have not completed at shutdownNow()", operationCount); + } return mDelegate.shutdownNow(); } diff --git a/core/transport/src/main/proto/grpc/block_master.proto b/core/transport/src/main/proto/grpc/block_master.proto index f7d89ebb8bdb..f6aed214f010 100644 --- a/core/transport/src/main/proto/grpc/block_master.proto +++ b/core/transport/src/main/proto/grpc/block_master.proto @@ -109,10 +109,11 @@ message WorkerLostStorageInfo { /** a map from tier alias to the lost storage paths */ map lostStorage = 2; } -message RemoveDecommissionedWorkerPOptions { - optional string workerName = 1; +message RemoveDisabledWorkerPOptions { + required string workerHostname = 1; + optional int64 workerWebPort = 2; } -message RemoveDecommissionedWorkerPResponse {} +message RemoveDisabledWorkerPResponse {} message GetWorkerLostStoragePOptions {} message GetWorkerLostStoragePResponse { @@ -121,7 +122,9 @@ message GetWorkerLostStoragePResponse { message DecommissionWorkerPResponse {} message DecommissionWorkerPOptions { - required string workerName = 1; + required string workerHostname = 1; + optional int64 workerWebPort = 2; + optional bool canRegisterAgain = 3; } /** @@ -158,8 +161,8 @@ service BlockMasterClientService { * If target worker is in the decommissioned worker set, * return true, remove target worker from decommissioned worker set; else, return false. */ - rpc RemoveDecommissionedWorker(RemoveDecommissionedWorkerPOptions) - returns (RemoveDecommissionedWorkerPResponse); + rpc RemoveDisabledWorker(RemoveDisabledWorkerPOptions) + returns (RemoveDisabledWorkerPResponse); /** * Returns a list of workers information for report CLI. diff --git a/core/transport/src/main/proto/grpc/common.proto b/core/transport/src/main/proto/grpc/common.proto index 67dbd85fd3be..e89c4c869738 100644 --- a/core/transport/src/main/proto/grpc/common.proto +++ b/core/transport/src/main/proto/grpc/common.proto @@ -80,6 +80,8 @@ enum CommandType { Free = 3; // Ask the worker to free files. Delete = 4; // Ask the worker to delete files. Persist = 5; // Ask the worker to persist a file for lineage + Decommissioned = 6; // Notify the worker that it has been decommissioned + Disabled = 7; // Notify the worker that it has been disabled } message ConfigProperty { diff --git a/core/transport/src/main/proto/proto.lock b/core/transport/src/main/proto/proto.lock index d169882e78be..f41422aa1781 100644 --- a/core/transport/src/main/proto/proto.lock +++ b/core/transport/src/main/proto/proto.lock @@ -365,17 +365,22 @@ ] }, { - "name": "RemoveDecommissionedWorkerPOptions", + "name": "RemoveDisabledWorkerPOptions", "fields": [ { "id": 1, - "name": "workerName", + "name": "workerHostname", "type": "string" + }, + { + "id": 2, + "name": "workerWebPort", + "type": "int64" } ] }, { - "name": "RemoveDecommissionedWorkerPResponse" + "name": "RemoveDisabledWorkerPResponse" }, { "name": "GetWorkerLostStoragePOptions" @@ -399,8 +404,18 @@ "fields": [ { "id": 1, - "name": "workerName", + "name": "workerHostname", "type": "string" + }, + { + "id": 2, + "name": "workerWebPort", + "type": "int64" + }, + { + "id": 3, + "name": "canRegisterAgain", + "type": "bool" } ] }, @@ -816,9 +831,9 @@ "out_type": "GetWorkerInfoListPResponse" }, { - "name": "RemoveDecommissionedWorker", - "in_type": "RemoveDecommissionedWorkerPOptions", - "out_type": "RemoveDecommissionedWorkerPResponse" + "name": "RemoveDisabledWorker", + "in_type": "RemoveDisabledWorkerPOptions", + "out_type": "RemoveDisabledWorkerPResponse" }, { "name": "GetWorkerReport", @@ -1596,6 +1611,14 @@ { "name": "Persist", "integer": 5 + }, + { + "name": "Decommissioned", + "integer": 6 + }, + { + "name": "Disabled", + "integer": 7 } ] }, diff --git a/shell/src/main/java/alluxio/cli/fs/command/DecommissionWorkerCommand.java b/shell/src/main/java/alluxio/cli/fs/command/DecommissionWorkerCommand.java deleted file mode 100644 index 09fa2419e97e..000000000000 --- a/shell/src/main/java/alluxio/cli/fs/command/DecommissionWorkerCommand.java +++ /dev/null @@ -1,112 +0,0 @@ -/* - * The Alluxio Open Foundation licenses this work under the Apache License, version 2.0 - * (the "License"). You may not use this work except in compliance with the License, which is - * available at www.apache.org/licenses/LICENSE-2.0 - * - * This software is distributed on an "AS IS" basis, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, - * either express or implied, as more fully set forth in the License. - * - * See the NOTICE file distributed with this work for information regarding copyright ownership. - */ - -package alluxio.cli.fs.command; - -import alluxio.Constants; -import alluxio.client.block.BlockMasterClient; -import alluxio.client.block.BlockWorkerInfo; -import alluxio.client.file.FileSystemContext; -import alluxio.exception.AlluxioException; -import alluxio.grpc.DecommissionWorkerPOptions; -import alluxio.resource.CloseableResource; - -import org.apache.commons.cli.CommandLine; -import org.apache.commons.cli.Option; -import org.apache.commons.cli.Options; - -import java.io.IOException; -import java.util.List; -import java.util.Objects; - -/** - * Decommission a specific worker, the decommissioned worker is not automatically - * shutdown and are not chosen for writing new replicas. - */ -public final class DecommissionWorkerCommand extends AbstractFileSystemCommand { - - private static final int DEFAULT_TIMEOUT = 10 * Constants.MINUTE_MS; - - private static final Option HOST_OPTION = - Option.builder("h") - .longOpt("host") - .required(true) // Host option is mandatory. - .hasArg(true) - .numberOfArgs(1) - .argName("host") - .desc("A worker host name, which is mandatory.") - .build(); - - /** - * Constructs a new instance to decommission the given worker from Alluxio. - * @param fsContext the filesystem of Alluxio - */ - public DecommissionWorkerCommand(FileSystemContext fsContext) { - super(fsContext); - } - - @Override - public int run(CommandLine cl) throws AlluxioException, IOException { - String workerHost = cl.getOptionValue(HOST_OPTION.getLongOpt()); - - DecommissionWorkerPOptions options = - DecommissionWorkerPOptions.newBuilder() - .setWorkerName(workerHost).build(); - - List cachedWorkers = mFsContext.getCachedWorkers(); - - for (BlockWorkerInfo blockWorkerInfo : cachedWorkers) { - if (Objects.equals(blockWorkerInfo.getNetAddress().getHost(), workerHost)) { - try (CloseableResource blockMasterClient = - mFsContext.acquireBlockMasterClientResource()) { - long start = System.currentTimeMillis(); - blockMasterClient.get().decommissionWorker(options); - long duration = System.currentTimeMillis() - start; - System.out.printf("Decommission worker %s success, spend: %dms%n", - workerHost, duration); - } catch (IOException ie) { - throw new AlluxioException(ie.getMessage()); - } - return 0; - } - } - - System.out.println("Target worker is not found in Alluxio, please input another hostname.\n" - + "Available workers:"); - for (BlockWorkerInfo blockWorkerInfo : cachedWorkers) { - System.out.println("\t" + blockWorkerInfo.getNetAddress().getHost() - + ":" + blockWorkerInfo.getNetAddress().getRpcPort()); - } - return 0; - } - - @Override - public String getCommandName() { - return "decommissionWorker"; - } - - @Override - public Options getOptions() { - return new Options().addOption(HOST_OPTION); - } - - @Override - public String getUsage() { - return "decommissionWorker --h "; - } - - @Override - public String getDescription() { - return "Decommission a specific worker in the Alluxio cluster. The decommissioned" - + "worker is not shut down but will not accept new read/write operations. The ongoing " - + "operations will proceed until completion."; - } -} diff --git a/shell/src/main/java/alluxio/cli/fs/command/FreeWorkerCommand.java b/shell/src/main/java/alluxio/cli/fs/command/FreeWorkerCommand.java index 0b731bf98865..e072516b0a91 100644 --- a/shell/src/main/java/alluxio/cli/fs/command/FreeWorkerCommand.java +++ b/shell/src/main/java/alluxio/cli/fs/command/FreeWorkerCommand.java @@ -19,7 +19,6 @@ import alluxio.client.block.stream.BlockWorkerClient; import alluxio.client.file.FileSystemContext; import alluxio.exception.AlluxioException; -import alluxio.exception.status.NotFoundException; import alluxio.resource.CloseableResource; import alluxio.wire.WorkerInfo; import alluxio.wire.WorkerNetAddress; @@ -81,16 +80,7 @@ public int run(CommandLine cl) throws AlluxioException, IOException { return -1; } - // 3. Remove target worker metadata. - try (CloseableResource blockMasterClient = - mFsContext.acquireBlockMasterClientResource()) { - blockMasterClient.get().removeDecommissionedWorker(workerName); - } catch (NotFoundException notFoundException) { - System.out.println("Worker " + workerName + " is not found in decommissioned worker set."); - return -1; - } - - // 4. Free target worker. + // 3. Free target worker. try (CloseableResource blockWorkerClient = mFsContext.acquireBlockWorkerClient(targetWorkerNetAddress)) { blockWorkerClient.get().freeWorker(); diff --git a/shell/src/main/java/alluxio/cli/fsadmin/command/DecommissionWorkerCommand.java b/shell/src/main/java/alluxio/cli/fsadmin/command/DecommissionWorkerCommand.java new file mode 100644 index 000000000000..45c1cc8458e6 --- /dev/null +++ b/shell/src/main/java/alluxio/cli/fsadmin/command/DecommissionWorkerCommand.java @@ -0,0 +1,546 @@ +/* + * The Alluxio Open Foundation licenses this work under the Apache License, version 2.0 + * (the "License"). You may not use this work except in compliance with the License, which is + * available at www.apache.org/licenses/LICENSE-2.0 + * + * This software is distributed on an "AS IS" basis, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, + * either express or implied, as more fully set forth in the License. + * + * See the NOTICE file distributed with this work for information regarding copyright ownership. + */ + +package alluxio.cli.fsadmin.command; + +import alluxio.Constants; +import alluxio.client.block.BlockWorkerInfo; +import alluxio.client.file.FileSystemContext; +import alluxio.conf.AlluxioConfiguration; +import alluxio.conf.PropertyKey; +import alluxio.grpc.DecommissionWorkerPOptions; +import alluxio.metrics.MetricKey; +import alluxio.retry.RetryPolicy; +import alluxio.retry.TimeoutRetry; +import alluxio.util.FormatUtils; +import alluxio.util.SleepUtils; +import alluxio.util.network.HttpUtils; +import alluxio.wire.WorkerNetAddress; +import alluxio.wire.WorkerWebUIOperations; + +import com.fasterxml.jackson.databind.ObjectMapper; +import com.google.common.annotations.VisibleForTesting; +import org.apache.commons.cli.CommandLine; +import org.apache.commons.cli.Option; +import org.apache.commons.cli.Options; +import org.apache.http.client.utils.URIBuilder; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import java.io.IOException; +import java.time.Duration; +import java.time.Instant; +import java.util.Collection; +import java.util.HashMap; +import java.util.HashSet; +import java.util.List; +import java.util.Map; +import java.util.Set; +import java.util.concurrent.atomic.AtomicReference; +import java.util.concurrent.atomic.LongAdder; +import java.util.stream.Collectors; + +/** + * Decommission a specific batch of workers, the decommissioned worker is not automatically + * shutdown and will not be picked for new I/O requests. The workers still serve concurrent + * requests and eventually will become idle. This command waits for the workers to be idle. + * So when this command returns, it will be safe for the admin to kill/restart those workers. + * See the help message for more details. + */ +public final class DecommissionWorkerCommand extends AbstractFsAdminCommand { + private static final Logger LOG = LoggerFactory.getLogger(DecommissionWorkerCommand.class); + private static final int DEFAULT_WAIT_TIME_MS = 5 * Constants.MINUTE_MS; // 5min + + private static final Option ADDRESSES_OPTION = + Option.builder("a") + .longOpt("addresses") + .required(true) // Host option is mandatory. + .hasArg(true) + .numberOfArgs(1) + .argName("workerHosts") + .desc("One or more worker addresses separated by comma. If port is not specified, " + + PropertyKey.WORKER_WEB_PORT.getName() + " will be used. The command will talk " + + "to the workers' web port to monitor if they are idle and safe to stop.") + .build(); + private static final Option WAIT_OPTION = + Option.builder("w") + .longOpt("wait") + .required(false) + .hasArg(true) + .numberOfArgs(1) + .argName("waitTime") + .desc("Time to wait, in human readable form like 5m.") + .build(); + private static final Option DISABLE_OPTION = + Option.builder("d") + .longOpt("disable") + .required(false) + .hasArg(false) + .desc("Whether the worker should be disabled and not allowed to register again, " + + "until it is re-enabled again by fsadmin enableWorker command.") + .build(); + + private final Set mFailedWorkers = new HashSet<>(); + private final Map mWaitingWorkers = new HashMap<>(); + private final Set mFinishedWorkers = new HashSet<>(); + private final Set mLostWorkers = new HashSet<>(); + private final AlluxioConfiguration mConf; + + /** + * Constructs a new instance to decommission a given batch of workers from Alluxio. + * + * @param context fsadmin command context + * @param alluxioConf Alluxio configuration + */ + public DecommissionWorkerCommand(Context context, AlluxioConfiguration alluxioConf) { + super(context); + mConf = alluxioConf; + } + + private BlockWorkerInfo findMatchingWorkerAddress( + WorkerNetAddress address, List cachedWorkers) { + for (BlockWorkerInfo worker : cachedWorkers) { + if (worker.getNetAddress().getHost().equals(address.getHost())) { + return worker; + } + } + throw new IllegalArgumentException("Worker " + address.getHost() + + " is not known by the master. Please check the hostname or retry later. " + + "Available workers are: " + printCachedWorkerAddresses(cachedWorkers)); + } + + private String printCachedWorkerAddresses(List cachedWorkers) { + StringBuilder sb = new StringBuilder(); + for (BlockWorkerInfo blockWorkerInfo : cachedWorkers) { + sb.append("\t").append(blockWorkerInfo.getNetAddress().getHost()).append(":") + .append(blockWorkerInfo.getNetAddress().getWebPort()); + } + return sb.toString(); + } + + private long parseWaitTimeMs(CommandLine cl) { + if (cl.hasOption(WAIT_OPTION.getLongOpt())) { + String waitTimeStr = cl.getOptionValue(WAIT_OPTION.getLongOpt()); + return FormatUtils.parseTimeSize(waitTimeStr); + } else { + return DEFAULT_WAIT_TIME_MS; + } + } + + @Override + public int run(CommandLine cl) { + long waitTimeMs = parseWaitTimeMs(cl); + FileSystemContext context = FileSystemContext.create(); + List availableWorkers; + try { + availableWorkers = context.getCachedWorkers(); + } catch (Exception e) { + System.err.format("Cannot get available worker list from master: %s%n", e.getMessage()); + LOG.error("Failed to get worker list from master", e); + return ReturnCode.LOST_MASTER_CONNECTION.getCode(); + } + + // The decommission command is idempotent + sendDecommissionCommand(cl, availableWorkers); + System.out.format("Sent decommission messages to the master, %s failed and %s succeeded%n", + mFailedWorkers.size(), mWaitingWorkers.size()); + System.out.format("Failed ones: %s%n", mFailedWorkers.stream() + .map(WorkerAddressUtils::convertAddressToStringWebPort).collect(Collectors.toList())); + if (mWaitingWorkers.size() == 0) { + System.out.println(ReturnCode.DECOMMISSION_FAILED.getMessage()); + return ReturnCode.DECOMMISSION_FAILED.getCode(); + } + + // Manually block and wait, for all clients(proxies) to see the update on the worker list + verifyFromMasterAndWait(context, mWaitingWorkers.keySet()); + + // Block and wait for the workers to become idle, so when this command returns without error, + // the admin is safe to proceed to stopping those workers + Instant startWaiting = Instant.now(); + waitForWorkerToBecomeIdle(startWaiting, waitTimeMs); + Instant end = Instant.now(); + System.out.format("Waited %s minutes for workers to be idle%n", + Duration.between(startWaiting, end).toMinutes()); + + if (mWaitingWorkers.size() > 0 || mLostWorkers.size() > 0) { + if (mWaitingWorkers.size() > 0) { + System.out.format("%s workers still have not finished all their operations%n", + mWaitingWorkers.keySet()); + System.out.println("The admin should manually intervene and check those workers, " + + "before shutting them down."); + for (Map.Entry entry : mWaitingWorkers.entrySet()) { + WorkerWebUIOperations lastSeenStatus = entry.getValue().getWorkerTrackedStatus(); + System.out.format("Worker %s has %s=%s, %s=%s%n", + WorkerAddressUtils.convertAddressToStringWebPort(entry.getKey()), + MetricKey.WORKER_ACTIVE_OPERATIONS.getName(), + lastSeenStatus.getOperationCount(), + MetricKey.WORKER_RPC_QUEUE_LENGTH.getName(), + lastSeenStatus.getRpcQueueLength()); + } + } + if (mLostWorkers.size() > 0) { + System.out.format("%s workers finished all their operations successfully:%n%s%n", + mFinishedWorkers.size(), + WorkerAddressUtils.workerAddressListToString(mFinishedWorkers)); + System.out.format("%s workers became inaccessible and we assume there are no operations, " + + "but we still recommend the admin to double check:%n%s%n", + mLostWorkers.size(), + WorkerAddressUtils.workerAddressListToString(mLostWorkers)); + } + return mWaitingWorkers.size() > 0 ? ReturnCode.WORKERS_NOT_IDLE.getCode() + : ReturnCode.LOST_SOME_WORKERS.getCode(); + } else { + System.out.println(ReturnCode.OK.getMessage()); + return ReturnCode.OK.getCode(); + } + } + + private void sendDecommissionCommand(CommandLine cl, List availableWorkers) { + boolean canRegisterAgain = !cl.hasOption(DISABLE_OPTION.getLongOpt()); + String workerAddressesStr = cl.getOptionValue(ADDRESSES_OPTION.getLongOpt()); + if (workerAddressesStr.isEmpty()) { + throw new IllegalArgumentException("Worker addresses must be specified"); + } + List addresses = + WorkerAddressUtils.parseWorkerAddresses(workerAddressesStr, mConf); + for (WorkerNetAddress a : addresses) { + System.out.format("Decommissioning worker %s%n", + WorkerAddressUtils.convertAddressToStringWebPort(a)); + + BlockWorkerInfo worker = findMatchingWorkerAddress(a, availableWorkers); + WorkerNetAddress workerAddress = worker.getNetAddress(); + DecommissionWorkerPOptions options = + DecommissionWorkerPOptions.newBuilder() + .setWorkerHostname(workerAddress.getHost()) + .setWorkerWebPort(workerAddress.getWebPort()) + .setCanRegisterAgain(canRegisterAgain).build(); + try { + mBlockClient.decommissionWorker(options); + System.out.format("Set worker %s decommissioned on master%n", + WorkerAddressUtils.convertAddressToStringWebPort(workerAddress)); + // Start counting for this worker + mWaitingWorkers.put(worker.getNetAddress(), new WorkerStatus()); + } catch (IOException ie) { + System.err.format("Failed to decommission worker %s%n", + WorkerAddressUtils.convertAddressToStringWebPort(workerAddress)); + ie.printStackTrace(); + mFailedWorkers.add(workerAddress); + } + } + } + + private void waitForWorkerToBecomeIdle(Instant startWaiting, long waitTimeMs) { + // Block and wait for the workers to become idle, so when this command returns without error, + // the admin is safe to proceed to stopping those workers + boolean helpPrinted = false; + // Sleep 1s until the target time + RetryPolicy retry = new TimeoutRetry(startWaiting.toEpochMilli() + waitTimeMs, 1000); + while (mWaitingWorkers.size() > 0 && retry.attempt()) { + // Poll the status from each worker + for (Map.Entry entry : mWaitingWorkers.entrySet()) { + WorkerNetAddress address = entry.getKey(); + System.out.format("Polling status from worker %s%n", + WorkerAddressUtils.convertAddressToStringWebPort(address)); + try { + WorkerWebUIOperations workerStatus = pollWorkerStatus(address); + entry.getValue().recordWorkerStatus(workerStatus); + if (canWorkerBeStopped(workerStatus)) { + entry.getValue().countWorkerIsQuiet(); + } else { + /* + * If there are operations on the worker, clear the counter. + * The worker is considered idle only if there are zero operations in + * consecutive checks. + */ + entry.getValue().countWorkerNotQuiet(); + } + } catch (Exception e) { + System.err.format("Failed to poll progress from worker %s: %s%n", + address.getHost(), e.getMessage()); + if (!helpPrinted) { + printWorkerNoResponseReasons(); + helpPrinted = true; + } + LOG.error("Failed to poll progress from worker", e); + entry.getValue().countError(); + } + } + + mWaitingWorkers.entrySet().removeIf(entry -> { + boolean isQuiet = entry.getValue().isWorkerQuiet(); + if (isQuiet) { + System.out.format("There is no operation on worker %s:%s for %s times in a row. " + + "Worker is considered safe to stop.%n", entry.getKey().getHost(), + entry.getKey().getWebPort(), WorkerStatus.WORKER_QUIET_THRESHOLD); + mFinishedWorkers.add(entry.getKey()); + return true; + } + boolean isError = entry.getValue().isWorkerInaccessible(); + if (isError) { + System.out.format("Failed to poll status from worker %s:%s for %s times in a row. " + + "Worker is considered inaccessible and not functioning. " + + "If the worker is not functioning, it probably does not currently have ongoing " + + "I/O and can be stopped. But the admin is advised to manually double check the " + + "working before stopping it. %n", + entry.getKey().getHost(), entry.getKey().getWebPort(), + WorkerStatus.WORKER_ERROR_THRESHOLD); + mLostWorkers.add(entry.getKey()); + return true; + } + return false; + }); + } + } + + private static void printWorkerNoResponseReasons() { + System.err.println("There are many reasons why the poll can fail, including but not limited " + + "to:"); + System.err.println("1. Worker is running with a low version which does not contain " + + "this endpoint"); + System.err.println("2. alluxio.worker.web.port is not configured correctly or is not " + + "accessible by firewall rules"); + System.err.println("3. Some other transient network errors"); + } + + // We verify the target workers have been taken off the list on the master + // Then we manually block for a while so clients/proxies in the cluster all get the update + private void verifyFromMasterAndWait( + FileSystemContext context, Collection removedWorkers) { + // Wait a while so the proxy instances will get updated worker list from master + long workerListLag = mConf.getMs(PropertyKey.USER_WORKER_LIST_REFRESH_INTERVAL); + System.out.format("Clients take %s=%s to be updated on the new worker list so this command " + + "will block for the same amount of time to ensure the update propagates to clients " + + "in the cluster.%n", + PropertyKey.USER_WORKER_LIST_REFRESH_INTERVAL.getName(), + mConf.get(PropertyKey.USER_WORKER_LIST_REFRESH_INTERVAL)); + SleepUtils.sleepMs(workerListLag); + + // Poll the latest worker list and verify the workers are decommissioned + System.out.println("Verifying the decommission has taken effect by listing all " + + "available workers on the master"); + try { + Set cachedWorkers = new HashSet<>(context.getCachedWorkers()); + System.out.println("Now on master the available workers are: " + + WorkerAddressUtils.workerListToString(cachedWorkers)); + cachedWorkers.forEach(w -> { + if (removedWorkers.contains(w.getNetAddress())) { + System.err.format("Worker %s is still showing available on the master. " + + "Please check why the decommission did not work.%n", w.getNetAddress()); + System.err.println("This command will still continue, but the admin should manually " + + "verify the state of this worker afterwards."); + } + }); + } catch (IOException e) { + System.err.format("Failed to refresh the available worker list from master: %s%n", + e.getMessage()); + System.err.println("The command will skip this check and continue. If we observe " + + "the workers become idle, that suggests the decommission is successful and no clients " + + "will be using this batch of workers, and this error can be ignored."); + LOG.error("Failed to refresh the available worker list from master", e); + } + } + + @VisibleForTesting + private static WorkerWebUIOperations pollWorkerStatus(WorkerNetAddress worker) + throws IOException { + URIBuilder uriBuilder = new URIBuilder(); + uriBuilder.setScheme("http"); + uriBuilder.setHost(worker.getHost()); + uriBuilder.setPort(worker.getWebPort()); + uriBuilder.setPath(Constants.REST_API_PREFIX + "/worker/operations"); + + // Poll the worker status endpoint + AtomicReference workerState = new AtomicReference<>(); + HttpUtils.get(uriBuilder.toString(), 5000, inputStream -> { + ObjectMapper mapper = new ObjectMapper(); + workerState.set(mapper.readValue(inputStream, WorkerWebUIOperations.class)); + }); + + if (workerState.get() == null) { + // Should not reach here + throw new IOException("Received null from worker operation status!"); + } + return workerState.get(); + } + + private static boolean canWorkerBeStopped(WorkerWebUIOperations workerStatus) { + // Now the idleness check only considers RPCs. This means it does NOT consider + // short circuit r/w operations. So when the admin believes the worker is idle and + // kill/restart the worker, ongoing r/w operations may fail. + // https://github.com/Alluxio/alluxio/issues/17343 + /* + * The operation count consists of ongoing operations in worker thread pools: + * 1. RPC pool + * 2. Data reader pool (used for reading block contents) + * 3. Data reader serialized pool (used for replying read requests) + * 4. Data writer pool + * So if the operation count goes to zero that means all pools are idle. + * + * Pool 2, 3 and 4 all have a very small queue so only the queue 1 length is helpful. + */ + boolean result = workerStatus.getOperationCount() == 0 && workerStatus.getRpcQueueLength() == 0; + if (!result) { + System.out.format("Worker ActiveOperations=%s, RpcQueueLength=%s%n", + workerStatus.getOperationCount(), workerStatus.getRpcQueueLength()); + } + return result; + } + + @Override + public String getCommandName() { + return "decommissionWorker"; + } + + @Override + public Options getOptions() { + return new Options().addOption(ADDRESSES_OPTION) + .addOption(WAIT_OPTION).addOption(DISABLE_OPTION); + } + + @Override + public String getUsage() { + return "decommissionWorker --addresses [--wait waitTime] [--disable]"; + } + + @Override + public String getDescription() { + return "Decommission a specific batch of workers in the Alluxio cluster. " + + "The command will perform the following actions:\n" + + "1. For each worker in the batch, send a decommission command to the primary Alluxio " + + "master so the master marks those workers as decommissioned and will not serve " + + "operations.\n" + + "2. It takes a small interval for all other Alluxio components (like clients and " + + "Proxy instances) to know those workers should not be used, so this command waits for " + + "the interval time defined by " + PropertyKey.USER_WORKER_LIST_REFRESH_INTERVAL + ".\n" + + "3. Gets the active worker list from the master after waiting, and verify the target " + + "workers are not active anymore.\n" + + "4. Wait for the workers to become idle. This command will constantly check the " + + "idleness status on each worker.\n" + + "5. Either all workers have become idle, or the specified timeout expires, this command " + + "will return.\n" + + "\n" // One empty line + + "This command is idempotent and can be retried, but the admin is advised to manually " + + "check if there's an error. The return codes have different meanings: " + + printReturnCodes(); + } + + private String printReturnCodes() { + StringBuilder sb = new StringBuilder(); + for (ReturnCode rc : ReturnCode.values()) { + sb.append("\n").append(rc.getCode()).append(": ").append(rc.name()); + sb.append("\n").append(rc.getMessage()); + } + return sb.toString(); + } + + /** + * A set of return codes. + * Each code embeds an exit code (like 0 or 1) and a message for the admin. + */ + public enum ReturnCode { + OK(0, "All workers are successfully decommissioned and now idle. Safe to kill or " + + "restart this batch of workers now."), + DECOMMISSION_FAILED(1, "Failed to decommission all workers. " + + "The admin should double check the worker addresses and the primary master status."), + LOST_MASTER_CONNECTION(2, "Lost connection to the primary master while this " + + "command is running. This suggests the configured master address is wrong or the " + + "primary master failed over."), + // Some workers are still not idle so they are not safe to restart. + WORKERS_NOT_IDLE(3, "Some workers were still not idle afte the wait. " + + "Either the wait time is too short or those workers failed to mark decommissioned. " + + "The admin should manually intervene and check those workers."), + LOST_SOME_WORKERS(10, "Workers are decommissioned but some or all workers " + + "lost contact while this command is running. If a worker is not serving then it is " + + "safe to kill or restart. But the admin is advised to double check the status of " + + "those workers.") + ; + + private final int mCode; + private final String mMessage; + + /** + * Constructor. + * + * @param code the code to exit with + * @param message the message to display + */ + ReturnCode(int code, String message) { + mCode = code; + mMessage = message; + } + + /** + * Gets the code. + * @return the code + */ + public int getCode() { + return mCode; + } + + /** + * Gets the message. + * @return the message + */ + public String getMessage() { + return mMessage; + } + } + + /** + * A wrapper managing worker activeness status and deciding whether the worker + * can be safely killed. + */ + public static class WorkerStatus { + public static final int WORKER_QUIET_THRESHOLD = 20; + public static final int WORKER_ERROR_THRESHOLD = 5; + + private final LongAdder mConsecutiveQuietCount; + private final LongAdder mConsecutiveFailureCount; + private final AtomicReference mWorkerStatus; + + WorkerStatus() { + mConsecutiveQuietCount = new LongAdder(); + mConsecutiveFailureCount = new LongAdder(); + mWorkerStatus = new AtomicReference<>(null); + } + + void countWorkerIsQuiet() { + mConsecutiveFailureCount.reset(); + mConsecutiveQuietCount.increment(); + } + + void countWorkerNotQuiet() { + mConsecutiveFailureCount.reset(); + mConsecutiveQuietCount.reset(); + } + + void countError() { + mConsecutiveQuietCount.reset(); + mConsecutiveFailureCount.increment(); + } + + boolean isWorkerQuiet() { + return mConsecutiveQuietCount.sum() >= WORKER_QUIET_THRESHOLD; + } + + boolean isWorkerInaccessible() { + return mConsecutiveFailureCount.sum() >= WORKER_ERROR_THRESHOLD; + } + + void recordWorkerStatus(WorkerWebUIOperations status) { + mWorkerStatus.set(status); + } + + WorkerWebUIOperations getWorkerTrackedStatus() { + return mWorkerStatus.get(); + } + } +} diff --git a/shell/src/main/java/alluxio/cli/fsadmin/command/EnableWorkerCommand.java b/shell/src/main/java/alluxio/cli/fsadmin/command/EnableWorkerCommand.java new file mode 100644 index 000000000000..8accfe9729ed --- /dev/null +++ b/shell/src/main/java/alluxio/cli/fsadmin/command/EnableWorkerCommand.java @@ -0,0 +1,131 @@ +/* + * The Alluxio Open Foundation licenses this work under the Apache License, version 2.0 + * (the "License"). You may not use this work except in compliance with the License, which is + * available at www.apache.org/licenses/LICENSE-2.0 + * + * This software is distributed on an "AS IS" basis, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, + * either express or implied, as more fully set forth in the License. + * + * See the NOTICE file distributed with this work for information regarding copyright ownership. + */ + +package alluxio.cli.fsadmin.command; + +import alluxio.conf.AlluxioConfiguration; +import alluxio.conf.PropertyKey; +import alluxio.exception.AlluxioException; +import alluxio.grpc.RemoveDisabledWorkerPOptions; +import alluxio.wire.WorkerNetAddress; + +import org.apache.commons.cli.CommandLine; +import org.apache.commons.cli.Option; +import org.apache.commons.cli.Options; + +import java.io.IOException; +import java.util.HashSet; +import java.util.List; +import java.util.Set; +import java.util.stream.Collectors; + +/** + * The DecommissionWorkerCommand can specify to disable certain workers in the cluster, so + * they are not allowed to register again. This command is the reverse operation of that, which + * enables those workers to register to the cluster. + * See the help message for more details. + */ +public class EnableWorkerCommand extends AbstractFsAdminCommand { + private static final Option ADDRESSES_OPTION = + Option.builder("a") + .longOpt("addresses") + .required(true) // Host option is mandatory. + .hasArg(true) + .numberOfArgs(1) + .argName("workerHosts") + .desc("One or more worker addresses separated by comma. If port is not specified, " + + PropertyKey.WORKER_WEB_PORT.getName() + " will be used. " + + "Note the addresses specify the WEB port instead of RPC port!") + .build(); + + private final AlluxioConfiguration mConf; + + /** + * @param context fsadmin command context + * @param alluxioConf Alluxio configuration + */ + public EnableWorkerCommand(Context context, AlluxioConfiguration alluxioConf) { + super(context); + mConf = alluxioConf; + } + + @Override + public int run(CommandLine cl) throws AlluxioException, IOException { + String workerAddressesStr = cl.getOptionValue(ADDRESSES_OPTION.getLongOpt()); + if (workerAddressesStr.isEmpty()) { + throw new IllegalArgumentException("Worker addresses must be specified"); + } + List addresses = WorkerAddressUtils.parseWorkerAddresses( + workerAddressesStr, mConf); + + Set failedWorkers = new HashSet<>(); + for (WorkerNetAddress workerAddress : addresses) { + System.out.format("Re-enabling worker %s%n", + WorkerAddressUtils.convertAddressToStringWebPort(workerAddress)); + try { + RemoveDisabledWorkerPOptions options = + RemoveDisabledWorkerPOptions.newBuilder() + .setWorkerHostname(workerAddress.getHost()) + .setWorkerWebPort(workerAddress.getWebPort()).build(); + mBlockClient.removeDisabledWorker(options); + System.out.format("Re-enabled worker %s on master%n", + WorkerAddressUtils.convertAddressToStringWebPort(workerAddress)); + } catch (IOException ie) { + System.err.format("Failed to re-enable worker %s%n", + WorkerAddressUtils.convertAddressToStringWebPort(workerAddress)); + ie.printStackTrace(); + failedWorkers.add(workerAddress); + } + } + + if (failedWorkers.size() == 0) { + System.out.println("Successfully re-enabled all workers on the master. " + + "The workers should be able to register to the master and then serve normally." + + "Note there is a short gap defined by " + + PropertyKey.USER_WORKER_LIST_REFRESH_INTERVAL.getName() + + " before the clients become aware of this worker and start to use it."); + return 0; + } else { + System.out.format("%s failed to be re-enabled on the master: %s%n", failedWorkers.size(), + failedWorkers.stream().map(WorkerAddressUtils::convertAddressToStringWebPort) + .collect(Collectors.toList())); + System.out.println("The admin needs to manually check and fix the problem. " + + "Those workers are not able to register to the master and serve requests."); + return 1; + } + } + + @Override + public String getCommandName() { + return "enableWorker"; + } + + @Override + public Options getOptions() { + return new Options().addOption(ADDRESSES_OPTION); + } + + @Override + public String getUsage() { + return "enableWorker --addresses "; + } + + @Override + public String getDescription() { + return "Re-enables workers to register and join the cluster. This is used in pair with the " + + "decommissionWorker command. For example:\n\n" + + "# -d specifies the worker should be rejected from registering\n" + + "$bin/alluxio fsadmin decommissionWorker --addresses worker1 -d\n" + + "# This can be reversed by enableWorker command\n" + + "$bin/alluxio fsadmin enableWorker --addresses worker1\n" + + "# worker1 should now be able to register"; + } +} diff --git a/shell/src/main/java/alluxio/cli/fsadmin/command/WorkerAddressUtils.java b/shell/src/main/java/alluxio/cli/fsadmin/command/WorkerAddressUtils.java new file mode 100644 index 000000000000..01a5e8397ea7 --- /dev/null +++ b/shell/src/main/java/alluxio/cli/fsadmin/command/WorkerAddressUtils.java @@ -0,0 +1,95 @@ +/* + * The Alluxio Open Foundation licenses this work under the Apache License, version 2.0 + * (the "License"). You may not use this work except in compliance with the License, which is + * available at www.apache.org/licenses/LICENSE-2.0 + * + * This software is distributed on an "AS IS" basis, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, + * either express or implied, as more fully set forth in the License. + * + * See the NOTICE file distributed with this work for information regarding copyright ownership. + */ + +package alluxio.cli.fsadmin.command; + +import alluxio.client.block.BlockWorkerInfo; +import alluxio.conf.AlluxioConfiguration; +import alluxio.conf.PropertyKey; +import alluxio.wire.WorkerNetAddress; + +import com.google.common.base.Preconditions; + +import java.util.ArrayList; +import java.util.Collection; +import java.util.List; +import java.util.Set; +import java.util.stream.Collectors; + +/** + * A util class for worker address -> string conversion. + */ +public class WorkerAddressUtils { + /** + * Parses a string to worker addresses. + * + * @param workerAddressesStr the string input + * @param alluxioConf the conf to rely on + * @return a list of worker addresses + */ + public static List parseWorkerAddresses( + String workerAddressesStr, AlluxioConfiguration alluxioConf) { + List result = new ArrayList<>(); + for (String part : workerAddressesStr.split(",")) { + if (part.contains(":")) { + String[] p = part.split(":"); + Preconditions.checkState(p.length == 2, + "worker address %s cannot be recognized", part); + String port = p[1]; + WorkerNetAddress addr = new WorkerNetAddress() + .setHost(p[0]).setWebPort(Integer.parseInt(port)); + result.add(addr); + } else { + int port = alluxioConf.getInt(PropertyKey.WORKER_WEB_PORT); + WorkerNetAddress addr = new WorkerNetAddress().setHost(part).setWebPort(port); + result.add(addr); + } + } + return result; + } + + /** + * Convert a list of worker addresses to string. + * + * @param workers input worker list + * @return the string format + */ + public static String workerAddressListToString(Collection workers) { + return workers.stream().map(WorkerAddressUtils::convertAddressToStringWebPort) + .collect(Collectors.toList()).toString(); + } + + /** + * Converts a set of worker metadata to string. + * + * @param worker a set of workers + * @return the converted string format + */ + public static String workerListToString(Set worker) { + if (worker.isEmpty()) { + return "[]"; + } + // Print on a new line + return "\n" + worker.stream().map(w -> convertAddressToStringWebPort(w.getNetAddress())) + .collect(Collectors.toList()); + } + + /** + * Converts a worker address to string. + * To stay consistent with the command, we print the web port of the worker. + * + * @param address the worker address + * @return the string format + */ + public static String convertAddressToStringWebPort(WorkerNetAddress address) { + return address.getHost() + ":" + address.getWebPort(); + } +} diff --git a/tests/src/test/java/alluxio/client/fs/io/FileInStreamDecommissionIntegrationTest.java b/tests/src/test/java/alluxio/client/fs/io/FileInStreamDecommissionIntegrationTest.java new file mode 100644 index 000000000000..1129aca140d6 --- /dev/null +++ b/tests/src/test/java/alluxio/client/fs/io/FileInStreamDecommissionIntegrationTest.java @@ -0,0 +1,394 @@ +/* + * The Alluxio Open Foundation licenses this work under the Apache License, version 2.0 + * (the "License"). You may not use this work except in compliance with the License, which is + * available at www.apache.org/licenses/LICENSE-2.0 + * + * This software is distributed on an "AS IS" basis, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, + * either express or implied, as more fully set forth in the License. + * + * See the NOTICE file distributed with this work for information regarding copyright ownership. + */ + +package alluxio.client.fs.io; + +import static org.junit.Assert.assertEquals; +import static org.junit.Assert.assertNotEquals; +import static org.junit.Assert.assertThrows; + +import alluxio.AlluxioURI; +import alluxio.client.block.BlockWorkerInfo; +import alluxio.client.file.FileInStream; +import alluxio.client.file.FileSystem; +import alluxio.client.file.FileSystemContext; +import alluxio.client.file.FileSystemTestUtils; +import alluxio.client.file.URIStatus; +import alluxio.conf.Configuration; +import alluxio.conf.PropertyKey; +import alluxio.exception.status.UnavailableException; +import alluxio.grpc.CreateFilePOptions; +import alluxio.grpc.DecommissionWorkerPOptions; +import alluxio.grpc.GetStatusPOptions; +import alluxio.grpc.GrpcUtils; +import alluxio.grpc.OpenFilePOptions; +import alluxio.grpc.ReadPType; +import alluxio.grpc.WritePType; +import alluxio.security.user.TestUserState; +import alluxio.testutils.LocalAlluxioClusterResource; +import alluxio.util.SleepUtils; +import alluxio.util.ThreadFactoryUtils; +import alluxio.util.io.PathUtils; +import alluxio.wire.BlockLocation; +import alluxio.wire.FileBlockInfo; +import alluxio.wire.WorkerInfo; +import alluxio.wire.WorkerNetAddress; + +import org.junit.After; +import org.junit.Before; +import org.junit.Rule; +import org.junit.Test; +import org.junit.rules.ExpectedException; + +import java.util.ArrayList; +import java.util.List; +import java.util.concurrent.CountDownLatch; +import java.util.concurrent.ExecutorService; +import java.util.concurrent.Executors; + +public class FileInStreamDecommissionIntegrationTest { + private static final int BLOCK_SIZE = 1024 * 1024; + private static final int LENGTH = 2 * BLOCK_SIZE; + private static final int CLIENT_WORKER_LIST_REFRESH_INTERVAL = 2000; // 2s + + @Rule + public LocalAlluxioClusterResource mLocalAlluxioClusterResource = + new LocalAlluxioClusterResource.Builder() + .setNumWorkers(2) + .setProperty(PropertyKey.USER_BLOCK_SIZE_BYTES_DEFAULT, BLOCK_SIZE) + .setProperty(PropertyKey.USER_WORKER_LIST_REFRESH_INTERVAL, "2s") + // Disable short circuit + .setProperty(PropertyKey.USER_SHORT_CIRCUIT_ENABLED, false) + .setStartCluster(false) + .build(); + private FileSystem mFileSystem = null; + private CreateFilePOptions mWriteBoth; + private CreateFilePOptions mWriteAlluxio; + private OpenFilePOptions mReadNoCache; + private OpenFilePOptions mReadCachePromote; + private String mTestPath; + private ExecutorService mThreadPool; + + private String mCacheThroughFilePath; + private String mMustCacheFilePath; + + @Rule + public ExpectedException mThrown = ExpectedException.none(); + + @Before + public final void setUp() throws Exception { + mLocalAlluxioClusterResource.start(); + mFileSystem = mLocalAlluxioClusterResource.get().getClient(); + + // Just use the 1st worker to write everything + WorkerNetAddress worker1 = mLocalAlluxioClusterResource.get().getWorkerAddress(); + // For each file, 2 blocks on the same worker so we can use the 1st block's location + // to know which worker to decommission + mWriteBoth = CreateFilePOptions.newBuilder() + .setBlockSizeBytes(BLOCK_SIZE) + .setWriteType(WritePType.CACHE_THROUGH) + .setWorkerLocation(GrpcUtils.toProto(worker1)) + .setRecursive(true).build(); + mWriteAlluxio = CreateFilePOptions.newBuilder() + .setBlockSizeBytes(BLOCK_SIZE) + .setWriteType(WritePType.MUST_CACHE) + .setWorkerLocation(GrpcUtils.toProto(worker1)) + .setRecursive(true).build(); + mReadCachePromote = + OpenFilePOptions.newBuilder().setReadType(ReadPType.CACHE_PROMOTE).build(); + mReadNoCache = OpenFilePOptions.newBuilder().setReadType(ReadPType.NO_CACHE).build(); + mTestPath = PathUtils.uniqPath(); + mCacheThroughFilePath = mTestPath + "/file_BOTH"; + mMustCacheFilePath = mTestPath + "/file_CACHE"; + + // Create files of varying size and write type to later read from + AlluxioURI path0 = new AlluxioURI(mCacheThroughFilePath); + FileSystemTestUtils.createByteFile(mFileSystem, path0, mWriteBoth, LENGTH); + + AlluxioURI path1 = new AlluxioURI(mMustCacheFilePath); + FileSystemTestUtils.createByteFile(mFileSystem, path1, mWriteAlluxio, LENGTH); + + mThreadPool = Executors.newFixedThreadPool(1, + ThreadFactoryUtils.build("decommission-worker-%d", true)); + } + + @After + public final void tearDown() throws Exception { + mLocalAlluxioClusterResource.stop(); + mThreadPool.shutdownNow(); + } + + private List getOptionSet() { + List ret = new ArrayList<>(2); + ret.add(mWriteBoth); + ret.add(mWriteAlluxio); + return ret; + } + + @Test + /* + * If a stream is created after the worker is decommissioned, it cannot pick that worker. + * And if the block exists in UFS, the client will use the other worker and read from UFS. + */ + public void readUfsFromUndecommissionedWorker() throws Exception { + AlluxioURI uri = new AlluxioURI(mCacheThroughFilePath); + + FileSystemContext context = FileSystemContext + .create(new TestUserState("test", Configuration.global()).getSubject(), + Configuration.global()); + List availableWorkers = context.acquireBlockMasterClientResource() + .get().getWorkerInfoList(); + assertEquals(2, availableWorkers.size()); + + URIStatus status = context.acquireMasterClientResource().get() + .getStatus(uri, GetStatusPOptions.getDefaultInstance()); + List blockInfos = status.getFileBlockInfos(); + FileBlockInfo block0 = blockInfos.get(0); + BlockLocation loc0 = block0.getBlockInfo().getLocations().get(0); + WorkerNetAddress workerToDecommission = loc0.getWorkerAddress(); + + DecommissionWorkerPOptions decomOptions = DecommissionWorkerPOptions.newBuilder() + .setWorkerHostname(workerToDecommission.getHost()) + .setWorkerWebPort(workerToDecommission.getWebPort()) + .setCanRegisterAgain(true).build(); + context.acquireBlockMasterClientResource().get().decommissionWorker(decomOptions); + + // This stream is able to find the undecommissioned worker and use that to read from UFS + FileInStream is = mFileSystem.openFile(uri, mReadCachePromote); + byte[] ret = new byte[1024 * 1024]; // 1MB buffer + // This has created the block stream that reads from the target worker + int readLength = 0; + int value = 0; + + while (value != -1) { + value = is.read(ret); + if (value != -1) { + readLength += value; + } + } + assertEquals(readLength, LENGTH); + is.close(); + + // The blocks are read from the other worker, so there should be cache on the way + URIStatus statusAfterRead = context.acquireMasterClientResource().get() + .getStatus(uri, GetStatusPOptions.getDefaultInstance()); + assertEquals(2, statusAfterRead.getFileBlockInfos().size()); + List block0Locs = statusAfterRead.getFileBlockInfos().get(0) + .getBlockInfo().getLocations(); + assertEquals(1, block0Locs.size()); + // The block is not on the decommissioned worker, meaning it is cached on the other worker + assertNotEquals(workerToDecommission, block0Locs.get(0).getWorkerAddress()); + + List block1Locs = statusAfterRead.getFileBlockInfos().get(1) + .getBlockInfo().getLocations(); + assertEquals(1, block1Locs.size()); + // The block is not on the decommissioned worker, meaning it is cached on the other worker + assertNotEquals(workerToDecommission, block1Locs.get(0).getWorkerAddress()); + } + + @Test + /* + * If a stream is created after the worker is decommissioned, it cannot pick that worker. + * And if that worker holds the only cache and the block is not in UFS, + * the read will fail. + */ + public void cannotReadCacheFromDecommissionedWorker() throws Exception { + AlluxioURI uri = new AlluxioURI(mMustCacheFilePath); + FileSystemContext context = FileSystemContext + .create(new TestUserState("test", Configuration.global()).getSubject(), + Configuration.global()); + List availableWorkers = context.acquireBlockMasterClientResource() + .get().getWorkerInfoList(); + assertEquals(2, availableWorkers.size()); + + URIStatus status = context.acquireMasterClientResource().get() + .getStatus(uri, GetStatusPOptions.getDefaultInstance()); + List blockInfos = status.getFileBlockInfos(); + FileBlockInfo block0 = blockInfos.get(0); + BlockLocation loc0 = block0.getBlockInfo().getLocations().get(0); + WorkerNetAddress targetWorker = loc0.getWorkerAddress(); + + DecommissionWorkerPOptions decomOptions = DecommissionWorkerPOptions.newBuilder() + .setWorkerHostname(targetWorker.getHost()).setWorkerWebPort(targetWorker.getWebPort()) + .setCanRegisterAgain(true).build(); + context.acquireBlockMasterClientResource().get().decommissionWorker(decomOptions); + + // This stream is able to find the undecommissioned worker and use that to read from UFS + FileInStream is = mFileSystem.openFile(uri, mReadCachePromote); + // The worker has been decommissioned and the file only exists in that worker + // So the client cannot read + assertThrows(UnavailableException.class, () -> { + int value = is.read(); + }); + is.close(); + } + + @Test + /* + * The target worker is decommissioned while the stream is reading. + * However, the stream does not know the worker list has changed and keeps reading that worker. + * This read should succeed. + */ + public void decommissionWhileReading() throws Exception { + AlluxioURI uri = new AlluxioURI(mCacheThroughFilePath); + FileSystemContext context = FileSystemContext + .create(new TestUserState("test", Configuration.global()).getSubject(), + Configuration.global()); + List availableWorkers = context.acquireBlockMasterClientResource() + .get().getWorkerInfoList(); + assertEquals(2, availableWorkers.size()); + + URIStatus status = context.acquireMasterClientResource().get() + .getStatus(uri, GetStatusPOptions.getDefaultInstance()); + List blockInfos = status.getFileBlockInfos(); + FileBlockInfo block0 = blockInfos.get(0); + BlockLocation loc0 = block0.getBlockInfo().getLocations().get(0); + WorkerNetAddress targetWorker = loc0.getWorkerAddress(); + + CountDownLatch streamActive = new CountDownLatch(1); + CountDownLatch workerDecommissioned = new CountDownLatch(1); + mThreadPool.submit(() -> { + try { + streamActive.await(); + DecommissionWorkerPOptions decomOptions = DecommissionWorkerPOptions.newBuilder() + .setWorkerHostname(targetWorker.getHost()).setWorkerWebPort(targetWorker.getWebPort()) + .setCanRegisterAgain(true).build(); + context.acquireBlockMasterClientResource().get().decommissionWorker(decomOptions); + + List updatedWorkers = context.acquireBlockMasterClientResource() + .get().getWorkerInfoList(); + assertEquals(1, updatedWorkers.size()); + + workerDecommissioned.countDown(); + } catch (Exception e) { + e.printStackTrace(); + } + }); + + // This stream is able to find the undecommissioned worker and use that to read from UFS + FileInStream is = mFileSystem.openFile(uri, mReadNoCache); + byte[] ret = new byte[1024]; // 1MB buffer + // This has created the block stream that reads from the target worker + int value = 0; + int readLength = 0; + + boolean released = false; + while (value != -1) { + if (readLength > 1024 && !released) { + streamActive.countDown(); + released = true; + + // Wait a bit for the decommission to take effect + // After the worker is decommissioned, the stream can successfully complete + workerDecommissioned.await(); + // However, even though the master has refreshed the available worker list + // The stream does not pick another worker until it sees an exception + // So when this resumes, the stream will keep reading the decommissioned worker + // And we want the decommissioned worker to keep serving + } + value = is.read(ret); + if (value != -1) { + readLength += value; + } + } + assertEquals(readLength, LENGTH); + is.close(); + } + + @Test + /* + * When there is an active stream reading from one worker, decommission that worker. + * Then we make the stream wait a bit and realize that worker is no longer available. + * The stream should pick the other available worker in the cluster and read from UFS using that. + */ + public void halfStreamFromAnotherWorker() throws Exception { + AlluxioURI uri = new AlluxioURI(mCacheThroughFilePath); + + FileSystemContext context = FileSystemContext + .create(new TestUserState("test", Configuration.global()).getSubject(), + Configuration.global()); + List clusterWorkers = context.acquireBlockMasterClientResource() + .get().getWorkerInfoList(); + assertEquals(2, clusterWorkers.size()); + + URIStatus status = context.acquireMasterClientResource().get() + .getStatus(uri, GetStatusPOptions.getDefaultInstance()); + List blockInfos = status.getFileBlockInfos(); + FileBlockInfo block0 = blockInfos.get(0); + BlockLocation loc0 = block0.getBlockInfo().getLocations().get(0); + WorkerNetAddress workerToDecommission = loc0.getWorkerAddress(); + + CountDownLatch streamActive = new CountDownLatch(1); + CountDownLatch workerDecommissioned = new CountDownLatch(1); + mThreadPool.submit(() -> { + try { + streamActive.await(); + DecommissionWorkerPOptions decomOptions = DecommissionWorkerPOptions.newBuilder() + .setWorkerHostname(workerToDecommission.getHost()) + .setWorkerWebPort(workerToDecommission.getWebPort()) + .setCanRegisterAgain(true).build(); + context.acquireBlockMasterClientResource().get().decommissionWorker(decomOptions); + + List updatedWorkers = context.acquireBlockMasterClientResource() + .get().getWorkerInfoList(); + assertEquals(1, updatedWorkers.size()); + workerDecommissioned.countDown(); + } catch (Exception e) { + e.printStackTrace(); + } + }); + + // This stream is able to find the undecommissioned worker and use that to read from UFS + FileInStream is = mFileSystem.openFile(uri, mReadCachePromote); + byte[] ret = new byte[1024]; // 1MB buffer + // This has created the block stream that reads from the target worker + int value = 0; + int readLength = 0; + + boolean released = false; + while (value != -1) { + // 2 blocks on the same worker, decommission at the end of one BlockStream + // so when the FileStream continues, create the 2nd block stream where there's only one + // usable worker that does not have the block + if (readLength == BLOCK_SIZE && !released) { + streamActive.countDown(); + released = true; + + // Wait a bit for the decommission to take effect + // After the worker is decommissioned, the stream can successfully complete + workerDecommissioned.await(); + + // Wait a bit for the worker list to refresh in the FileSystemContext + SleepUtils.sleepMs(CLIENT_WORKER_LIST_REFRESH_INTERVAL); + // The client realizes the target worker is decommissioned + List usableWorkers = context.getCachedWorkers(); + assertEquals(1, usableWorkers.size()); + // Continue where the usable worker is not the stream target + // The client should be able to find the other worker and read UFS through that + } + value = is.read(ret); + if (value != -1) { + readLength += value; + } + } + assertEquals(readLength, LENGTH); + + // The 2nd block is read from the other worker, so there should be cached on the way + URIStatus statusAfterRead = context.acquireMasterClientResource() + .get().getStatus(uri, GetStatusPOptions.getDefaultInstance()); + FileBlockInfo block1 = statusAfterRead.getFileBlockInfos().get(1); + WorkerNetAddress cachedToWorker = block1.getBlockInfo().getLocations() + .get(0).getWorkerAddress(); + assertNotEquals(cachedToWorker, workerToDecommission); + + is.close(); + } +} diff --git a/tests/src/test/java/alluxio/client/fs/io/FileOutStreamDecommissionIntegrationTest.java b/tests/src/test/java/alluxio/client/fs/io/FileOutStreamDecommissionIntegrationTest.java new file mode 100644 index 000000000000..ae834b56c007 --- /dev/null +++ b/tests/src/test/java/alluxio/client/fs/io/FileOutStreamDecommissionIntegrationTest.java @@ -0,0 +1,501 @@ +/* + * The Alluxio Open Foundation licenses this work under the Apache License, version 2.0 + * (the "License"). You may not use this work except in compliance with the License, which is + * available at www.apache.org/licenses/LICENSE-2.0 + * + * This software is distributed on an "AS IS" basis, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, + * either express or implied, as more fully set forth in the License. + * + * See the NOTICE file distributed with this work for information regarding copyright ownership. + */ + +package alluxio.client.fs.io; + +import static org.apache.ratis.util.Preconditions.assertTrue; +import static org.junit.Assert.assertEquals; +import static org.junit.Assert.assertNotEquals; +import static org.junit.Assert.assertThrows; + +import alluxio.AlluxioURI; +import alluxio.client.block.BlockWorkerInfo; +import alluxio.client.file.FileInStream; +import alluxio.client.file.FileOutStream; +import alluxio.client.file.FileSystem; +import alluxio.client.file.FileSystemContext; +import alluxio.client.file.URIStatus; +import alluxio.conf.Configuration; +import alluxio.conf.PropertyKey; +import alluxio.exception.ExceptionMessage; +import alluxio.exception.status.UnavailableException; +import alluxio.grpc.CreateFilePOptions; +import alluxio.grpc.DecommissionWorkerPOptions; +import alluxio.grpc.GetStatusPOptions; +import alluxio.grpc.GrpcUtils; +import alluxio.grpc.OpenFilePOptions; +import alluxio.grpc.ReadPType; +import alluxio.grpc.WritePType; +import alluxio.security.user.TestUserState; +import alluxio.testutils.LocalAlluxioClusterResource; +import alluxio.util.SleepUtils; +import alluxio.util.ThreadFactoryUtils; +import alluxio.util.io.PathUtils; +import alluxio.wire.BlockLocation; +import alluxio.wire.FileBlockInfo; +import alluxio.wire.WorkerInfo; +import alluxio.wire.WorkerNetAddress; + +import org.junit.After; +import org.junit.Before; +import org.junit.Rule; +import org.junit.Test; +import org.junit.rules.ExpectedException; + +import java.io.IOException; +import java.util.ArrayList; +import java.util.List; +import java.util.concurrent.CountDownLatch; +import java.util.concurrent.ExecutorService; +import java.util.concurrent.Executors; + +public class FileOutStreamDecommissionIntegrationTest { + private static final int BLOCK_SIZE = 1024 * 1024; + private static final int LENGTH = 2 * BLOCK_SIZE; + private static final int CLIENT_WORKER_LIST_REFRESH_INTERVAL = 2000; // 2s + + @Rule + public LocalAlluxioClusterResource mLocalAlluxioClusterResource = + new LocalAlluxioClusterResource.Builder() + .setNumWorkers(2) + .setProperty(PropertyKey.USER_BLOCK_SIZE_BYTES_DEFAULT, BLOCK_SIZE) + .setProperty(PropertyKey.USER_WORKER_LIST_REFRESH_INTERVAL, "2s") + .setProperty(PropertyKey.USER_FILE_WRITE_INIT_MAX_DURATION, "2s") + // Disable short circuit + .setProperty(PropertyKey.USER_SHORT_CIRCUIT_ENABLED, false) + .setStartCluster(false) + .build(); + private FileSystem mFileSystem = null; + private CreateFilePOptions mWriteBoth; + private CreateFilePOptions mWriteAlluxio; + private OpenFilePOptions mReadNoCache; + private OpenFilePOptions mReadCachePromote; + private String mTestPath; + private ExecutorService mThreadPool; + + private String mCacheThroughFilePath; + private String mMustCacheFilePath; + + @Rule + public ExpectedException mThrown = ExpectedException.none(); + + @Before + public final void setUp() throws Exception { + mLocalAlluxioClusterResource.start(); + mFileSystem = mLocalAlluxioClusterResource.get().getClient(); + + // Just use the 1st worker to write everything + WorkerNetAddress worker1 = mLocalAlluxioClusterResource.get().getWorkerAddress(); + // For each file, 2 blocks on the same worker so we can use the 1st block's location + // to know which worker to decommission + mWriteBoth = CreateFilePOptions.newBuilder() + .setBlockSizeBytes(BLOCK_SIZE) + .setWriteType(WritePType.CACHE_THROUGH) + .setWorkerLocation(GrpcUtils.toProto(worker1)) + .setRecursive(true).build(); + mWriteAlluxio = CreateFilePOptions.newBuilder() + .setBlockSizeBytes(BLOCK_SIZE) + .setWriteType(WritePType.MUST_CACHE) + .setWorkerLocation(GrpcUtils.toProto(worker1)) + .setRecursive(true).build(); + mReadCachePromote = + OpenFilePOptions.newBuilder().setReadType(ReadPType.CACHE_PROMOTE).build(); + mReadNoCache = OpenFilePOptions.newBuilder().setReadType(ReadPType.NO_CACHE).build(); + mTestPath = PathUtils.uniqPath(); + mCacheThroughFilePath = mTestPath + "/file_BOTH"; + mMustCacheFilePath = mTestPath + "/file_CACHE"; + mThreadPool = Executors.newFixedThreadPool(1, + ThreadFactoryUtils.build("decommission-worker-%d", true)); + } + + @After + public final void tearDown() throws Exception { + mLocalAlluxioClusterResource.stop(); + mThreadPool.shutdownNow(); + } + + private List getOptionSet() { + List ret = new ArrayList<>(2); + ret.add(mWriteBoth); + ret.add(mWriteAlluxio); + return ret; + } + + @Test + /* + * If a stream is created after the worker is decommissioned, it cannot pick that worker. + * And the client will use the other worker to write to UFS. + */ + public void writeUfsFromUndecommissionedWorker() throws Exception { + AlluxioURI uri = new AlluxioURI(mCacheThroughFilePath); + + FileSystemContext context = FileSystemContext + .create(new TestUserState("test", Configuration.global()).getSubject(), + Configuration.global()); + List availableWorkers = context.acquireBlockMasterClientResource() + .get().getWorkerInfoList(); + assertEquals(2, availableWorkers.size()); + + // Decommission one worker in the cluster + WorkerNetAddress workerToDecommission = mLocalAlluxioClusterResource.get().getWorkerAddress(); + DecommissionWorkerPOptions decomOptions = DecommissionWorkerPOptions.newBuilder() + .setWorkerHostname(workerToDecommission.getHost()) + .setWorkerWebPort(workerToDecommission.getWebPort()) + .setCanRegisterAgain(true).build(); + context.acquireBlockMasterClientResource().get().decommissionWorker(decomOptions); + + // Create a stream w/o specifying target + CreateFilePOptions writeOptions = CreateFilePOptions.newBuilder() + .setBlockSizeBytes(BLOCK_SIZE) + .setWriteType(WritePType.CACHE_THROUGH) + .setRecursive(true).build(); + // This stream is able to find the undecommissioned worker and use that to write to UFS + FileOutStream os = mFileSystem.createFile(uri, writeOptions); + byte[] ret = new byte[1024 * 1024]; // 1MB buffer + // This has created the block stream that reads from the target worker + int writeLength = 0; + + while (writeLength < LENGTH) { + // Write whatever in the buffer + os.write(ret); + writeLength += ret.length; + } + assertEquals(writeLength, LENGTH); + os.close(); + + // 2 blocks are written successfully + URIStatus status = context.acquireMasterClientResource().get() + .getStatus(uri, GetStatusPOptions.getDefaultInstance()); + assertEquals(LENGTH, status.getLength()); + assertEquals(2, status.getFileBlockInfos().size()); + // The location is on the undecommissioned worker + List block0Locs = status.getFileBlockInfos().get(0) + .getBlockInfo().getLocations(); + assertEquals(1, block0Locs.size()); + assertNotEquals(workerToDecommission, block0Locs.get(0).getWorkerAddress()); + List block1Locs = status.getFileBlockInfos().get(1) + .getBlockInfo().getLocations(); + assertEquals(1, block1Locs.size()); + assertNotEquals(workerToDecommission, block1Locs.get(0).getWorkerAddress()); + + // The file should be able to be read + FileInStream is = mFileSystem.openFile(uri, mReadCachePromote); + int readLength = 0; + int res = 0; + while (res != -1) { + res = is.read(ret); + if (res != -1) { + readLength += res; + } + } + assertEquals(readLength, LENGTH); + } + + @Test + /* + * If a stream is created after the worker is decommissioned, it cannot pick that worker. + * And if that worker is the only one to pick from, the request fails. + */ + public void cannotWriteFromDecommissionedWorker() throws Exception { + AlluxioURI uri = new AlluxioURI(mCacheThroughFilePath); + FileSystemContext context = FileSystemContext + .create(new TestUserState("test", Configuration.global()).getSubject(), + Configuration.global()); + List availableWorkers = context.acquireBlockMasterClientResource() + .get().getWorkerInfoList(); + assertEquals(2, availableWorkers.size()); + + // Decommission one worker in the cluster + WorkerNetAddress workerToDecommission = mLocalAlluxioClusterResource.get().getWorkerAddress(); + DecommissionWorkerPOptions decomOptions = DecommissionWorkerPOptions.newBuilder() + .setWorkerHostname(workerToDecommission.getHost()) + .setWorkerWebPort(workerToDecommission.getWebPort()) + .setCanRegisterAgain(true).build(); + context.acquireBlockMasterClientResource().get().decommissionWorker(decomOptions); + + // This stream is able to find the undecommissioned worker and use that to read from UFS + // Create a stream specifying the target + CreateFilePOptions writeOptions = CreateFilePOptions.newBuilder() + .setBlockSizeBytes(BLOCK_SIZE) + .setWriteType(WritePType.CACHE_THROUGH) + .setWorkerLocation(GrpcUtils.toProto(workerToDecommission)) + .setRecursive(true).build(); + // The worker has been decommissioned so the file cannot be written + Exception e = assertThrows(UnavailableException.class, () -> { + FileOutStream os = mFileSystem.createFile(uri, writeOptions); + os.write(7); + os.close(); + }); + assertTrue(e.getMessage().contains(ExceptionMessage.NO_WORKER_AVAILABLE.getMessage())); + } + + @Test + /* + * The target worker is decommissioned while the stream is writing. + * The stream should succeed but there will be no available cache location to the client. + */ + public void decommissionWhileWriting() throws Exception { + AlluxioURI uri = new AlluxioURI(mCacheThroughFilePath); + FileSystemContext context = FileSystemContext + .create(new TestUserState("test", Configuration.global()).getSubject(), + Configuration.global()); + List availableWorkers = context.acquireBlockMasterClientResource() + .get().getWorkerInfoList(); + assertEquals(2, availableWorkers.size()); + + WorkerNetAddress workerToDecommission = mLocalAlluxioClusterResource.get().getWorkerAddress(); + CountDownLatch streamActive = new CountDownLatch(1); + CountDownLatch workerDecommissioned = new CountDownLatch(1); + mThreadPool.submit(() -> { + try { + streamActive.await(); + DecommissionWorkerPOptions decomOptions = DecommissionWorkerPOptions.newBuilder() + .setWorkerHostname(workerToDecommission.getHost()) + .setWorkerWebPort(workerToDecommission.getWebPort()) + .setCanRegisterAgain(true).build(); + context.acquireBlockMasterClientResource().get().decommissionWorker(decomOptions); + + List updatedWorkers = context.acquireBlockMasterClientResource() + .get().getWorkerInfoList(); + assertEquals(1, updatedWorkers.size()); + + workerDecommissioned.countDown(); + } catch (Exception e) { + e.printStackTrace(); + } + }); + + // Create a stream specifying the target + CreateFilePOptions writeOptions = CreateFilePOptions.newBuilder() + .setBlockSizeBytes(BLOCK_SIZE) + .setWriteType(WritePType.CACHE_THROUGH) + .setWorkerLocation(GrpcUtils.toProto(workerToDecommission)) + .setRecursive(true).build(); + // This stream is able to find the undecommissioned worker and use that to read from UFS + FileOutStream os = mFileSystem.createFile(uri, writeOptions); + byte[] ret = new byte[1024]; + // This has created the block stream that writes with the target worker + int writeLength = 0; + + boolean released = false; + while (writeLength < LENGTH) { + if (writeLength > 1024 && !released) { + streamActive.countDown(); + released = true; + + // Wait a bit for the decommission to take effect + // After the worker is decommissioned, the stream can successfully complete + workerDecommissioned.await(); + // However, even though the master has refreshed the available worker list + // The stream does not pick another worker until it sees an exception + // So when this resumes, the stream will keep reading the decommissioned worker + // And we want the decommissioned worker to keep serving + } + os.write(ret); + writeLength += ret.length; + } + assertEquals(writeLength, LENGTH); + os.close(); + + // The worker has been decommissioned so the block locations are all empty + // No cache readable for the client + URIStatus status = context.acquireMasterClientResource().get() + .getStatus(uri, GetStatusPOptions.getDefaultInstance()); + List blockInfos = status.getFileBlockInfos(); + FileBlockInfo block0 = blockInfos.get(0); + assertEquals(0, block0.getBlockInfo().getLocations().size()); + FileBlockInfo block1 = blockInfos.get(1); + assertEquals(0, block1.getBlockInfo().getLocations().size()); + + // The file should be able to be read + FileInStream is = mFileSystem.openFile(uri, mReadCachePromote); + int readLength = 0; + int res = 0; + while (res != -1) { + res = is.read(ret); + if (res != -1) { + readLength += res; + } + } + assertEquals(readLength, LENGTH); + } + + @Test + /* + * When there is an active stream writing to one worker, decommission that worker. + * Then we make the stream wait a bit and realize that worker is no longer available. + */ + public void halfCacheThroughStreamDecommission() throws Exception { + AlluxioURI uri = new AlluxioURI(mCacheThroughFilePath); + + FileSystemContext context = FileSystemContext + .create(new TestUserState("test", Configuration.global()).getSubject(), + Configuration.global()); + List clusterWorkers = context.acquireBlockMasterClientResource() + .get().getWorkerInfoList(); + assertEquals(2, clusterWorkers.size()); + + WorkerNetAddress workerToDecommission = mLocalAlluxioClusterResource.get().getWorkerAddress(); + CountDownLatch streamActive = new CountDownLatch(1); + CountDownLatch workerDecommissioned = new CountDownLatch(1); + mThreadPool.submit(() -> { + try { + streamActive.await(); + DecommissionWorkerPOptions decomOptions = DecommissionWorkerPOptions.newBuilder() + .setWorkerHostname(workerToDecommission.getHost()) + .setWorkerWebPort(workerToDecommission.getWebPort()) + .setCanRegisterAgain(true).build(); + context.acquireBlockMasterClientResource().get().decommissionWorker(decomOptions); + List updatedWorkers = context.acquireBlockMasterClientResource() + .get().getWorkerInfoList(); + assertEquals(1, updatedWorkers.size()); + workerDecommissioned.countDown(); + } catch (Exception e) { + e.printStackTrace(); + } + }); + + // This stream is able to find the undecommissioned worker and use that to write to UFS + FileOutStream os = mFileSystem.createFile(uri, mWriteBoth); + byte[] ret = new byte[1024]; + // This has created the block stream that reads from the target worker + int writeLength = 0; + + boolean released = false; + while (writeLength < LENGTH) { + // 2 blocks on the same worker, decommission at the end of one BlockStream + // so when the FileStream continues, create the 2nd block stream where there's only one + // usable worker that does not have the block + if (writeLength == BLOCK_SIZE && !released) { + streamActive.countDown(); + released = true; + + // Wait a bit for the decommission to take effect + // After the worker is decommissioned, the stream can successfully complete + workerDecommissioned.await(); + + // Wait a bit for the worker list to refresh in the FileSystemContext + SleepUtils.sleepMs(CLIENT_WORKER_LIST_REFRESH_INTERVAL); + // The client realizes the target worker is decommissioned + List usableWorkers = context.getCachedWorkers(); + assertEquals(1, usableWorkers.size()); + // Continue the stream, the stream is able to keep going using the decommissioned worker + } + os.write(ret); + writeLength += ret.length; + } + assertEquals(writeLength, LENGTH); + os.close(); + + // The worker has been decommissioned so the block locations are all empty + // No cache readable for the client + URIStatus statusAfterRead = context.acquireMasterClientResource() + .get().getStatus(uri, GetStatusPOptions.getDefaultInstance()); + assertEquals(LENGTH, statusAfterRead.getLength()); + assertEquals(2, statusAfterRead.getFileBlockInfos().size()); + assertEquals(0, statusAfterRead.getFileBlockInfos() + .get(0).getBlockInfo().getLocations().size()); + assertEquals(0, statusAfterRead.getFileBlockInfos() + .get(1).getBlockInfo().getLocations().size()); + + // The file should be able to be read + FileInStream is = mFileSystem.openFile(uri, mReadCachePromote); + int readLength = 0; + int res = 0; + while (res != -1) { + res = is.read(ret); + if (res != -1) { + readLength += res; + } + } + assertEquals(readLength, LENGTH); + } + + @Test + /* + * When there is an active stream writing to one worker, decommission that worker. + * Then we make the stream wait a bit and realize that worker is no longer available. + * The stream is not able to continue because the cache target is gone. + */ + public void halfStreamMustCacheDecommission() throws Exception { + AlluxioURI uri = new AlluxioURI(mMustCacheFilePath); + + FileSystemContext context = FileSystemContext + .create(new TestUserState("test", Configuration.global()).getSubject(), + Configuration.global()); + List clusterWorkers = context.acquireBlockMasterClientResource() + .get().getWorkerInfoList(); + assertEquals(2, clusterWorkers.size()); + + WorkerNetAddress workerToDecommission = mLocalAlluxioClusterResource.get().getWorkerAddress(); + CountDownLatch streamActive = new CountDownLatch(1); + CountDownLatch workerDecommissioned = new CountDownLatch(1); + mThreadPool.submit(() -> { + try { + streamActive.await(); + DecommissionWorkerPOptions decomOptions = DecommissionWorkerPOptions.newBuilder() + .setWorkerHostname(workerToDecommission.getHost()) + .setWorkerWebPort(workerToDecommission.getWebPort()) + .setCanRegisterAgain(true).build(); + context.acquireBlockMasterClientResource().get().decommissionWorker(decomOptions); + List updatedWorkers = context.acquireBlockMasterClientResource() + .get().getWorkerInfoList(); + assertEquals(1, updatedWorkers.size()); + workerDecommissioned.countDown(); + } catch (Exception e) { + e.printStackTrace(); + } + }); + + // This stream is able to find the undecommissioned worker and use that to write to UFS + FileOutStream os = mFileSystem.createFile(uri, mWriteAlluxio); + byte[] ret = new byte[1024]; + // This has created the block stream that reads from the target worker + int writeLength = 0; + + boolean released = false; + while (writeLength < LENGTH) { + // 2 blocks on the same worker, decommission at the end of one BlockStream + // so when the FileStream continues, create the 2nd block stream where there's only one + // usable worker that does not have the block + if (writeLength == BLOCK_SIZE && !released) { + streamActive.countDown(); + released = true; + + // Wait a bit for the decommission to take effect + // After the worker is decommissioned, the stream can successfully complete + workerDecommissioned.await(); + + // Wait a bit for the worker list to refresh in the FileSystemContext + SleepUtils.sleepMs(CLIENT_WORKER_LIST_REFRESH_INTERVAL); + // The client realizes the target worker is decommissioned + List usableWorkers = context.getCachedWorkers(); + assertEquals(1, usableWorkers.size()); + // Continue the stream, the stream is not able to continue because the worker is + // no longer available + assertThrows(IOException.class, () -> { + os.write(ret); + }); + break; + } + os.write(ret); + writeLength += ret.length; + } + os.close(); + + // The worker has been decommissioned so the block locations are all empty + // No cache readable for the client + URIStatus statusAfterRead = context.acquireMasterClientResource() + .get().getStatus(uri, GetStatusPOptions.getDefaultInstance()); + // The stream was not successful so the length is 0 + assertEquals(0, statusAfterRead.getLength()); + } +} From 26257e6f35c57d937f6f5bdee831facc2fcf7e1a Mon Sep 17 00:00:00 2001 From: Jiacheng Liu Date: Fri, 28 Apr 2023 09:41:09 +0800 Subject: [PATCH 252/334] Make capacity command show worker state ### What changes are proposed in this pull request? ``` bin/alluxio fsadmin report capacity Capacity information for all workers: Total Capacity: 10.67GB Tier: MEM Size: 10.67GB Used Capacity: 0B Tier: MEM Size: 0B Used Percentage: 0% Free Percentage: 100% Format is short: true Format is %-16s %-15s %-16s %-13s %s %-16s %-40s Worker Name State Last Heartbeat Storage MEM Version Revision 192.168.3.8 ACTIVE 166 capacity 10.67GB 2.10.0-SNAPSHOT ffcb706497bf47e43d5b2efc90f664c7a3e7014e used 0B (0%) ``` ### What changes are proposed in this pull request? Please outline the changes and how this PR fixes the issue. ### Why are the changes needed? Please clarify why the changes are needed. For instance, 1. If you propose a new API, clarify the use case for a new API. 2. If you fix a bug, describe the bug. ### Does this PR introduce any user facing changes? Please list the user-facing changes introduced by your change, including 1. change in user-facing APIs 2. addition or removal of property keys 3. webui pr-link: Alluxio/alluxio#17325 change-id: cid-598ce3cd88ee00e5981b0de273b27a3ea18d710b --- .../java/alluxio/master}/WorkerState.java | 9 ++-- .../master/block/DefaultBlockMaster.java | 27 +++++++++--- .../master/block/meta/MasterWorkerInfo.java | 1 + .../alluxio/master/block/BlockMasterTest.java | 3 +- .../block/meta/MasterWorkerInfoTest.java | 3 +- .../cli/fsadmin/report/CapacityCommand.java | 19 ++++---- .../fsadmin/report/CapacityCommandTest.java | 43 ++++++++++--------- .../CapacityCommandIntegrationTest.java | 6 ++- 8 files changed, 68 insertions(+), 43 deletions(-) rename core/{server/master/src/main/java/alluxio/master/block/meta => common/src/main/java/alluxio/master}/WorkerState.java (86%) diff --git a/core/server/master/src/main/java/alluxio/master/block/meta/WorkerState.java b/core/common/src/main/java/alluxio/master/WorkerState.java similarity index 86% rename from core/server/master/src/main/java/alluxio/master/block/meta/WorkerState.java rename to core/common/src/main/java/alluxio/master/WorkerState.java index 0f5d2327ba28..8b4572fa7cde 100644 --- a/core/server/master/src/main/java/alluxio/master/block/meta/WorkerState.java +++ b/core/common/src/main/java/alluxio/master/WorkerState.java @@ -9,15 +9,16 @@ * See the NOTICE file distributed with this work for information regarding copyright ownership. */ -package alluxio.master.block.meta; +package alluxio.master; /*** * The worker state maintained by master. */ public enum WorkerState { - LIVE("In Service"), - LOST("Out of Service"), - DECOMMISSIONED("Decommissioned"); + LIVE("ACTIVE"), + LOST("LOST"), + DECOMMISSIONED("Decommissioned"), + DISABLED("Disabled"); private final String mState; WorkerState(String s) { diff --git a/core/server/master/src/main/java/alluxio/master/block/DefaultBlockMaster.java b/core/server/master/src/main/java/alluxio/master/block/DefaultBlockMaster.java index 73d0f6c83180..0a87d0086797 100644 --- a/core/server/master/src/main/java/alluxio/master/block/DefaultBlockMaster.java +++ b/core/server/master/src/main/java/alluxio/master/block/DefaultBlockMaster.java @@ -49,9 +49,9 @@ import alluxio.heartbeat.HeartbeatThread; import alluxio.master.CoreMaster; import alluxio.master.CoreMasterContext; +import alluxio.master.WorkerState; import alluxio.master.block.meta.MasterWorkerInfo; import alluxio.master.block.meta.WorkerMetaLockSection; -import alluxio.master.block.meta.WorkerState; import alluxio.master.journal.JournalContext; import alluxio.master.journal.SingleEntryJournaled; import alluxio.master.journal.checkpoint.CheckpointName; @@ -677,6 +677,8 @@ public List getWorkerInfoList() throws UnavailableException { } private List constructWorkerInfoList() { + // TODO(jiacheng): investigate why this cache is refreshed so many times by the + // alluxio.master.scheduler.Scheduler L239 List workerInfoList = new ArrayList<>(mWorkers.size()); for (MasterWorkerInfo worker : mWorkers) { // extractWorkerInfo handles the locking internally @@ -791,16 +793,31 @@ public List getWorkerReport(GetWorkerReportOptions options) + selectedDecommissionedWorkers.size()); for (MasterWorkerInfo worker : selectedLiveWorkers) { // extractWorkerInfo handles the locking internally - workerInfoList.add(extractWorkerInfo(worker, options.getFieldRange(), WorkerState.LIVE)); + if (mRejectWorkers.contains(worker.getWorkerAddress())) { + workerInfoList.add(extractWorkerInfo(worker, options.getFieldRange(), + WorkerState.DISABLED)); + } else { + workerInfoList.add(extractWorkerInfo(worker, options.getFieldRange(), WorkerState.LIVE)); + } } for (MasterWorkerInfo worker : selectedLostWorkers) { // extractWorkerInfo handles the locking internally - workerInfoList.add(extractWorkerInfo(worker, options.getFieldRange(), WorkerState.LOST)); + if (mRejectWorkers.contains(worker.getWorkerAddress())) { + workerInfoList.add(extractWorkerInfo(worker, options.getFieldRange(), + WorkerState.DISABLED)); + } else { + workerInfoList.add(extractWorkerInfo(worker, options.getFieldRange(), WorkerState.LOST)); + } } for (MasterWorkerInfo worker : selectedDecommissionedWorkers) { // extractWorkerInfo handles the locking internally - workerInfoList.add(extractWorkerInfo(worker, options.getFieldRange(), - WorkerState.DECOMMISSIONED)); + if (mRejectWorkers.contains(worker.getWorkerAddress())) { + workerInfoList.add(extractWorkerInfo(worker, options.getFieldRange(), + WorkerState.DISABLED)); + } else { + workerInfoList.add(extractWorkerInfo(worker, options.getFieldRange(), + WorkerState.DECOMMISSIONED)); + } } return workerInfoList; } diff --git a/core/server/master/src/main/java/alluxio/master/block/meta/MasterWorkerInfo.java b/core/server/master/src/main/java/alluxio/master/block/meta/MasterWorkerInfo.java index b2d08a66fcb3..d4dae4783b1d 100644 --- a/core/server/master/src/main/java/alluxio/master/block/meta/MasterWorkerInfo.java +++ b/core/server/master/src/main/java/alluxio/master/block/meta/MasterWorkerInfo.java @@ -17,6 +17,7 @@ import alluxio.client.block.options.GetWorkerReportOptions.WorkerInfoField; import alluxio.grpc.BuildVersion; import alluxio.grpc.StorageList; +import alluxio.master.WorkerState; import alluxio.master.block.DefaultBlockMaster; import alluxio.resource.LockResource; import alluxio.util.CommonUtils; diff --git a/core/server/master/src/test/java/alluxio/master/block/BlockMasterTest.java b/core/server/master/src/test/java/alluxio/master/block/BlockMasterTest.java index 71a200c1010f..d1cbb7a2a686 100644 --- a/core/server/master/src/test/java/alluxio/master/block/BlockMasterTest.java +++ b/core/server/master/src/test/java/alluxio/master/block/BlockMasterTest.java @@ -42,8 +42,8 @@ import alluxio.master.CoreMasterContext; import alluxio.master.MasterRegistry; import alluxio.master.MasterTestUtils; +import alluxio.master.WorkerState; import alluxio.master.block.meta.MasterWorkerInfo; -import alluxio.master.block.meta.WorkerState; import alluxio.master.journal.JournalSystem; import alluxio.master.journal.noop.NoopJournalSystem; import alluxio.master.metrics.MetricsMaster; @@ -113,7 +113,6 @@ public class BlockMasterTest { private MasterRegistry mRegistry; private ManualClock mClock; private ExecutorService mExecutorService; - private ExecutorService mClientExecutorService; private MetricsMaster mMetricsMaster; private List mMetrics; diff --git a/core/server/master/src/test/java/alluxio/master/block/meta/MasterWorkerInfoTest.java b/core/server/master/src/test/java/alluxio/master/block/meta/MasterWorkerInfoTest.java index c855f79fb6c7..e5bc773cce51 100644 --- a/core/server/master/src/test/java/alluxio/master/block/meta/MasterWorkerInfoTest.java +++ b/core/server/master/src/test/java/alluxio/master/block/meta/MasterWorkerInfoTest.java @@ -19,6 +19,7 @@ import alluxio.DefaultStorageTierAssoc; import alluxio.StorageTierAssoc; import alluxio.client.block.options.GetWorkerReportOptions; +import alluxio.master.WorkerState; import alluxio.wire.WorkerInfo; import alluxio.wire.WorkerNetAddress; @@ -142,7 +143,7 @@ public void workerInfoGeneration() { WorkerState.LIVE); assertEquals(mInfo.getId(), workerInfo.getId()); assertEquals(mInfo.getWorkerAddress(), workerInfo.getAddress()); - assertEquals("In Service", workerInfo.getState()); + assertEquals(WorkerState.LIVE.toString(), workerInfo.getState()); assertEquals(mInfo.getCapacityBytes(), workerInfo.getCapacityBytes()); assertEquals(mInfo.getUsedBytes(), workerInfo.getUsedBytes()); assertEquals(mInfo.getStartTime(), workerInfo.getStartTimeMs()); diff --git a/shell/src/main/java/alluxio/cli/fsadmin/report/CapacityCommand.java b/shell/src/main/java/alluxio/cli/fsadmin/report/CapacityCommand.java index 1d5699acbf41..908e705406e9 100644 --- a/shell/src/main/java/alluxio/cli/fsadmin/report/CapacityCommand.java +++ b/shell/src/main/java/alluxio/cli/fsadmin/report/CapacityCommand.java @@ -301,6 +301,7 @@ private void printWorkerInfo(List workerInfoList) { if (mCapacityTierInfoMap.size() == 0) { return; } else if (mCapacityTierInfoMap.size() == 1) { + // TODO(jiacheng): test BOTH long and short output // Do not print Total value when only one tier exists printShortWorkerInfo(workerInfoList); return; @@ -309,7 +310,8 @@ private void printWorkerInfo(List workerInfoList) { String tiersInfo = String.format(Strings.repeat("%-14s", tiers.size()), tiers.toArray()); String longInfoFormat = getInfoFormat(workerInfoList, false); print(String.format("%n" + longInfoFormat, - "Worker Name", "Last Heartbeat", "Storage", "Total", tiersInfo, "Version", "Revision")); + "Worker Name", "State", "Last Heartbeat", "Storage", "Total", tiersInfo, + "Version", "Revision")); for (WorkerInfo info : workerInfoList) { String workerName = info.getAddress().getHost(); @@ -326,10 +328,11 @@ private void printWorkerInfo(List workerInfoList) { String capacityTierInfo = getWorkerFormattedTierValues(mCapacityTierInfoMap, workerName); String usedTierInfo = getWorkerFormattedTierValues(mUsedTierInfoMap, workerName); - print(String.format(longInfoFormat, workerName, info.getLastContactSec(), "capacity", + print(String.format(longInfoFormat, workerName, info.getState(), + info.getLastContactSec(), "capacity", FormatUtils.getSizeFromBytes(capacityBytes), capacityTierInfo, info.getVersion(), info.getRevision())); - print(String.format(longInfoFormat, "", "", "used", + print(String.format(longInfoFormat, "", "", "", "used", FormatUtils.getSizeFromBytes(usedBytes) + usedPercentageInfo, usedTierInfo, "", "")); } @@ -344,7 +347,7 @@ private void printShortWorkerInfo(List workerInfoList) { String tier = String.format("%-16s", mCapacityTierInfoMap.firstKey()); String shortInfoFormat = getInfoFormat(workerInfoList, true); print(String.format("%n" + shortInfoFormat, - "Worker Name", "Last Heartbeat", "Storage", tier, "Version", "Revision")); + "Worker Name", "State", "Last Heartbeat", "Storage", tier, "Version", "Revision")); for (WorkerInfo info : workerInfoList) { long capacityBytes = info.getCapacityBytes(); @@ -355,11 +358,11 @@ private void printShortWorkerInfo(List workerInfoList) { int usedPercentage = (int) (100L * usedBytes / capacityBytes); usedPercentageInfo = String.format(" (%s%%)", usedPercentage); } - print(String.format(shortInfoFormat, info.getAddress().getHost(), + print(String.format(shortInfoFormat, info.getAddress().getHost(), info.getState(), info.getLastContactSec(), "capacity", String.format("%-16s", FormatUtils.getSizeFromBytes(capacityBytes)), info.getVersion(), info.getRevision())); - print(String.format(shortInfoFormat, "", "", "used", + print(String.format(shortInfoFormat, "", "", "", "used", String.format("%-16s", FormatUtils.getSizeFromBytes(usedBytes) + usedPercentageInfo), "", "")); } @@ -380,9 +383,9 @@ private String getInfoFormat(List workerInfoList, boolean isShort) { firstIndent = maxWorkerNameLength + 5; } if (isShort) { - return "%-" + firstIndent + "s %-16s %-13s %s %-16s %-40s"; + return "%-" + firstIndent + "s %-15s %-16s %-13s %s %-16s %-40s"; } - return "%-" + firstIndent + "s %-16s %-13s %-16s %s %-16s %-40s"; + return "%-" + firstIndent + "s %-15s %-16s %-13s %-16s %s %-16s %-40s"; } /** diff --git a/shell/src/test/java/alluxio/cli/fsadmin/report/CapacityCommandTest.java b/shell/src/test/java/alluxio/cli/fsadmin/report/CapacityCommandTest.java index 129fc67a0095..d4aa63800ffe 100644 --- a/shell/src/test/java/alluxio/cli/fsadmin/report/CapacityCommandTest.java +++ b/shell/src/test/java/alluxio/cli/fsadmin/report/CapacityCommandTest.java @@ -15,6 +15,7 @@ import alluxio.client.block.BlockMasterClient; import alluxio.client.block.options.GetWorkerReportOptions; import alluxio.conf.Configuration; +import alluxio.master.WorkerState; import alluxio.wire.WorkerInfo; import alluxio.wire.WorkerNetAddress; @@ -73,11 +74,11 @@ public void longCapacity() throws IOException { " Used Percentage: 34%", " Free Percentage: 66%", "", - "Worker Name Last Heartbeat Storage Total MEM SSD HDD DOM RAM Version Revision ", - "216.239.33.96 542 capacity 18.63GB 4768.37MB 4768.37MB - 9.31GB - 2.10.0-SNAPSHOT 0123456789abcdef0123456789abcdef01234567", - " used 953.67MB (5%) 190.73MB 286.10MB - 476.84MB - ", - "64.68.90.1 3123 capacity 11.18GB 3814.70MB - 1907.35MB - 5.59GB 2.9.3 0123456789012345678901234567890123456789", - " used 9.31GB (83%) 2861.02MB - 1907.35MB - 4768.37MB "); + "Worker Name State Last Heartbeat Storage Total MEM SSD HDD DOM RAM Version Revision ", + "216.239.33.96 ACTIVE 542 capacity 18.63GB 4768.37MB 4768.37MB - 9.31GB - 2.10.0-SNAPSHOT 0123456789abcdef0123456789abcdef01234567", + " used 953.67MB (5%) 190.73MB 286.10MB - 476.84MB - ", + "64.68.90.1 ACTIVE 3123 capacity 11.18GB 3814.70MB - 1907.35MB - 5.59GB 2.9.3 0123456789012345678901234567890123456789", + " used 9.31GB (83%) 2861.02MB - 1907.35MB - 4768.37MB "); // CHECKSTYLE.ON: LineLengthExceed List testOutput = Arrays.asList(output.split("\n")); Assert.assertThat(testOutput, @@ -107,11 +108,11 @@ public void shortCapacity() throws IOException { " Used Percentage: 34%", " Free Percentage: 66%", "", - "Worker Name Last Heartbeat Storage RAM Version Revision ", - "215.42.95.24 953 capacity 9.31GB 2.2.4 000111222333444555666777888999aaabbbcccd", - " used 476.84MB (5%) ", - "29.53.5.124 6424122 capacity 5.59GB 2.2.3 00112233445566778899aabbccddeeff00112233", - " used 4768.37MB (83%) "); + "Worker Name State Last Heartbeat Storage RAM Version Revision ", + "215.42.95.24 ACTIVE 953 capacity 9.31GB 2.2.4 000111222333444555666777888999aaabbbcccd", + " used 476.84MB (5%) ", + "29.53.5.124 LOST 6424122 capacity 5.59GB 2.2.3 00112233445566778899aabbccddeeff00112233", + " used 4768.37MB (83%) "); List testOutput = Arrays.asList(output.split("\n")); Assert.assertThat(testOutput, IsIterableContainingInOrder.contains(expectedOutput.toArray())); @@ -145,11 +146,11 @@ public void longWorkerNameCapacity() throws IOException { " Used Percentage: 34%", " Free Percentage: 66%", "", - "Worker Name Last Heartbeat Storage Total MEM SSD HDD Version Revision ", - "org.apache.hdp1 681 capacity 1907.35MB 572.20MB 572.20MB - 2.10.0-rc1 abababababababababababababababababababab", - " used 95.37MB (5%) 19.07MB 28.61MB - ", - "org.alluxio.long.host1 6211 capacity 1144.41MB 572.20MB - 190.73MB 2.10.0-rc2 0101010101010101010101010101010101010101", - " used 953.67MB (83%) 286.10MB - 190.73MB "); + "Worker Name State Last Heartbeat Storage Total MEM SSD HDD Version Revision ", + "org.apache.hdp1 ACTIVE 681 capacity 1907.35MB 572.20MB 572.20MB - 2.10.0-rc1 abababababababababababababababababababab", + " used 95.37MB (5%) 19.07MB 28.61MB - ", + "org.alluxio.long.host1 ACTIVE 6211 capacity 1144.41MB 572.20MB - 190.73MB 2.10.0-rc2 0101010101010101010101010101010101010101", + " used 953.67MB (83%) 286.10MB - 190.73MB "); // CHECKSTYLE.ON: LineLengthExceed List testOutput = Arrays.asList(output.split("\n")); @@ -178,7 +179,7 @@ private List prepareLongInfoList() { .setId(1) .setLastContactSec(3123) .setStartTimeMs(1331231121212L) - .setState("In Service") + .setState(WorkerState.LIVE.toString()) .setUsedBytes(10000000000L) .setUsedBytesOnTiers(usedBytesOnTiersOne) .setVersion("2.9.3") @@ -199,7 +200,7 @@ private List prepareLongInfoList() { .setId(2) .setLastContactSec(542) .setStartTimeMs(1131231121212L) - .setState("In Service") + .setState(WorkerState.LIVE.toString()) .setUsedBytes(1000000000L) .setUsedBytesOnTiers(usedBytesOnTiersSec) .setVersion("2.10.0-SNAPSHOT") @@ -226,7 +227,7 @@ private List prepareShortInfoList() { .setId(1) .setLastContactSec(6424122) .setStartTimeMs(19365332L) - .setState("Out of Service") + .setState(WorkerState.LOST.toString()) .setUsedBytes(5000000000L) .setUsedBytesOnTiers(usedBytesOnTiersOne) .setVersion("2.2.3") @@ -243,7 +244,7 @@ private List prepareShortInfoList() { .setId(2) .setLastContactSec(953) .setStartTimeMs(112495222L) - .setState("In Service") + .setState(WorkerState.LIVE.toString()) .setUsedBytes(500000000L) .setUsedBytesOnTiers(usedBytesOnTiersSec) .setVersion("2.2.4") @@ -272,7 +273,7 @@ private List prepareLongWorkerNameInfoList() { .setId(1) .setLastContactSec(6211) .setStartTimeMs(1529222699127L) - .setState("In Service") + .setState(WorkerState.LIVE.toString()) .setUsedBytes(1000000000L) .setUsedBytesOnTiers(usedBytesOnTiersOne) .setVersion("2.10.0-rc2") @@ -291,7 +292,7 @@ private List prepareLongWorkerNameInfoList() { .setId(2) .setLastContactSec(681) .setStartTimeMs(1529222699127L) - .setState("In Service") + .setState(WorkerState.LIVE.toString()) .setUsedBytes(100000000L) .setUsedBytesOnTiers(usedBytesOnTiersSec) .setVersion("2.10.0-rc1") diff --git a/tests/src/test/java/alluxio/client/cli/fsadmin/command/CapacityCommandIntegrationTest.java b/tests/src/test/java/alluxio/client/cli/fsadmin/command/CapacityCommandIntegrationTest.java index 2096f672d062..5014d3814d12 100644 --- a/tests/src/test/java/alluxio/client/cli/fsadmin/command/CapacityCommandIntegrationTest.java +++ b/tests/src/test/java/alluxio/client/cli/fsadmin/command/CapacityCommandIntegrationTest.java @@ -39,7 +39,8 @@ public void allCapacity() { Assert.assertEquals(" Free Percentage: 100%", lines[6]); Assert.assertEquals("", lines[7]); Assert.assertTrue(lines[8].matches( - "Worker Name {6,}Last Heartbeat {3}Storage {7}MEM {14}Version {10}Revision *")); + "Worker Name {6,}State {11,}Last Heartbeat {3}Storage {7}MEM {14}Version {10}Revision *")); + Assert.assertTrue(lines[9].contains("ACTIVE")); Assert.assertTrue(lines[9].contains("capacity " + size)); Assert.assertTrue(lines[10].contains("used 0B (0%)")); } @@ -68,7 +69,8 @@ public void liveCapacity() { Assert.assertEquals(" Free Percentage: 100%", lines[6]); Assert.assertEquals("", lines[7]); Assert.assertTrue(lines[8].matches( - "Worker Name {6,}Last Heartbeat {3}Storage {7}MEM {14}Version {10}Revision *")); + "Worker Name {6,}State {11,}Last Heartbeat {3}Storage {7}MEM {14}Version {10}Revision *")); + Assert.assertTrue(lines[9].contains("ACTIVE")); Assert.assertTrue(lines[9].contains("capacity " + size)); Assert.assertTrue(lines[10].contains("used 0B (0%)")); } From 7a378dcf8558c41db2c62ae3818cea7e22c2afcb Mon Sep 17 00:00:00 2001 From: Xinran Dong <81548653+007DXR@users.noreply.github.com> Date: Fri, 28 Apr 2023 16:49:50 +0800 Subject: [PATCH 253/334] Reduce redundant calls in getObject of S3 API the same modification as #16655 pr-link: Alluxio/alluxio#17356 change-id: cid-a410d4d5645b6fd24a89c145b22c33eb22b7b1a8 --- .../proxy/src/main/java/alluxio/proxy/s3/S3ObjectTask.java | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/core/server/proxy/src/main/java/alluxio/proxy/s3/S3ObjectTask.java b/core/server/proxy/src/main/java/alluxio/proxy/s3/S3ObjectTask.java index 78450c296eb6..671a11c49e86 100644 --- a/core/server/proxy/src/main/java/alluxio/proxy/s3/S3ObjectTask.java +++ b/core/server/proxy/src/main/java/alluxio/proxy/s3/S3ObjectTask.java @@ -28,6 +28,7 @@ import alluxio.grpc.CreateDirectoryPOptions; import alluxio.grpc.CreateFilePOptions; import alluxio.grpc.DeletePOptions; +import alluxio.grpc.OpenFilePOptions; import alluxio.grpc.PMode; import alluxio.grpc.RenamePOptions; import alluxio.grpc.S3SyntaxOptions; @@ -311,7 +312,7 @@ public Response continueTask() { mOPType.name(), user, mHandler.getBucket(), mHandler.getObject())) { try { URIStatus status = userFs.getStatus(objectUri); - FileInStream is = userFs.openFile(objectUri); + FileInStream is = userFs.openFile(status, OpenFilePOptions.getDefaultInstance()); S3RangeSpec s3Range = S3RangeSpec.Factory.create(range); RangeFileInStream ris = RangeFileInStream.Factory.create( is, status.getLength(), s3Range); From 1ad9b4e30ad5685830c28e4ad0b7f9bd9118c8ee Mon Sep 17 00:00:00 2001 From: yuyang wang <39869597+Jackson-Wang-7@users.noreply.github.com> Date: Sat, 29 Apr 2023 10:11:15 +0800 Subject: [PATCH 254/334] Fix the Eof error when parse the Complete MPU ### What changes are proposed in this pull request? Convert the Eof error to a standard error code when parse the Complete MPU pr-link: Alluxio/alluxio#17351 change-id: cid-565ccb6d766df536cc8a6d4fc22343b59169a41b --- .../s3/CompleteMultipartUploadHandler.java | 7 +++++++ .../java/alluxio/proxy/s3/S3ObjectTask.java | 7 +++++++ .../client/rest/S3ClientRestApiTest.java | 18 ++++++++++++++++++ 3 files changed, 32 insertions(+) diff --git a/core/server/proxy/src/main/java/alluxio/proxy/s3/CompleteMultipartUploadHandler.java b/core/server/proxy/src/main/java/alluxio/proxy/s3/CompleteMultipartUploadHandler.java index 05f3bcf7edfb..acbc75a6fdeb 100644 --- a/core/server/proxy/src/main/java/alluxio/proxy/s3/CompleteMultipartUploadHandler.java +++ b/core/server/proxy/src/main/java/alluxio/proxy/s3/CompleteMultipartUploadHandler.java @@ -20,6 +20,7 @@ import alluxio.conf.Configuration; import alluxio.conf.PropertyKey; import alluxio.exception.AlluxioException; +import alluxio.exception.status.InvalidArgumentException; import alluxio.grpc.Bits; import alluxio.grpc.CreateFilePOptions; import alluxio.grpc.DeletePOptions; @@ -33,6 +34,7 @@ import alluxio.web.ProxyWebServer; import com.codahale.metrics.Timer; +import com.fasterxml.jackson.core.JsonParseException; import com.fasterxml.jackson.core.JsonProcessingException; import com.fasterxml.jackson.dataformat.xml.XmlMapper; import com.google.common.base.Stopwatch; @@ -376,6 +378,11 @@ public CompleteMultipartUploadRequest parseCompleteMultipartUploadRequest(String if (cause instanceof S3Exception) { throw S3RestUtils.toObjectS3Exception((S3Exception) cause, objectPath); } + if (e instanceof JsonParseException) { + throw new S3Exception( + new InvalidArgumentException("Failed parsing CompleteMultipartUploadRequest."), + objectPath, S3ErrorCode.INVALID_ARGUMENT); + } throw S3RestUtils.toObjectS3Exception(e, objectPath); } return request; diff --git a/core/server/proxy/src/main/java/alluxio/proxy/s3/S3ObjectTask.java b/core/server/proxy/src/main/java/alluxio/proxy/s3/S3ObjectTask.java index 671a11c49e86..a1006d622d8d 100644 --- a/core/server/proxy/src/main/java/alluxio/proxy/s3/S3ObjectTask.java +++ b/core/server/proxy/src/main/java/alluxio/proxy/s3/S3ObjectTask.java @@ -24,6 +24,7 @@ import alluxio.exception.DirectoryNotEmptyException; import alluxio.exception.FileAlreadyExistsException; import alluxio.exception.FileDoesNotExistException; +import alluxio.exception.status.InvalidArgumentException; import alluxio.grpc.Bits; import alluxio.grpc.CreateDirectoryPOptions; import alluxio.grpc.CreateFilePOptions; @@ -41,6 +42,7 @@ import alluxio.web.ProxyWebServer; import com.codahale.metrics.Timer; +import com.fasterxml.jackson.core.JsonParseException; import com.fasterxml.jackson.dataformat.xml.XmlMapper; import com.google.common.base.Preconditions; import com.google.common.io.BaseEncoding; @@ -1188,6 +1190,11 @@ public CompleteMultipartUploadRequest parseCompleteMultipartUploadRequest(String if (cause instanceof S3Exception) { throw S3RestUtils.toObjectS3Exception((S3Exception) cause, objectPath); } + if (e instanceof JsonParseException) { + throw new S3Exception( + new InvalidArgumentException("Failed parsing CompleteMultipartUploadRequest."), + objectPath, S3ErrorCode.INVALID_ARGUMENT); + } throw S3RestUtils.toObjectS3Exception(e, objectPath); } return request; diff --git a/tests/src/test/java/alluxio/client/rest/S3ClientRestApiTest.java b/tests/src/test/java/alluxio/client/rest/S3ClientRestApiTest.java index 091ed6918a34..9e74d7e88ef4 100644 --- a/tests/src/test/java/alluxio/client/rest/S3ClientRestApiTest.java +++ b/tests/src/test/java/alluxio/client/rest/S3ClientRestApiTest.java @@ -1727,6 +1727,24 @@ public void duplicateMultipartUpload() throws Exception { Assert.assertEquals(S3ErrorCode.Name.NO_SUCH_UPLOAD, response.getCode()); } + @Test + public void completeMultipartUploadWithInvalidArgument() throws Exception { + final String bucketName = "bucket"; + createBucketRestCall(bucketName); + + final String objectName = "object"; + String objectKey = bucketName + AlluxioURI.SEPARATOR + objectName; + + // Initiate the multipart upload. + String result = initiateMultipartUploadRestCall(objectKey); + InitiateMultipartUploadResult multipartUploadResult = + XML_MAPPER.readValue(result, InitiateMultipartUploadResult.class); + final String uploadId = multipartUploadResult.getUploadId(); + TestCase testCase = getCompleteMultipartUploadReadCallTestCase(objectKey, uploadId, null); + HttpURLConnection connection = testCase.execute(); + Assert.assertEquals(Response.Status.BAD_REQUEST.getStatusCode(), connection.getResponseCode()); + } + @Test @Ignore public void completeMultipartUploadSpecifyParts() throws Exception { From f575e4fab5ccd15ca29f2565b294679adc945c28 Mon Sep 17 00:00:00 2001 From: Kai Date: Sat, 6 May 2023 11:07:12 +0800 Subject: [PATCH 255/334] Fix the active rpc metrics ### What changes are proposed in this pull request? mentioned in #16629 ### Why are the changes needed? The `Cluster.ActiveRpcReadCount` metric could get an impossible value like a negative number. The main reason is that the request handling doesn't cover all situations. ### Does this PR introduce any user facing changes? Nope pr-link: Alluxio/alluxio#17234 change-id: cid-f7d28cbf57ef2212e4b8d300f8a18300b593b213 --- .../alluxio/worker/grpc/BlockReadHandler.java | 29 ++++++++++--------- 1 file changed, 15 insertions(+), 14 deletions(-) diff --git a/core/server/worker/src/main/java/alluxio/worker/grpc/BlockReadHandler.java b/core/server/worker/src/main/java/alluxio/worker/grpc/BlockReadHandler.java index bf0cf4c5603e..af8310e262bb 100644 --- a/core/server/worker/src/main/java/alluxio/worker/grpc/BlockReadHandler.java +++ b/core/server/worker/src/main/java/alluxio/worker/grpc/BlockReadHandler.java @@ -462,28 +462,29 @@ private void runInternal() { } continue; } - if (error != null) { + if (eof || cancel || error != null) { try { completeRequest(mContext); } catch (Exception e) { - LOG.error("Failed to close the request.", e); - } - replyError(error); - } else if (eof || cancel) { - try { - completeRequest(mContext); - } catch (Exception e) { - LogUtils.warnWithException(LOG, "Exception occurred while completing read request, " - + "EOF/CANCEL sessionId: {}. {}", mContext.getRequest().getSessionId(), - mContext.getRequest(), e); - setError(new Error(AlluxioStatusException.fromThrowable(e), true)); + if (error != null) { + LOG.error("Failed to close the request.", e); + } else { + LogUtils.warnWithException(LOG, "Exception occurred while completing read request, " + + "EOF/CANCEL sessionId: {}. {}", mContext.getRequest().getSessionId(), + mContext.getRequest(), e); + error = new Error(AlluxioStatusException.fromThrowable(e), true); + } } - if (eof) { + if (error != null) { + replyError(error); + } else if (eof) { replyEof(); - } else { + } else if (cancel) { replyCancel(); } } + // Leave `!mResponse.isReady() && tooManyPendingChunks()` unhandled + // since the reader is not finished in that case and needs more rounds } /** From 8a4e579efb32b7468378714f4bf64188bdac1c91 Mon Sep 17 00:00:00 2001 From: yuyang wang <39869597+Jackson-Wang-7@users.noreply.github.com> Date: Mon, 8 May 2023 10:15:51 +0800 Subject: [PATCH 256/334] Support range copy in copyObject and copyPart ### What changes are proposed in this pull request? Please outline the changes and how this PR fixes the issue. ### Why are the changes needed? Please clarify why the changes are needed. For instance, 1. If you propose a new API, clarify the use case for a new API. 2. If you fix a bug, describe the bug. ### Does this PR introduce any user facing changes? Please list the user-facing changes introduced by your change, including 1. change in user-facing APIs 2. addition or removal of property keys 3. webui pr-link: Alluxio/alluxio#17390 change-id: cid-30d303cb559a315f334cdf880d4dde99ed7779c6 --- .../src/main/java/alluxio/proxy/s3/S3Constants.java | 1 + .../src/main/java/alluxio/proxy/s3/S3ObjectTask.java | 12 +++++++++++- .../java/alluxio/proxy/s3/S3RestServiceHandler.java | 8 +++++++- 3 files changed, 19 insertions(+), 2 deletions(-) diff --git a/core/server/proxy/src/main/java/alluxio/proxy/s3/S3Constants.java b/core/server/proxy/src/main/java/alluxio/proxy/s3/S3Constants.java index 11cfc9695d1e..b94cffb6f289 100644 --- a/core/server/proxy/src/main/java/alluxio/proxy/s3/S3Constants.java +++ b/core/server/proxy/src/main/java/alluxio/proxy/s3/S3Constants.java @@ -41,6 +41,7 @@ public final class S3Constants { // AWS headers public static final String S3_ACL_HEADER = "x-amz-acl"; public static final String S3_COPY_SOURCE_HEADER = "x-amz-copy-source"; + public static final String S3_COPY_SOURCE_RANGE = "x-amz-copy-source-range"; public static final String S3_ETAG_HEADER = "ETAG"; public static final String S3_METADATA_DIRECTIVE_HEADER = "x-amz-metadata-directive"; diff --git a/core/server/proxy/src/main/java/alluxio/proxy/s3/S3ObjectTask.java b/core/server/proxy/src/main/java/alluxio/proxy/s3/S3ObjectTask.java index a1006d622d8d..6c00fc704bee 100644 --- a/core/server/proxy/src/main/java/alluxio/proxy/s3/S3ObjectTask.java +++ b/core/server/proxy/src/main/java/alluxio/proxy/s3/S3ObjectTask.java @@ -667,11 +667,21 @@ public String copyObject(FileSystem userFs, S3AuditContext auditContext, throw new S3Exception("Copying an object to itself invalid.", targetPath, S3ErrorCode.INVALID_REQUEST); } + URIStatus status; + try { + status = userFs.getStatus(new AlluxioURI(sourcePath)); + } catch (Exception e) { + throw S3RestUtils.toObjectS3Exception(e, targetPath, auditContext); + } + final String range = mHandler.getHeaderOrDefault(S3Constants.S3_COPY_SOURCE_RANGE, null); + S3RangeSpec s3Range = S3RangeSpec.Factory.create(range); try (FileInStream in = userFs.openFile(new AlluxioURI(sourcePath)); + RangeFileInStream ris = RangeFileInStream.Factory.create(in, status.getLength(), + s3Range); FileOutStream out = userFs.createFile(objectUri, copyFilePOption)) { MessageDigest md5 = MessageDigest.getInstance("MD5"); try (DigestOutputStream digestOut = new DigestOutputStream(out, md5)) { - IOUtils.copyLarge(in, digestOut, new byte[8 * Constants.MB]); + IOUtils.copyLarge(ris, digestOut, new byte[8 * Constants.MB]); byte[] digest = md5.digest(); String entityTag = Hex.encodeHexString(digest); // persist the ETag via xAttr diff --git a/core/server/proxy/src/main/java/alluxio/proxy/s3/S3RestServiceHandler.java b/core/server/proxy/src/main/java/alluxio/proxy/s3/S3RestServiceHandler.java index 3a4a28d65355..7d7968eefdfa 100644 --- a/core/server/proxy/src/main/java/alluxio/proxy/s3/S3RestServiceHandler.java +++ b/core/server/proxy/src/main/java/alluxio/proxy/s3/S3RestServiceHandler.java @@ -663,6 +663,7 @@ public Response deleteBucket(@PathParam("bucket") final String bucket, * Uploads an object or part of an object in multipart upload. * @param contentMD5 the optional Base64 encoded 128-bit MD5 digest of the object * @param copySourceParam the URL-encoded source path to copy the new file from + * @param copySourceRange the http range header * @param decodedLength the length of the content when in aws-chunked encoding * @param contentLength the total length of the request body * @param contentTypeParam the content type of the request body @@ -685,6 +686,8 @@ public Response deleteBucket(@PathParam("bucket") final String bucket, public Response createObjectOrUploadPart(@HeaderParam("Content-MD5") final String contentMD5, @HeaderParam(S3Constants.S3_COPY_SOURCE_HEADER) final String copySourceParam, + @HeaderParam(S3Constants.S3_COPY_SOURCE_RANGE) + final String copySourceRange, @HeaderParam("x-amz-decoded-content-length") final String decodedLength, @HeaderParam(S3Constants.S3_METADATA_DIRECTIVE_HEADER) @@ -906,6 +909,7 @@ public Response createObjectOrUploadPart(@HeaderParam("Content-MD5") final Strin } else { // CopyObject or UploadPartCopy String copySource = !copySourceParam.startsWith(AlluxioURI.SEPARATOR) ? AlluxioURI.SEPARATOR + copySourceParam : copySourceParam; + S3RangeSpec s3Range = S3RangeSpec.Factory.create(copySourceRange); try { copySource = URLDecoder.decode(copySource, "UTF-8"); } catch (UnsupportedEncodingException ex) { @@ -968,10 +972,12 @@ public Response createObjectOrUploadPart(@HeaderParam("Content-MD5") final Strin objectPath, S3ErrorCode.INVALID_REQUEST); } try (FileInStream in = userFs.openFile(new AlluxioURI(copySource)); + RangeFileInStream ris = RangeFileInStream.Factory.create(in, status.getLength(), + s3Range); FileOutStream out = userFs.createFile(objectUri, copyFilePOptionsBuilder.build())) { MessageDigest md5 = MessageDigest.getInstance("MD5"); try (DigestOutputStream digestOut = new DigestOutputStream(out, md5)) { - IOUtils.copyLarge(in, digestOut, new byte[8 * Constants.MB]); + IOUtils.copyLarge(ris, digestOut, new byte[8 * Constants.MB]); byte[] digest = md5.digest(); String entityTag = Hex.encodeHexString(digest); // persist the ETag via xAttr From 912cf525cc48914cc9d2176ec8a0ef5e782342e4 Mon Sep 17 00:00:00 2001 From: Jiacheng Liu Date: Mon, 8 May 2023 15:11:39 +0800 Subject: [PATCH 257/334] [DOCFIX] Add docs for rolling upgrade and decommission worker ### What changes are proposed in this pull request? This is the doc update from a few recent commits: 1. https://github.com/Alluxio/alluxio/commit/aee3c5cb960b2e472f4fcdb86e731ca3b91d2f8c 2. https://github.com/Alluxio/alluxio/commit/9a4e154e7293667572fec25c63136462fb58a345 3. https://github.com/Alluxio/alluxio/commit/dbac084c1e1d89c1e9253da036746e2df9e1ef14 4. https://github.com/Alluxio/alluxio/commit/8da59539201cff376c33256846bd565d976a148e 5. https://github.com/Alluxio/alluxio/commit/141ee0e567a4c6c60907f85ea66b828704bd761d 6. https://github.com/Alluxio/alluxio/commit/26257e6f35c57d937f6f5bdee831facc2fcf7e1a In short, a few improvements have been added that the admin can now see the version/revision of the running Alluxio components in the cluster. A few commands were also added to help with gracefully decommissioning/restarting workers in the cluster. ### Why are the changes needed? This doc change helps users understand how to utilize those features. pr-link: Alluxio/alluxio#17381 change-id: cid-1d45c1750f84ac94d3fa79b9bbe0f0f328f7cb45 --- docs/en/administration/Upgrade.md | 144 +++++++++++++++++++++++++++++ docs/en/operation/Admin-CLI.md | 147 +++++++++++++++++++++++++++++- 2 files changed, 287 insertions(+), 4 deletions(-) diff --git a/docs/en/administration/Upgrade.md b/docs/en/administration/Upgrade.md index 4d35aa5fd701..ff031df59ac4 100644 --- a/docs/en/administration/Upgrade.md +++ b/docs/en/administration/Upgrade.md @@ -87,6 +87,150 @@ $ ./bin/alluxio-start.sh all ``` 5. If you have updated the Alluxio client jar for an application, restart that application to use the new Alluxio client jar. +### Rolling upgrade/restart masters + +When the cluster is running in high-availability mode (running multiple Alluxio masters), if the admin wants to restart all masters +in the cluster, it should be done in a rolling restart fashion to minimize service unavailable time. +The service should only be unavailable during primary master failover, once there is a primary master in HA, +restarting standby masters will not interrupt the service. + +If the HA is on Embeddded Journal (using Raft), this is an example of how to perform rolling upgrade: +```shell +# First check all master nodes in the cluster +$ ./bin/alluxio fsadmin report +... +Raft journal addresses: + master-0:19200 + master-1:19200 + master-2:19200 +Master Address State Version REVISION +master-0:19998 PRIMARY alluxio-2.9.0 abcde +master-1:19998 STANDBY alluxio-2.9.0 abcde +master-2:19998 STANDBY alluxio-2.9.0 abcde + +# Pick one standby master and restart it using the higher version +$ ssh master-1 +$ bin/alluxio-start.sh master + +# Wait for that master to join the quorum and observe it is using the higher verison +$ ./bin/alluxio fsadmin report +... +Raft journal addresses: + master-0:19200 + master-1:19200 + master-2:19200 +Master Address State Version REVISION +master-0:19998 PRIMARY alluxio-2.9.0 abcde +master-1:19998 STANDBY alluxio-2.9.1 hijkl +master-2:19998 STANDBY alluxio-2.9.0 abcde + +# Do the same for the other standby master master2 + +# Manually failover the primary to one upgraded standby master, now master-0 becomes standby +$ ./bin/alluxio fsadmin journal quorum elect -address master-1:19200 + +# Restart master-0 with the higher version and wait for it to re-join the quorum +# Then you should observe all masters are on the higher version +$ ./bin/alluxio fsadmin report +... +Raft journal addresses: + master-0:19200 + master-1:19200 + master-2:19200 +Master Address State Version REVISION +master-0:19998 STANDBY alluxio-2.9.1 hijkl +master-1:19998 PRIMARY alluxio-2.9.1 hijkl +master-2:19998 STANDBY alluxio-2.9.1 hijkl + +# Wait for all workers register with the new primary, and run tests to validate the service +$ bin/alluxio runTests +``` + +Similarly, if the HA is on UFS Journal (using ZooKeeper), the admin can restart masters one by one in the same order. +The only difference is there is no command to manually trigger a primary master failover. The admin can +directly kill the primary master process, after a brief timeout, one standby master will realize and become the new primary. + +### Rolling upgrade/restart workers + +If the admin wants to restart workers without interrupting ongoing service, there are now ways to rolling restart +all workers without failing ongoing I/O requests. Typically, we want to restart workers to apply configuration changes, +or to upgrade to a newer version. + +A typical workflow of rolling upgrade workers looks as follows: +```shell +# First check all worker nodes in the cluster +$ ./bin/alluxio fsadmin report capacity +... +Worker Name State Last Heartbeat Storage MEM Version Revision +data-worker-1 ACTIVE 1 capacity 10.67GB 2.9.0 abcde + used 0B (0%) +data-worker-0 ACTIVE 2 capacity 10.67GB 2.9.0 abcde + used 0B (0%) +data-worker-2 ACTIVE 0 capacity 10.67GB 2.9.0 abcde + used 0B (0%) +... + +# Pick a batch of workers to decommission, e.g. this batch is 2 workers +$ ./bin/alluxio fsadmin decommissionWorker -a data-worker-0,data-worker-1 -w 5m +Decommissioning worker data-worker-0:30000 +Set worker data-worker-0:30000 decommissioned on master +Decommissioning worker data-worker-1:30000 +Set worker data-worker-1:30000 decommissioned on master +Sent decommission messages to the master, 0 failed and 2 succeeded +Failed ones: [] +Clients take alluxio.user.worker.list.refresh.interval=2min to be updated on the new worker list so this command will block for the same amount of time to ensure the update propagates to clients in the cluster. +Verifying the decommission has taken effect by listing all available workers on the master +Now on master the available workers are: [data-worker-2,data-worker-3,...] +Polling status from worker data-worker-0:30000 +Polling status from worker data-worker-1:30000 +... +There is no operation on worker data-worker-0:30000 for 20 times in a row. Worker is considered safe to stop. +Polling status from worker data-worker-1:30000 +There is no operation on worker data-worker-1:30000 for 20 times in a row. Worker is considered safe to stop. +Waited 3 minutes for workers to be idle +All workers are successfully decommissioned and now idle. Safe to kill or restart this batch of workers now. + +# Now you will be able to observe those workers' state have changed from ACTIVE to DECOMMISSIONED. +$ ./bin/alluxio fsadmin report capacity +... +Worker Name State Last Heartbeat Storage MEM Version Revision +data-worker-1 DECOMMISSIONED 1 capacity 10.67GB 2.9.0 abcde + used 0B (0%) +data-worker-0 DECOMMISSIONED 2 capacity 10.67GB 2.9.0 abcde + used 0B (0%) +data-worker-2 ACTIVE 0 capacity 10.67GB 2.9.0 abcde + used 0B (0%) + +# Then you can restart the decommissioned workers. The workers will start normally and join the cluster. +$ ssh data-worker-0 +$ ./bin/alluxio-start.sh worker +... + +# Now you will be able to observe those workers become ACTIVE again and have a higher version +$ ./bin/alluxio fsadmin report capacity +... +Worker Name State Last Heartbeat Storage MEM Version Revision +data-worker-1 ACTIVE 1 capacity 10.67GB 2.9.1 hijkl + used 0B (0%) +data-worker-0 ACTIVE 2 capacity 10.67GB 2.9.1 hijkl + used 0B (0%) +data-worker-2 ACTIVE 0 capacity 10.67GB 2.9.0 abcde + used 0B (0%) + +# You can run I/O tests against the upgraded workers to validate they are serving, before moving to upgrade the next batch +$ bin/alluxio runTests --workers data-worker-0,data-worker-1 + +# Keep performing the steps above until all workers are upgraded +``` + +See more details about the `decommissionWorker` command in +[documentation]({{ '/en/operation/Admin-CLI.html' | relativize_url }}#decommissionworker). + +### Rolling restart/upgrade other components + +Other components like the Job Master, Job Worker and Proxy do not support rolling upgrade at the moment. +The admin can manually restart them in batches. + ## Additional Options ### Alluxio worker ramdisk cache persistence diff --git a/docs/en/operation/Admin-CLI.md b/docs/en/operation/Admin-CLI.md index d58283d88f4b..c31cc0c64c6a 100644 --- a/docs/en/operation/Admin-CLI.md +++ b/docs/en/operation/Admin-CLI.md @@ -25,7 +25,8 @@ Usage: alluxio fsadmin [generic options] [statelock] [ufs [--mode ] ] [updateConf key1=val1 [key2=val2 ...]] - + [decommissionWorker [--addresses worker0,worker1] [--wait 5m] [--disable]] + [enableWorker [--addresses worker0,worker1]] ``` ## Operations @@ -271,6 +272,7 @@ Alluxio cluster summary: `report capacity` will report Alluxio cluster capacity information for different subsets of workers: * `-live` Live workers * `-lost` Lost workers +* `-decommissioned` Decommissioned workers * `-workers ` Specified workers, host names or ip addresses separated by `,`. ```shell @@ -282,6 +284,22 @@ $ ./bin/alluxio fsadmin report capacity -live $ ./bin/alluxio fsadmin report capacity -workers AlluxioWorker1,127.0.0.1 ``` +A typical output looks like below: +```shell +$ ./bin/alluxio fsadmin report capacity +Capacity information for all workers: + Total Capacity: 10.67GB + Tier: MEM Size: 10.67GB + Used Capacity: 0B + Tier: MEM Size: 0B + Used Percentage: 0% + Free Percentage: 100% + +Worker Name State Last Heartbeat Storage MEM Version Revision +data-worker-1 ACTIVE 166 capacity 10.67GB 2.9.0 ffcb706497bf47e43d5b2efc90f664c7a3e7014e + used 0B (0%) +``` + `report metrics` will report the metrics stored in the leading master which includes leading master process metrics and aggregated cluster metrics. @@ -301,9 +319,15 @@ hdfs://localhost:9000/ on / (hdfs, capacity=-1B, used=-1B, not read-only, not sh ```shell $ bin/alluxio fsadmin report jobservice -Worker: MigrationTest-workers-2 Task Pool Size: 10 Unfinished Tasks: 1303 Active Tasks: 10 Load Avg: 1.08, 0.64, 0.27 -Worker: MigrationTest-workers-3 Task Pool Size: 10 Unfinished Tasks: 1766 Active Tasks: 10 Load Avg: 1.02, 0.48, 0.21 -Worker: MigrationTest-workers-1 Task Pool Size: 10 Unfinished Tasks: 1808 Active Tasks: 10 Load Avg: 0.73, 0.5, 0.23 +Master Address State Start Time Version Revision +MigrationTest-master-1:20001 PRIMARY 20230425-110043 2.9.0 ac6a0616 +MigrationTest-master-2:20001 STANDBY 20230425-110044 2.9.0 ac6a0616 +MigrationTest-master-3:20001 STANDBY 20230425-110050 2.9.0 ac6a0616 + +Job Worker Version Revision Task Pool Size Unfinished Tasks Active Tasks Load Avg +MigrationTest-workers-2 2.9.0 ac6a0616 10 1303 10 1.08, 0.64, 0.27 +MigrationTest-workers-3 2.9.0 ac6a0616 10 1766 10 1.02, 0.48, 0.21 +MigrationTest-workers-1 2.9.0 ac6a0616 10 1808 10 0.73, 0.5, 0.23 Status: CREATED Count: 4877 Status: CANCELED Count: 0 @@ -322,6 +346,16 @@ Timestamp: 10-24-2019 17:15:22:946 Id: 1603922372008 Name: Persist 10 Longest Running Jobs: ``` +`report proxy` will report a summary of the proxy instances in the cluster. + +```shell +$ ./bin/alluxio fsadmin report proxy +1 Proxy instances in the cluster, 1 serving and 0 lost + +Address State Start Time Last Heartbeat Time Version Revision +Alluxio-Proxy-Node-1:39997 ACTIVE 20230421-165608 20230421-170201 2.9.0 c697105199e29a480cf6251494d367cf325123a0 +``` + ### statelock The `statelock` command provides information about the waiters and holders of the alluxio statelock. @@ -362,3 +396,108 @@ alluxio.master.audit.logging.enabled alluxio.master.ufs.managed.blocking.enabled alluxio.master.metastore.inode.inherit.owner.and.group ``` + +### decommissionWorker + +The `decommissionWorker` command can be used to take the target workers off-line from the cluster, +so Alluxio clients and proxy instances stop using those workers, and therefore they can be killed or restarted gracefully. +Note that this command will NOT kill worker processes. This command will NOT remove the cache on the workers. +This command can be typically used for the following use cases: +1. Perform a graceful rolling restart of all workers in the cluster, where no user requests should fail. +2. Scale down the cluster without interrupting user I/O workflow. + +```shell +$ ./bin/alluxio fsadmin decommissionWorker --addresses data-worker-0,data-worker-1 [--wait 5m] [--disable] +``` +The arguments are explained as follows: + +`--addresses/-a` is a required argument, followed by a list of comma-separated worker addresses. Each worker address is `:`. +Unlike many other commands which specify the RPC port, we use the web port here because the command will monitor the worker's workload +exposed at the web port. If the port is not specified, the value in `alluxio.worker.web.port` will be used. Note that `alluxio.worker.web.port` +will be resolved from the node where this command is run. + +`--wait/-w` is an optional argument. This argument defines how long this command waits for the workers to become idle. +This command returns either when all workers are idle, or when this wait time is up. The default value is `5m`. + +`--disable/-d` is an optional argument. If this is specified, the decommissioned workers will not be able to +register to the master. In other words, a disabled worker cannot join the cluster and will not be chosen for I/O requests. +This is useful when the admin wants to remove the workers from the cluster. When those disabled workers register, +the master will reject them but will not kill the worker processes. This is often used in pair with the `enableWorker` command. + +The command will perform the following actions: + +1. For each worker in the batch, send a decommission command to the primary Alluxio master so + the master marks those workers as decommissioned and will not serve operations. The ongoing I/O on those workers + will NOT be interrupted. +2. It takes a small interval for all other Alluxio components (like clients and Proxy instances) + to know those workers should not be used, so this command waits for the interval time defined by `alluxio.user.worker.list.refresh.interval` + on the node where this command is executed. Before a client/proxy realizes the workers are decommissioned, + they may submit more I/O requests to those workers, and those requests should execute normally. +3. Get the active worker list from the master after waiting, and verify the target workers are not active anymore. +4. Wait for the workers to become idle. This command will constantly check the idleness status on each worker. + A worker is considered "idle" if it is not actively serving RPCs(including control RPCs and data I/O). +5. Either all workers have become idle, or the specified timeout expires, this command will return. + +A worker is considered "idle" if it is not actively serving RPCs(including control RPCs and data I/O). +The `decommissionWorker` command stops all clients from using those workers, and waits for all ongoing requests to complete. +The command waits for those all clients to stop using those workers, and waits for those workers to become idle, +so when this command returns success(exit code 0), it is safe for the admin to kill/restart those worker processes. + +The primary Alluxio master maintains a list of available Alluxio workers, and all Alluxio components(including Proxy, Job Master/Job Worker and Client) +will regularly refresh this list with the primary master. The refresh interval is defined by `alluxio.user.worker.list.refresh.interval`. +So after those workers are taken off the available list, after another refresh interval has elapsed, +and after all ongoing requests have been served, those workers should not receive any more requests. +Therefore, no matter when the admin restarts/kills those worker processes, that should not fail any requests. +However, there are a few exceptions. See the next section for more details. + +See [Rolling Upgrade Workers]({{ '/en/administration/Upgrade.html#rolling-upgraderestart-workers' | relativize_url }}) for how this command is used. + +**Limitations** + +This has some limitations. In some cases, the `decommissionWorker` command may return code 0 (success) +but when the worker process is killed, some user I/O requests may fail. + +When the `decommissionWorker` command waits for a worker to become idle, it only respects ongoing I/O requests on the worker. +If the Alluxio client is reading/writing the worker with short circuit, the client directly reads/writes cache files +in worker storage and maintains a gRPC stream with the worker simply for locking that block. An open(and idle) stream +will NOT be respected by the `decommissionWorker` command so it may consider the worker is idle. +However, when the admin kills the worker and deletes the cache storage, the client request will fail +either because the cache blocks are gone or because the gRPC stream is broken. So if you are using short circuit, +wait a few more minutes before killing the worker process and deleting cached blocks. The clients will +know the workers are decommissioned (should not be read/written) and stop using those workers by short circuit. + +The `decommissionWorker` command does NOT consider cache blocks on the target workers. That means if +decommissioning some workers will bring ALL replicas of certain blocks offline, and those blocks only exist in cache, +then clients CAN NOT read those blocks. The user has to restart the workload after those workers are restarted. + +**Exit Codes** + +This command is idempotent and can be retried, but the admin is advised to manually check if there's an error. + +The return codes have different meanings: + +1. `0(OK)`: All workers are successfully decommissioned and now idle. Safe to kill or restart this batch of workers now. +2. `1(DECOMMISSION_FAILED)`: Failed to decommission all workers. The admin should double check the worker addresses and the primary master status. +3. `2(LOST_MASTER_CONNECTION)`: Lost connection to the primary master while this command is running. This suggests the configured master address is wrong or the primary master failed over. +4. `3(WORKERS_NOT_IDLE)`: Some workers were still not idle after the wait. Either the wait time is too short or those workers failed to mark decommissioned. The admin should manually intervene and check those workers. +5. `10(LOST_SOME_WORKERS)`: Workers are decommissioned but some or all workers lost contact while this command is running. If a worker is not serving then it is safe to kill or restart. But the admin is advised to double check the status of those workers. + +### enableWorker + +The `enableWorker` command is the reverse operation of `decommissionWorker -d`. The `decommissionWorker -d` command will +decommission workers and not disable them from re-registering to the cluster. The `enableWorker` command will +enable those workers to re-register to the cluster and serve again. + +```shell +# Decommission 2 workers and disable them from joining the cluster again even if they restart +$ ./bin/alluxio fsadmin decommissionWorker --addresses data-worker-0,data-worker-1 --disable + +# data-worker-0 and data-worker-1 will not be able to register to the master after they restart +$ ./bin/alluxio-start.sh workers # This should show an error status + +# The admin regrets and wants to bring one of them back to the cluster +$ ./bin/alluxio fsadmin enableWorker --addresses data-worker-1 + +# If data-worker-1 is restarted, it is able to register to the cluster and serve normally again +$ ./bin/alluxio-start.sh workers +``` From abf9e6c66a805d38547d9e8e2911d8f28c4cc6d6 Mon Sep 17 00:00:00 2001 From: yuyang wang <39869597+Jackson-Wang-7@users.noreply.github.com> Date: Mon, 8 May 2023 18:35:31 +0800 Subject: [PATCH 258/334] Avoid the NPE exception in copy object with x-amz-tagging-directive ### What changes are proposed in this pull request? Avoid the NPE exception in copy object with x-amz-tagging-directive ### Why are the changes needed? Avoid the NPE exception pr-link: Alluxio/alluxio#17400 change-id: cid-db0f6ac83b1f6de7937c17f82911b153fe63155d --- .../main/java/alluxio/proxy/s3/S3RestServiceHandler.java | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/core/server/proxy/src/main/java/alluxio/proxy/s3/S3RestServiceHandler.java b/core/server/proxy/src/main/java/alluxio/proxy/s3/S3RestServiceHandler.java index 7d7968eefdfa..465b0f074a4a 100644 --- a/core/server/proxy/src/main/java/alluxio/proxy/s3/S3RestServiceHandler.java +++ b/core/server/proxy/src/main/java/alluxio/proxy/s3/S3RestServiceHandler.java @@ -971,6 +971,14 @@ public Response createObjectOrUploadPart(@HeaderParam("Content-MD5") final Strin throw new S3Exception("Copying an object to itself invalid.", objectPath, S3ErrorCode.INVALID_REQUEST); } + // avoid the NPE of status + try { + if (status == null) { + status = userFs.getStatus(new AlluxioURI(copySource)); + } + } catch (Exception e) { + throw S3RestUtils.toObjectS3Exception(e, objectPath, auditContext); + } try (FileInStream in = userFs.openFile(new AlluxioURI(copySource)); RangeFileInStream ris = RangeFileInStream.Factory.create(in, status.getLength(), s3Range); From 71bd2029b53c3dbd0a1c9055d0f68bfcfee4de75 Mon Sep 17 00:00:00 2001 From: elega <445092967@qq.com> Date: Tue, 9 May 2023 10:05:12 +0800 Subject: [PATCH 259/334] Implement metadata sync V2 ### What changes are proposed in this pull request? In this PR, we implemented a new metadata sync architecture. Screen Shot 2023-05-04 at 2 43 22 PM In the new architecture, metadata sync requests are processed as tasks. Tasks are executed asynchronously managed by the task tracker. There are 4 major components in the system: Task Tracker - Creates metadata tasks, controls resources and dedups concurrent sync on the same path The Path Loader - Manages the path loading process and breakdown the loading task into batches The UFS Loader - Loads a single batch of UFS status for a path (up to 1000 files) The Update Metadata - Compares the delta between UFS status and inodes in alluxio and updates the alluxio file metadata In addition to the internal change, we also revamped the load metadata cli command backed by the metadata sync v2. The feature is still experimental and can only be triggered by the loadMetadata cli command. S3 is the only supported UFS. ### Why are the changes needed? Please clarify why the changes are needed. For instance, 1. Better resource control. We redesigned rate limitation mechanism and fetches file from UFS by batch (backed by S3 listObjectsV2 endpoint) to reduce the memory consumption. 2. Better performance. We introduced aws client sdk v2 in this PR where an s3 async client is provided with better performance. Also we redesigned the concurrency to make it more efficient as well as other minor optimizations. Result: We tested the new code path to load a s3 dataset of 5 million files. We observed 50% to 100% performance improvement and less memory consumption. We used to see OOM on a 8gb alluxio master but now the issue is gone. ### Does this PR introduce any user facing changes? Yes. The loadMetadata v2 command is revamped. pr-link: Alluxio/alluxio#17242 change-id: cid-8163ac604eaa6f9d93a7099fd9007ee7594fcecd --- .../alluxio/client/file/BaseFileSystem.java | 41 + .../client/file/DelegatingFileSystem.java | 29 + .../java/alluxio/client/file/FileSystem.java | 43 + .../client/file/FileSystemMasterClient.java | 39 + .../RetryHandlingFileSystemMasterClient.java | 55 + .../client/file/ufs/UfsBaseFileSystem.java | 30 + .../alluxio/util/FileSystemOptionsUtils.java | 9 + .../file/MockFileSystemMasterClient.java | 28 + .../cache/LocalCacheFileInStreamTest.java | 30 + core/common/pom.xml | 2 +- .../src/main/java/alluxio/AlluxioURI.java | 14 +- .../ManagedBlockingUfsForwarder.java | 31 + .../main/java/alluxio/conf/PropertyKey.java | 43 + .../main/java/alluxio/conf/path/TrieNode.java | 21 + .../file/options/DirectoryLoadType.java | 47 + .../src/main/java/alluxio/grpc/GrpcUtils.java | 20 + .../main/java/alluxio/metrics/MetricKey.java | 103 ++ .../alluxio/underfs/BaseUnderFileSystem.java | 128 +- .../java/alluxio/underfs/Fingerprint.java | 13 +- .../underfs/ObjectUnderFileSystem.java | 292 +++- .../main/java/alluxio/underfs/UfsClient.java | 52 + .../java/alluxio/underfs/UfsLoadResult.java | 110 ++ .../java/alluxio/underfs/UnderFileSystem.java | 17 +- .../underfs/UnderFileSystemWithLogging.java | 74 + .../main/java/alluxio/util/IteratorUtils.java | 32 + .../main/java/alluxio/util/RateLimiter.java | 60 + .../java/alluxio/util/SimpleRateLimiter.java | 65 + .../main/java/alluxio/util/io/PathUtils.java | 34 +- .../java/alluxio/conf/path/TrieNodeTest.java | 58 +- .../java/alluxio/underfs/FingerprintTest.java | 10 + .../underfs/ObjectUnderFileSystemTest.java | 48 + .../underfs/UnderFileSystemTestUtil.java | 49 + .../java/alluxio/util/RateLimiterTest.java | 88 ++ .../FileSystemMergeJournalContext.java | 2 +- .../MetadataSyncMergeJournalContext.java | 13 + core/server/master/pom.xml | 14 +- .../alluxio/master/block/BlockMaster.java | 10 + .../master/block/DefaultBlockMaster.java | 8 +- .../master/file/DefaultFileSystemMaster.java | 231 ++- .../alluxio/master/file/FileSystemMaster.java | 39 + .../FileSystemMasterClientServiceHandler.java | 50 + .../alluxio/master/file/InodeSyncStream.java | 6 +- .../java/alluxio/master/file/RpcContext.java | 2 +- .../file/contexts/CreateFileContext.java | 91 +- .../file/contexts/CreatePathContext.java | 60 +- .../master/file/contexts/DeleteContext.java | 17 + .../file/contexts/ListStatusContext.java | 25 + .../file/contexts/SyncMetadataContext.java | 73 + .../alluxio/master/file/mdsync/BaseTask.java | 314 ++++ .../master/file/mdsync/BaseTaskResult.java | 35 + .../master/file/mdsync/BatchPathWaiter.java | 103 ++ .../file/mdsync/DefaultSyncProcess.java | 973 ++++++++++++ .../file/mdsync/DirectoryPathWaiter.java | 71 + .../master/file/mdsync/LoadRequest.java | 157 ++ .../file/mdsync/LoadRequestExecutor.java | 271 ++++ .../master/file/mdsync/LoadResult.java | 125 ++ .../file/mdsync/LoadResultExecutor.java | 67 + .../file/mdsync/MetadataSyncHandler.java | 100 ++ .../master/file/mdsync/PathLoaderTask.java | 259 ++++ .../master/file/mdsync/PathSequence.java | 59 + .../master/file/mdsync/PathWaiter.java | 30 + .../file/mdsync/RateLimitedRequest.java | 60 + .../master/file/mdsync/SyncFailReason.java | 28 + .../master/file/mdsync/SyncOperation.java | 80 + .../file/mdsync/SyncOperationMetrics.java | 40 + .../master/file/mdsync/SyncProcess.java | 29 + .../file/mdsync/SyncProcessContext.java | 273 ++++ .../master/file/mdsync/SyncProcessResult.java | 84 + .../alluxio/master/file/mdsync/TaskGroup.java | 113 ++ .../alluxio/master/file/mdsync/TaskInfo.java | 149 ++ .../alluxio/master/file/mdsync/TaskStats.java | 279 ++++ .../master/file/mdsync/TaskTracker.java | 329 ++++ .../file/meta/InodeIterationResult.java | 50 + .../alluxio/master/file/meta/InodeTree.java | 54 +- .../master/file/meta/LockedInodePath.java | 48 +- .../alluxio/master/file/meta/MountTable.java | 13 +- .../file/meta/MutableInodeDirectory.java | 6 +- .../master/file/meta/MutableInodeFile.java | 13 +- .../master/file/meta/UfsSyncUtils.java | 3 + .../master/metastore/ReadOnlyInodeStore.java | 125 ++ .../metastore/RecursiveInodeIterator.java | 233 +++ .../metastore/SkippableInodeIterator.java | 30 + .../file/FileSystemMasterS3UfsTest.java | 60 +- .../master/file/FileSystemMasterTestBase.java | 16 +- ...FileSystemMetadataSyncV2BenchmarkTest.java | 131 ++ .../file/FileSystemMetadataSyncV2Test.java | 1375 +++++++++++++++++ .../master/file/MetadataSyncDepthV2Test.java | 204 +++ .../file/MetadataSyncMultiMountV2Test.java | 199 +++ .../MetadataSyncNonObjectStoreV2Test.java | 163 ++ .../master/file/MetadataSyncV2TestBase.java | 308 ++++ .../master/file/mdsync/BaseTaskTest.java | 128 ++ .../file/mdsync/BatchPathWaiterTest.java | 334 ++++ .../file/mdsync/DirectoryPathWaiterTest.java | 196 +++ .../master/file/mdsync/DummySyncProcess.java | 63 + .../master/file/mdsync/MockUfsClient.java | 108 ++ .../master/file/mdsync/TaskTrackerTest.java | 673 ++++++++ .../master/file/mdsync/TestSyncProcessor.java | 101 ++ .../master/file/mdsync/UfsLoadsTest.java | 88 ++ .../master/metastore/InodeStoreTest.java | 120 +- .../master/metastore/InodeStoreTestBase.java | 151 ++ .../metastore/RecursiveInodeIteratorTest.java | 415 +++++ .../main/proto/grpc/file_system_master.proto | 76 +- .../src/main/proto/grpc/fscommon.proto | 6 + core/transport/src/main/proto/proto.lock | 235 +++ docs/en/operation/User-CLI.md | 36 + .../fuse/auth/AbstractAuthPolicyTest.java | 30 + .../cli/MockFuseFileSystemMasterClient.java | 28 + pom.xml | 23 +- .../cli/fs/command/LoadMetadataCommand.java | 238 ++- tests/pom.xml | 22 +- .../fs/FileSystemS3UfsIntegrationTest.java | 42 +- .../delegating/DelegatingUnderFileSystem.java | 26 + underfs/local/pom.xml | 8 + .../local/LocalUnderFileSystemTest.java | 33 + underfs/pom.xml | 4 +- underfs/s3a/pom.xml | 13 + .../underfs/s3a/S3AUnderFileSystem.java | 362 ++++- .../s3a/S3AUnderFileSystemMockServerTest.java | 138 +- .../underfs/s3a/S3AUnderFileSystemTest.java | 16 +- 119 files changed, 12723 insertions(+), 405 deletions(-) create mode 100644 core/common/src/main/java/alluxio/file/options/DirectoryLoadType.java create mode 100644 core/common/src/main/java/alluxio/underfs/UfsClient.java create mode 100644 core/common/src/main/java/alluxio/underfs/UfsLoadResult.java create mode 100644 core/common/src/main/java/alluxio/util/IteratorUtils.java create mode 100644 core/common/src/main/java/alluxio/util/RateLimiter.java create mode 100644 core/common/src/main/java/alluxio/util/SimpleRateLimiter.java create mode 100644 core/common/src/test/java/alluxio/underfs/UnderFileSystemTestUtil.java create mode 100644 core/common/src/test/java/alluxio/util/RateLimiterTest.java create mode 100644 core/server/master/src/main/java/alluxio/master/file/contexts/SyncMetadataContext.java create mode 100644 core/server/master/src/main/java/alluxio/master/file/mdsync/BaseTask.java create mode 100644 core/server/master/src/main/java/alluxio/master/file/mdsync/BaseTaskResult.java create mode 100644 core/server/master/src/main/java/alluxio/master/file/mdsync/BatchPathWaiter.java create mode 100644 core/server/master/src/main/java/alluxio/master/file/mdsync/DefaultSyncProcess.java create mode 100644 core/server/master/src/main/java/alluxio/master/file/mdsync/DirectoryPathWaiter.java create mode 100644 core/server/master/src/main/java/alluxio/master/file/mdsync/LoadRequest.java create mode 100644 core/server/master/src/main/java/alluxio/master/file/mdsync/LoadRequestExecutor.java create mode 100644 core/server/master/src/main/java/alluxio/master/file/mdsync/LoadResult.java create mode 100644 core/server/master/src/main/java/alluxio/master/file/mdsync/LoadResultExecutor.java create mode 100644 core/server/master/src/main/java/alluxio/master/file/mdsync/MetadataSyncHandler.java create mode 100644 core/server/master/src/main/java/alluxio/master/file/mdsync/PathLoaderTask.java create mode 100644 core/server/master/src/main/java/alluxio/master/file/mdsync/PathSequence.java create mode 100644 core/server/master/src/main/java/alluxio/master/file/mdsync/PathWaiter.java create mode 100644 core/server/master/src/main/java/alluxio/master/file/mdsync/RateLimitedRequest.java create mode 100644 core/server/master/src/main/java/alluxio/master/file/mdsync/SyncFailReason.java create mode 100644 core/server/master/src/main/java/alluxio/master/file/mdsync/SyncOperation.java create mode 100644 core/server/master/src/main/java/alluxio/master/file/mdsync/SyncOperationMetrics.java create mode 100644 core/server/master/src/main/java/alluxio/master/file/mdsync/SyncProcess.java create mode 100644 core/server/master/src/main/java/alluxio/master/file/mdsync/SyncProcessContext.java create mode 100644 core/server/master/src/main/java/alluxio/master/file/mdsync/SyncProcessResult.java create mode 100644 core/server/master/src/main/java/alluxio/master/file/mdsync/TaskGroup.java create mode 100644 core/server/master/src/main/java/alluxio/master/file/mdsync/TaskInfo.java create mode 100644 core/server/master/src/main/java/alluxio/master/file/mdsync/TaskStats.java create mode 100644 core/server/master/src/main/java/alluxio/master/file/mdsync/TaskTracker.java create mode 100644 core/server/master/src/main/java/alluxio/master/file/meta/InodeIterationResult.java create mode 100644 core/server/master/src/main/java/alluxio/master/metastore/RecursiveInodeIterator.java create mode 100644 core/server/master/src/main/java/alluxio/master/metastore/SkippableInodeIterator.java create mode 100644 core/server/master/src/test/java/alluxio/master/file/FileSystemMetadataSyncV2BenchmarkTest.java create mode 100644 core/server/master/src/test/java/alluxio/master/file/FileSystemMetadataSyncV2Test.java create mode 100644 core/server/master/src/test/java/alluxio/master/file/MetadataSyncDepthV2Test.java create mode 100644 core/server/master/src/test/java/alluxio/master/file/MetadataSyncMultiMountV2Test.java create mode 100644 core/server/master/src/test/java/alluxio/master/file/MetadataSyncNonObjectStoreV2Test.java create mode 100644 core/server/master/src/test/java/alluxio/master/file/MetadataSyncV2TestBase.java create mode 100644 core/server/master/src/test/java/alluxio/master/file/mdsync/BaseTaskTest.java create mode 100644 core/server/master/src/test/java/alluxio/master/file/mdsync/BatchPathWaiterTest.java create mode 100644 core/server/master/src/test/java/alluxio/master/file/mdsync/DirectoryPathWaiterTest.java create mode 100644 core/server/master/src/test/java/alluxio/master/file/mdsync/DummySyncProcess.java create mode 100644 core/server/master/src/test/java/alluxio/master/file/mdsync/MockUfsClient.java create mode 100644 core/server/master/src/test/java/alluxio/master/file/mdsync/TaskTrackerTest.java create mode 100644 core/server/master/src/test/java/alluxio/master/file/mdsync/TestSyncProcessor.java create mode 100644 core/server/master/src/test/java/alluxio/master/file/mdsync/UfsLoadsTest.java create mode 100644 core/server/master/src/test/java/alluxio/master/metastore/InodeStoreTestBase.java create mode 100644 core/server/master/src/test/java/alluxio/master/metastore/RecursiveInodeIteratorTest.java diff --git a/core/client/fs/src/main/java/alluxio/client/file/BaseFileSystem.java b/core/client/fs/src/main/java/alluxio/client/file/BaseFileSystem.java index dcca12711155..e72c027b00f8 100644 --- a/core/client/fs/src/main/java/alluxio/client/file/BaseFileSystem.java +++ b/core/client/fs/src/main/java/alluxio/client/file/BaseFileSystem.java @@ -38,6 +38,7 @@ import alluxio.exception.status.UnauthenticatedException; import alluxio.exception.status.UnavailableException; import alluxio.grpc.Bits; +import alluxio.grpc.CancelSyncMetadataPResponse; import alluxio.grpc.CheckAccessPOptions; import alluxio.grpc.CreateDirectoryPOptions; import alluxio.grpc.CreateFilePOptions; @@ -45,6 +46,7 @@ import alluxio.grpc.ExistsPOptions; import alluxio.grpc.FreePOptions; import alluxio.grpc.GetStatusPOptions; +import alluxio.grpc.GetSyncProgressPResponse; import alluxio.grpc.JobProgressReportFormat; import alluxio.grpc.ListStatusPOptions; import alluxio.grpc.ListStatusPartialPOptions; @@ -56,6 +58,9 @@ import alluxio.grpc.SetAclAction; import alluxio.grpc.SetAclPOptions; import alluxio.grpc.SetAttributePOptions; +import alluxio.grpc.SyncMetadataAsyncPResponse; +import alluxio.grpc.SyncMetadataPOptions; +import alluxio.grpc.SyncMetadataPResponse; import alluxio.grpc.UnmountPOptions; import alluxio.job.JobDescription; import alluxio.job.JobRequest; @@ -549,6 +554,42 @@ public String getJobProgress(JobDescription jobDescription, } } + @Override + public SyncMetadataPResponse syncMetadata(AlluxioURI path, SyncMetadataPOptions options) + throws FileDoesNotExistException, IOException, AlluxioException { + try (CloseableResource client = + mFsContext.acquireMasterClientResource()) { + return client.get().syncMetadata(path, options); + } + } + + @Override + public SyncMetadataAsyncPResponse syncMetadataAsync(AlluxioURI path, SyncMetadataPOptions options) + throws FileDoesNotExistException, IOException, AlluxioException { + try (CloseableResource client = + mFsContext.acquireMasterClientResource()) { + return client.get().syncMetadataAsync(path, options); + } + } + + @Override + public GetSyncProgressPResponse getSyncProgress(long taskGroupId) + throws FileDoesNotExistException, IOException, AlluxioException { + try (CloseableResource client = + mFsContext.acquireMasterClientResource()) { + return client.get().getSyncProgress(taskGroupId); + } + } + + @Override + public CancelSyncMetadataPResponse cancelSyncMetadata(long taskGroupId) + throws IOException, AlluxioException { + try (CloseableResource client = + mFsContext.acquireMasterClientResource()) { + return client.get().cancelSyncMetadata(taskGroupId); + } + } + /** * Checks an {@link AlluxioURI} for scheme and authority information. Warn the user and throw an * exception if necessary. diff --git a/core/client/fs/src/main/java/alluxio/client/file/DelegatingFileSystem.java b/core/client/fs/src/main/java/alluxio/client/file/DelegatingFileSystem.java index 19f77a4d0849..e8d63da69e82 100644 --- a/core/client/fs/src/main/java/alluxio/client/file/DelegatingFileSystem.java +++ b/core/client/fs/src/main/java/alluxio/client/file/DelegatingFileSystem.java @@ -20,6 +20,7 @@ import alluxio.exception.FileIncompleteException; import alluxio.exception.InvalidPathException; import alluxio.exception.OpenDirectoryException; +import alluxio.grpc.CancelSyncMetadataPResponse; import alluxio.grpc.CheckAccessPOptions; import alluxio.grpc.CreateDirectoryPOptions; import alluxio.grpc.CreateFilePOptions; @@ -27,6 +28,7 @@ import alluxio.grpc.ExistsPOptions; import alluxio.grpc.FreePOptions; import alluxio.grpc.GetStatusPOptions; +import alluxio.grpc.GetSyncProgressPResponse; import alluxio.grpc.JobProgressReportFormat; import alluxio.grpc.ListStatusPOptions; import alluxio.grpc.ListStatusPartialPOptions; @@ -37,6 +39,9 @@ import alluxio.grpc.SetAclAction; import alluxio.grpc.SetAclPOptions; import alluxio.grpc.SetAttributePOptions; +import alluxio.grpc.SyncMetadataAsyncPResponse; +import alluxio.grpc.SyncMetadataPOptions; +import alluxio.grpc.SyncMetadataPResponse; import alluxio.grpc.UnmountPOptions; import alluxio.job.JobDescription; import alluxio.job.JobRequest; @@ -261,6 +266,30 @@ public String getJobProgress(JobDescription jobDescription, return mDelegatedFileSystem.getJobProgress(jobDescription, format, verbose); } + @Override + public SyncMetadataPResponse syncMetadata(AlluxioURI path, SyncMetadataPOptions options) + throws FileDoesNotExistException, IOException, AlluxioException { + return mDelegatedFileSystem.syncMetadata(path, options); + } + + @Override + public SyncMetadataAsyncPResponse syncMetadataAsync(AlluxioURI path, SyncMetadataPOptions options) + throws FileDoesNotExistException, IOException, AlluxioException { + return mDelegatedFileSystem.syncMetadataAsync(path, options); + } + + @Override + public GetSyncProgressPResponse getSyncProgress(long taskGroupId) + throws FileDoesNotExistException, IOException, AlluxioException { + return mDelegatedFileSystem.getSyncProgress(taskGroupId); + } + + @Override + public CancelSyncMetadataPResponse cancelSyncMetadata(long taskGroupId) + throws IOException, AlluxioException { + return mDelegatedFileSystem.cancelSyncMetadata(taskGroupId); + } + @Override public void close() throws IOException { mDelegatedFileSystem.close(); diff --git a/core/client/fs/src/main/java/alluxio/client/file/FileSystem.java b/core/client/fs/src/main/java/alluxio/client/file/FileSystem.java index dccc68cde6d2..61778f425346 100644 --- a/core/client/fs/src/main/java/alluxio/client/file/FileSystem.java +++ b/core/client/fs/src/main/java/alluxio/client/file/FileSystem.java @@ -30,6 +30,7 @@ import alluxio.exception.InvalidPathException; import alluxio.exception.OpenDirectoryException; import alluxio.exception.status.AlluxioStatusException; +import alluxio.grpc.CancelSyncMetadataPResponse; import alluxio.grpc.CheckAccessPOptions; import alluxio.grpc.CreateDirectoryPOptions; import alluxio.grpc.CreateFilePOptions; @@ -37,6 +38,7 @@ import alluxio.grpc.ExistsPOptions; import alluxio.grpc.FreePOptions; import alluxio.grpc.GetStatusPOptions; +import alluxio.grpc.GetSyncProgressPResponse; import alluxio.grpc.JobProgressReportFormat; import alluxio.grpc.ListStatusPOptions; import alluxio.grpc.ListStatusPartialPOptions; @@ -49,6 +51,9 @@ import alluxio.grpc.SetAclAction; import alluxio.grpc.SetAclPOptions; import alluxio.grpc.SetAttributePOptions; +import alluxio.grpc.SyncMetadataAsyncPResponse; +import alluxio.grpc.SyncMetadataPOptions; +import alluxio.grpc.SyncMetadataPResponse; import alluxio.grpc.UnmountPOptions; import alluxio.job.JobDescription; import alluxio.job.JobRequest; @@ -769,4 +774,42 @@ default void unmount(AlluxioURI path) throws IOException, AlluxioException { */ String getJobProgress(JobDescription jobDescription, JobProgressReportFormat format, boolean verbose); + + /** + * Syncs metadata for a given alluxio path. + * + * @param path the path to sync metadata on + * @param options options to associate with this operation + * @return the sync metadata response + */ + SyncMetadataPResponse syncMetadata(AlluxioURI path, SyncMetadataPOptions options) + throws FileDoesNotExistException, IOException, AlluxioException; + + /** + * Syncs metadata asynchronously for a given alluxio path. + * + * @param path the path to sync metadata on + * @param options options to associate with this operation + * @return the sync metadata async response + */ + SyncMetadataAsyncPResponse syncMetadataAsync(AlluxioURI path, SyncMetadataPOptions options) + throws FileDoesNotExistException, IOException, AlluxioException; + + /** + * Gets the sync progress. + * + * @param taskGroupId the task group id + * @return the sync progress + */ + GetSyncProgressPResponse getSyncProgress(long taskGroupId) + throws FileDoesNotExistException, IOException, AlluxioException; + + /** + * Cancels an ongoing metadata sync. + * + * @param taskGroupId the task group id + * @return the cancellation result + */ + CancelSyncMetadataPResponse cancelSyncMetadata(long taskGroupId) + throws IOException, AlluxioException; } diff --git a/core/client/fs/src/main/java/alluxio/client/file/FileSystemMasterClient.java b/core/client/fs/src/main/java/alluxio/client/file/FileSystemMasterClient.java index 02943a854c82..43e81540be9f 100644 --- a/core/client/fs/src/main/java/alluxio/client/file/FileSystemMasterClient.java +++ b/core/client/fs/src/main/java/alluxio/client/file/FileSystemMasterClient.java @@ -16,6 +16,7 @@ import alluxio.exception.status.AlluxioStatusException; import alluxio.exception.status.AlreadyExistsException; import alluxio.exception.status.NotFoundException; +import alluxio.grpc.CancelSyncMetadataPResponse; import alluxio.grpc.CheckAccessPOptions; import alluxio.grpc.CheckConsistencyPOptions; import alluxio.grpc.CompleteFilePOptions; @@ -25,6 +26,7 @@ import alluxio.grpc.ExistsPOptions; import alluxio.grpc.FreePOptions; import alluxio.grpc.GetStatusPOptions; +import alluxio.grpc.GetSyncProgressPResponse; import alluxio.grpc.JobProgressReportFormat; import alluxio.grpc.ListStatusPOptions; import alluxio.grpc.ListStatusPartialPOptions; @@ -34,6 +36,9 @@ import alluxio.grpc.SetAclAction; import alluxio.grpc.SetAclPOptions; import alluxio.grpc.SetAttributePOptions; +import alluxio.grpc.SyncMetadataAsyncPResponse; +import alluxio.grpc.SyncMetadataPOptions; +import alluxio.grpc.SyncMetadataPResponse; import alluxio.grpc.UpdateUfsModePOptions; import alluxio.job.JobDescription; import alluxio.job.JobRequest; @@ -373,4 +378,38 @@ void updateUfsMode(AlluxioURI ufsUri, UpdateUfsModePOptions options) */ String getJobProgress(JobDescription jobDescription, JobProgressReportFormat format, boolean verbose); + + /** + * Syncs metadata for a given alluxio path. + * + * @param path the path to sync metadata on + * @param options options to associate with this operation + * @return the sync metadata response + */ + SyncMetadataPResponse syncMetadata(AlluxioURI path, SyncMetadataPOptions options) + throws AlluxioStatusException; + + /** + * Syncs metadata for a given alluxio path asynchronously. + * + * @param path the path to sync metadata on + * @param options options to associate with this operation + * @return the sync metadata response + */ + SyncMetadataAsyncPResponse syncMetadataAsync(AlluxioURI path, SyncMetadataPOptions options) + throws AlluxioStatusException; + + /** + * Gets the sync progress. + * @param taskGroupId the task group id + * @return the sync progress + */ + GetSyncProgressPResponse getSyncProgress(long taskGroupId) throws AlluxioStatusException; + + /** + * Cancels an ongoing metadata sync. + * @param taskGroupId the task group id + * @return the cancellation result + */ + CancelSyncMetadataPResponse cancelSyncMetadata(long taskGroupId) throws AlluxioStatusException; } diff --git a/core/client/fs/src/main/java/alluxio/client/file/RetryHandlingFileSystemMasterClient.java b/core/client/fs/src/main/java/alluxio/client/file/RetryHandlingFileSystemMasterClient.java index 0673cde84952..42d714f76fe7 100644 --- a/core/client/fs/src/main/java/alluxio/client/file/RetryHandlingFileSystemMasterClient.java +++ b/core/client/fs/src/main/java/alluxio/client/file/RetryHandlingFileSystemMasterClient.java @@ -15,6 +15,8 @@ import alluxio.AlluxioURI; import alluxio.Constants; import alluxio.exception.status.AlluxioStatusException; +import alluxio.grpc.CancelSyncMetadataPRequest; +import alluxio.grpc.CancelSyncMetadataPResponse; import alluxio.grpc.CheckAccessPOptions; import alluxio.grpc.CheckAccessPRequest; import alluxio.grpc.CheckConsistencyPOptions; @@ -43,6 +45,8 @@ import alluxio.grpc.GetStatusPOptions; import alluxio.grpc.GetStatusPRequest; import alluxio.grpc.GetSyncPathListPRequest; +import alluxio.grpc.GetSyncProgressPRequest; +import alluxio.grpc.GetSyncProgressPResponse; import alluxio.grpc.GrpcUtils; import alluxio.grpc.JobProgressPOptions; import alluxio.grpc.JobProgressReportFormat; @@ -70,6 +74,10 @@ import alluxio.grpc.StopSyncPRequest; import alluxio.grpc.SubmitJobPRequest; import alluxio.grpc.SubmitJobPResponse; +import alluxio.grpc.SyncMetadataAsyncPResponse; +import alluxio.grpc.SyncMetadataPOptions; +import alluxio.grpc.SyncMetadataPRequest; +import alluxio.grpc.SyncMetadataPResponse; import alluxio.grpc.UnmountPOptions; import alluxio.grpc.UnmountPRequest; import alluxio.grpc.UpdateMountPRequest; @@ -476,6 +484,53 @@ public String getJobProgress(JobDescription jobDescription, return response.getProgressReport(); } + @Override + public SyncMetadataPResponse syncMetadata(AlluxioURI path, SyncMetadataPOptions options) + throws AlluxioStatusException { + return retryRPC(() -> { + SyncMetadataPRequest request = SyncMetadataPRequest.newBuilder() + .setPath(path.getPath()) + .setOptions(options) + .build(); + SyncMetadataPResponse response = mClient.syncMetadata(request); + return response; + }, RPC_LOG, "SyncMetadata", "path=%s,options=%s", path, options); + } + + @Override + public SyncMetadataAsyncPResponse syncMetadataAsync(AlluxioURI path, SyncMetadataPOptions options) + throws AlluxioStatusException { + return retryRPC(() -> { + SyncMetadataPRequest request = SyncMetadataPRequest.newBuilder() + .setPath(path.getPath()) + .setOptions(options) + .build(); + SyncMetadataAsyncPResponse response = mClient.syncMetadataAsync(request); + return response; + }, RPC_LOG, "SyncMetadataAsync", "path=%s,options=%s", path, options); + } + + @Override + public GetSyncProgressPResponse getSyncProgress(long taskGroupId) throws AlluxioStatusException { + return retryRPC(() -> { + GetSyncProgressPRequest request = GetSyncProgressPRequest.newBuilder() + .setTaskGroupId(taskGroupId) + .build(); + return mClient.getSyncProgress(request); + }, RPC_LOG, "GetSyncProgress", "taskGroupId=%s", taskGroupId); + } + + @Override + public CancelSyncMetadataPResponse cancelSyncMetadata(long taskGroupId) + throws AlluxioStatusException { + return retryRPC(() -> { + CancelSyncMetadataPRequest request = CancelSyncMetadataPRequest.newBuilder() + .setTaskGroupId(taskGroupId) + .build(); + return mClient.cancelSyncMetadata(request); + }, RPC_LOG, "CancelSyncMetadata", "taskGroupId=%s", taskGroupId); + } + /** * Gets the path that will be transported to master. * diff --git a/core/client/fs/src/main/java/alluxio/client/file/ufs/UfsBaseFileSystem.java b/core/client/fs/src/main/java/alluxio/client/file/ufs/UfsBaseFileSystem.java index 77c2f4c6d786..2341cb93c46b 100644 --- a/core/client/fs/src/main/java/alluxio/client/file/ufs/UfsBaseFileSystem.java +++ b/core/client/fs/src/main/java/alluxio/client/file/ufs/UfsBaseFileSystem.java @@ -21,7 +21,9 @@ import alluxio.client.file.options.UfsFileSystemOptions; import alluxio.conf.AlluxioConfiguration; import alluxio.exception.AlluxioException; +import alluxio.exception.FileDoesNotExistException; import alluxio.exception.runtime.AlluxioRuntimeException; +import alluxio.grpc.CancelSyncMetadataPResponse; import alluxio.grpc.CheckAccessPOptions; import alluxio.grpc.CreateDirectoryPOptions; import alluxio.grpc.CreateFilePOptions; @@ -30,6 +32,7 @@ import alluxio.grpc.ExistsPOptions; import alluxio.grpc.FreePOptions; import alluxio.grpc.GetStatusPOptions; +import alluxio.grpc.GetSyncProgressPResponse; import alluxio.grpc.JobProgressReportFormat; import alluxio.grpc.ListStatusPOptions; import alluxio.grpc.ListStatusPartialPOptions; @@ -40,6 +43,9 @@ import alluxio.grpc.SetAclAction; import alluxio.grpc.SetAclPOptions; import alluxio.grpc.SetAttributePOptions; +import alluxio.grpc.SyncMetadataAsyncPResponse; +import alluxio.grpc.SyncMetadataPOptions; +import alluxio.grpc.SyncMetadataPResponse; import alluxio.grpc.UnmountPOptions; import alluxio.job.JobDescription; import alluxio.job.JobRequest; @@ -416,6 +422,30 @@ public String getJobProgress(JobDescription jobDescription, throw new UnsupportedOperationException(); } + @Override + public SyncMetadataPResponse syncMetadata(AlluxioURI path, SyncMetadataPOptions options) + throws FileDoesNotExistException, IOException, AlluxioException { + throw new UnsupportedOperationException(); + } + + @Override + public SyncMetadataAsyncPResponse syncMetadataAsync(AlluxioURI path, SyncMetadataPOptions options) + throws FileDoesNotExistException, IOException, AlluxioException { + throw new UnsupportedOperationException(); + } + + @Override + public GetSyncProgressPResponse getSyncProgress(long taskGroupId) + throws FileDoesNotExistException, IOException, AlluxioException { + throw new UnsupportedOperationException(); + } + + @Override + public CancelSyncMetadataPResponse cancelSyncMetadata(long taskGroupId) + throws IOException, AlluxioException { + throw new UnsupportedOperationException(); + } + /** * Transform UFS file/directory status to client-side status. * diff --git a/core/client/fs/src/main/java/alluxio/util/FileSystemOptionsUtils.java b/core/client/fs/src/main/java/alluxio/util/FileSystemOptionsUtils.java index 56520f8bfb77..c3cc86de9b28 100644 --- a/core/client/fs/src/main/java/alluxio/util/FileSystemOptionsUtils.java +++ b/core/client/fs/src/main/java/alluxio/util/FileSystemOptionsUtils.java @@ -35,6 +35,7 @@ import alluxio.grpc.ScheduleAsyncPersistencePOptions; import alluxio.grpc.SetAclPOptions; import alluxio.grpc.SetAttributePOptions; +import alluxio.grpc.SyncMetadataPOptions; import alluxio.grpc.TtlAction; import alluxio.grpc.UnmountPOptions; import alluxio.security.authorization.Mode; @@ -169,6 +170,14 @@ public static ExistsPOptions existsDefaults(AlluxioConfiguration conf) { .build(); } + /** + * @param conf Alluxio configuration + * @return options based on the configuration + */ + public static SyncMetadataPOptions syncMetadataDefaults(AlluxioConfiguration conf) { + return SyncMetadataPOptions.newBuilder().build(); + } + /** * @param conf Alluxio configuration * @return options based on the configuration diff --git a/core/client/fs/src/test/java/alluxio/client/file/MockFileSystemMasterClient.java b/core/client/fs/src/test/java/alluxio/client/file/MockFileSystemMasterClient.java index 2b5d44680fc6..67642b512829 100644 --- a/core/client/fs/src/test/java/alluxio/client/file/MockFileSystemMasterClient.java +++ b/core/client/fs/src/test/java/alluxio/client/file/MockFileSystemMasterClient.java @@ -14,6 +14,7 @@ import alluxio.AlluxioURI; import alluxio.exception.status.AlluxioStatusException; import alluxio.exception.status.UnavailableException; +import alluxio.grpc.CancelSyncMetadataPResponse; import alluxio.grpc.CheckAccessPOptions; import alluxio.grpc.CheckConsistencyPOptions; import alluxio.grpc.CompleteFilePOptions; @@ -23,6 +24,7 @@ import alluxio.grpc.ExistsPOptions; import alluxio.grpc.FreePOptions; import alluxio.grpc.GetStatusPOptions; +import alluxio.grpc.GetSyncProgressPResponse; import alluxio.grpc.JobProgressReportFormat; import alluxio.grpc.ListStatusPOptions; import alluxio.grpc.ListStatusPartialPOptions; @@ -32,6 +34,9 @@ import alluxio.grpc.SetAclAction; import alluxio.grpc.SetAclPOptions; import alluxio.grpc.SetAttributePOptions; +import alluxio.grpc.SyncMetadataAsyncPResponse; +import alluxio.grpc.SyncMetadataPOptions; +import alluxio.grpc.SyncMetadataPResponse; import alluxio.grpc.UpdateUfsModePOptions; import alluxio.job.JobDescription; import alluxio.job.JobRequest; @@ -254,4 +259,27 @@ public String getJobProgress(JobDescription jobDescription, JobProgressReportFormat format, boolean verbose) { return null; } + + @Override + public SyncMetadataPResponse syncMetadata(AlluxioURI path, SyncMetadataPOptions options) + throws AlluxioStatusException { + return null; + } + + @Override + public SyncMetadataAsyncPResponse syncMetadataAsync(AlluxioURI path, SyncMetadataPOptions options) + throws AlluxioStatusException { + return null; + } + + @Override + public GetSyncProgressPResponse getSyncProgress(long taskGroupId) throws AlluxioStatusException { + return null; + } + + @Override + public CancelSyncMetadataPResponse cancelSyncMetadata(long taskGroupId) + throws AlluxioStatusException { + return null; + } } diff --git a/core/client/fs/src/test/java/alluxio/client/file/cache/LocalCacheFileInStreamTest.java b/core/client/fs/src/test/java/alluxio/client/file/cache/LocalCacheFileInStreamTest.java index 630faf1d4e0e..ab17f2a98a7d 100644 --- a/core/client/fs/src/test/java/alluxio/client/file/cache/LocalCacheFileInStreamTest.java +++ b/core/client/fs/src/test/java/alluxio/client/file/cache/LocalCacheFileInStreamTest.java @@ -32,6 +32,7 @@ import alluxio.exception.FileIncompleteException; import alluxio.exception.InvalidPathException; import alluxio.exception.OpenDirectoryException; +import alluxio.grpc.CancelSyncMetadataPResponse; import alluxio.grpc.CheckAccessPOptions; import alluxio.grpc.CreateDirectoryPOptions; import alluxio.grpc.CreateFilePOptions; @@ -39,6 +40,7 @@ import alluxio.grpc.ExistsPOptions; import alluxio.grpc.FreePOptions; import alluxio.grpc.GetStatusPOptions; +import alluxio.grpc.GetSyncProgressPResponse; import alluxio.grpc.JobProgressReportFormat; import alluxio.grpc.ListStatusPOptions; import alluxio.grpc.ListStatusPartialPOptions; @@ -49,6 +51,9 @@ import alluxio.grpc.SetAclAction; import alluxio.grpc.SetAclPOptions; import alluxio.grpc.SetAttributePOptions; +import alluxio.grpc.SyncMetadataAsyncPResponse; +import alluxio.grpc.SyncMetadataPOptions; +import alluxio.grpc.SyncMetadataPResponse; import alluxio.grpc.UnmountPOptions; import alluxio.job.JobDescription; import alluxio.job.JobRequest; @@ -914,6 +919,31 @@ public String getJobProgress(JobDescription jobDescription, throw new UnsupportedOperationException(); } + @Override + public SyncMetadataPResponse syncMetadata(AlluxioURI path, SyncMetadataPOptions options) + throws FileDoesNotExistException, IOException, AlluxioException { + return null; + } + + @Override + public SyncMetadataAsyncPResponse syncMetadataAsync(AlluxioURI path, + SyncMetadataPOptions options) + throws FileDoesNotExistException, IOException, AlluxioException { + return null; + } + + @Override + public GetSyncProgressPResponse getSyncProgress(long taskGroupId) + throws FileDoesNotExistException, IOException, AlluxioException { + return null; + } + + @Override + public CancelSyncMetadataPResponse cancelSyncMetadata(long taskGroupId) + throws IOException, AlluxioException { + return null; + } + @Override public void close() throws IOException { throw new UnsupportedOperationException(); diff --git a/core/common/pom.xml b/core/common/pom.xml index 24ef667caa2e..6e3656543c70 100644 --- a/core/common/pom.xml +++ b/core/common/pom.xml @@ -131,7 +131,7 @@ io.netty netty-tcnative-boringssl-static - 2.0.26.Final + 2.0.34.Final diff --git a/core/common/src/main/java/alluxio/AlluxioURI.java b/core/common/src/main/java/alluxio/AlluxioURI.java index 055222caa8f4..11e8e8c5ffb5 100644 --- a/core/common/src/main/java/alluxio/AlluxioURI.java +++ b/core/common/src/main/java/alluxio/AlluxioURI.java @@ -449,6 +449,18 @@ public static String normalizePath(String path) { * @return true the current alluxioURI is an ancestor of the AlluxioURI */ public boolean isAncestorOf(AlluxioURI alluxioURI) throws InvalidPathException { + return isAncestorOf(alluxioURI, true); + } + + /** + * Returns true if the current AlluxioURI is an ancestor of another AlluxioURI. + * otherwise, return false. + * @param alluxioURI potential children to check + * @param cleanPath if the paths should be cleaned + * @return true the current alluxioURI is an ancestor of the AlluxioURI + */ + public boolean isAncestorOf(AlluxioURI alluxioURI, boolean cleanPath) + throws InvalidPathException { // To be an ancestor of another URI, authority and scheme must match if (!Objects.equals(getAuthority(), alluxioURI.getAuthority())) { return false; @@ -458,7 +470,7 @@ public boolean isAncestorOf(AlluxioURI alluxioURI) throws InvalidPathException { } return PathUtils.hasPrefix(PathUtils.normalizePath(alluxioURI.getPath(), SEPARATOR), - PathUtils.normalizePath(getPath(), SEPARATOR)); + PathUtils.normalizePath(getPath(), SEPARATOR), cleanPath); } /** diff --git a/core/common/src/main/java/alluxio/concurrent/ManagedBlockingUfsForwarder.java b/core/common/src/main/java/alluxio/concurrent/ManagedBlockingUfsForwarder.java index f9cd4cdd4111..78ff4f662fb2 100755 --- a/core/common/src/main/java/alluxio/concurrent/ManagedBlockingUfsForwarder.java +++ b/core/common/src/main/java/alluxio/concurrent/ManagedBlockingUfsForwarder.java @@ -16,12 +16,14 @@ import alluxio.collections.Pair; import alluxio.concurrent.jsr.ForkJoinPool; import alluxio.conf.AlluxioConfiguration; +import alluxio.file.options.DescendantType; import alluxio.security.authorization.AccessControlList; import alluxio.security.authorization.AclEntry; import alluxio.security.authorization.DefaultAccessControlList; import alluxio.underfs.Fingerprint; import alluxio.underfs.UfsDirectoryStatus; import alluxio.underfs.UfsFileStatus; +import alluxio.underfs.UfsLoadResult; import alluxio.underfs.UfsMode; import alluxio.underfs.UfsStatus; import alluxio.underfs.UnderFileSystem; @@ -32,12 +34,15 @@ import alluxio.underfs.options.ListOptions; import alluxio.underfs.options.MkdirsOptions; import alluxio.underfs.options.OpenOptions; +import alluxio.util.RateLimiter; import java.io.IOException; import java.io.InputStream; import java.io.OutputStream; +import java.util.Iterator; import java.util.List; import java.util.Map; +import java.util.function.Consumer; import javax.annotation.Nullable; /** @@ -578,6 +583,32 @@ public void close() throws IOException { mUfs.close(); } + @Override + public Iterator listStatusIterable( + String path, ListOptions options, String startAfter, int batchSize) throws IOException { + return new ManagedBlockingUfsMethod>() { + @Override + public Iterator execute() throws IOException { + return mUfs.listStatusIterable(path, options, startAfter, batchSize); + } + }.get(); + } + + @Override + public void performListingAsync( + String path, @Nullable String continuationToken, @Nullable String startAfter, + DescendantType descendantType, boolean checkStatus, Consumer onComplete, + Consumer onError) { + // given this is an async function, we do not execute it in the thread pool + mUfs.performListingAsync(path, continuationToken, startAfter, descendantType, + checkStatus, onComplete, onError); + } + + @Override + public RateLimiter getRateLimiter() { + return mUfs.getRateLimiter(); + } + /** * Utility class used to isolate calls into underlying UFS from concurrency compensation logic. * Note: This class used to make calls with a return value. diff --git a/core/common/src/main/java/alluxio/conf/PropertyKey.java b/core/common/src/main/java/alluxio/conf/PropertyKey.java index 9ce9ac5ee79b..fb392f860a06 100755 --- a/core/common/src/main/java/alluxio/conf/PropertyKey.java +++ b/core/common/src/main/java/alluxio/conf/PropertyKey.java @@ -3753,6 +3753,33 @@ public String toString() { .setIsHidden(true) .setConsistencyCheckLevel(ConsistencyCheckLevel.WARN) .build(); + public static final PropertyKey MASTER_METADATA_SYNC_UFS_CONCURRENT_GET_STATUS = + booleanBuilder(Name.MASTER_METADATA_SYNC_UFS_CONCURRENT_GET_STATUS) + .setDefaultValue(true) + .setDescription("Allows metadata sync operations on single items (i.e. getStatus) " + + "operations to run concurrently with metadata sync operations on directories " + + "(i.e listings) on intersecting paths.") + .setScope(Scope.MASTER) + .setConsistencyCheckLevel(ConsistencyCheckLevel.WARN) + .build(); + public static final PropertyKey MASTER_METADATA_SYNC_UFS_CONCURRENT_LISTING = + booleanBuilder(Name.MASTER_METADATA_SYNC_UFS_CONCURRENT_LISTING) + .setDefaultValue(true) + .setDescription("Allows non-recursive metadata sync operations directories " + + "to run concurrently with recursive metadata sync operations on " + + "intersecting paths.") + .setScope(Scope.MASTER) + .setConsistencyCheckLevel(ConsistencyCheckLevel.WARN) + .build(); + public static final PropertyKey MASTER_METADATA_SYNC_UFS_CONCURRENT_LOADS = + intBuilder(Name.MASTER_METADATA_SYNC_UFS_CONCURRENT_LOADS) + .setDefaultValue(100) + .setDescription("The number of concurrently running UFS listing operations " + + "during metadata sync. This includes loads that have completed, but " + + "have not yet been processed.") + .setScope(Scope.MASTER) + .setConsistencyCheckLevel(ConsistencyCheckLevel.WARN) + .build(); // In Java8 in container environment Runtime.availableProcessors() always returns 1, // which is not the actual number of cpus, so we set a safe default value 32. public static final PropertyKey MASTER_METADATA_SYNC_UFS_PREFETCH_POOL_SIZE = @@ -3792,6 +3819,14 @@ public String toString() { .setScope(Scope.MASTER) .setConsistencyCheckLevel(ConsistencyCheckLevel.WARN) .build(); + public static final PropertyKey MASTER_METADATA_SYNC_UFS_RATE_LIMIT = + longBuilder(Name.MASTER_METADATA_SYNC_UFS_RATE_LIMIT) + .setDescription("The maximum number of operations per second to execute " + + "on an individual UFS during metadata sync operations. If 0 or unset " + + "then no rate limit is enforced.") + .setScope(Scope.MASTER) + .setConsistencyCheckLevel(ConsistencyCheckLevel.WARN) + .build(); public static final PropertyKey MASTER_METADATA_SYNC_IGNORE_TTL = booleanBuilder(Name.MASTER_METADATA_SYNC_IGNORE_TTL) .setDefaultValue(false) @@ -8141,6 +8176,12 @@ public static final class Name { "alluxio.master.metadata.sync.report.failure"; public static final String MASTER_METADATA_SYNC_GET_DIRECTORY_STATUS_SKIP_LOADING_CHILDREN = "alluxio.master.metadata.sync.get.directory.status.skip.loading.children"; + public static final String MASTER_METADATA_SYNC_UFS_CONCURRENT_LOADS = + "alluxio.master.metadata.sync.ufs.concurrent.loads"; + public static final String MASTER_METADATA_SYNC_UFS_CONCURRENT_GET_STATUS = + "alluxio.master.metadata.sync.ufs.concurrent.get.status"; + public static final String MASTER_METADATA_SYNC_UFS_CONCURRENT_LISTING = + "alluxio.master.metadata.sync.ufs.concurrent.listing"; public static final String MASTER_METADATA_SYNC_UFS_PREFETCH_POOL_SIZE = "alluxio.master.metadata.sync.ufs.prefetch.pool.size"; public static final String MASTER_METADATA_SYNC_TRAVERSAL_ORDER = @@ -8149,6 +8190,8 @@ public static final class Name { "alluxio.master.metadata.sync.ufs.prefetch.status"; public static final String MASTER_METADATA_SYNC_UFS_PREFETCH_TIMEOUT = "alluxio.master.metadata.sync.ufs.prefetch.timeout"; + public static final String MASTER_METADATA_SYNC_UFS_RATE_LIMIT = + "alluxio.master.metadata.sync.ufs.rate.limit"; public static final String MASTER_METADATA_SYNC_IGNORE_TTL = "alluxio.master.metadata.sync.ignore.ttl"; public static final String MASTER_METASTORE = "alluxio.master.metastore"; diff --git a/core/common/src/main/java/alluxio/conf/path/TrieNode.java b/core/common/src/main/java/alluxio/conf/path/TrieNode.java index e741478a9859..c2cc0f67179f 100644 --- a/core/common/src/main/java/alluxio/conf/path/TrieNode.java +++ b/core/common/src/main/java/alluxio/conf/path/TrieNode.java @@ -71,6 +71,27 @@ public TrieNode insert(String path) { return current; } + /** + * Get the terminal node closest to the full path. + * @param path the path to check + * @return the terminal node + */ + public Optional> getClosestTerminal(String path) { + TrieNode current = this; + TrieNode result = current.isTerminal() ? current : null; + for (String nxt : path.split("/")) { + if (current.mChildren.containsKey(nxt)) { + current = current.mChildren.get(nxt); + if (current.mIsTerminal) { + result = current; + } + } else { + break; + } + } + return Optional.ofNullable(result); + } + /** * Traverses the trie along the path components until the traversal cannot proceed any more. * diff --git a/core/common/src/main/java/alluxio/file/options/DirectoryLoadType.java b/core/common/src/main/java/alluxio/file/options/DirectoryLoadType.java new file mode 100644 index 000000000000..69e3e77eba22 --- /dev/null +++ b/core/common/src/main/java/alluxio/file/options/DirectoryLoadType.java @@ -0,0 +1,47 @@ +/* + * The Alluxio Open Foundation licenses this work under the Apache License, version 2.0 + * (the "License"). You may not use this work except in compliance with the License, which is + * available at www.apache.org/licenses/LICENSE-2.0 + * + * This software is distributed on an "AS IS" basis, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, + * either express or implied, as more fully set forth in the License. + * + * See the NOTICE file distributed with this work for information regarding copyright ownership. + */ + +package alluxio.file.options; + +/** + * Defines how directories will be listed on the UFS when performing + * metadata synchronization. Only effects recursive listings. + */ +public enum DirectoryLoadType { + /** + * Load the path recursively by running a single command which returns results + * in batches if supported by the UFS. For example on an object store, this + * will perform a ListBucket operation with no delimiter. This will create + * less load on the UFS than {@link DirectoryLoadType#BFS} and {@link DirectoryLoadType#DFS} + * but will be more impacted by latency between Alluxio and the UFS as there + * is only a single listing running. + * This should only be used with S3 UFS types as currently only this UFS + * type uses batch listing, otherwise all items will be loaded into memory + * before processing. + */ + SINGLE_LISTING, + /** + * Load the path recursively by loading each nested directory in a separate + * load command in a breadth first manner. Each directory will be listed in batches + * if supported by the UFS. Listings of different directories will run concurrently. + * Note that this is only an approximate BFS, as batches are processed and loaded + * concurrently and may be loaded in different orders. + */ + BFS, + /** + * Load the path recursively by loading each nested directory in a separate + * load command in a depth first manner. Each directory will be listed in batches + * if supported by the UFS. Listings of different directories will run concurrently. + * Note that this is only an approximate DFS, as batches are processed and loaded + * concurrently and may be loaded in different orders. + */ + DFS +} diff --git a/core/common/src/main/java/alluxio/grpc/GrpcUtils.java b/core/common/src/main/java/alluxio/grpc/GrpcUtils.java index 10a51f2a0091..281411186c69 100644 --- a/core/common/src/main/java/alluxio/grpc/GrpcUtils.java +++ b/core/common/src/main/java/alluxio/grpc/GrpcUtils.java @@ -15,6 +15,7 @@ import alluxio.Constants; import alluxio.file.options.DescendantType; +import alluxio.file.options.DirectoryLoadType; import alluxio.proto.journal.File; import alluxio.security.authorization.AccessControlList; import alluxio.security.authorization.AclAction; @@ -227,6 +228,25 @@ public static DescendantType fromProto(alluxio.grpc.LoadDescendantPType pDescend } } + /** + * Converts a proto type to a wire type. + * + * @param pDirectoryLoadType the proto representation of a directory load type + * @return the wire representation of the directory load type + */ + public static DirectoryLoadType fromProto(alluxio.grpc.DirectoryLoadPType pDirectoryLoadType) { + switch (pDirectoryLoadType) { + case SINGLE_LISTING: + return DirectoryLoadType.SINGLE_LISTING; + case BFS: + return DirectoryLoadType.BFS; + case DFS: + return DirectoryLoadType.DFS; + default: + throw new IllegalStateException("Unknown DirectoryLoadType: " + pDirectoryLoadType); + } + } + /** * Converts a proto type to a wire type. * diff --git a/core/common/src/main/java/alluxio/metrics/MetricKey.java b/core/common/src/main/java/alluxio/metrics/MetricKey.java index a56d95df27b3..dcdd08877873 100644 --- a/core/common/src/main/java/alluxio/metrics/MetricKey.java +++ b/core/common/src/main/java/alluxio/metrics/MetricKey.java @@ -715,6 +715,109 @@ public static String getSyncMetricName(long mountId) { .setMetricType(MetricType.TIMER) .build(); + // Metadata sync v2 metrics + public static final MetricKey MASTER_METADATA_SYNC_QUEUED_LOADS = + new Builder("Master.MetadataSyncV2QueuedLoads") + .setDescription("Total number of load requests that are pending") + .setMetricType(MetricType.GAUGE) + .setIsClusterAggregated(false) + .build(); + public static final MetricKey MASTER_METADATA_SYNC_RUNNING_LOADS = + new Builder("Master.MetadataSyncV2RunningLoads") + .setDescription("The number of load requests that are in progress or" + + " have completed, but not yet been processed") + .setMetricType(MetricType.GAUGE) + .setIsClusterAggregated(false) + .build(); + public static final MetricKey MASTER_METADATA_SYNC_RUNNING_TASKS = + new Builder("Master.MetadataSyncV2RunningTasks") + .setDescription("The number of metadata sync tasks currently running") + .setMetricType(MetricType.GAUGE) + .setIsClusterAggregated(false) + .build(); + public static final MetricKey MASTER_METADATA_SYNC_COMPLETED_TASKS = + new Builder("Master.MetadataSyncV2CompletedTasks") + .setDescription("The number of completed metadata sync tasks") + .setMetricType(MetricType.COUNTER) + .setIsClusterAggregated(false) + .build(); + public static final MetricKey MASTER_METADATA_SYNC_FAILED_TASKS = + new Builder("Master.MetadataSyncV2FailedTasks") + .setDescription("The number of failed metadata sync tasks") + .setMetricType(MetricType.COUNTER) + .setIsClusterAggregated(false) + .build(); + public static final MetricKey MASTER_METADATA_SYNC_CANCELLED_TASKS = + new Builder("Master.MetadataSyncV2CancelledTasks") + .setDescription("The number of cancelled metadata sync tasks") + .setMetricType(MetricType.COUNTER) + .setIsClusterAggregated(false) + .build(); + public static final MetricKey MASTER_METADATA_SYNC_LOADS_FAILED = + new Builder("Master.MetadataSyncV2LoadsFailed") + .setDescription("The number of failed load requests during metadata sync") + .setMetricType(MetricType.COUNTER) + .setIsClusterAggregated(false) + .build(); + public static final MetricKey MASTER_METADATA_SYNC_PROCESSING_FAILED = + new Builder("Master.MetadataSyncV2ProcessingFailed") + .setDescription("The number loads failed during processing during metadata sync") + .setMetricType(MetricType.COUNTER) + .setIsClusterAggregated(false) + .build(); + public static final MetricKey MASTER_METADATA_SYNC_FILES_CREATED = + new Builder("Master.MetadataSyncV2FilesCreated") + .setDescription("The number of files created during processing during metadata sync") + .setMetricType(MetricType.COUNTER) + .setIsClusterAggregated(false) + .build(); + public static final MetricKey MASTER_METADATA_SYNC_FILES_DELETED = + new Builder("Master.MetadataSyncV2FilesDeleted") + .setDescription("The number of files deleted during processing during metadata sync") + .setMetricType(MetricType.COUNTER) + .setIsClusterAggregated(false) + .build(); + public static final MetricKey MASTER_METADATA_SYNC_FILES_RECREATED = + new Builder("Master.MetadataSyncV2FilesRecreated") + .setDescription("The number of files recreated during processing during metadata sync") + .setMetricType(MetricType.COUNTER) + .setIsClusterAggregated(false) + .build(); + public static final MetricKey MASTER_METADATA_SYNC_FILES_UPDATED = + new Builder("Master.MetadataSyncV2FilesUpdated") + .setDescription("The number of files updated during processing during metadata sync") + .setMetricType(MetricType.COUNTER) + .setIsClusterAggregated(false) + .build(); + public static final MetricKey MASTER_METADATA_SYNC_FILES_SKIPPED_CONCURRENT_UPDATE = + new Builder("Master.MetadataSyncV2FilesSkippedConcurrentUpdate") + .setDescription("The number of files skipped due to concurrent update " + + "during processing during metadata sync") + .setMetricType(MetricType.COUNTER) + .setIsClusterAggregated(false) + .build(); + public static final MetricKey MASTER_METADATA_SYNC_FILES_SKIPPED_MOUNT_POINT = + new Builder("Master.MetadataSyncV2FilesSkippedMountPoint") + .setDescription("The number of files skipped because the inode is a mount point " + + "during processing during metadata sync") + .setMetricType(MetricType.COUNTER) + .setIsClusterAggregated(false) + .build(); + public static final MetricKey MASTER_METADATA_SYNC_FILES_NOOP = + new Builder("Master.MetadataSyncV2FilesNoop") + .setDescription("The number of files at parity between alluxio and UFS " + + "during processing during metadata sync") + .setMetricType(MetricType.COUNTER) + .setIsClusterAggregated(false) + .build(); + public static final MetricKey MASTER_METADATA_SYNC_FILES_SKIPPED_NON_PERSISTED = + new Builder("Master.MetadataSyncV2FilesSkippedNonPersisted") + .setDescription("The number of files skipped because the " + + "inode is not persisted during processing during metadata sync") + .setMetricType(MetricType.COUNTER) + .setIsClusterAggregated(false) + .build(); + // Metadata sync metrics public static final MetricKey MASTER_METADATA_SYNC_UFS_MOUNT = new Builder("Master.MetadataSyncUfsMount.") diff --git a/core/common/src/main/java/alluxio/underfs/BaseUnderFileSystem.java b/core/common/src/main/java/alluxio/underfs/BaseUnderFileSystem.java index c43299e7ab89..a33135db1a07 100644 --- a/core/common/src/main/java/alluxio/underfs/BaseUnderFileSystem.java +++ b/core/common/src/main/java/alluxio/underfs/BaseUnderFileSystem.java @@ -16,6 +16,8 @@ import alluxio.SyncInfo; import alluxio.collections.Pair; import alluxio.conf.AlluxioConfiguration; +import alluxio.conf.PropertyKey; +import alluxio.file.options.DescendantType; import alluxio.security.authorization.AccessControlList; import alluxio.security.authorization.AclEntry; import alluxio.security.authorization.DefaultAccessControlList; @@ -24,21 +26,32 @@ import alluxio.underfs.options.ListOptions; import alluxio.underfs.options.MkdirsOptions; import alluxio.underfs.options.OpenOptions; +import alluxio.util.RateLimiter; +import alluxio.util.ThreadFactoryUtils; import alluxio.util.io.PathUtils; import com.google.common.base.Preconditions; +import com.google.common.collect.Iterators; +import com.google.common.io.Closer; import org.slf4j.Logger; import org.slf4j.LoggerFactory; +import java.io.FileNotFoundException; import java.io.IOException; import java.io.InputStream; import java.io.OutputStream; import java.util.ArrayDeque; import java.util.ArrayList; import java.util.Arrays; +import java.util.Comparator; +import java.util.Iterator; import java.util.List; import java.util.Map; import java.util.Queue; +import java.util.concurrent.ExecutorService; +import java.util.concurrent.Executors; +import java.util.function.Consumer; +import java.util.stream.Stream; import javax.annotation.Nullable; import javax.annotation.concurrent.ThreadSafe; @@ -46,7 +59,7 @@ * A base abstract {@link UnderFileSystem}. */ @ThreadSafe -public abstract class BaseUnderFileSystem implements UnderFileSystem { +public abstract class BaseUnderFileSystem implements UnderFileSystem, UfsClient { private static final Logger LOG = LoggerFactory.getLogger(BaseUnderFileSystem.class); public static final Pair EMPTY_ACL = new Pair<>(null, null); @@ -57,6 +70,10 @@ public abstract class BaseUnderFileSystem implements UnderFileSystem { /** UFS Configuration options. */ protected final UnderFileSystemConfiguration mUfsConf; + private final ExecutorService mAsyncIOExecutor; + + private final RateLimiter mRateLimiter; + /** * Constructs an {@link BaseUnderFileSystem}. * @@ -66,6 +83,27 @@ public abstract class BaseUnderFileSystem implements UnderFileSystem { protected BaseUnderFileSystem(AlluxioURI uri, UnderFileSystemConfiguration ufsConf) { mUri = Preconditions.checkNotNull(uri, "uri"); mUfsConf = Preconditions.checkNotNull(ufsConf, "ufsConf"); + mAsyncIOExecutor = Executors.newCachedThreadPool( + ThreadFactoryUtils.build(uri.getPath() + "IOThread", true)); + long rateLimit = mUfsConf.isSet(PropertyKey.MASTER_METADATA_SYNC_UFS_RATE_LIMIT) + ? mUfsConf.getLong(PropertyKey.MASTER_METADATA_SYNC_UFS_RATE_LIMIT) : 0; + mRateLimiter = RateLimiter.createRateLimiter(rateLimit); + } + + @Override + public void close() throws IOException { + try (Closer closer = Closer.create()) { + closer.register(() -> { + if (mAsyncIOExecutor != null) { + mAsyncIOExecutor.shutdown(); + } + }); + } + } + + @Override + public RateLimiter getRateLimiter() { + return mRateLimiter; } @Override @@ -163,6 +201,94 @@ public boolean isSeekable() { return false; } + @Nullable + @Override + public Iterator listStatusIterable( + String path, ListOptions options, String startAfter, int batchSize) throws IOException { + // Calling this method on non s3 UFS might result in OOM because batch based fetching + // is not supported and this method essentially fetches all ufs status and converts it to + // an iterator. + UfsStatus[] result = listStatus(path, options); + if (result == null) { + return null; + } + Arrays.sort(result, Comparator.comparing(UfsStatus::getName)); + return Iterators.forArray(result); + } + + @Override + public void performListingAsync( + String path, @Nullable String continuationToken, @Nullable String startAfter, + DescendantType descendantType, boolean checkStatus, Consumer onComplete, + Consumer onError) { + mAsyncIOExecutor.submit(() -> { + try { + UfsStatus baseStatus = null; + if (checkStatus) { + try { + baseStatus = getStatus(path); + if (baseStatus == null && !isObjectStorage()) { + onComplete.accept(new UfsLoadResult(Stream.empty(), 0, + null, null, false, false, false)); + return; + } + if (baseStatus != null && (descendantType == DescendantType.NONE + || baseStatus.isFile())) { + onComplete.accept(new UfsLoadResult(Stream.of(baseStatus), 1, + null, new AlluxioURI(baseStatus.getName()), false, + baseStatus.isFile(), isObjectStorage())); + return; + } + } catch (FileNotFoundException e) { + // if we are not using object storage we know nothing exists at the path, + // so just return an empty result + if (!isObjectStorage()) { + onComplete.accept(new UfsLoadResult(Stream.empty(), 0, + null, null, false, false, false)); + return; + } + } + } + UfsStatus[] items = listStatus(path, ListOptions.defaults() + .setRecursive(descendantType == DescendantType.ALL)); + if (items != null) { + if (descendantType == DescendantType.NONE && items.length > 0) { + assert isObjectStorage() && this instanceof ObjectUnderFileSystem; + ObjectUnderFileSystem.ObjectPermissions permissions = + ((ObjectUnderFileSystem) this).getPermissions(); + items = new UfsStatus[] { + new UfsDirectoryStatus("", permissions.getOwner(), permissions.getGroup(), + permissions.getMode())}; + } + Arrays.sort(items, Comparator.comparing(UfsStatus::getName)); + for (UfsStatus item: items) { + // performListingAsync is used by metadata sync v2 + // which expects the name of an item to be a full path + item.setName(PathUtils.concatPath(path, item.getName())); + } + } + if (items != null && items.length == 0) { + items = null; + } + UfsStatus firstItem = baseStatus != null ? baseStatus + : items != null ? items[0] : null; + UfsStatus lastItem = items == null ? firstItem + : items[items.length - 1]; + Stream itemStream = items == null ? Stream.empty() : Arrays.stream(items); + int itemCount = items == null ? 0 : items.length; + if (baseStatus != null) { + itemStream = Stream.concat(Stream.of(baseStatus), itemStream); + itemCount++; + } + onComplete.accept(new UfsLoadResult(itemStream, itemCount, + null, lastItem == null ? null : new AlluxioURI(lastItem.getName()), false, + firstItem != null && firstItem.isFile(), isObjectStorage())); + } catch (Throwable t) { + onError.accept(t); + } + }); + } + @Override @Nullable public UfsStatus[] listStatus(String path, ListOptions options) throws IOException { diff --git a/core/common/src/main/java/alluxio/underfs/Fingerprint.java b/core/common/src/main/java/alluxio/underfs/Fingerprint.java index d44a8b5829a5..2d41d02432f6 100644 --- a/core/common/src/main/java/alluxio/underfs/Fingerprint.java +++ b/core/common/src/main/java/alluxio/underfs/Fingerprint.java @@ -14,12 +14,13 @@ import alluxio.Constants; import alluxio.security.authorization.AccessControlList; +import com.google.common.annotations.VisibleForTesting; import com.google.common.base.Splitter; +import org.apache.commons.lang3.StringUtils; import java.util.Collections; import java.util.HashMap; import java.util.Map; -import java.util.regex.Pattern; import javax.annotation.Nullable; import javax.annotation.concurrent.NotThreadSafe; @@ -43,12 +44,13 @@ public final class Fingerprint { private static final char KVDELIMTER = '|'; private static final char TAGDELIMTER = ' '; - private static final Pattern SANITIZE_REGEX = Pattern.compile("[" + KVDELIMTER - + TAGDELIMTER + "]"); public static final String UNDERSCORE = "_"; private final Map mValues; + private final String[] mSearchList = new String[] {"|", " "}; + private final String[] mReplaceList = new String[] {"_", "_"}; + /** * The possible types of the fingerprint. */ @@ -282,10 +284,11 @@ private Fingerprint(Map values) { } } - private String sanitizeString(String input) { + @VisibleForTesting + String sanitizeString(String input) { if (input == null || input.isEmpty()) { return UNDERSCORE; } - return SANITIZE_REGEX.matcher(input).replaceAll(UNDERSCORE); + return StringUtils.replaceEachRepeatedly(input, mSearchList, mReplaceList); } } diff --git a/core/common/src/main/java/alluxio/underfs/ObjectUnderFileSystem.java b/core/common/src/main/java/alluxio/underfs/ObjectUnderFileSystem.java index 527149f2653f..6c918b51c73e 100755 --- a/core/common/src/main/java/alluxio/underfs/ObjectUnderFileSystem.java +++ b/core/common/src/main/java/alluxio/underfs/ObjectUnderFileSystem.java @@ -31,6 +31,7 @@ import alluxio.util.io.PathUtils; import com.google.common.annotations.VisibleForTesting; +import com.google.common.collect.Iterators; import org.apache.http.conn.ConnectTimeoutException; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -49,8 +50,11 @@ import java.util.Comparator; import java.util.HashMap; import java.util.HashSet; +import java.util.Iterator; import java.util.List; import java.util.Map; +import java.util.NavigableMap; +import java.util.TreeMap; import java.util.concurrent.ExecutionException; import java.util.concurrent.ExecutorService; import java.util.concurrent.Future; @@ -190,6 +194,14 @@ public interface ObjectListingChunk { */ @Nullable ObjectListingChunk getNextChunk() throws IOException; + + /** + * Gets if there is more chunks to fetch WITHOUT actually fetching the next chunk. + * @return true if there is, no if there isn't, NULL if it cannot tell + */ + default @Nullable Boolean hasNextChunk() { + return null; + } } /** @@ -592,6 +604,23 @@ public UfsStatus[] listStatus(String path, ListOptions options) return listInternal(path, options); } + @Nullable + @Override + public Iterator listStatusIterable( + String path, ListOptions options, String startAfter, int batchSize) throws IOException { + final ObjectListingChunk chunk = + getObjectListingChunkForPath(path, options.isRecursive(), startAfter, batchSize); + if (chunk == null) { + String keyAsFolder = convertToFolderName(stripPrefixIfPresent(path)); + if (getObjectStatus(keyAsFolder) != null) { + // Path is an empty directory + return Collections.emptyIterator(); + } + return null; + } + return new UfsStatusIterator(path, options.isRecursive(), chunk); + } + @Override public boolean mkdirs(String path, MkdirsOptions options) throws IOException { if (path == null) { @@ -929,25 +958,50 @@ protected String getChildName(String child, String parent) throws IOException { * * @param key pseudo-directory key excluding header and bucket * @param recursive whether to request immediate children only, or all descendants + * @param startAfter indicates where the listing starts + * @param batchSize the batch size of each chunk * @return chunked object listing, or null if key is not found */ @Nullable - protected abstract ObjectListingChunk getObjectListingChunk(String key, boolean recursive) - throws IOException; + protected ObjectListingChunk getObjectListingChunk( + String key, boolean recursive, String startAfter, int batchSize) throws IOException { + // Some UFS haven't implemented getObjectListingChunk(dir, recursive, startAfter, batchSize) + // so falling back to the one with less param if startAfter and batchSize is unset. + if (startAfter == null && batchSize == 0) { + return getObjectListingChunk(key, recursive); + } + throw new UnsupportedOperationException("Operation not supported"); + } /** - * Gets a (partial) object listing for the given path. + * Gets a (partial) object listing result for the given key. * - * @param path of pseudo-directory + * @param key pseudo-directory key excluding header and bucket * @param recursive whether to request immediate children only, or all descendants - * @return chunked object listing, or null if the path does not exist as a pseudo-directory + * @return chunked object listing, or null if key is not found */ @Nullable + protected abstract ObjectListingChunk getObjectListingChunk(String key, boolean recursive) + throws IOException; + protected ObjectListingChunk getObjectListingChunkForPath(String path, boolean recursive) throws IOException { + return getObjectListingChunkForPath(path, recursive, null, 0); + } + + /** + * Gets a (partial) object listing for the given path. + * + * @param path of pseudo-directory + * @param recursive whether to request immediate children only, or all descendants + * @return chunked object listing, or null if the path does not exist as a pseudo-directory + */ + @Nullable + protected ObjectListingChunk getObjectListingChunkForPath( + String path, boolean recursive, String startAfter, int batchSize) throws IOException { // Check if anything begins with / String dir = stripPrefixIfPresent(path); - ObjectListingChunk objs = getObjectListingChunk(dir, recursive); + ObjectListingChunk objs = getObjectListingChunk(dir, recursive, startAfter, batchSize); // If there are, this is a folder and we can create the necessary metadata if (objs != null && ((objs.getObjectStatuses() != null && objs.getObjectStatuses().length > 0) @@ -964,6 +1018,86 @@ protected ObjectListingChunk getObjectListingChunkForPath(String path, boolean r return null; } + private void populateUfsStatus( + String keyPrefix, ObjectListingChunk chunk, + boolean isRecursive, Map ufsStatusMap) throws IOException { + // Directories in UFS can be possibly encoded in two different ways: + // (1) as file objects with FOLDER_SUFFIX for directories created through Alluxio or + // (2) as "common prefixes" of other files objects for directories not created through + // Alluxio + // + // Case (1) (and file objects) is accounted for by iterating over chunk.getObjects() while + // case (2) is accounted for by iterating over chunk.getCommonPrefixes(). + // + // An example, with prefix="ufs" and delimiter="/" and LISTING_LENGTH=5 + // - objects.key = ufs/, child = + // - objects.key = ufs/dir1, child = dir1 + // - objects.key = ufs/file, child = file + // - commonPrefix = ufs/dir1/, child = dir1 + // - commonPrefix = ufs/dir2/, child = dir2 + + // Handle case (1) + for (ObjectStatus status : chunk.getObjectStatuses()) { + // Remove parent portion of the key + String child = getChildName(status.getName(), keyPrefix); + if (child.isEmpty() || child.equals(getFolderSuffix())) { + // Removes results equal to the path + continue; + } + ObjectPermissions permissions = getPermissions(); + if (child.endsWith(getFolderSuffix())) { + // Child is a directory + child = CommonUtils.stripSuffixIfPresent(child, getFolderSuffix()); + ufsStatusMap.put(child, new UfsDirectoryStatus(child, permissions.getOwner(), + permissions.getGroup(), permissions.getMode())); + } else { + // Child is a file + ufsStatusMap.put(child, + new UfsFileStatus(child, status.getContentHash(), status.getContentLength(), + status.getLastModifiedTimeMs(), permissions.getOwner(), permissions.getGroup(), + permissions.getMode(), + mUfsConf.getBytes(PropertyKey.USER_BLOCK_SIZE_BYTES_DEFAULT))); + } + } + // Handle case (2) + String[] commonPrefixes; + if (isRecursive) { + // In case of a recursive listing infer pseudo-directories as the commonPrefixes returned + // from the object store is empty for an empty delimiter. + HashSet prefixes = new HashSet<>(); + for (ObjectStatus objectStatus : chunk.getObjectStatuses()) { + String objectName = objectStatus.getName(); + while (objectName.startsWith(keyPrefix) && objectName.contains(PATH_SEPARATOR)) { + objectName = objectName.substring(0, objectName.lastIndexOf(PATH_SEPARATOR)); + if (!objectName.isEmpty()) { + // include the separator with the prefix, to conform to what object stores return + // as common prefixes. + prefixes.add(PathUtils.normalizePath(objectName, PATH_SEPARATOR)); + } + } + } + commonPrefixes = prefixes.toArray(new String[0]); + } else { + commonPrefixes = chunk.getCommonPrefixes(); + } + for (String commonPrefix : commonPrefixes) { + if (commonPrefix.startsWith(keyPrefix)) { + // Remove parent portion of the key + String child = getChildName(commonPrefix, keyPrefix); + // Remove any portion after the last path delimiter + int childNameIndex = child.lastIndexOf(PATH_SEPARATOR); + child = childNameIndex != -1 ? child.substring(0, childNameIndex) : child; + if (!child.isEmpty() && !ufsStatusMap.containsKey(child)) { + // If both a file and a directory existed with the same name, the path will be + // treated as a directory + ObjectPermissions permissions = getPermissions(); + ufsStatusMap.put(child, new UfsDirectoryStatus(child, permissions.getOwner(), + permissions.getGroup(), permissions.getMode())); + } + } + } + } + /** * Get full path of root in object store. * @@ -994,81 +1128,7 @@ protected UfsStatus[] listInternal(String path, ListOptions options) throws IOEx keyPrefix = keyPrefix.equals(PATH_SEPARATOR) ? "" : keyPrefix; Map children = new HashMap<>(); while (chunk != null) { - // Directories in UFS can be possibly encoded in two different ways: - // (1) as file objects with FOLDER_SUFFIX for directories created through Alluxio or - // (2) as "common prefixes" of other files objects for directories not created through - // Alluxio - // - // Case (1) (and file objects) is accounted for by iterating over chunk.getObjects() while - // case (2) is accounted for by iterating over chunk.getCommonPrefixes(). - // - // An example, with prefix="ufs" and delimiter="/" and LISTING_LENGTH=5 - // - objects.key = ufs/, child = - // - objects.key = ufs/dir1, child = dir1 - // - objects.key = ufs/file, child = file - // - commonPrefix = ufs/dir1/, child = dir1 - // - commonPrefix = ufs/dir2/, child = dir2 - - // Handle case (1) - for (ObjectStatus status : chunk.getObjectStatuses()) { - // Remove parent portion of the key - String child = getChildName(status.getName(), keyPrefix); - if (child.isEmpty() || child.equals(getFolderSuffix())) { - // Removes results equal to the path - continue; - } - ObjectPermissions permissions = getPermissions(); - if (child.endsWith(getFolderSuffix())) { - // Child is a directory - child = CommonUtils.stripSuffixIfPresent(child, getFolderSuffix()); - children.put(child, new UfsDirectoryStatus(child, permissions.getOwner(), - permissions.getGroup(), permissions.getMode())); - } else { - // Child is a file - children.put(child, - new UfsFileStatus(child, status.getContentHash(), status.getContentLength(), - status.getLastModifiedTimeMs(), permissions.getOwner(), permissions.getGroup(), - permissions.getMode(), - mUfsConf.getBytes(PropertyKey.USER_BLOCK_SIZE_BYTES_DEFAULT))); - } - } - // Handle case (2) - String[] commonPrefixes; - if (options.isRecursive()) { - // In case of a recursive listing infer pseudo-directories as the commonPrefixes returned - // from the object store is empty for an empty delimiter. - HashSet prefixes = new HashSet<>(); - for (ObjectStatus objectStatus : chunk.getObjectStatuses()) { - String objectName = objectStatus.getName(); - while (objectName.startsWith(keyPrefix) && objectName.contains(PATH_SEPARATOR)) { - objectName = objectName.substring(0, objectName.lastIndexOf(PATH_SEPARATOR)); - if (!objectName.isEmpty()) { - // include the separator with the prefix, to conform to what object stores return - // as common prefixes. - prefixes.add(PathUtils.normalizePath(objectName, PATH_SEPARATOR)); - } - } - } - commonPrefixes = prefixes.toArray(new String[0]); - } else { - commonPrefixes = chunk.getCommonPrefixes(); - } - for (String commonPrefix : commonPrefixes) { - if (commonPrefix.startsWith(keyPrefix)) { - // Remove parent portion of the key - String child = getChildName(commonPrefix, keyPrefix); - // Remove any portion after the last path delimiter - int childNameIndex = child.lastIndexOf(PATH_SEPARATOR); - child = childNameIndex != -1 ? child.substring(0, childNameIndex) : child; - if (!child.isEmpty() && !children.containsKey(child)) { - // If both a file and a directory existed with the same name, the path will be - // treated as a directory - ObjectPermissions permissions = getPermissions(); - children.put(child, new UfsDirectoryStatus(child, permissions.getOwner(), - permissions.getGroup(), permissions.getMode())); - } - } - } + populateUfsStatus(keyPrefix, chunk, options.isRecursive(), children); chunk = chunk.getNextChunk(); } UfsStatus[] ret = new UfsStatus[children.size()]; @@ -1080,11 +1140,73 @@ protected UfsStatus[] listInternal(String path, ListOptions options) throws IOEx } /** - * Creates a directory flagged file with the key and folder suffix. - * - * @param key the key to create a folder - * @return true if the operation was successful, false otherwise + * The UFS status iterator that iterates the ufs statuses and fetches the chunk by lazy. */ + public class UfsStatusIterator implements Iterator { + private ObjectListingChunk mChunk; + private final String mKeyPrefix; + private final boolean mIsRecursive; + private Iterator mIterator = null; + private String mLastKey = null; + + /** + * Creates the iterator. + * @param path the path + * @param isRecursive if the listing is recursive + * @param firstChunk the first object listing chunk + */ + public UfsStatusIterator(String path, boolean isRecursive, ObjectListingChunk firstChunk) + throws IOException { + String keyPrefix = PathUtils.normalizePath(stripPrefixIfPresent(path), PATH_SEPARATOR); + keyPrefix = keyPrefix.equals(PATH_SEPARATOR) ? "" : keyPrefix; + mKeyPrefix = keyPrefix; + mIsRecursive = isRecursive; + mChunk = firstChunk; + updateIterator(); + } + + private void updateIterator() throws IOException { + NavigableMap ufsStatusMap = new TreeMap<>(); + populateUfsStatus(mKeyPrefix, mChunk, mIsRecursive, ufsStatusMap); + if (mLastKey != null) { + ufsStatusMap = ufsStatusMap.tailMap(mLastKey, false); + } + mIterator = Iterators.transform(ufsStatusMap.entrySet().iterator(), Map.Entry::getValue); + mLastKey = ufsStatusMap.isEmpty() ? null : ufsStatusMap.lastKey(); + } + + @Override + public boolean hasNext() { + if (mChunk == null) { + return false; + } + if (mIterator.hasNext()) { + return true; + } + if (Boolean.FALSE.equals(mChunk.hasNextChunk())) { + return false; + } + try { + mChunk = mChunk.getNextChunk(); + updateIterator(); + return hasNext(); + } catch (Exception e) { + throw new RuntimeException(e); + } + } + + @Override + public UfsStatus next() { + return mIterator.next(); + } + } + + /** + * Creates a directory flagged file with the key and folder suffix. + * + * @param key the key to create a folder + * @return true if the operation was successful, false otherwise + */ protected boolean mkdirsInternal(String key) { return createEmptyObject(convertToFolderName(stripPrefixIfPresent(key))); } diff --git a/core/common/src/main/java/alluxio/underfs/UfsClient.java b/core/common/src/main/java/alluxio/underfs/UfsClient.java new file mode 100644 index 000000000000..bd79d82acdcc --- /dev/null +++ b/core/common/src/main/java/alluxio/underfs/UfsClient.java @@ -0,0 +1,52 @@ +/* + * The Alluxio Open Foundation licenses this work under the Apache License, version 2.0 + * (the "License"). You may not use this work except in compliance with the License, which is + * available at www.apache.org/licenses/LICENSE-2.0 + * + * This software is distributed on an "AS IS" basis, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, + * either express or implied, as more fully set forth in the License. + * + * See the NOTICE file distributed with this work for information regarding copyright ownership. + */ + +package alluxio.underfs; + +import alluxio.file.options.DescendantType; +import alluxio.util.RateLimiter; + +import java.util.function.Consumer; +import javax.annotation.Nullable; + +/** + * The async UFS client interface. + */ +public interface UfsClient { + + /** + * Lists the ufs statuses for a given path. The {@link UfsStatus#getName()} + * function for the returned values should include the full path of each + * item from the UFS root (not including the bucket name for object stores). + * It differs from a traditional listing in that if the input variable + * checkStatus is true, the {@link UfsStatus} for the base path should + * be included at the start of the results. The function should return + * immediately, and perform the operation asynchronously. + * @param path the path in ufs + * @param continuationToken the continuation token + * @param startAfter the start after string where the loading starts from + * @param descendantType the load descendant type (NONE/ONE/ALL) + * @param checkStatus if true the call will perform a GetStatus on the path + * to see if an object exists, which should be returned + * as part of the result + * @param onComplete the callback when the load is complete + * @param onError the callback when the load encountered an error + */ + void performListingAsync( + String path, @Nullable String continuationToken, @Nullable String startAfter, + DescendantType descendantType, boolean checkStatus, Consumer onComplete, + Consumer onError); + + /** + * @return the rate limiter + */ + RateLimiter getRateLimiter(); +} diff --git a/core/common/src/main/java/alluxio/underfs/UfsLoadResult.java b/core/common/src/main/java/alluxio/underfs/UfsLoadResult.java new file mode 100644 index 000000000000..e11da19d1fa4 --- /dev/null +++ b/core/common/src/main/java/alluxio/underfs/UfsLoadResult.java @@ -0,0 +1,110 @@ +/* + * The Alluxio Open Foundation licenses this work under the Apache License, version 2.0 + * (the "License"). You may not use this work except in compliance with the License, which is + * available at www.apache.org/licenses/LICENSE-2.0 + * + * This software is distributed on an "AS IS" basis, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, + * either express or implied, as more fully set forth in the License. + * + * See the NOTICE file distributed with this work for information regarding copyright ownership. + */ + +package alluxio.underfs; + +import alluxio.AlluxioURI; + +import java.util.Optional; +import java.util.stream.Stream; +import javax.annotation.Nullable; + +/** + The UfsLoadResult represents the result of a load operation + on an Under File System (UFS). + It contains information about the loaded items, such as the count, + whether it is truncated or not, and the continuation token. + */ +public class UfsLoadResult { + + private final Stream mItems; + private final String mContinuationToken; + private final boolean mIsTruncated; + private final int mItemsCount; + private final AlluxioURI mLastItem; + private final boolean mFirstIsFile; + private final boolean mIsObjectStore; + + /** + * Constructs a new instance of {@link UfsLoadResult}. + * + * @param items the stream of loaded items + * @param itemsCount the count of loaded items + * @param continuationToken the continuation token for loading more items + * @param lastItem the URI of the last item that was loaded + * @param isTruncated whether the load operation was truncated due to reaching a limit + * @param firstIsFile whether the first item in the stream is a file + * @param isObjectStore whether the under file system is an object store + */ + public UfsLoadResult( + Stream items, int itemsCount, @Nullable String continuationToken, + @Nullable AlluxioURI lastItem, boolean isTruncated, boolean firstIsFile, + boolean isObjectStore) { + mItems = items; + mContinuationToken = continuationToken; + mIsTruncated = isTruncated; + mItemsCount = itemsCount; + mLastItem = lastItem; + mFirstIsFile = firstIsFile; + mIsObjectStore = isObjectStore; + } + + /** + * @return true if the under file system is an object store, false otherwise + */ + public boolean isIsObjectStore() { + return mIsObjectStore; + } + + /** + * @return true if the first item in the stream is a file, false otherwise + */ + public boolean isFirstFile() { + return mFirstIsFile; + } + + /** + * @return an optional containing the URI of the last item that was loaded, + * or empty if no items were loaded + */ + public Optional getLastItem() { + return Optional.ofNullable(mLastItem); + } + + /** + * @return the count of loaded items + */ + public int getItemsCount() { + return mItemsCount; + } + + /** + * @return true if the load operation was truncated, false otherwise + */ + public boolean isTruncated() { + return mIsTruncated; + } + + /** + * @return the stream of loaded items + */ + public Stream getItems() { + return mItems; + } + + /** + * @return the continuation token for loading more items, + * or null if there are no more items to load + */ + public String getContinuationToken() { + return mContinuationToken; + } +} diff --git a/core/common/src/main/java/alluxio/underfs/UnderFileSystem.java b/core/common/src/main/java/alluxio/underfs/UnderFileSystem.java index a680f0ff9c31..4d13b422aa15 100755 --- a/core/common/src/main/java/alluxio/underfs/UnderFileSystem.java +++ b/core/common/src/main/java/alluxio/underfs/UnderFileSystem.java @@ -38,6 +38,7 @@ import java.io.InputStream; import java.io.OutputStream; import java.util.ArrayList; +import java.util.Iterator; import java.util.List; import java.util.Map; import javax.annotation.Nullable; @@ -57,7 +58,7 @@ @PublicApi @ThreadSafe // TODO(adit); API calls should use a URI instead of a String wherever appropriate -public interface UnderFileSystem extends Closeable { +public interface UnderFileSystem extends Closeable, UfsClient { /** * The factory for the {@link UnderFileSystem}. */ @@ -638,6 +639,20 @@ default UfsFileStatus getFileStatus(String path) throws IOException { @Nullable UfsStatus[] listStatus(String path, ListOptions options) throws IOException; + /** + * Lists the ufs statuses iteratively. + * + * @param path the abstract pathname to list + * @param options for list directory + * @param startAfter the start after token + * @param batchSize the batch size + * @return An iterator of ufs status. Returns + * {@code null} if this abstract pathname does not denote a directory. + */ + @Nullable + Iterator listStatusIterable( + String path, ListOptions options, String startAfter, int batchSize) throws IOException; + /** * Creates the directory named by this abstract pathname. If the folder already exists, the method * returns false. The method creates any necessary but nonexistent parent directories. diff --git a/core/common/src/main/java/alluxio/underfs/UnderFileSystemWithLogging.java b/core/common/src/main/java/alluxio/underfs/UnderFileSystemWithLogging.java index 0cf021796834..54377d172982 100755 --- a/core/common/src/main/java/alluxio/underfs/UnderFileSystemWithLogging.java +++ b/core/common/src/main/java/alluxio/underfs/UnderFileSystemWithLogging.java @@ -17,7 +17,9 @@ import alluxio.collections.Pair; import alluxio.conf.AlluxioConfiguration; import alluxio.conf.PropertyKey; +import alluxio.exception.runtime.InternalRuntimeException; import alluxio.exception.status.UnimplementedException; +import alluxio.file.options.DescendantType; import alluxio.metrics.Metric; import alluxio.metrics.MetricInfo; import alluxio.metrics.MetricsSystem; @@ -32,18 +34,22 @@ import alluxio.underfs.options.ListOptions; import alluxio.underfs.options.MkdirsOptions; import alluxio.underfs.options.OpenOptions; +import alluxio.util.RateLimiter; import alluxio.util.SecurityUtils; import com.codahale.metrics.Timer; import com.google.common.base.Preconditions; +import com.google.common.collect.Iterators; import org.slf4j.Logger; import org.slf4j.LoggerFactory; import java.io.IOException; import java.io.InputStream; import java.io.OutputStream; +import java.util.Iterator; import java.util.List; import java.util.Map; +import java.util.function.Consumer; import javax.annotation.Nullable; /** @@ -817,6 +823,38 @@ public String toString() { }); } + @Override + public Iterator listStatusIterable( + String path, ListOptions options, String startAfter, + int batchSize) throws IOException { + return call(new UfsCallable>() { + @Override + public Iterator call() throws IOException { + Iterator result = + mUnderFileSystem.listStatusIterable(path, options, startAfter, batchSize); + return filterInvalidPaths(result, path); + } + + @Override + public String methodName() { + return "ListStatusIterable"; + } + + @Override + public String toString() { + return String.format("path=%s, options=%s", path, options); + } + }); + } + + @Nullable + Iterator filterInvalidPaths(Iterator statuses, String listedPath) { + if (statuses == null) { + return null; + } + return Iterators.filter(statuses, (it) -> !it.getName().contains("?")); + } + @Nullable private UfsStatus[] filterInvalidPaths(UfsStatus[] statuses, String listedPath) { // This is a temporary fix to prevent us from choking on paths containing '?'. @@ -1226,6 +1264,42 @@ public UnderFileSystem getUnderFileSystem() { return mUnderFileSystem; } + @Override + public void performListingAsync( + String path, @Nullable String continuationToken, @Nullable String startAfter, + DescendantType descendantType, boolean checkStatus, Consumer onComplete, + Consumer onError) { + try { + call(new UfsCallable() { + @Override + public Void call() { + mUnderFileSystem.performListingAsync(path, continuationToken, startAfter, + descendantType, checkStatus, onComplete, onError); + return null; + } + + @Override + public String methodName() { + return "PerformListingAsync"; + } + + @Override + public String toString() { + return String.format("path=%s, continuationToken=%s, startAfter=%s, descendantType=%s," + + " checkStatus=%s", + path, continuationToken, startAfter, descendantType, checkStatus); + } + }); + } catch (IOException e) { + throw new InternalRuntimeException("should not reach"); + } + } + + @Override + public RateLimiter getRateLimiter() { + return mUnderFileSystem.getRateLimiter(); + } + /** * Interface representing a callable to the under storage system which throws an * {@link IOException} if an error occurs during the external communication. diff --git a/core/common/src/main/java/alluxio/util/IteratorUtils.java b/core/common/src/main/java/alluxio/util/IteratorUtils.java new file mode 100644 index 000000000000..a36fc8e8ea59 --- /dev/null +++ b/core/common/src/main/java/alluxio/util/IteratorUtils.java @@ -0,0 +1,32 @@ +/* + * The Alluxio Open Foundation licenses this work under the Apache License, version 2.0 + * (the "License"). You may not use this work except in compliance with the License, which is + * available at www.apache.org/licenses/LICENSE-2.0 + * + * This software is distributed on an "AS IS" basis, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, + * either express or implied, as more fully set forth in the License. + * + * See the NOTICE file distributed with this work for information regarding copyright ownership. + */ + +package alluxio.util; + +import java.util.Iterator; + +/** + * Util for iterators. + */ +public class IteratorUtils { + /** + * @param iterator the iterator + * @return the next element in the iterator or null if hasNext() returns false + * @param the type of elements returned by the iterator + */ + public static T nextOrNull(Iterator iterator) { + if (iterator.hasNext()) { + return iterator.next(); + } + return null; + } +} + diff --git a/core/common/src/main/java/alluxio/util/RateLimiter.java b/core/common/src/main/java/alluxio/util/RateLimiter.java new file mode 100644 index 000000000000..c2fd231c4747 --- /dev/null +++ b/core/common/src/main/java/alluxio/util/RateLimiter.java @@ -0,0 +1,60 @@ +/* + * The Alluxio Open Foundation licenses this work under the Apache License, version 2.0 + * (the "License"). You may not use this work except in compliance with the License, which is + * available at www.apache.org/licenses/LICENSE-2.0 + * + * This software is distributed on an "AS IS" basis, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, + * either express or implied, as more fully set forth in the License. + * + * See the NOTICE file distributed with this work for information regarding copyright ownership. + */ + +package alluxio.util; + +import java.util.Optional; + +/** + * Used to limit the rate of operations. This rate limiter is not thread safe + * and the operations are non-blocking. It is used by acquiring a permit for + * each operation, then checking how the operation should wait by calling + * {@link RateLimiter#getWaitTimeNanos(long)}. + */ +public interface RateLimiter { + + /** + * Acquire a permit for the next operation. + * @return {@link Optional#empty()} if no waiting is needed, otherwise + * the value contained in the returned optional is the permit, which + * can be used in calls to {@link RateLimiter#getWaitTimeNanos} + * to see how long to wait for the operation to be ready. + */ + Optional acquire(); + + /** + * Checks how long is needed to wait for this permit to be ready. + * @param permit the permit returned by {@link RateLimiter#acquire()} + * @return the amount of time needed to wait in nanoseconds + */ + long getWaitTimeNanos(long permit); + + /** + * @param permitsPerSecond permits per second + * @return a rate limiter + */ + static RateLimiter createRateLimiter(long permitsPerSecond) { + if (permitsPerSecond <= 0) { + return new RateLimiter() { + @Override + public Optional acquire() { + return Optional.empty(); + } + + @Override + public long getWaitTimeNanos(long permit) { + return 0; + } + }; + } + return new SimpleRateLimiter(permitsPerSecond); + } +} diff --git a/core/common/src/main/java/alluxio/util/SimpleRateLimiter.java b/core/common/src/main/java/alluxio/util/SimpleRateLimiter.java new file mode 100644 index 000000000000..fb2959371155 --- /dev/null +++ b/core/common/src/main/java/alluxio/util/SimpleRateLimiter.java @@ -0,0 +1,65 @@ +/* + * The Alluxio Open Foundation licenses this work under the Apache License, version 2.0 + * (the "License"). You may not use this work except in compliance with the License, which is + * available at www.apache.org/licenses/LICENSE-2.0 + * + * This software is distributed on an "AS IS" basis, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, + * either express or implied, as more fully set forth in the License. + * + * See the NOTICE file distributed with this work for information regarding copyright ownership. + */ + +package alluxio.util; + +import com.google.common.annotations.VisibleForTesting; +import com.google.common.base.Ticker; + +import java.time.Duration; +import java.util.Optional; + +/** + * A basic implementation of {@link RateLimiter}. + */ +public class SimpleRateLimiter implements RateLimiter { + + final Ticker mTicker; + final long mMinDuration; + + long mLastAcquire = 0; + + SimpleRateLimiter(long permitsPerSecond) { + this(permitsPerSecond, new Ticker() { + @Override + public long read() { + return System.nanoTime(); + } + }); + } + + /** + * Creates a simple rate limiter for testing purpose. + * @param permitsPerSecond permits per second + * @param ticker the ticker + */ + @VisibleForTesting + public SimpleRateLimiter(long permitsPerSecond, Ticker ticker) { + mTicker = ticker; + mMinDuration = Duration.ofSeconds(1).toNanos() / permitsPerSecond; + } + + @Override + public long getWaitTimeNanos(long permit) { + return permit - mTicker.read(); + } + + @Override + public Optional acquire() { + long nxtElapsed = mTicker.read(); + if (nxtElapsed - mLastAcquire >= mMinDuration) { + mLastAcquire = nxtElapsed; + return Optional.empty(); + } + mLastAcquire += mMinDuration; + return Optional.of(mLastAcquire); + } +} diff --git a/core/common/src/main/java/alluxio/util/io/PathUtils.java b/core/common/src/main/java/alluxio/util/io/PathUtils.java index 0d3069b69182..17dcbd3e50e0 100644 --- a/core/common/src/main/java/alluxio/util/io/PathUtils.java +++ b/core/common/src/main/java/alluxio/util/io/PathUtils.java @@ -346,9 +346,27 @@ public static String subtractPaths(String path, String prefix) throws InvalidPat * @throws InvalidPathException when the path or prefix is invalid */ public static boolean hasPrefix(String path, String prefix) throws InvalidPathException { + return hasPrefix(path, prefix, true); + } + + /** + * Checks whether the given path contains the given prefix. The comparison happens at a component + * granularity; for example, {@code hasPrefix(/dir/file, /dir)} should evaluate to true, while + * {@code hasPrefix(/dir/file, /d)} should evaluate to false. + * + * @param path a path + * @param prefix a prefix + * @param cleanPath if the paths should be cleaned + * @return whether the given path has the given prefix + * @throws InvalidPathException when the path or prefix is invalid + */ + public static boolean hasPrefix(String path, String prefix, boolean cleanPath) + throws InvalidPathException { // normalize path and prefix(e.g. "/a/b/../c" -> "/a/c", "/a/b/" --> "/a/b") - path = cleanPath(path); - prefix = cleanPath(prefix); + if (cleanPath) { + path = cleanPath(path); + prefix = cleanPath(prefix); + } if (prefix.equals("/")) { return true; @@ -445,6 +463,18 @@ public static String normalizePath(String path, String separator) { return path.endsWith(separator) ? path : path + separator; } + /** + * Adds a starting separator if it does not exist in path. + * + * @param path the file name + * @param separator trailing separator to add + * @return updated path with trailing separator + */ + public static String normalizePathStart( + String path, String separator) { + return path.startsWith(separator) ? path : separator + path; + } + private PathUtils() {} // prevent instantiation /** diff --git a/core/common/src/test/java/alluxio/conf/path/TrieNodeTest.java b/core/common/src/test/java/alluxio/conf/path/TrieNodeTest.java index fdb8a53eb49e..62c8d6690af7 100644 --- a/core/common/src/test/java/alluxio/conf/path/TrieNodeTest.java +++ b/core/common/src/test/java/alluxio/conf/path/TrieNodeTest.java @@ -11,6 +11,8 @@ package alluxio.conf.path; +import static org.junit.Assert.assertEquals; + import com.google.common.collect.ImmutableList; import com.google.common.collect.Streams; import org.junit.Assert; @@ -74,16 +76,16 @@ public void searchExact() { TrieNode d = node.insert("/c/d"); TrieNode g = node.insert("/c/g"); TrieNode h = node.insert("/u/h"); - Assert.assertEquals(a, node.searchExact("/a").get()); - Assert.assertEquals(b, node.searchExact("/a/b").get()); - Assert.assertEquals(f, node.searchExact("/a/e/f").get()); - Assert.assertEquals(d, node.searchExact("/c/d").get()); - Assert.assertEquals(g, node.searchExact("/c/g").get()); - Assert.assertEquals(h, node.searchExact("/u/h").get()); - Assert.assertEquals(Optional.empty(), node.searchExact("/")); - Assert.assertEquals(Optional.empty(), node.searchExact("/ab")); - Assert.assertEquals(Optional.empty(), node.searchExact("/a/b/c")); - Assert.assertEquals(Optional.empty(), node.searchExact("/a/d")); + assertEquals(a, node.searchExact("/a").get()); + assertEquals(b, node.searchExact("/a/b").get()); + assertEquals(f, node.searchExact("/a/e/f").get()); + assertEquals(d, node.searchExact("/c/d").get()); + assertEquals(g, node.searchExact("/c/g").get()); + assertEquals(h, node.searchExact("/u/h").get()); + assertEquals(Optional.empty(), node.searchExact("/")); + assertEquals(Optional.empty(), node.searchExact("/ab")); + assertEquals(Optional.empty(), node.searchExact("/a/b/c")); + assertEquals(Optional.empty(), node.searchExact("/a/d")); } @Test @@ -97,17 +99,17 @@ public void deleteIfTrue() { TrieNode h = node.insert("/u/h"); Assert.assertTrue(node.search("/a/b").contains(b)); TrieNode b2 = node.deleteIf("/a/b", n -> { - Assert.assertEquals(b, n); + assertEquals(b, n); return true; }); - Assert.assertEquals(b, b2); + assertEquals(b, b2); Assert.assertFalse(node.search("/a/b").contains(b)); Assert.assertTrue(node.search("/a").contains(a)); TrieNode a2 = node.deleteIf("/a", n -> { - Assert.assertEquals(a, n); + assertEquals(a, n); return true; }); - Assert.assertEquals(a, a2); + assertEquals(a, a2); Assert.assertFalse(node.search("/a").contains(a)); Assert.assertTrue(node.search("/a/e/f").contains(f)); TrieNode c2 = node.deleteIf("/c", n -> true); @@ -115,10 +117,10 @@ public void deleteIfTrue() { Assert.assertTrue(node.search("/c/d").contains(d)); Assert.assertTrue(node.search("/c/g").contains(g)); TrieNode h2 = node.deleteIf("/u/h", n -> { - Assert.assertEquals(h, n); + assertEquals(h, n); return true; }); - Assert.assertEquals(h, h2); + assertEquals(h, h2); TrieNode nil = node.deleteIf("/n", n -> { Assert.fail(); return true; @@ -147,10 +149,10 @@ public void deleteAndInsert() { Assert.assertTrue(node.search("/a/b").contains(b)); TrieNode b2 = node.deleteIf("/a/b", n -> { - Assert.assertEquals(b, n); + assertEquals(b, n); return true; }); - Assert.assertEquals(b, b2); + assertEquals(b, b2); Assert.assertFalse(node.search("/a/b").contains(b)); TrieNode b3 = node.insert("/a/b"); Assert.assertTrue(node.search("/a/b").contains(b3)); @@ -158,10 +160,10 @@ public void deleteAndInsert() { Assert.assertTrue(node.search("/a").contains(a)); Assert.assertTrue(node.search("/a/b").contains(a)); TrieNode a2 = node.deleteIf("/a", n -> { - Assert.assertEquals(a, n); + assertEquals(a, n); return true; }); - Assert.assertEquals(a, a2); + assertEquals(a, a2); Assert.assertFalse(node.search("/a/b").contains(a)); Assert.assertFalse(node.search("/a").contains(a)); Assert.assertTrue(node.search("/a/b").contains(b3)); @@ -188,7 +190,7 @@ public void getChildren() { node.getLeafChildren("/a/e/f").toArray(TrieNode[]::new)); Assert.assertArrayEquals(new TrieNode[] {d}, node.getLeafChildren("/c/d").toArray(TrieNode[]::new)); - Assert.assertEquals(new HashSet(Arrays.asList(a, b, f, d, g, h)), + assertEquals(new HashSet(Arrays.asList(a, b, f, d, g, h)), node.getLeafChildren("/").collect(Collectors.toSet())); } @@ -205,8 +207,20 @@ public void clearTrie() { node.clear(); // after clearing, each node should only contain itself for (TrieNode nxt : ImmutableList.of(a, b, f, d, g, h)) { - Assert.assertEquals(Collections.singletonList(nxt), + assertEquals(Collections.singletonList(nxt), nxt.getLeafChildren("/").collect(Collectors.toList())); } } + + @Test + public void getLeafChildrenOnRoot() { + TrieNode node = new TrieNode<>(); + TrieNode a = node.insert("/a"); + TrieNode b = node.insert("/a/b"); + TrieNode f = node.insert("/a/e/f"); + TrieNode d = node.insert("/c/d"); + TrieNode g = node.insert("/c/g"); + TrieNode h = node.insert("/u/h"); + assertEquals(6, node.getLeafChildren("/").toArray().length); + } } diff --git a/core/common/src/test/java/alluxio/underfs/FingerprintTest.java b/core/common/src/test/java/alluxio/underfs/FingerprintTest.java index 13eddf51c41c..9f0ddee1efb6 100644 --- a/core/common/src/test/java/alluxio/underfs/FingerprintTest.java +++ b/core/common/src/test/java/alluxio/underfs/FingerprintTest.java @@ -144,4 +144,14 @@ public void createACLFingeprint() { Fingerprint.parse(expected).getTag(Fingerprint.Tag.ACL)); assertEquals(expected, Fingerprint.parse(expected).serialize()); } + + @Test + public void sanitizeString() { + Fingerprint dummy = Fingerprint.INVALID_FINGERPRINT; + assertEquals("foobar", dummy.sanitizeString("foobar")); + assertEquals("foo_bar", dummy.sanitizeString("foo bar")); + assertEquals("foo_bar", dummy.sanitizeString("foo|bar")); + assertEquals("foo_bar_baz", dummy.sanitizeString("foo bar|baz")); + assertEquals("foo_bar_baz_qux", dummy.sanitizeString("foo bar baz qux")); + } } diff --git a/core/common/src/test/java/alluxio/underfs/ObjectUnderFileSystemTest.java b/core/common/src/test/java/alluxio/underfs/ObjectUnderFileSystemTest.java index 287abb1690f0..eb0c1e6bf23d 100644 --- a/core/common/src/test/java/alluxio/underfs/ObjectUnderFileSystemTest.java +++ b/core/common/src/test/java/alluxio/underfs/ObjectUnderFileSystemTest.java @@ -11,6 +11,7 @@ package alluxio.underfs; +import static org.junit.Assert.assertEquals; import static org.junit.Assert.assertTrue; import static org.junit.Assert.fail; @@ -19,8 +20,11 @@ import alluxio.conf.AlluxioConfiguration; import alluxio.conf.Configuration; import alluxio.conf.PropertyKey; +import alluxio.file.options.DescendantType; +import alluxio.underfs.options.ListOptions; import com.google.common.collect.ImmutableMap; +import org.junit.Assert; import org.junit.Rule; import org.junit.Test; import org.mockito.Mockito; @@ -28,6 +32,7 @@ import java.io.FileNotFoundException; import java.io.IOException; import java.net.SocketException; +import java.util.stream.Collectors; public class ObjectUnderFileSystemTest { private static final AlluxioConfiguration CONF = Configuration.global(); @@ -69,4 +74,47 @@ public void testRetryOnException() { fail(); } } + + @Test + public void testListObjectStorageDescendantTypeNone() throws Throwable { + mObjectUFS = new MockObjectUnderFileSystem(new AlluxioURI("/"), + UnderFileSystemConfiguration.defaults(CONF)) { + final UfsStatus mF1Status = new UfsFileStatus("f1", "", 0L, 0L, "", "", (short) 0777, 0L); + final UfsStatus mF2Status = new UfsFileStatus("f2", "", 1L, 0L, "", "", (short) 0777, 0L); + + @Override + public UfsStatus getStatus(String path) throws IOException { + if (path.equals("root/f1")) { + return mF1Status; + } else if (path.equals("root/f2")) { + return mF2Status; + } + throw new FileNotFoundException(); + } + + @Override + public UfsStatus[] listStatus(String path) throws IOException { + if (path.equals("root") || path.equals("root/")) { + return new UfsStatus[] {mF1Status, mF2Status}; + } + return new UfsStatus[0]; + } + + @Override + public UfsStatus[] listStatus(String path, ListOptions options) throws IOException { + return listStatus(path); + } + + @Override + protected ObjectPermissions getPermissions() { + return new ObjectPermissions("foo", "bar", (short) 0777); + } + }; + + UfsLoadResult result = UnderFileSystemTestUtil.performListingAsyncAndGetResult( + mObjectUFS, "root", DescendantType.NONE); + Assert.assertEquals(1, result.getItemsCount()); + UfsStatus status = result.getItems().collect(Collectors.toList()).get(0); + assertEquals("root", status.getName()); + } } diff --git a/core/common/src/test/java/alluxio/underfs/UnderFileSystemTestUtil.java b/core/common/src/test/java/alluxio/underfs/UnderFileSystemTestUtil.java new file mode 100644 index 000000000000..5b52f6cdd9d2 --- /dev/null +++ b/core/common/src/test/java/alluxio/underfs/UnderFileSystemTestUtil.java @@ -0,0 +1,49 @@ +/* + * The Alluxio Open Foundation licenses this work under the Apache License, version 2.0 + * (the "License"). You may not use this work except in compliance with the License, which is + * available at www.apache.org/licenses/LICENSE-2.0 + * + * This software is distributed on an "AS IS" basis, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, + * either express or implied, as more fully set forth in the License. + * + * See the NOTICE file distributed with this work for information regarding copyright ownership. + */ + +package alluxio.underfs; + +import alluxio.file.options.DescendantType; + +import java.util.concurrent.CountDownLatch; +import java.util.concurrent.atomic.AtomicReference; + +/** + * Test utils for UFS. + */ +public class UnderFileSystemTestUtil { + /** + * A test helper convert the async performListingAsync call to a sync one. + * @param ufs the ufs object + * @param path the path + * @param descendantType the descendant type + * @return the ufs load result + */ + public static UfsLoadResult performListingAsyncAndGetResult( + UnderFileSystem ufs, String path, DescendantType descendantType) throws Throwable { + CountDownLatch latch = new CountDownLatch(1); + AtomicReference throwable = new AtomicReference<>(); + AtomicReference result = new AtomicReference<>(); + ufs.performListingAsync(path, null, null, descendantType, descendantType == DescendantType.NONE, + (r) -> { + result.set(r); + latch.countDown(); + }, (t) -> { + throwable.set(t); + latch.countDown(); + }); + latch.await(); + if (throwable.get() != null) { + throw throwable.get(); + } + return result.get(); + } +} diff --git a/core/common/src/test/java/alluxio/util/RateLimiterTest.java b/core/common/src/test/java/alluxio/util/RateLimiterTest.java new file mode 100644 index 000000000000..f7d4e05a616a --- /dev/null +++ b/core/common/src/test/java/alluxio/util/RateLimiterTest.java @@ -0,0 +1,88 @@ +/* + * The Alluxio Open Foundation licenses this work under the Apache License, version 2.0 + * (the "License"). You may not use this work except in compliance with the License, which is + * available at www.apache.org/licenses/LICENSE-2.0 + * + * This software is distributed on an "AS IS" basis, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, + * either express or implied, as more fully set forth in the License. + * + * See the NOTICE file distributed with this work for information regarding copyright ownership. + */ + +package alluxio.util; + +import static org.junit.Assert.assertEquals; +import static org.junit.Assert.assertFalse; +import static org.junit.Assert.assertTrue; + +import com.google.common.base.Ticker; +import org.junit.Before; +import org.junit.Test; + +import java.time.Duration; +import java.util.ArrayList; +import java.util.List; +import java.util.Optional; + +public class RateLimiterTest { + + private final Ticker mTicker = new Ticker() { + @Override + public long read() { + return mTime; + } + }; + + private long mTime; + + @Before + public void before() { + mTime = 0; + } + + @Test + public void testFastRequests() { + long permitsPerSecond = 10; + long timePerPermit = Duration.ofSeconds(1).toNanos() / permitsPerSecond; + SimpleRateLimiter rateLimiter = new SimpleRateLimiter(permitsPerSecond, mTicker); + + // if the timer is moving as fast as the permits then there should be no waiting + for (int i = 0; i < 10; i++) { + mTime += timePerPermit; + assertFalse(rateLimiter.acquire().isPresent()); + } + // if we move forward a large amount, we should still only get 1 new permit + mTime += timePerPermit * 100; + assertFalse(rateLimiter.acquire().isPresent()); + assertTrue(rateLimiter.acquire().isPresent()); + + mTime += timePerPermit; + assertTrue(rateLimiter.acquire().isPresent()); + + mTime += timePerPermit * 2; + assertFalse(rateLimiter.acquire().isPresent()); + + Optional permit = rateLimiter.acquire(); + assertTrue(permit.isPresent()); + mTime += timePerPermit; + assertEquals(mTime, (long) permit.get()); + } + + @Test + public void testSlowRequests() { + long permitsPerSecond = 10; + long timePerPermit = Duration.ofSeconds(1).toNanos() / permitsPerSecond; + SimpleRateLimiter rateLimiter = new SimpleRateLimiter(permitsPerSecond, mTicker); + List permits = new ArrayList<>(); + for (int i = 0; i < permitsPerSecond; i++) { + Optional permit = rateLimiter.acquire(); + assertTrue(permit.isPresent()); + permits.add(permit.get()); + } + assertEquals(Duration.ofSeconds(1).toNanos(), (long) permits.get(permits.size() - 1)); + for (int i = 0; i < permitsPerSecond; i++) { + mTime += timePerPermit; + assertEquals(0, rateLimiter.getWaitTimeNanos(permits.get(i))); + } + } +} diff --git a/core/server/common/src/main/java/alluxio/master/journal/FileSystemMergeJournalContext.java b/core/server/common/src/main/java/alluxio/master/journal/FileSystemMergeJournalContext.java index 7989971b78bd..450c25302a7d 100644 --- a/core/server/common/src/main/java/alluxio/master/journal/FileSystemMergeJournalContext.java +++ b/core/server/common/src/main/java/alluxio/master/journal/FileSystemMergeJournalContext.java @@ -42,7 +42,7 @@ public class FileSystemMergeJournalContext implements JournalContext { private static final Logger LOG = LoggerFactory.getLogger(FileSystemMergeJournalContext.class); - private final JournalContext mJournalContext; + protected final JournalContext mJournalContext; protected final JournalEntryMerger mJournalEntryMerger; /** diff --git a/core/server/common/src/main/java/alluxio/master/journal/MetadataSyncMergeJournalContext.java b/core/server/common/src/main/java/alluxio/master/journal/MetadataSyncMergeJournalContext.java index d3dfcc679dda..6099592b8a89 100644 --- a/core/server/common/src/main/java/alluxio/master/journal/MetadataSyncMergeJournalContext.java +++ b/core/server/common/src/main/java/alluxio/master/journal/MetadataSyncMergeJournalContext.java @@ -11,6 +11,8 @@ package alluxio.master.journal; +import alluxio.exception.status.UnavailableException; + import com.google.common.annotations.VisibleForTesting; import javax.annotation.concurrent.NotThreadSafe; @@ -43,6 +45,9 @@ public MetadataSyncMergeJournalContext( super(journalContext, journalEntryMerger); } + /** + * Flushes the journals into the async journal writer. + */ @Override public void flush() { appendMergedJournals(); @@ -55,6 +60,14 @@ public void close() { // the rpc thread. } + /** + * Flushes and commits journals. + */ + public void hardFlush() throws UnavailableException { + appendMergedJournals(); + mJournalContext.flush(); + } + /** * @return the journal merger, used in unit test */ diff --git a/core/server/master/pom.xml b/core/server/master/pom.xml index 84a81926050d..36c5b7d4576c 100644 --- a/core/server/master/pom.xml +++ b/core/server/master/pom.xml @@ -30,6 +30,10 @@ + + software.amazon.awssdk + s3 + com.google.guava guava @@ -123,9 +127,15 @@ test - io.findify - s3mock_2.13 + org.gaul + s3proxy test + + + ch.qos.logback + logback-classic + + diff --git a/core/server/master/src/main/java/alluxio/master/block/BlockMaster.java b/core/server/master/src/main/java/alluxio/master/block/BlockMaster.java index 061b3348a7a1..1414a02cd2be 100644 --- a/core/server/master/src/main/java/alluxio/master/block/BlockMaster.java +++ b/core/server/master/src/main/java/alluxio/master/block/BlockMaster.java @@ -193,6 +193,16 @@ default void commitBlockInUFS(long blockId, long length) throws UnavailableExcep */ void commitBlockInUFS(long blockId, long length, JournalContext context); + /** + * Marks a block as committed, but without a worker location. This means the block is only in ufs. + * Append any created journal entries to the included context. + * @param blockId the id of the block to commit + * @param length the length of the block + * @param context the journal context + * @param checkExists checks if the block exists + */ + void commitBlockInUFS(long blockId, long length, JournalContext context, boolean checkExists); + /** * @param blockId the block id to get information for * @return the {@link BlockInfo} for the given block id diff --git a/core/server/master/src/main/java/alluxio/master/block/DefaultBlockMaster.java b/core/server/master/src/main/java/alluxio/master/block/DefaultBlockMaster.java index 0a87d0086797..d501526ea8b8 100644 --- a/core/server/master/src/main/java/alluxio/master/block/DefaultBlockMaster.java +++ b/core/server/master/src/main/java/alluxio/master/block/DefaultBlockMaster.java @@ -1174,9 +1174,15 @@ public void commitBlock(long workerId, long usedBytesOnTier, String tierAlias, @Override public void commitBlockInUFS(long blockId, long length, JournalContext journalContext) { + commitBlockInUFS(blockId, length, journalContext, true); + } + + @Override + public void commitBlockInUFS( + long blockId, long length, JournalContext journalContext, boolean checkExists) { LOG.debug("Commit block in ufs. blockId: {}, length: {}", blockId, length); try (LockResource r = lockBlock(blockId)) { - if (mBlockMetaStore.getBlock(blockId).isPresent()) { + if (checkExists && mBlockMetaStore.getBlock(blockId).isPresent()) { // Block metadata already exists, so do not need to create a new one. return; } diff --git a/core/server/master/src/main/java/alluxio/master/file/DefaultFileSystemMaster.java b/core/server/master/src/main/java/alluxio/master/file/DefaultFileSystemMaster.java index eaf8193f483a..ac8230977032 100644 --- a/core/server/master/src/main/java/alluxio/master/file/DefaultFileSystemMaster.java +++ b/core/server/master/src/main/java/alluxio/master/file/DefaultFileSystemMaster.java @@ -40,6 +40,7 @@ import alluxio.exception.InvalidFileSizeException; import alluxio.exception.InvalidPathException; import alluxio.exception.UnexpectedAlluxioException; +import alluxio.exception.runtime.NotFoundRuntimeException; import alluxio.exception.status.FailedPreconditionException; import alluxio.exception.status.InvalidArgumentException; import alluxio.exception.status.NotFoundException; @@ -47,9 +48,11 @@ import alluxio.exception.status.ResourceExhaustedException; import alluxio.exception.status.UnavailableException; import alluxio.file.options.DescendantType; +import alluxio.grpc.CancelSyncMetadataPResponse; import alluxio.grpc.DeletePOptions; import alluxio.grpc.FileSystemMasterCommonPOptions; import alluxio.grpc.GetStatusPOptions; +import alluxio.grpc.GetSyncProgressPResponse; import alluxio.grpc.GrpcService; import alluxio.grpc.GrpcUtils; import alluxio.grpc.LoadDescendantPType; @@ -59,6 +62,8 @@ import alluxio.grpc.ServiceType; import alluxio.grpc.SetAclAction; import alluxio.grpc.SetAttributePOptions; +import alluxio.grpc.SyncMetadataAsyncPResponse; +import alluxio.grpc.SyncMetadataPResponse; import alluxio.grpc.TtlAction; import alluxio.heartbeat.FixedIntervalSupplier; import alluxio.heartbeat.HeartbeatContext; @@ -92,7 +97,10 @@ import alluxio.master.file.contexts.ScheduleAsyncPersistenceContext; import alluxio.master.file.contexts.SetAclContext; import alluxio.master.file.contexts.SetAttributeContext; +import alluxio.master.file.contexts.SyncMetadataContext; import alluxio.master.file.contexts.WorkerHeartbeatContext; +import alluxio.master.file.mdsync.DefaultSyncProcess; +import alluxio.master.file.mdsync.TaskGroup; import alluxio.master.file.meta.FileSystemMasterView; import alluxio.master.file.meta.Inode; import alluxio.master.file.meta.InodeDirectory; @@ -215,6 +223,8 @@ import java.util.Iterator; import java.util.List; import java.util.Map; +import java.util.Objects; +import java.util.Optional; import java.util.Set; import java.util.SortedMap; import java.util.Spliterators; @@ -448,6 +458,8 @@ public class DefaultFileSystemMaster extends CoreMaster ThreadFactoryUtils.build("alluxio-ufs-active-sync-%d", false)); private HeartbeatThread mReplicationCheckHeartbeatThread; + private final DefaultSyncProcess mDefaultSyncProcess; + /** * Creates a new instance of {@link DefaultFileSystemMaster}. * @@ -520,6 +532,8 @@ public Type getType() { FileSystemContext schedulerFsContext = FileSystemContext.create(); JournaledJobMetaStore jobMetaStore = new JournaledJobMetaStore(this); mScheduler = new Scheduler(new DefaultWorkerProvider(this, schedulerFsContext), jobMetaStore); + mDefaultSyncProcess = createSyncProcess( + mInodeStore, mMountTable, mInodeTree, getSyncPathCache()); // The mount table should come after the inode tree because restoring the mount table requires // that the inode tree is already restored. @@ -1140,7 +1154,7 @@ public void listStatus(AlluxioURI path, ListStatusContext context, boolean run = true; while (run) { run = false; - if (loadMetadata) { + if (loadMetadata && !context.isDisableMetadataSync()) { loadMetadataIfNotExist(rpcContext, path, loadMetadataContext); ufsAccessed = true; } @@ -1158,7 +1172,7 @@ public void listStatus(AlluxioURI path, ListStatusContext context, auditContext.setAllowed(false); throw e; } - if (!loadMetadata) { + if (!loadMetadata && !context.isDisableMetadataSync()) { Inode inode; boolean isLoaded = true; if (inodePath.fullPathExists()) { @@ -1336,7 +1350,7 @@ private void listStatusInternal( try (LockedInodePath childInodePath = currInodePath.lockChildByName( - childName, LockPattern.READ, childComponentsHint)) { + childName, LockPattern.READ, childComponentsHint, true)) { listStatusInternal(context, rpcContext, childInodePath, auditContext, nextDescendantType, resultStream, depth + 1, counter, partialPath, prefixComponents); @@ -1698,6 +1712,50 @@ public void completeFile(AlluxioURI path, CompleteFileContext context) } } + /** + * Creates a completed file for metadata sync. + * This method is more efficient than a combination of individual + * createFile() and completeFile() methods, with less journal entries generated and + * less frequent metadata store updates. + * @param rpcContext the rpc context for journaling + * @param inodePath the inode path + * @param createFileContext the create file context + * @param ufsStatus the ufs status, used to generate fingerprint + * @return the path of inodes to the created node + */ + public List createCompleteFileInternalForMetadataSync( + RpcContext rpcContext, LockedInodePath inodePath, CreateFileContext createFileContext, + UfsFileStatus ufsStatus + ) + throws InvalidPathException, FileDoesNotExistException, FileAlreadyExistsException, + BlockInfoException, IOException { + long containerId = mBlockMaster.getNewContainerId(); + List blockIds = new ArrayList<>(); + + int sequenceNumber = 0; + long ufsLength = ufsStatus.getContentLength(); + long remainingBytes = ufsLength; + long blockSize = createFileContext.getOptions().getBlockSizeBytes(); + while (remainingBytes > 0) { + blockIds.add(BlockId.createBlockId(containerId, sequenceNumber)); + remainingBytes -= Math.min(remainingBytes, blockSize); + sequenceNumber++; + } + createFileContext.setCompleteFileInfo( + new CreateFileContext.CompleteFileInfo(containerId, ufsLength, blockIds) + ); + createFileContext.setMetadataLoad(true, false); + createFileContext.setFingerprint(getUfsFingerprint(inodePath.getUri(), ufsStatus, null)); + + // Ufs absent cache is updated in the metadata syncer when a request processing is done, + // so ufs absent cache is not updated here. + List inodes = createFileInternal(rpcContext, inodePath, createFileContext, false); + + commitBlockInfosForFile(blockIds, ufsLength, blockSize, rpcContext.getJournalContext()); + mUfsAbsentPathCache.processExisting(inodePath.getUri()); + return inodes; + } + /** * Completes a file. After a file is completed, it cannot be written to. * @@ -1742,21 +1800,9 @@ void completeFileInternal(RpcContext rpcContext, LockedInodePath inodePath, String ufsFingerprint = Constants.INVALID_UFS_FINGERPRINT; if (fileInode.isPersisted()) { - UfsStatus ufsStatus = context.getUfsStatus(); - // Retrieve the UFS fingerprint for this file. - MountTable.Resolution resolution = mMountTable.resolve(inodePath.getUri()); - AlluxioURI resolvedUri = resolution.getUri(); - String ufsPath = resolvedUri.toString(); - try (CloseableResource ufsResource = resolution.acquireUfsResource()) { - UnderFileSystem ufs = ufsResource.get(); - if (ufsStatus == null) { - String contentHash = context.getOptions().hasContentHash() - ? context.getOptions().getContentHash() : null; - ufsFingerprint = ufs.getParsedFingerprint(ufsPath, contentHash).serialize(); - } else { - ufsFingerprint = Fingerprint.create(ufs.getUnderFSType(), ufsStatus).serialize(); - } - } + String contentHash = context.getOptions().hasContentHash() + ? context.getOptions().getContentHash() : null; + ufsFingerprint = getUfsFingerprint(inodePath.getUri(), context.getUfsStatus(), contentHash); } completeFileInternal(rpcContext, inodePath, length, context.getOperationTimeMs(), @@ -1826,6 +1872,23 @@ private void completeFileInternal(RpcContext rpcContext, LockedInodePath inodePa Metrics.FILES_COMPLETED.inc(); } + String getUfsFingerprint( + AlluxioURI uri, @Nullable UfsStatus ufsStatus, @Nullable String contentHash) + throws InvalidPathException { + // Retrieve the UFS fingerprint for this file. + MountTable.Resolution resolution = mMountTable.resolve(uri); + AlluxioURI resolvedUri = resolution.getUri(); + String ufsPath = resolvedUri.toString(); + try (CloseableResource ufsResource = resolution.acquireUfsResource()) { + UnderFileSystem ufs = ufsResource.get(); + if (ufsStatus == null) { + return ufs.getParsedFingerprint(ufsPath, contentHash).serialize(); + } else { + return Fingerprint.create(ufs.getUnderFSType(), ufsStatus).serialize(); + } + } + } + /** * Queries InodeTree's operation cache and see if this operation has recently * been applied to its persistent state. @@ -1864,7 +1927,7 @@ private void commitBlockInfosForFile(List blockIds, long fileLength, long // for the block info so that we do not have to create a new journal // context and flush again if (context != null && !(mJournalSystem instanceof UfsJournalSystem)) { - mBlockMaster.commitBlockInUFS(blockId, currentBlockSize, context); + mBlockMaster.commitBlockInUFS(blockId, currentBlockSize, context, false); } else { mBlockMaster.commitBlockInUFS(blockId, currentBlockSize); } @@ -1920,7 +1983,7 @@ public FileInfo createFile(AlluxioURI path, CreateFileContext context) checkUfsMode(path, OperationType.WRITE); } deleteFileIfOverwrite(rpcContext, inodePath, context); - createFileInternal(rpcContext, inodePath, context); + createFileInternal(rpcContext, inodePath, context, true); auditContext.setSrcInode(inodePath.getInode()).setSucceeded(true); cacheOperation(context); return getFileInfoInternal(inodePath); @@ -1970,7 +2033,7 @@ private void deleteFileIfOverwrite(RpcContext rpcContext, LockedInodePath inodeP * @return the list of created inodes */ List createFileInternal(RpcContext rpcContext, LockedInodePath inodePath, - CreateFileContext context) + CreateFileContext context, boolean updateUfsAbsentCache) throws InvalidPathException, FileAlreadyExistsException, BlockInfoException, IOException, FileDoesNotExistException { if (mWhitelist.inList(inodePath.getUri().toString())) { @@ -1982,7 +2045,9 @@ List createFileInternal(RpcContext rpcContext, LockedInodePath inodePath, if (context.isPersisted()) { // The path exists in UFS, so it is no longer absent. The ancestors exist in UFS, but the // actual file does not exist in UFS yet. - mUfsAbsentPathCache.processExisting(inodePath.getUri().getParent()); + if (updateUfsAbsentCache) { + mUfsAbsentPathCache.processExisting(inodePath.getUri().getParent()); + } } else { MountTable.Resolution resolution = mMountTable.resolve(inodePath.getUri()); Metrics.getUfsOpsSavedCounter(resolution.getUfsMountPointUri(), @@ -2205,9 +2270,11 @@ public void delete(AlluxioURI path, DeleteContext context) * @param inodePath the file {@link LockedInodePath} * @param deleteContext the method optitions * @param bypassPermCheck whether the permission check has been done before entering this call + * @return the number of inodes deleted, and the number of inodes skipped that were unable + * to be deleted */ @VisibleForTesting - public void deleteInternal(RpcContext rpcContext, LockedInodePath inodePath, + public Pair deleteInternal(RpcContext rpcContext, LockedInodePath inodePath, DeleteContext deleteContext, boolean bypassPermCheck) throws FileDoesNotExistException, IOException, DirectoryNotEmptyException, InvalidPathException { Preconditions.checkState(inodePath.getLockPattern() == LockPattern.WRITE_EDGE); @@ -2215,14 +2282,21 @@ public void deleteInternal(RpcContext rpcContext, LockedInodePath inodePath, // TODO(jiri): A crash after any UFS object is deleted and before the delete operation is // journaled will result in an inconsistency between Alluxio and UFS. if (!inodePath.fullPathExists()) { - return; + return new Pair<>(0, 0); } long opTimeMs = mClock.millis(); Inode inode = inodePath.getInode(); if (inode == null) { - return; + return new Pair<>(0, 0); } + if (deleteContext.isSkipNotPersisted() && inode.isFile()) { + InodeFile inodeFile = inode.asFile(); + // skip deleting a non persisted file + if (!inodeFile.isPersisted() || !inodeFile.isCompleted()) { + return new Pair<>(0, 1); + } + } boolean recursive = deleteContext.getOptions().getRecursive(); if (inode.isDirectory() && !recursive && mInodeStore.hasChildren(inode.asDirectory())) { // inode is nonempty, and we don't want to delete a nonempty directory unless recursive is @@ -2253,12 +2327,22 @@ public void deleteInternal(RpcContext rpcContext, LockedInodePath inodePath, Set unsafeParentInodes = new HashSet<>(); // Alluxio URIs (and the reason for failure) which could not be deleted List> failedUris = new ArrayList<>(); + int inodeToDeleteUnsafeCount = 0; try (LockedInodePathList descendants = mInodeTree.getDescendants(inodePath)) { // This walks the tree in a DFS flavor, first all the children in a subtree, // then the sibling trees one by one. // Therefore, we first see a parent, then all its children. for (LockedInodePath childPath : descendants) { + // Check if we should skip non-persisted files + if (deleteContext.isSkipNotPersisted() && childPath.getInode().isFile()) { + InodeFile childInode = childPath.getInode().asFile(); + if (!childInode.isCompleted() || !childInode.isPersisted()) { + unsafeInodes.add(childInode.getId()); + unsafeParentInodes.add(childInode.getParentId()); + continue; + } + } if (bypassPermCheck) { inodesToDelete.add(new Pair<>(mInodeTree.getPath(childPath.getInode()), childPath)); } else { @@ -2343,6 +2427,7 @@ public void deleteInternal(RpcContext rpcContext, LockedInodePath inodePath, // Something went wrong with this path so it cannot be removed normally // Remove the path from further processing inodesToDelete.set(i, null); + inodeToDeleteUnsafeCount++; } } @@ -2373,11 +2458,17 @@ public void deleteInternal(RpcContext rpcContext, LockedInodePath inodePath, } } - if (!failedUris.isEmpty()) { + if (!failedUris.isEmpty() && !deleteContext.isSkipNotPersisted()) { throw new FailedPreconditionException(buildDeleteFailureMessage(failedUris)); } } Metrics.PATHS_DELETED.inc(inodesToDelete.size()); + int inodeSkipped = unsafeInodes.size(); + if (!unsafeInodes.isEmpty()) { + // remove 1 because we added the parent of the path being deleted + inodeSkipped--; + } + return new Pair<>(inodesToDelete.size() - inodeToDeleteUnsafeCount, inodeSkipped); } private String buildDeleteFailureMessage(List> failedUris) { @@ -2760,7 +2851,7 @@ public long createDirectory(AlluxioURI path, CreateDirectoryContext context) * @param context method context * @return a list of created inodes */ - List createDirectoryInternal(RpcContext rpcContext, LockedInodePath inodePath, + public List createDirectoryInternal(RpcContext rpcContext, LockedInodePath inodePath, UfsManager.UfsClient ufsClient, AlluxioURI ufsUri, CreateDirectoryContext context) throws InvalidPathException, FileAlreadyExistsException, IOException, FileDoesNotExistException { Preconditions.checkState(inodePath.getLockPattern() == LockPattern.WRITE_EDGE); @@ -4094,6 +4185,70 @@ InodeSyncStream.SyncStatus syncMetadata(RpcContext rpcContext, AlluxioURI path, return sync.sync(); } + @Override + public SyncMetadataPResponse syncMetadata(AlluxioURI path, SyncMetadataContext context) + throws InvalidPathException { + TaskGroup task = mDefaultSyncProcess.syncPath(path, + GrpcUtils.fromProto(context.getOptions().getLoadDescendantType()), + GrpcUtils.fromProto(context.getOptions().getDirectoryLoadType()), 0, null, true); + try { + task.waitAllComplete(0); + } catch (Throwable t) { + LOG.error("Sync metadata failed for task group {}", task.getGroupId(), t); + } + return SyncMetadataPResponse.newBuilder().addAllTask( + task.toProtoTasks().collect(Collectors.toList())).build(); + } + + @Override + public SyncMetadataAsyncPResponse syncMetadataAsync(AlluxioURI path, SyncMetadataContext context) + throws InvalidPathException, IOException { + TaskGroup result = mDefaultSyncProcess.syncPath(path, + GrpcUtils.fromProto(context.getOptions().getLoadDescendantType()), + GrpcUtils.fromProto(context.getOptions().getDirectoryLoadType()), 0, null, true); + return SyncMetadataAsyncPResponse.newBuilder() + .setSubmitted(true) + .setTaskGroupId(result.getGroupId()) + .addAllTaskIds(result.getTasks().map(it -> it.getTaskInfo().getId()) + .collect(Collectors.toSet())) + .build(); + } + + @Override + public GetSyncProgressPResponse getSyncProgress(long taskGroupId) { + Optional task = mDefaultSyncProcess.getTaskGroup(taskGroupId); + if (!task.isPresent()) { + throw new NotFoundRuntimeException("Task group id " + taskGroupId + " not found"); + } + GetSyncProgressPResponse.Builder responseBuilder = GetSyncProgressPResponse.newBuilder(); + responseBuilder.addAllTask(task.get().toProtoTasks().collect(Collectors.toList())); + + return responseBuilder.build(); + } + + @Override + public CancelSyncMetadataPResponse cancelSyncMetadata(long taskGroupId) throws NotFoundException { + Optional group = mDefaultSyncProcess.getTaskGroup(taskGroupId); + if (!group.isPresent()) { + throw new NotFoundRuntimeException("Task group id " + taskGroupId + " not found"); + } + Optional ex = group.get().getTasks().map(baseTask -> { + try { + mDefaultSyncProcess.getTaskTracker().cancelTaskById(baseTask.getTaskInfo().getId()); + return null; + } catch (NotFoundException e) { + return e; + } + }).filter(Objects::nonNull).reduce((acc, e) -> { + acc.addSuppressed(e); + return acc; + }); + if (ex.isPresent()) { + throw ex.get(); + } + return CancelSyncMetadataPResponse.newBuilder().build(); + } + @FunctionalInterface interface PermissionCheckFunction { @@ -4157,12 +4312,13 @@ public FileSystemCommand workerHeartbeat(long workerId, List persistedFile } /** + * @param rpcContext the rpc context * @param inodePath the {@link LockedInodePath} to use * @param updateUfs whether to update the UFS with the attribute change * @param opTimeMs the operation time (in milliseconds) * @param context the method context */ - protected void setAttributeSingleFile(RpcContext rpcContext, LockedInodePath inodePath, + public void setAttributeSingleFile(RpcContext rpcContext, LockedInodePath inodePath, boolean updateUfs, long opTimeMs, SetAttributeContext context) throws FileDoesNotExistException, InvalidPathException, AccessControlException { Inode inode = inodePath.getInode(); @@ -5393,7 +5549,11 @@ public RpcContext createRpcContext(OperationContext operationContext) operationContext.withTracker(mStateLockCallTracker)); } - private RpcContext createNonMergingJournalRpcContext(OperationContext operationContext) + /** + * @param operationContext the operation context + * @return an Rpc context that does not use a merge journal context + */ + public RpcContext createNonMergingJournalRpcContext(OperationContext operationContext) throws UnavailableException { return new RpcContext(createBlockDeletionContext(), createJournalContext(false), operationContext.withTracker(mStateLockCallTracker)); @@ -5468,6 +5628,19 @@ public void needsSync(AlluxioURI path) throws InvalidPathException { getSyncPathCache().notifyInvalidation(path); } + @VisibleForTesting + protected DefaultSyncProcess createSyncProcess( + ReadOnlyInodeStore inodeStore, MountTable mountTable, + InodeTree inodeTree, UfsSyncPathCache syncPathCache) { + return new DefaultSyncProcess( + this, inodeStore, mountTable, inodeTree, syncPathCache, mUfsAbsentPathCache); + } + + @VisibleForTesting + DefaultSyncProcess getMetadataSyncer() { + return mDefaultSyncProcess; + } + /** * Get scheduler. * @return scheduler diff --git a/core/server/master/src/main/java/alluxio/master/file/FileSystemMaster.java b/core/server/master/src/main/java/alluxio/master/file/FileSystemMaster.java index b9d474b0e38a..22659e4d3e44 100644 --- a/core/server/master/src/main/java/alluxio/master/file/FileSystemMaster.java +++ b/core/server/master/src/main/java/alluxio/master/file/FileSystemMaster.java @@ -24,8 +24,13 @@ import alluxio.exception.InvalidPathException; import alluxio.exception.UnexpectedAlluxioException; import alluxio.exception.status.InvalidArgumentException; +import alluxio.exception.status.NotFoundException; import alluxio.exception.status.UnavailableException; +import alluxio.grpc.CancelSyncMetadataPResponse; +import alluxio.grpc.GetSyncProgressPResponse; import alluxio.grpc.SetAclAction; +import alluxio.grpc.SyncMetadataAsyncPResponse; +import alluxio.grpc.SyncMetadataPResponse; import alluxio.master.Master; import alluxio.master.file.contexts.CheckAccessContext; import alluxio.master.file.contexts.CheckConsistencyContext; @@ -42,6 +47,7 @@ import alluxio.master.file.contexts.ScheduleAsyncPersistenceContext; import alluxio.master.file.contexts.SetAclContext; import alluxio.master.file.contexts.SetAttributeContext; +import alluxio.master.file.contexts.SyncMetadataContext; import alluxio.master.file.contexts.WorkerHeartbeatContext; import alluxio.master.file.meta.FileSystemMasterView; import alluxio.master.file.meta.PersistenceState; @@ -635,4 +641,37 @@ void activeSyncMetadata(AlluxioURI path, Collection changedFiles, * @param path the path to invalidate */ void needsSync(AlluxioURI path) throws InvalidPathException; + + /** + * Syncs the metadata of a given path. + * + * @param path the path to sync + * @param context the method context + * @return the sync metadata response + */ + SyncMetadataPResponse syncMetadata(AlluxioURI path, SyncMetadataContext context) + throws InvalidPathException, IOException; + + /** + * Submits a metadata sync task and runs it async. + * @param path the path to sync + * @param context the method context + * @return the sync metadata async response + */ + SyncMetadataAsyncPResponse syncMetadataAsync(AlluxioURI path, SyncMetadataContext context) + throws InvalidPathException, IOException; + + /** + * Gets a metadata sync task progress. + * @param taskGroupId the task group id + * @return the sync progress + */ + GetSyncProgressPResponse getSyncProgress(long taskGroupId); + + /** + * Cancels an ongoing metadata sync. + * @param taskGroupId the task group id + * @return the cancel sync metadata response + */ + CancelSyncMetadataPResponse cancelSyncMetadata(long taskGroupId) throws NotFoundException; } diff --git a/core/server/master/src/main/java/alluxio/master/file/FileSystemMasterClientServiceHandler.java b/core/server/master/src/main/java/alluxio/master/file/FileSystemMasterClientServiceHandler.java index 3821f04302b4..3ac1c04612d7 100644 --- a/core/server/master/src/main/java/alluxio/master/file/FileSystemMasterClientServiceHandler.java +++ b/core/server/master/src/main/java/alluxio/master/file/FileSystemMasterClientServiceHandler.java @@ -18,6 +18,8 @@ import alluxio.exception.AlluxioException; import alluxio.exception.ExceptionMessage; import alluxio.exception.FileDoesNotExistException; +import alluxio.grpc.CancelSyncMetadataPRequest; +import alluxio.grpc.CancelSyncMetadataPResponse; import alluxio.grpc.CheckAccessPRequest; import alluxio.grpc.CheckAccessPResponse; import alluxio.grpc.CheckConsistencyPOptions; @@ -53,6 +55,8 @@ import alluxio.grpc.GetStatusPResponse; import alluxio.grpc.GetSyncPathListPRequest; import alluxio.grpc.GetSyncPathListPResponse; +import alluxio.grpc.GetSyncProgressPRequest; +import alluxio.grpc.GetSyncProgressPResponse; import alluxio.grpc.GrpcUtils; import alluxio.grpc.JobProgressReportFormat; import alluxio.grpc.ListStatusPRequest; @@ -81,6 +85,9 @@ import alluxio.grpc.StopSyncPResponse; import alluxio.grpc.SubmitJobPRequest; import alluxio.grpc.SubmitJobPResponse; +import alluxio.grpc.SyncMetadataAsyncPResponse; +import alluxio.grpc.SyncMetadataPRequest; +import alluxio.grpc.SyncMetadataPResponse; import alluxio.grpc.UnmountPRequest; import alluxio.grpc.UnmountPResponse; import alluxio.grpc.UpdateMountPRequest; @@ -106,6 +113,7 @@ import alluxio.master.file.contexts.ScheduleAsyncPersistenceContext; import alluxio.master.file.contexts.SetAclContext; import alluxio.master.file.contexts.SetAttributeContext; +import alluxio.master.file.contexts.SyncMetadataContext; import alluxio.master.job.JobFactoryProducer; import alluxio.master.scheduler.Scheduler; import alluxio.recorder.Recorder; @@ -586,4 +594,46 @@ public void getJobProgress(GetJobProgressPRequest request, private AlluxioURI getAlluxioURI(String uriStr) { return new AlluxioURI(uriStr); } + + @Override + public void syncMetadata( + SyncMetadataPRequest request, + StreamObserver responseObserver) { + RpcUtils.call(LOG, () -> { + return mFileSystemMaster.syncMetadata( + new AlluxioURI(request.getPath()), + SyncMetadataContext.create(request.getOptions().toBuilder())); + }, "syncMetadata", "request=%s", responseObserver, request); + } + + @Override + public void syncMetadataAsync( + SyncMetadataPRequest request, + StreamObserver responseObserver) { + RpcUtils.call(LOG, () -> { + return mFileSystemMaster.syncMetadataAsync( + new AlluxioURI(request.getPath()), + SyncMetadataContext.create(request.getOptions().toBuilder())); + }, "syncMetadataAsync", "request=%s", responseObserver, request); + } + + @Override + public void getSyncProgress( + GetSyncProgressPRequest request, + StreamObserver responseObserver) { + RpcUtils.call(LOG, () -> { + return mFileSystemMaster.getSyncProgress( + request.getTaskGroupId()); + }, "syncMetadataAsync", "request=%s", responseObserver, request); + } + + @Override + public void cancelSyncMetadata( + CancelSyncMetadataPRequest request, + StreamObserver responseObserver) { + RpcUtils.call(LOG, () -> { + return mFileSystemMaster.cancelSyncMetadata( + request.getTaskGroupId()); + }, "cancelSyncMetadata", "request=%s", responseObserver, request); + } } diff --git a/core/server/master/src/main/java/alluxio/master/file/InodeSyncStream.java b/core/server/master/src/main/java/alluxio/master/file/InodeSyncStream.java index 32511a364296..a7422eb006b8 100644 --- a/core/server/master/src/main/java/alluxio/master/file/InodeSyncStream.java +++ b/core/server/master/src/main/java/alluxio/master/file/InodeSyncStream.java @@ -1212,7 +1212,7 @@ void loadFileMetadataInternal(RpcContext rpcContext, LockedInodePath inodePath, .setTtl(context.getOptions().getCommonOptions().getTtl()) .setTtlAction(context.getOptions().getCommonOptions().getTtlAction())); createFileContext.setWriteType(WriteType.THROUGH); // set as through since already in UFS - createFileContext.setMetadataLoad(true); + createFileContext.setMetadataLoad(true, true); createFileContext.setOwner(context.getUfsStatus().getOwner()); createFileContext.setGroup(context.getUfsStatus().getGroup()); createFileContext.setXAttr(context.getUfsStatus().getXAttr()); @@ -1247,7 +1247,7 @@ void loadFileMetadataInternal(RpcContext rpcContext, LockedInodePath inodePath, ? rpcContext : new RpcContext( rpcContext.getBlockDeletionContext(), merger, rpcContext.getOperationContext()); - fsMaster.createFileInternal(wrapRpcContext, writeLockedPath, createFileContext); + fsMaster.createFileInternal(wrapRpcContext, writeLockedPath, createFileContext, true); CompleteFileContext completeContext = CompleteFileContext.mergeFrom(CompleteFilePOptions.newBuilder().setUfsLength(ufsLength)) .setUfsStatus(context.getUfsStatus()).setMetadataLoad(true); @@ -1322,7 +1322,7 @@ private static void loadDirectoryMetadataInternal(RpcContext rpcContext, MountTa .setTtl(context.getOptions().getCommonOptions().getTtl()) .setTtlAction(context.getOptions().getCommonOptions().getTtlAction())); createDirectoryContext.setMountPoint(isMountPoint); - createDirectoryContext.setMetadataLoad(true); + createDirectoryContext.setMetadataLoad(true, true); createDirectoryContext.setWriteType(WriteType.THROUGH); AccessControlList acl = null; diff --git a/core/server/master/src/main/java/alluxio/master/file/RpcContext.java b/core/server/master/src/main/java/alluxio/master/file/RpcContext.java index bc71ea478070..75e78bc04e0f 100644 --- a/core/server/master/src/main/java/alluxio/master/file/RpcContext.java +++ b/core/server/master/src/main/java/alluxio/master/file/RpcContext.java @@ -37,7 +37,7 @@ * guarantees about the order in which resources are closed. */ @NotThreadSafe -public final class RpcContext implements Closeable, Supplier { +public class RpcContext implements Closeable, Supplier { public static final RpcContext NOOP = new RpcContext(NoopBlockDeletionContext.INSTANCE, NoopJournalContext.INSTANCE, new InternalOperationContext()); diff --git a/core/server/master/src/main/java/alluxio/master/file/contexts/CreateFileContext.java b/core/server/master/src/main/java/alluxio/master/file/contexts/CreateFileContext.java index 8de11ea14bf2..97bcd9ccd775 100644 --- a/core/server/master/src/main/java/alluxio/master/file/contexts/CreateFileContext.java +++ b/core/server/master/src/main/java/alluxio/master/file/contexts/CreateFileContext.java @@ -18,14 +18,67 @@ import com.google.common.base.MoreObjects; +import java.util.List; +import javax.annotation.Nullable; + /** * Implementation of {@link OperationContext} used to merge and wrap {@link CreateFilePOptions}. */ public class CreateFileContext extends CreatePathContext { + /** + * A class for complete file info. + */ + public static class CompleteFileInfo { + /** + * Constructs an instance. + * @param containerId the file container id + * @param length the file size + * @param blockIds the block ids in the file + */ + public CompleteFileInfo(long containerId, long length, List blockIds) { + mBlockIds = blockIds; + mContainerId = containerId; + mLength = length; + } + + /** + * If set, the new file will use this id instead of a generated one when the file is created. + */ + private final long mContainerId; + private final long mLength; + private final List mBlockIds; + + /** + * @return the container id + */ + public long getContainerId() { + return mContainerId; + } + + /** + * @return the file length + */ + public long getLength() { + return mLength; + } + + /** + * @return the block ids in the file + */ + public List getBlockIds() { + return mBlockIds; + } + } private boolean mCacheable; + /** + * If set, the file will be mark as completed when it gets created in the inode tree. + * Used in metadata sync. + */ + @Nullable private CompleteFileInfo mCompleteFileInfo; + /** * Creates context with given option data. * @@ -34,6 +87,7 @@ public class CreateFileContext private CreateFileContext(CreateFilePOptions.Builder optionsBuilder) { super(optionsBuilder); mCacheable = false; + mCompleteFileInfo = null; } /** @@ -58,6 +112,15 @@ public static CreateFileContext mergeFrom(CreateFilePOptions.Builder optionsBuil return new CreateFileContext(mergedOptionsBuilder); } + /** + * Merges and creates a CreateFileContext. + * @param optionsBuilder the options builder template + * @return the context + */ + public static CreateFileContext mergeFromDefault(CreateFilePOptions optionsBuilder) { + return new CreateFileContext(CreateFilePOptions.newBuilder().mergeFrom(optionsBuilder)); + } + /** * @return the instance of {@link CreateFileContext} with default values for master */ @@ -90,11 +153,33 @@ public OperationId getOperationId() { return super.getOperationId(); } + /** + * @param completeFileInfo if the file is expected to mark as completed when it is created + * @return the updated context object + */ + public CreateFileContext setCompleteFileInfo(CompleteFileInfo completeFileInfo) { + mCompleteFileInfo = completeFileInfo; + return getThis(); + } + + /** + * @return the complete file info object + */ + public CompleteFileInfo getCompleteFileInfo() { + return mCompleteFileInfo; + } + @Override public String toString() { - return MoreObjects.toStringHelper(this) + MoreObjects.ToStringHelper helper = MoreObjects.toStringHelper(this) .add("PathContext", super.toString()) - .add("Cacheable", mCacheable) - .toString(); + .add("Cacheable", mCacheable); + + if (mCompleteFileInfo != null) { + helper.add("Length", mCompleteFileInfo.getLength()) + .add("IsCompleted", true) + .add("BlockContainerId", mCompleteFileInfo.getContainerId()); + } + return helper.toString(); } } diff --git a/core/server/master/src/main/java/alluxio/master/file/contexts/CreatePathContext.java b/core/server/master/src/main/java/alluxio/master/file/contexts/CreatePathContext.java index bf8719fcc8d3..730f6e050608 100644 --- a/core/server/master/src/main/java/alluxio/master/file/contexts/CreatePathContext.java +++ b/core/server/master/src/main/java/alluxio/master/file/contexts/CreatePathContext.java @@ -11,6 +11,7 @@ package alluxio.master.file.contexts; +import alluxio.Constants; import alluxio.client.WriteType; import alluxio.conf.Configuration; import alluxio.grpc.CreateDirectoryPOptions; @@ -24,12 +25,14 @@ import alluxio.util.SecurityUtils; import com.google.common.base.MoreObjects; +import com.google.common.base.Preconditions; import com.google.common.collect.ImmutableList; import com.google.protobuf.GeneratedMessageV3; import java.util.Collections; import java.util.List; import java.util.Map; +import java.util.function.Supplier; import javax.annotation.Nullable; /** @@ -48,9 +51,11 @@ public abstract class CreatePathContext, protected String mOwner; protected String mGroup; protected boolean mMetadataLoad; + protected boolean mPersistNonExistingParentDirectories = true; private WriteType mWriteType; protected Map mXAttr; protected XAttrPropagationStrategy mXAttrPropStrat; + @Nullable protected Supplier mMissingDirFingerprint = null; // // Values for the below fields will be extracted from given proto options @@ -64,6 +69,7 @@ public abstract class CreatePathContext, protected boolean mRecursive; protected long mTtl; protected TtlAction mTtlAction; + @Nullable protected String mFingerprint; /** * Creates context with given option data. @@ -78,6 +84,7 @@ protected CreatePathContext(T optionsBuilder) { mMetadataLoad = false; mGroup = ""; mOwner = ""; + mFingerprint = null; if (SecurityUtils.isAuthenticationEnabled(Configuration.global())) { mOwner = SecurityUtils.getOwnerFromGrpcClient(Configuration.global()); mGroup = SecurityUtils.getGroupFromGrpcClient(Configuration.global()); @@ -269,12 +276,18 @@ public String getGroup() { } /** - * @param metadataLoad the flag value to use; if true, the create path is a result of a metadata + * @param metadataLoad the flag value to use; if true, the created path is a result of a metadata * load + * @param persistNonExistingParentDirectories if true any non-existing parent directories + * will also be created on the UFS (this can only be + * set to false if metadataLoad is set to true) * @return the updated context */ - public K setMetadataLoad(boolean metadataLoad) { + public K setMetadataLoad( + boolean metadataLoad, boolean persistNonExistingParentDirectories) { + Preconditions.checkState(metadataLoad || persistNonExistingParentDirectories); mMetadataLoad = metadataLoad; + mPersistNonExistingParentDirectories = persistNonExistingParentDirectories; return getThis(); } @@ -295,6 +308,40 @@ public K setXAttr(@Nullable Map xattr) { return getThis(); } + /** + * @return the fingerprint + */ + @Nullable + public String getFingerprint() { + return mFingerprint; + } + + /** + * @param fingerprint the fingerprint + * @return the updated context + */ + public K setFingerprint(String fingerprint) { + mFingerprint = fingerprint; + return getThis(); + } + + /** + * @return the fingerprint for missing directories + */ + public String getMissingDirFingerprint() { + return mMissingDirFingerprint == null + ? Constants.INVALID_UFS_FINGERPRINT : mMissingDirFingerprint.get(); + } + + /** + * @param fingerprint the fingerprint to be used when creating missing nested directories + * @return the updated context + */ + public K setMissingDirFingerprint(Supplier fingerprint) { + mMissingDirFingerprint = fingerprint; + return getThis(); + } + /** * @return extended attributes propagation strategy of this context */ @@ -302,6 +349,14 @@ public XAttrPropagationStrategy getXAttrPropStrat() { return mXAttrPropStrat; } + /** + * @return true if non-existing parent directories should be persisted, + * can only be false if the metadataLoad flag is true + */ + public boolean isPersistNonExistingParentDirectories() { + return mPersistNonExistingParentDirectories; + } + /** * @return the metadataLoad flag; if true, the create path is a result of a metadata load */ @@ -320,6 +375,7 @@ public String toString() { .add("MetadataLoad", mMetadataLoad) .add("writeType", mWriteType) .add("xattr", mXAttr) + .add("Fingerprint", mFingerprint) .toString(); } } diff --git a/core/server/master/src/main/java/alluxio/master/file/contexts/DeleteContext.java b/core/server/master/src/main/java/alluxio/master/file/contexts/DeleteContext.java index 3bb640bc7cd6..6129b26dbe45 100644 --- a/core/server/master/src/main/java/alluxio/master/file/contexts/DeleteContext.java +++ b/core/server/master/src/main/java/alluxio/master/file/contexts/DeleteContext.java @@ -23,6 +23,7 @@ */ public class DeleteContext extends OperationContext { private boolean mMetadataLoad = false; + private boolean mSkipNotPersisted = false; /** * Creates context with given option data. @@ -81,6 +82,22 @@ public DeleteContext setMetadataLoad(boolean metadataLoad) { return this; } + /** + * @param skipNotPersisted if true non-completed, or non-persisted files will be skipped + * @return the updated context + */ + public DeleteContext skipNotPersisted(boolean skipNotPersisted) { + mSkipNotPersisted = skipNotPersisted; + return this; + } + + /** + * @return true if the deletion should skip non-completed, or non-persisted files + */ + public boolean isSkipNotPersisted() { + return mSkipNotPersisted; + } + /** * @return the metadataLoad flag; if true, the operation is a result of a metadata load */ diff --git a/core/server/master/src/main/java/alluxio/master/file/contexts/ListStatusContext.java b/core/server/master/src/main/java/alluxio/master/file/contexts/ListStatusContext.java index 19b7bc05fef9..9f633c6ecaa7 100644 --- a/core/server/master/src/main/java/alluxio/master/file/contexts/ListStatusContext.java +++ b/core/server/master/src/main/java/alluxio/master/file/contexts/ListStatusContext.java @@ -12,10 +12,13 @@ package alluxio.master.file.contexts; import alluxio.conf.Configuration; +import alluxio.grpc.FileSystemMasterCommonPOptions; import alluxio.grpc.ListStatusPOptions; import alluxio.grpc.ListStatusPartialPOptions; +import alluxio.grpc.LoadMetadataPType; import alluxio.util.FileSystemOptionsUtils; +import com.google.common.annotations.VisibleForTesting; import com.google.common.base.MoreObjects; import java.util.Optional; @@ -32,6 +35,7 @@ public class ListStatusContext private boolean mDoneListing = false; private long mTotalListings; private final ListStatusPartialPOptions.Builder mPartialPOptions; + private boolean mDisableMetadataSync = false; /** * @@ -41,6 +45,27 @@ public Optional getPartialOptions() { return Optional.ofNullable(mPartialPOptions); } + /** + * Set to true to disable metadata sync. + * @return the context + */ + @VisibleForTesting + public ListStatusContext disableMetadataSync() { + mDisableMetadataSync = true; + getOptions().setLoadMetadataType(LoadMetadataPType.NEVER) + .setCommonOptions(FileSystemMasterCommonPOptions.newBuilder() + .setSyncIntervalMs(-1).mergeFrom( + getOptions().getCommonOptions()).buildPartial()); + return this; + } + + /** + * @return true if metadata sync has been disabled for this operation + */ + public boolean isDisableMetadataSync() { + return mDisableMetadataSync; + } + /** * Creates context with given option data. * diff --git a/core/server/master/src/main/java/alluxio/master/file/contexts/SyncMetadataContext.java b/core/server/master/src/main/java/alluxio/master/file/contexts/SyncMetadataContext.java new file mode 100644 index 000000000000..398b680edaea --- /dev/null +++ b/core/server/master/src/main/java/alluxio/master/file/contexts/SyncMetadataContext.java @@ -0,0 +1,73 @@ +/* + * The Alluxio Open Foundation licenses this work under the Apache License, version 2.0 + * (the "License"). You may not use this work except in compliance with the License, which is + * available at www.apache.org/licenses/LICENSE-2.0 + * + * This software is distributed on an "AS IS" basis, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, + * either express or implied, as more fully set forth in the License. + * + * See the NOTICE file distributed with this work for information regarding copyright ownership. + */ + +package alluxio.master.file.contexts; + +import alluxio.conf.Configuration; +import alluxio.grpc.ExistsPOptions; +import alluxio.grpc.SyncMetadataPOptions; +import alluxio.util.FileSystemOptionsUtils; + +import com.google.common.base.MoreObjects; + +/** + * Used to merge and wrap {@link SyncMetadataPOptions}. + */ +public class SyncMetadataContext + extends OperationContext { + + /** + * Creates context with given option data. + * + * @param optionsBuilder options builder + */ + private SyncMetadataContext(SyncMetadataPOptions.Builder optionsBuilder) { + super(optionsBuilder); + } + + /** + * @param optionsBuilder Builder for proto {@link SyncMetadataPOptions} + * @return the instance of {@link SyncMetadataContext} with given options + */ + public static SyncMetadataContext create(SyncMetadataPOptions.Builder optionsBuilder) { + return new SyncMetadataContext(optionsBuilder); + } + + /** + * Merges and embeds the given {@link ExistsPOptions} with the corresponding master + * options. + * + * @param optionsBuilder Builder for proto {@link ExistsPOptions} to merge with defaults + * @return the instance of {@link SyncMetadataContext} with default values for master + */ + public static SyncMetadataContext mergeFrom(SyncMetadataPOptions.Builder optionsBuilder) { + SyncMetadataPOptions masterOptions = + FileSystemOptionsUtils.syncMetadataDefaults(Configuration.global()); + SyncMetadataPOptions.Builder mergedOptionsBuilder = + masterOptions.toBuilder().mergeFrom(optionsBuilder.build()); + return create(mergedOptionsBuilder); + } + + /** + * @return the instance of {@link SyncMetadataContext} with default values for master + */ + public static SyncMetadataContext defaults() { + return create(FileSystemOptionsUtils + .syncMetadataDefaults(Configuration.global()).toBuilder()); + } + + @Override + public String toString() { + return MoreObjects.toStringHelper(this) + .add("ProtoOptions", getOptions().build()) + .toString(); + } +} diff --git a/core/server/master/src/main/java/alluxio/master/file/mdsync/BaseTask.java b/core/server/master/src/main/java/alluxio/master/file/mdsync/BaseTask.java new file mode 100644 index 000000000000..1006baf01126 --- /dev/null +++ b/core/server/master/src/main/java/alluxio/master/file/mdsync/BaseTask.java @@ -0,0 +1,314 @@ +/* + * The Alluxio Open Foundation licenses this work under the Apache License, version 2.0 + * (the "License"). You may not use this work except in compliance with the License, which is + * available at www.apache.org/licenses/LICENSE-2.0 + * + * This software is distributed on an "AS IS" basis, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, + * either express or implied, as more fully set forth in the License. + * + * See the NOTICE file distributed with this work for information regarding copyright ownership. + */ + +package alluxio.master.file.mdsync; + +import alluxio.AlluxioURI; +import alluxio.collections.Pair; +import alluxio.exception.FileDoesNotExistException; +import alluxio.exception.InvalidPathException; +import alluxio.exception.runtime.DeadlineExceededRuntimeException; +import alluxio.exception.runtime.InternalRuntimeException; +import alluxio.exception.status.CancelledException; +import alluxio.exception.status.UnavailableException; +import alluxio.file.options.DescendantType; +import alluxio.file.options.DirectoryLoadType; +import alluxio.grpc.SyncMetadataState; +import alluxio.grpc.SyncMetadataTask; +import alluxio.master.file.DefaultFileSystemMaster; +import alluxio.master.file.meta.InodeTree; +import alluxio.master.file.meta.LockedInodePath; +import alluxio.master.journal.JournalContext; +import alluxio.resource.CloseableResource; +import alluxio.underfs.UfsClient; +import alluxio.util.CommonUtils; +import alluxio.util.ExceptionUtils; + +import com.google.common.annotations.VisibleForTesting; +import com.google.common.base.Preconditions; +import com.google.common.base.Stopwatch; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import java.util.Optional; +import java.util.concurrent.TimeUnit; +import java.util.function.Function; + +/** + * This is the overall task for a sync operation. + */ +public abstract class BaseTask implements PathWaiter { + enum State { + RUNNING, + SUCCEEDED, + FAILED, + CANCELED; + + SyncMetadataState toProto() { + switch (this) { + case RUNNING: + return SyncMetadataState.RUNNING; + case SUCCEEDED: + return SyncMetadataState.SUCCEEDED; + case FAILED: + return SyncMetadataState.FAILED; + case CANCELED: + return SyncMetadataState.CANCELED; + default: + return SyncMetadataState.UNKNOWN; + } + } + } + + private static final Logger LOG = LoggerFactory.getLogger(BaseTask.class); + + private final long mStartTime; + private volatile Long mFinishTime = null; + BaseTaskResult mIsCompleted = null; + private final TaskInfo mTaskInfo; + private final PathLoaderTask mPathLoadTask; + private final boolean mRemoveOnComplete; + + /** + * @return the task state + */ + public synchronized State getState() { + if (!isCompleted().isPresent()) { + return State.RUNNING; + } + BaseTaskResult result = isCompleted().get(); + if (result.succeeded()) { + return State.SUCCEEDED; + } else if (result.getThrowable().orElse(null) instanceof CancelledException) { + return State.CANCELED; + } else { + return State.FAILED; + } + } + + /** + * @return true if the task is completed + */ + public synchronized Optional isCompleted() { + return Optional.ofNullable(mIsCompleted); + } + + /** + * @return if the task is succeeded + */ + public synchronized boolean succeeded() { + return mIsCompleted != null && mIsCompleted.succeeded(); + } + + @VisibleForTesting + PathLoaderTask getPathLoadTask() { + return mPathLoadTask; + } + + static BaseTask create( + TaskInfo info, long startTime, + Function> clientSupplier, + boolean removeOnComplete) { + if (info.getLoadByDirectory() != DirectoryLoadType.SINGLE_LISTING + && info.getDescendantType() == DescendantType.ALL) { + return new DirectoryPathWaiter( + info, startTime, clientSupplier, removeOnComplete); + } else { + return new BatchPathWaiter( + info, startTime, clientSupplier, removeOnComplete); + } + } + + static BaseTask create( + TaskInfo info, long startTime, + Function> clientSupplier) { + return create(info, startTime, clientSupplier, true); + } + + BaseTask( + TaskInfo info, long startTime, + Function> clientSupplier, boolean removeOnComplete) { + mTaskInfo = info; + mStartTime = startTime; + mPathLoadTask = new PathLoaderTask(mTaskInfo, null, clientSupplier); + mRemoveOnComplete = removeOnComplete; + } + + /** + * @return the task info + */ + public TaskInfo getTaskInfo() { + return mTaskInfo; + } + + /** + * @return true, if the task should be removed on completion, otherwise it will be + * moved to a completed task cache. + */ + boolean removeOnComplete() { + return mRemoveOnComplete; + } + + /** + * @return the sync task time in ms + */ + public synchronized long getStartTime() { + Preconditions.checkState(mIsCompleted != null, + "Task must be completed before accessing the start time"); + return mStartTime; + } + + PathLoaderTask getLoadTask() { + return mPathLoadTask; + } + + synchronized void onComplete( + boolean isFile, DefaultFileSystemMaster fileSystemMaster, InodeTree inodeTree) { + if (mIsCompleted != null) { + return; + } + updateDirectChildrenLoaded(fileSystemMaster, inodeTree); + mFinishTime = CommonUtils.getCurrentMs(); + mIsCompleted = new BaseTaskResult(null); + mTaskInfo.getMdSync().onTaskComplete(mTaskInfo.getId(), isFile); + notifyAll(); + } + + /** + * Blocking waits until the task completes. + * If the task fails, the exception causing the failure is thrown. + * If the wait times-out a {@link DeadlineExceededRuntimeException} is thrown. + * + * @param timeoutMs the timeout in ms, 0 for an endless wait + */ + public synchronized void waitComplete(long timeoutMs) throws Throwable { + Stopwatch sw = Stopwatch.createStarted(); + long waitTime = timeoutMs; + while (mIsCompleted == null && (timeoutMs == 0 || waitTime > 0)) { + wait(waitTime); + if (timeoutMs != 0) { + waitTime = waitTime - sw.elapsed(TimeUnit.MILLISECONDS); + sw.reset(); + } + } + if (mIsCompleted == null) { + throw new DeadlineExceededRuntimeException("Task still running."); + } + if (mIsCompleted.getThrowable().isPresent()) { + throw mIsCompleted.getThrowable().get(); + } + } + + synchronized void onFailed(Throwable t) { + mFinishTime = CommonUtils.getCurrentMs(); + if (mIsCompleted != null) { + return; + } + mIsCompleted = new BaseTaskResult(t); + LOG.warn("Task {} failed with error", mTaskInfo, t); + cancel(); + mTaskInfo.getMdSync().onTaskError(mTaskInfo.getId(), t); + } + + synchronized long cancel() { + mFinishTime = CommonUtils.getCurrentMs(); + if (mIsCompleted == null) { + mIsCompleted = new BaseTaskResult(new CancelledException("Task was cancelled")); + } + mPathLoadTask.cancel(); + notifyAll(); + return mTaskInfo.getId(); + } + + boolean pathIsCovered(AlluxioURI path, DescendantType depth) { + switch (mTaskInfo.getDescendantType()) { + case NONE: + return depth == DescendantType.NONE && mTaskInfo.getBasePath().equals(path); + case ONE: + return (depth != DescendantType.ALL && mTaskInfo.getBasePath().equals(path)) + || (depth == DescendantType.NONE && mTaskInfo.getBasePath().equals(path.getParent())); + case ALL: + try { + return mTaskInfo.getBasePath().isAncestorOf(path); + } catch (InvalidPathException e) { + throw new InternalRuntimeException(e); + } + default: + throw new InternalRuntimeException(String.format( + "Unknown descendant type %s", mTaskInfo.getDescendantType())); + } + } + + /** + * @return the sync duration in ms + */ + public long getSyncDuration() { + final Long finishTime = mFinishTime; + if (finishTime == null) { + return CommonUtils.getCurrentMs() - mStartTime; + } + return mFinishTime - mStartTime; + } + + /** + * @return the sync metadata task in proto + */ + public synchronized SyncMetadataTask toProtoTask() { + SyncMetadataTask.Builder builder = SyncMetadataTask.newBuilder(); + builder.setId(getTaskInfo().getId()); + builder.setState(getState().toProto()); + builder.setSyncDurationMs(getSyncDuration()); + Throwable t = null; + if (mIsCompleted != null && mIsCompleted.getThrowable().isPresent()) { + t = mIsCompleted.getThrowable().get(); + } + if (t != null && getState() != State.CANCELED) { + builder.setException(SyncMetadataTask.Exception.newBuilder() + .setExceptionType(t.getClass().getTypeName()) + .setExceptionMessage(t.getMessage() == null ? "" : t.getMessage()) + .setStacktrace(ExceptionUtils.asPlainText(t))); + } + builder.setTaskInfoString(getTaskInfo().toString()); + Pair statReport = getTaskInfo().getStats().toReportString(); + builder.setSuccessOpCount(statReport.getFirst()); + builder.setTaskStatString(statReport.getSecond()); + return builder.build(); + } + + /** + * Updates direct children loaded for directories affected by the metadata sync. + * @param fileSystemMaster the file system master + * @param inodeTree the inode tree + */ + public void updateDirectChildrenLoaded( + DefaultFileSystemMaster fileSystemMaster, InodeTree inodeTree) { + try (JournalContext journalContext = fileSystemMaster.createJournalContext()) { + getTaskInfo().getPathsToUpdateDirectChildrenLoaded().forEach( + uri -> { + try (LockedInodePath lockedInodePath = + inodeTree.lockInodePath( + uri, InodeTree.LockPattern.WRITE_INODE, + journalContext)) { + if (lockedInodePath.fullPathExists() && lockedInodePath.getInode().isDirectory() + && !lockedInodePath.getInode().asDirectory().isDirectChildrenLoaded()) { + inodeTree.setDirectChildrenLoaded( + () -> journalContext, + lockedInodePath.getInode().asDirectory()); + } + } catch (FileDoesNotExistException | InvalidPathException e) { + throw new RuntimeException(e); + } + }); + } catch (UnavailableException e) { + throw new RuntimeException(e); + } + } +} diff --git a/core/server/master/src/main/java/alluxio/master/file/mdsync/BaseTaskResult.java b/core/server/master/src/main/java/alluxio/master/file/mdsync/BaseTaskResult.java new file mode 100644 index 000000000000..3e5b32cb5f91 --- /dev/null +++ b/core/server/master/src/main/java/alluxio/master/file/mdsync/BaseTaskResult.java @@ -0,0 +1,35 @@ +/* + * The Alluxio Open Foundation licenses this work under the Apache License, version 2.0 + * (the "License"). You may not use this work except in compliance with the License, which is + * available at www.apache.org/licenses/LICENSE-2.0 + * + * This software is distributed on an "AS IS" basis, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, + * either express or implied, as more fully set forth in the License. + * + * See the NOTICE file distributed with this work for information regarding copyright ownership. + */ + +package alluxio.master.file.mdsync; + +import java.util.Optional; +import javax.annotation.Nullable; + +/** + * The overall result of a base task. + */ +public class BaseTaskResult { + + private final Throwable mT; + + BaseTaskResult(@Nullable Throwable t) { + mT = t; + } + + boolean succeeded() { + return mT == null; + } + + Optional getThrowable() { + return Optional.ofNullable(mT); + } +} diff --git a/core/server/master/src/main/java/alluxio/master/file/mdsync/BatchPathWaiter.java b/core/server/master/src/main/java/alluxio/master/file/mdsync/BatchPathWaiter.java new file mode 100644 index 000000000000..00f285a57225 --- /dev/null +++ b/core/server/master/src/main/java/alluxio/master/file/mdsync/BatchPathWaiter.java @@ -0,0 +1,103 @@ +/* + * The Alluxio Open Foundation licenses this work under the Apache License, version 2.0 + * (the "License"). You may not use this work except in compliance with the License, which is + * available at www.apache.org/licenses/LICENSE-2.0 + * + * This software is distributed on an "AS IS" basis, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, + * either express or implied, as more fully set forth in the License. + * + * See the NOTICE file distributed with this work for information regarding copyright ownership. + */ + +package alluxio.master.file.mdsync; + +import alluxio.AlluxioURI; +import alluxio.resource.CloseableResource; +import alluxio.underfs.UfsClient; + +import com.google.common.annotations.VisibleForTesting; +import com.google.common.collect.Lists; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import java.util.List; +import java.util.function.Function; + +class BatchPathWaiter extends BaseTask { + private static final Logger LOG = LoggerFactory.getLogger(BatchPathWaiter.class); + private static final AlluxioURI EMPTY = new AlluxioURI(""); + + final List mLastCompleted; + final PathSequence mNoneCompleted; + + BatchPathWaiter( + TaskInfo info, long startTime, + Function> clientSupplier, + boolean removeOnComplete) { + super(info, startTime, clientSupplier, removeOnComplete); + mNoneCompleted = new PathSequence(EMPTY, info.getAlluxioPath()); + mLastCompleted = Lists.newArrayList(mNoneCompleted); + } + + @VisibleForTesting + List getLastCompleted() { + return mLastCompleted; + } + + @Override + public synchronized boolean waitForSync(AlluxioURI path) { + while (true) { + if (mIsCompleted != null) { + return mIsCompleted.succeeded(); + } + PathSequence minCompleted = mLastCompleted.get(0); + if (minCompleted != mNoneCompleted) { + if (minCompleted.getStart().compareTo(path) <= 0 + && minCompleted.getEnd().compareTo(path) > 0) { + return true; + } + } + try { + wait(); + } catch (InterruptedException e) { + LOG.debug("Interrupted while waiting for synced path {}", path); + return false; + } + } + } + + @Override + public synchronized void nextCompleted(SyncProcessResult completed) { + if (!completed.getLoaded().isPresent()) { + return; + } + PathSequence loaded = completed.getLoaded().get(); + AlluxioURI newRight = null; + AlluxioURI newLeft = null; + int i = 0; + for (; i < mLastCompleted.size(); i++) { + int rightCmp = mLastCompleted.get(i).getStart().compareTo(loaded.getEnd()); + if (rightCmp == 0) { + newRight = mLastCompleted.get(i).getEnd(); + } + if (rightCmp >= 0) { + break; + } + int leftCmp = mLastCompleted.get(i).getEnd().compareTo(loaded.getStart()); + if (leftCmp == 0) { + newLeft = mLastCompleted.get(i).getStart(); + } + } + if (newRight == null && newLeft == null) { + mLastCompleted.add(i, loaded); + } else if (newRight != null && newLeft != null) { + mLastCompleted.set(i, new PathSequence(newLeft, newRight)); + mLastCompleted.remove(i - 1); + } else if (newLeft != null) { + mLastCompleted.set(i - 1, new PathSequence(newLeft, loaded.getEnd())); + } else { + mLastCompleted.set(i, new PathSequence(loaded.getStart(), newRight)); + } + notifyAll(); + } +} diff --git a/core/server/master/src/main/java/alluxio/master/file/mdsync/DefaultSyncProcess.java b/core/server/master/src/main/java/alluxio/master/file/mdsync/DefaultSyncProcess.java new file mode 100644 index 000000000000..f117eaf6433d --- /dev/null +++ b/core/server/master/src/main/java/alluxio/master/file/mdsync/DefaultSyncProcess.java @@ -0,0 +1,973 @@ +/* + * The Alluxio Open Foundation licenses this work under the Apache License, version 2.0 + * (the "License"). You may not use this work except in compliance with the License, which is + * available at www.apache.org/licenses/LICENSE-2.0 + * + * This software is distributed on an "AS IS" basis, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, + * either express or implied, as more fully set forth in the License. + * + * See the NOTICE file distributed with this work for information regarding copyright ownership. + */ + +package alluxio.master.file.mdsync; + +import alluxio.AlluxioURI; +import alluxio.client.WriteType; +import alluxio.collections.Pair; +import alluxio.conf.Configuration; +import alluxio.conf.PropertyKey; +import alluxio.exception.AccessControlException; +import alluxio.exception.BlockInfoException; +import alluxio.exception.DirectoryNotEmptyException; +import alluxio.exception.ExceptionMessage; +import alluxio.exception.FileAlreadyExistsException; +import alluxio.exception.FileDoesNotExistException; +import alluxio.exception.InvalidPathException; +import alluxio.exception.runtime.InternalRuntimeException; +import alluxio.exception.runtime.InvalidArgumentRuntimeException; +import alluxio.exception.runtime.NotFoundRuntimeException; +import alluxio.file.options.DescendantType; +import alluxio.file.options.DirectoryLoadType; +import alluxio.grpc.CreateFilePOptions; +import alluxio.grpc.DeletePOptions; +import alluxio.grpc.FileSystemMasterCommonPOptions; +import alluxio.grpc.SetAttributePOptions; +import alluxio.grpc.TtlAction; +import alluxio.master.file.DefaultFileSystemMaster; +import alluxio.master.file.contexts.CreateDirectoryContext; +import alluxio.master.file.contexts.CreateFileContext; +import alluxio.master.file.contexts.DeleteContext; +import alluxio.master.file.contexts.InternalOperationContext; +import alluxio.master.file.contexts.SetAttributeContext; +import alluxio.master.file.meta.Inode; +import alluxio.master.file.meta.InodeFile; +import alluxio.master.file.meta.InodeIterationResult; +import alluxio.master.file.meta.InodeTree; +import alluxio.master.file.meta.LockedInodePath; +import alluxio.master.file.meta.LockingScheme; +import alluxio.master.file.meta.MountTable; +import alluxio.master.file.meta.UfsAbsentPathCache; +import alluxio.master.file.meta.UfsSyncPathCache; +import alluxio.master.file.meta.UfsSyncUtils; +import alluxio.master.file.meta.options.MountInfo; +import alluxio.master.metastore.ReadOnlyInodeStore; +import alluxio.master.metastore.ReadOption; +import alluxio.master.metastore.SkippableInodeIterator; +import alluxio.resource.CloseableResource; +import alluxio.security.authorization.Mode; +import alluxio.underfs.Fingerprint; +import alluxio.underfs.UfsClient; +import alluxio.underfs.UfsDirectoryStatus; +import alluxio.underfs.UfsFileStatus; +import alluxio.underfs.UfsManager; +import alluxio.underfs.UfsStatus; +import alluxio.underfs.UnderFileSystem; +import alluxio.util.CommonUtils; +import alluxio.util.FileSystemOptionsUtils; +import alluxio.util.IteratorUtils; +import alluxio.util.io.PathUtils; + +import com.google.common.annotations.VisibleForTesting; +import com.google.common.base.Preconditions; +import com.google.common.cache.Cache; +import com.google.common.cache.CacheBuilder; +import com.google.common.collect.Iterators; +import com.google.common.collect.PeekingIterator; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import java.io.IOException; +import java.util.Iterator; +import java.util.List; +import java.util.Optional; +import java.util.concurrent.atomic.AtomicLong; +import java.util.stream.Stream; +import javax.annotation.Nullable; + +/** + * The default metadata sync processor. + */ +public class DefaultSyncProcess implements SyncProcess { + /** + * the mount point not found runtime exception. + */ + public static class MountPointNotFoundRuntimeException extends NotFoundRuntimeException { + /** + * Creates the instance. + * @param message the error message + */ + public MountPointNotFoundRuntimeException(String message) { + super(message); + } + } + + public static final FileSystemMasterCommonPOptions NO_TTL_OPTION = + FileSystemMasterCommonPOptions.newBuilder() + .setTtl(-1) + .setTtlAction( + Configuration.getEnum(PropertyKey.USER_FILE_CREATE_TTL_ACTION, TtlAction.class)) + .build(); + private static final Logger LOG = LoggerFactory.getLogger(DefaultSyncProcess.class); + private final DefaultFileSystemMaster mFsMaster; + private final ReadOnlyInodeStore mInodeStore; + private final MountTable mMountTable; + private final InodeTree mInodeTree; + + private final TaskTracker mTaskTracker; + private final MetadataSyncHandler mMetadataSyncHandler; + private final boolean mIgnoreTTL = + Configuration.getBoolean(PropertyKey.MASTER_METADATA_SYNC_IGNORE_TTL); + private final CreateFilePOptions mCreateFilePOptions = + FileSystemOptionsUtils.createFileDefaults(Configuration.global(), false).toBuilder().build(); + + private final Cache mTaskGroupMap = + CacheBuilder.newBuilder().maximumSize(1000).build(); + private final AtomicLong mTaskGroupIds = new AtomicLong(0); + + private final UfsAbsentPathCache mUfsAbsentCache; + + /** + * Constructs a default metadata sync processor. + * + * @param fsMaster the file system master + * @param inodeStore the inode store + * @param mountTable the mount table + * @param inodeTree the inode tree + * @param syncPathCache the sync path cache + * @param absentPathCache the absent path cache + */ + public DefaultSyncProcess( + DefaultFileSystemMaster fsMaster, ReadOnlyInodeStore inodeStore, + MountTable mountTable, InodeTree inodeTree, + UfsSyncPathCache syncPathCache, UfsAbsentPathCache absentPathCache) { + mFsMaster = fsMaster; + mInodeStore = inodeStore; + mMountTable = mountTable; + mInodeTree = inodeTree; + mTaskTracker = new TaskTracker( + Configuration.getInt(PropertyKey.MASTER_METADATA_SYNC_EXECUTOR_POOL_SIZE), + Configuration.getInt(PropertyKey.MASTER_METADATA_SYNC_UFS_CONCURRENT_LOADS), + Configuration.getBoolean(PropertyKey.MASTER_METADATA_SYNC_UFS_CONCURRENT_GET_STATUS), + Configuration.getBoolean(PropertyKey.MASTER_METADATA_SYNC_UFS_CONCURRENT_LISTING), + syncPathCache, absentPathCache, this, this::getUfsClient); + mMetadataSyncHandler = new MetadataSyncHandler(mTaskTracker, fsMaster, inodeTree); + mUfsAbsentCache = absentPathCache; + } + + private static String ufsPathToAlluxioPath(String ufsPath, String ufsMount, String alluxioMount) { + // first check if the ufsPath is the ufsMount path + if (ufsPath.length() < ufsMount.length() + && !ufsPath.endsWith(AlluxioURI.SEPARATOR)) { + Preconditions.checkState(ufsMount.equals(ufsPath + AlluxioURI.SEPARATOR)); + ufsPath = ufsMount; + } + // ufs path will be the full path (but will not include the bucket) + // e.g. nested/file or /nested/file + // ufsMount will include the ufs mount path without the bucket, eg /nested/ + // First remove the ufsMount from ufsPath, including the first / so that + // ufsPath does not start with / + if (ufsPath.startsWith(AlluxioURI.SEPARATOR)) { + ufsPath = ufsPath.substring(ufsMount.length()); + } else { + ufsPath = ufsPath.substring(ufsMount.length() - 1); + } + // now append the alluxio mount path to the ufs path + // the alluxio mount path will be something like /a/b/c + return alluxioMount + ufsPath; + } + + /** + * @param groupId the id of the task group + * @return the {@link TaskGroup} corresponding to the id + */ + public Optional getTaskGroup(long groupId) { + return Optional.ofNullable(mTaskGroupMap.getIfPresent(groupId)); + } + + /** + * Perform a metadata sync on the given path. Launches the task asynchronously. + * If descendent type is ALL, then a task is launched for each nested mount. + * + * @param alluxioPath the path to sync + * @param descendantType the depth of descendant to load + * @param directoryLoadType the type of listing to do on directories in the UFS + * @param syncInterval the sync interval to check if a sync is needed + * @param startAfter the start after mark where the sync starts + * @param isAsyncMetadataLoading if the sync is initiated by an async load metadata cli command + * @return the running task group + */ + public TaskGroup syncPath( + AlluxioURI alluxioPath, DescendantType descendantType, DirectoryLoadType directoryLoadType, + long syncInterval, @Nullable String startAfter, boolean isAsyncMetadataLoading) + throws InvalidPathException { + startAfter = stripPrefixIfPresent(alluxioPath, startAfter); + if (startAfter != null && descendantType == DescendantType.ALL + && directoryLoadType != DirectoryLoadType.SINGLE_LISTING) { + throw new InvalidPathException( + "StartAfter param does not work with BFS/DFS directory load type"); + } + MountTable.Resolution resolution = mMountTable.resolve(alluxioPath); + Stream tasks = Stream.empty(); + long groupId = mTaskGroupIds.getAndIncrement(); + if (descendantType == DescendantType.ALL) { + List nestedMounts = mMountTable.findChildrenMountPoints(alluxioPath, false); + if (nestedMounts.size() > 0) { + if (startAfter != null) { + throw new InvalidPathException("StartAfter param does not work with nested mount"); + } + } + tasks = nestedMounts.stream().map(mountInfo -> + mTaskTracker.launchTaskAsync(mMetadataSyncHandler, mountInfo.getUfsUri(), + mountInfo.getAlluxioUri(), null, descendantType, + syncInterval, directoryLoadType, !isAsyncMetadataLoading)); + } + AlluxioURI ufsPath = resolution.getUri(); + TaskGroup group = new TaskGroup(groupId, + Stream.concat(Stream.of(mTaskTracker.launchTaskAsync( + mMetadataSyncHandler, ufsPath, alluxioPath, + startAfter, descendantType, syncInterval, directoryLoadType, + !isAsyncMetadataLoading)), tasks) + .toArray(BaseTask[]::new)); + mTaskGroupMap.put(groupId, group); + return group; + } + + /** + * Perform a metadata sync on the given path. Launches the task asynchronously. + * If descendant type is ALL, then a task is launched for each nested mount. + * + * @param alluxioPath the path to sync + * @param descendantType the depth of descendents to load + * @param directoryLoadType the type of listing to do on directories in the UFS + * @param syncInterval the sync interval to check if a sync is needed + * @return the running task + */ + public TaskGroup syncPath( + AlluxioURI alluxioPath, DescendantType descendantType, DirectoryLoadType directoryLoadType, + long syncInterval) throws InvalidPathException { + return syncPath(alluxioPath, descendantType, directoryLoadType, syncInterval, null, false); + } + + private CloseableResource getUfsClient(AlluxioURI ufsPath) { + CloseableResource ufsResource = + getClient(reverseResolve(ufsPath)).acquireUfsResource(); + return new CloseableResource(ufsResource.get()) { + @Override + public void closeResource() { + ufsResource.closeResource(); + } + }; + } + + private UfsManager.UfsClient getClient(MountTable.ReverseResolution reverseResolution) { + UfsManager.UfsClient ufsClient = mMountTable.getUfsClient( + reverseResolution.getMountInfo().getMountId()); + if (ufsClient == null) { + throw new NotFoundRuntimeException(String.format("Mount not found for UFS path %s", + reverseResolution.getMountInfo().getUfsUri())); + } + return ufsClient; + } + + private MountTable.ReverseResolution reverseResolve( + AlluxioURI ufsPath) throws MountPointNotFoundRuntimeException { + MountTable.ReverseResolution reverseResolution = mMountTable.reverseResolve( + ufsPath); + if (reverseResolution == null) { + throw new MountPointNotFoundRuntimeException(String.format("Mount not found for UFS path %s", + ufsPath)); + } + return reverseResolution; + } + + @Override + public SyncProcessResult performSync( + LoadResult loadResult, UfsSyncPathCache syncPathCache) throws Throwable { + try (SyncProcessContext context = + SyncProcessContext.Builder.builder( + mFsMaster.createNonMergingJournalRpcContext( + new InternalOperationContext()), loadResult).build()) { + MountTable.ReverseResolution reverseResolution + = reverseResolve(loadResult.getBaseLoadPath()); + try (CloseableResource ufsResource = + getClient(reverseResolution).acquireUfsResource()) { + UnderFileSystem ufs = ufsResource.get(); + final MountInfo mountInfo = reverseResolution.getMountInfo(); + + // this is the full mount, eg S3://bucket/dir + AlluxioURI ufsMountURI = reverseResolution.getMountInfo().getUfsUri(); + // this is the base of the mount, eg s3://bucket/ + AlluxioURI ufsMountBaseUri = new AlluxioURI(ufsMountURI.getRootPath()); + // and without the s3://bucket, e.g. the above would be /dir + / + final String ufsMountPath = PathUtils.normalizePath( + ufsMountURI.getPath(), AlluxioURI.SEPARATOR); + // the loaded and normalized ufs path without the bucket, e.g. /dir/ + final String baseLoadPath = PathUtils.normalizePath(loadResult.getBaseLoadPath().getPath(), + AlluxioURI.SEPARATOR); + + // the mounted path in alluxio, eg /mount + AlluxioURI alluxioMountUri = reverseResolution.getMountInfo().getAlluxioUri(); + final String alluxioMountPath = PathUtils.normalizePath( + alluxioMountUri.getPath(), AlluxioURI.SEPARATOR); + // the Alluxio path that was loaded from the UFS + AlluxioURI alluxioSyncPath = reverseResolution.getUri(); + // the completed path sequence is from the previous load's + // last sync path, until our last UFS item + AlluxioURI syncStart = new AlluxioURI(ufsPathToAlluxioPath(loadResult.getPreviousLast() + .orElse(loadResult.getBaseLoadPath()).getPath(), ufsMountPath, alluxioMountPath)); + LOG.debug("Syncing from {}, load batch id {}, load id {}", syncStart, + loadResult.getLoadRequest().getBatchSetId(), + loadResult.getLoadRequest().getLoadRequestId()); + Stream stream = loadResult.getUfsLoadResult().getItems().map(status -> { + UfsItem item = new UfsItem(status, ufsMountPath, alluxioMountPath); + try { + // If we are loading by directory, then we must create a new load task on each + // directory traversed + if (loadResult.getTaskInfo().hasDirLoadTasks() && status.isDirectory() + && !item.mAlluxioUri.isAncestorOf(loadResult.getTaskInfo().getAlluxioPath(), false) + && !(baseLoadPath.equals( + PathUtils.normalizePathStart(status.getName(), AlluxioURI.SEPARATOR)))) { + // first check if the directory needs to be synced + if (syncPathCache.shouldSyncPath(item.mAlluxioUri, + loadResult.getTaskInfo().getSyncInterval(), + loadResult.getTaskInfo().getDescendantType()).isShouldSync()) { + AlluxioURI childDirectoryPath = ufsMountBaseUri.join(status.getName()); + MountTable.ReverseResolution childDirectoryReverseResolution = + mMountTable.reverseResolve(childDirectoryPath); + Preconditions.checkNotNull(childDirectoryReverseResolution); + MountTable.Resolution childDirectoryResolution = + mMountTable.resolve(childDirectoryReverseResolution.getUri()); + if (childDirectoryReverseResolution.getMountInfo().getMountId() + == childDirectoryResolution.getMountId()) { + loadResult.getTaskInfo().getMdSync() + .loadNestedDirectory(loadResult.getTaskInfo().getId(), + ufsMountBaseUri.join(status.getName())); + } else { + LOG.warn("Sync of path {} is skipped as the directory is a mount point. " + + "Mount point {}, conflict mount point {}", reverseResolution.getUri(), + childDirectoryReverseResolution.getMountInfo().getUfsUri(), + childDirectoryResolution.getUfsMountPointUri()); + } + } + } + } catch (Exception e) { + throw new InvalidArgumentRuntimeException(e); + } + return item; + }); + + PeekingIterator ufsIterator = Iterators.peekingIterator(stream.iterator()); + // Check if the root of the path being synced is a file + UfsItem firstItem = ufsIterator.hasNext() ? ufsIterator.peek() : null; + boolean baseSyncPathIsFile = firstItem != null && firstItem.mUfsItem.isFile() + && PathUtils.normalizePathStart(firstItem.mUfsItem.getName(), AlluxioURI.SEPARATOR) + .equals(loadResult.getBaseLoadPath().getPath()); + + LOG.debug("Processing sync from {}", firstItem == null ? "" : firstItem.mAlluxioPath); + // this variable will keep the last UfsStatus returned + UfsItem lastUfsStatus; + ReadOption.Builder readOptionBuilder = ReadOption.newBuilder(); + // we start iterating the Alluxio metadata from the end of the + // previous load batch, or if this is the first load then the base + // load path + AlluxioURI readFrom = new AlluxioURI(ufsPathToAlluxioPath( + loadResult.getPreviousLast().map(AlluxioURI::getPath).orElse( + baseLoadPath), ufsMountPath, alluxioMountPath)); + // we skip the initial inode if this is not the initial listing, as this + // inode was processed in the previous listing + boolean skipInitialReadFrom = loadResult.getPreviousLast().isPresent(); + Preconditions.checkState(readFrom.getPath().startsWith(alluxioMountUri.getPath())); + loadResult.getPreviousLast().ifPresent(prevLast -> { + String prevLastAlluxio = ufsPathToAlluxioPath( + prevLast.getPath(), ufsMountPath, alluxioMountPath); + String readFromSubstring = prevLastAlluxio.substring( + alluxioSyncPath.getPath().endsWith(AlluxioURI.SEPARATOR) + ? alluxioSyncPath.getPath().length() : alluxioSyncPath.getPath().length() + 1); + readOptionBuilder.setReadFrom(readFromSubstring); + }); + // We stop iterating the Alluxio metadata at the last loaded item if the load result + // is truncated + AlluxioURI readUntil = null; + if (loadResult.getUfsLoadResult().isTruncated() + && loadResult.getUfsLoadResult().getLastItem().isPresent()) { + readUntil = new AlluxioURI(ufsPathToAlluxioPath( + loadResult.getUfsLoadResult().getLastItem().get().getPath(), + ufsMountPath, alluxioMountPath)); + } + + // Take the root of the sync path as a write_edge (unless it is the mount path + // as in this case we will not modify the node), once we traverse + // past this node, we will downgrade it to a read lock in + // SyncProcessState.getNextInode + InodeTree.LockPattern rootLockPattern = alluxioSyncPath.equals(alluxioMountUri) + ? InodeTree.LockPattern.READ : InodeTree.LockPattern.WRITE_EDGE; + LockingScheme lockingScheme = new LockingScheme(alluxioSyncPath, + rootLockPattern, false); + try (LockedInodePath lockedInodePath = + mInodeTree.lockInodePath( + lockingScheme, context.getRpcContext().getJournalContext())) { + // after taking the lock on the root path, + // we must verify the mount is still valid + String ufsMountUriString = PathUtils.normalizePath(ufsMountPath, AlluxioURI.SEPARATOR); + String ufsMountUriStringAfterTakingLock = + PathUtils.normalizePath(mMountTable.resolve(alluxioSyncPath) + .getUfsMountPointUri().getPath(), AlluxioURI.SEPARATOR); + if (!ufsMountUriString.equals(ufsMountUriStringAfterTakingLock)) { + NotFoundRuntimeException ex = new NotFoundRuntimeException(String.format( + "Mount path %s no longer exists during sync of %s", + ufsMountURI, alluxioSyncPath)); + handleConcurrentModification(context, alluxioSyncPath.getPath(), true, ex); + throw ex; + } + boolean containsNestedMount = context.getDescendantType() != DescendantType.NONE + && mMountTable.findChildrenMountPoints(alluxioSyncPath, false).size() > 0; + // Get the inode of the sync start + try (SkippableInodeIterator inodeIterator = mInodeStore.getSkippableChildrenIterator( + readOptionBuilder.build(), context.getDescendantType(), loadResult.isFirstLoad(), + lockedInodePath)) { + SyncProcessState syncState = new SyncProcessState(alluxioMountPath, + alluxioSyncPath, lockedInodePath, loadResult.isFirstLoad(), + readFrom, skipInitialReadFrom, readUntil, + context, inodeIterator, ufsIterator, mountInfo, ufs, containsNestedMount); + lastUfsStatus = updateMetadataSync(syncState); + } + if (lockedInodePath.fullPathExists() && lockedInodePath.getInode().isDirectory() + && !lockedInodePath.getInode().asDirectory().isDirectChildrenLoaded()) { + // check if the root sync path should have its children marked as loaded + context.addDirectoriesToUpdateIsChildrenLoaded(lockedInodePath.getUri()); + } + } + context.updateAbsentCache(mUfsAbsentCache); + AlluxioURI syncEnd = lastUfsStatus == null ? syncStart + : lastUfsStatus.mAlluxioUri; + PathSequence pathSequence = new PathSequence(syncStart, syncEnd); + LOG.debug("Completed processing sync from {} until {}", syncStart, syncEnd); + return new SyncProcessResult(loadResult.getTaskInfo(), loadResult.getBaseLoadPath(), + pathSequence, loadResult.getUfsLoadResult().isTruncated(), + baseSyncPathIsFile); + } + } + } + + private UfsItem updateMetadataSync(SyncProcessState syncState) + throws IOException, FileDoesNotExistException, FileAlreadyExistsException, BlockInfoException, + AccessControlException, DirectoryNotEmptyException, InvalidPathException { + InodeIterationResult currentInode = syncState.getNextInode(); + if (currentInode != null && currentInode.getLockedPath().getUri().equals( + syncState.mMountInfo.getAlluxioUri())) { + // skip the inode of the mount path + currentInode = syncState.getNextInode(); + } + // We don't want to include the inode that we are reading from, so skip until we are sure + // we are passed that + while (syncState.mUfsStatusIterator.hasNext() && currentInode != null + && ((syncState.mSkipInitialReadFrom + && syncState.mReadFrom.compareTo(currentInode.getLockedPath().getUri()) >= 0) + || (!syncState.mSkipInitialReadFrom + && syncState.mReadFrom.compareTo(currentInode.getLockedPath().getUri()) > 0))) { + currentInode = syncState.getNextInode(); + } + UfsItem currentUfsStatus = IteratorUtils.nextOrNull( + syncState.mUfsStatusIterator); + // skip the initial mount path of the UFS status + // as well as the base sync path if this is not our first load task + if (currentUfsStatus != null + && (currentUfsStatus.mAlluxioPath.equals(syncState.mAlluxioMountPath) + || (!syncState.mIsFirstLoad + && currentUfsStatus.mAlluxioUri.equals(syncState.mAlluxioSyncPath)))) { + currentUfsStatus = IteratorUtils.nextOrNull( + syncState.mUfsStatusIterator); + } + UfsItem lastUfsStatus = currentUfsStatus; + + // Case A. Alluxio /foo and UFS /bar + // 1. WRITE_LOCK lock /bar + // 2. create /bar + // 3. unlock /bar + // 4. move UFS pointer + // Case B. Alluxio /bar and UFS /foo + // 1. WRITE_LOCK lock /bar + // 2. delete /bar RECURSIVELY (call fs master) + // 3. unlock /bar + // 4. move Alluxio pointer and SKIP the children of /foo + // Case C. Alluxio /foo and Alluxio /foo + // 1. compare the fingerprint + // 2. WRITE_LOCK /foo + // 3. update the metadata + // 4. unlock /foo + // 5. move two pointers + while (currentInode != null || currentUfsStatus != null) { + SingleInodeSyncResult result = performSyncOne(syncState, currentUfsStatus, currentInode); + if (result.mSkipChildren) { + syncState.mInodeIterator.skipChildrenOfTheCurrent(); + } + if (result.mMoveInode) { + currentInode = syncState.getNextInode(); + } + if (result.mMoveUfs) { + currentUfsStatus = IteratorUtils.nextOrNull(syncState.mUfsStatusIterator); + lastUfsStatus = currentUfsStatus == null ? lastUfsStatus : currentUfsStatus; + } + } + Preconditions.checkState(!syncState.mUfsStatusIterator.hasNext()); + return lastUfsStatus; + } + + private void checkShouldSetDescendantsLoaded(Inode inode, SyncProcessState syncState) + throws FileDoesNotExistException, InvalidPathException { + // Mark directories as having their children loaded based on the sync descendent type + if (syncState.mContext.getDescendantType() != DescendantType.NONE) { + if (inode.isDirectory() && !inode.asDirectory().isDirectChildrenLoaded()) { + AlluxioURI inodePath = mInodeTree.getPath(inode.getId()); + // The children have been loaded if + // (1) The descendant type is ALL and the inode is contained in the sync path + // (2) The descendant type is ONE and the inode is the synced path + if ((syncState.mContext.getDescendantType() == DescendantType.ALL + && syncState.mAlluxioSyncPath.isAncestorOf(inodePath, false)) + || (syncState.mContext.getDescendantType() == DescendantType.ONE + && syncState.mAlluxioSyncPath.equals(inodePath))) { + syncState.mContext.addDirectoriesToUpdateIsChildrenLoaded(inodePath); + } + } + } + } + + protected SingleInodeSyncResult performSyncOne( + SyncProcessState syncState, + @Nullable UfsItem currentUfsStatus, + @Nullable InodeIterationResult currentInode) + throws InvalidPathException, FileDoesNotExistException, FileAlreadyExistsException, + IOException, BlockInfoException, DirectoryNotEmptyException, AccessControlException { + Optional comparisonResult = currentInode != null && currentUfsStatus != null + ? Optional.of( + currentInode.getLockedPath().getUri().compareTo(currentUfsStatus.mAlluxioUri)) : + Optional.empty(); + if (currentInode == null || (comparisonResult.isPresent() && comparisonResult.get() > 0)) { + // (Case 1) - in this case the UFS item is missing in the inode tree, so we create it + // comparisonResult is present implies that currentUfsStatus is not null + assert currentUfsStatus != null; + try (LockedInodePath lockedInodePath = syncState.mAlluxioSyncPathLocked.lockDescendant( + currentUfsStatus.mAlluxioUri, InodeTree.LockPattern.WRITE_EDGE)) { + // If the current mount point contains nested mount point, + // we need to do extra check to prevent files shadowed by mount points. + if (syncState.mContainsNestedMount) { + if (mMountTable.resolve(lockedInodePath.getUri()).getMountId() + != syncState.mMountInfo.getMountId()) { + // The file to create is shadowed by a nested mount + syncState.mContext.reportSyncOperationSuccess(SyncOperation.SKIPPED_ON_MOUNT_POINT); + return new SingleInodeSyncResult(true, false, false); + } + } + List createdInodes; + if (currentUfsStatus.mUfsItem.isDirectory()) { + createdInodes = createInodeDirectoryMetadata(syncState.mContext, lockedInodePath, + currentUfsStatus.mUfsItem, syncState); + } else { + createdInodes = createInodeFileMetadata(syncState.mContext, lockedInodePath, + currentUfsStatus.mUfsItem, syncState); + } + if (syncState.mContext.getDescendantType() != DescendantType.NONE) { + // Mark directories as having their children loaded based on the sync descendant type + for (Inode next : createdInodes) { + checkShouldSetDescendantsLoaded(next, syncState); + } + } + syncState.mContext.reportSyncOperationSuccess(SyncOperation.CREATE, createdInodes.size()); + } catch (FileAlreadyExistsException e) { + handleConcurrentModification( + syncState.mContext, currentUfsStatus.mAlluxioPath, false, e); + } + return new SingleInodeSyncResult(true, false, false); + } else if (currentUfsStatus == null || comparisonResult.get() < 0) { + if (currentInode.getInode().isDirectory() && currentUfsStatus != null + && currentInode.getLockedPath().getUri().isAncestorOf( + currentUfsStatus.mAlluxioUri, false)) { + // (Case 2) - in this case the inode is a directory and is an ancestor of the current + // UFS state, so we skip it + checkShouldSetDescendantsLoaded(currentInode.getInode(), syncState); + return new SingleInodeSyncResult(false, true, false); + } + // (Case 3) - in this case the inode is not in the UFS, so we must delete it + // unless the file is being persisted, or is not complete + try { + LockedInodePath path = currentInode.getLockedPath(); + path.traverse(); + AlluxioURI uri = currentInode.getLockedPath().getUri(); + // skip if this is a mount point, or it belongs to a nested mount point + if (mMountTable.isMountPoint(uri) + || (syncState.mContainsNestedMount && mMountTable.resolve(uri).getMountId() + != syncState.mMountInfo.getMountId())) { + // the mount point will be synced through another sync task if + // descendant type is ALL. + return new SingleInodeSyncResult(false, true, true); + } + Pair deletedInodes = deletePath(syncState.mContext, path, true); + if (deletedInodes.getFirst() > 0) { + syncState.mContext.reportSyncOperationSuccess(SyncOperation.DELETE, + deletedInodes.getFirst()); + } + if (deletedInodes.getSecond() > 0) { + syncState.mContext.reportSyncOperationSuccess(SyncOperation.SKIPPED_NON_PERSISTED, + deletedInodes.getSecond()); + } + } catch (FileDoesNotExistException e) { + handleConcurrentModification( + syncState.mContext, currentInode.getLockedPath().getUri().getPath(), false, e); + } + return new SingleInodeSyncResult(false, true, true); + } + // (Case 4) - in this case both the inode, and the UFS item exist, so we check if we need + // to update the metadata + LockedInodePath lockedInodePath = currentInode.getLockedPath(); + lockedInodePath.traverse(); + // skip if this is a mount point + if (mMountTable.isMountPoint(currentInode.getLockedPath().getUri())) { + syncState.mContext.reportSyncOperationSuccess(SyncOperation.SKIPPED_ON_MOUNT_POINT, 1); + return new SingleInodeSyncResult(true, true, true); + } + // skip if the file is not complete or not persisted + if (lockedInodePath.getInode().isFile()) { + InodeFile inodeFile = lockedInodePath.getInodeFile(); + if (!inodeFile.isCompleted() || !inodeFile.isPersisted()) { + syncState.mContext.reportSyncOperationSuccess(SyncOperation.SKIPPED_NON_PERSISTED, 1); + return new SingleInodeSyncResult(true, true, false); + } + } + // HDFS also fetches ACL list, which is ignored for now + String ufsType = syncState.mUfs.getUnderFSType(); + Fingerprint ufsFingerprint = Fingerprint.create(ufsType, currentUfsStatus.mUfsItem); + boolean containsMountPoint = mMountTable.containsMountPoint( + currentInode.getLockedPath().getUri(), true, false); + UfsSyncUtils.SyncPlan syncPlan = + UfsSyncUtils.computeSyncPlan(currentInode.getInode(), ufsFingerprint, containsMountPoint); + if (syncPlan.toUpdateMetaData() || syncPlan.toDelete() || syncPlan.toLoadMetadata()) { + try { + if (syncPlan.toUpdateMetaData()) { + updateInodeMetadata(syncState.mContext, lockedInodePath, currentUfsStatus.mUfsItem, + ufsFingerprint); + syncState.mContext.reportSyncOperationSuccess(SyncOperation.UPDATE); + } else if (syncPlan.toDelete() && syncPlan.toLoadMetadata()) { + if (lockedInodePath.getInode().isDirectory()) { + throw new InternalRuntimeException( + String.format("Deleting directory %s in metadata sync due to metadata change", + lockedInodePath.getUri())); + } + deletePath(syncState.mContext, lockedInodePath, false); + lockedInodePath.removeLastInode(); + try (LockedInodePath newLockedInodePath = mInodeTree.lockInodePath( + lockedInodePath.getUri(), InodeTree.LockPattern.WRITE_EDGE, + syncState.mContext.getMetadataSyncJournalContext())) { + if (currentUfsStatus.mUfsItem.isDirectory()) { + createInodeDirectoryMetadata(syncState.mContext, newLockedInodePath, + currentUfsStatus.mUfsItem, syncState); + } else { + createInodeFileMetadata(syncState.mContext, newLockedInodePath, + currentUfsStatus.mUfsItem, syncState); + } + } + syncState.mContext.reportSyncOperationSuccess(SyncOperation.RECREATE); + } else { + throw new IllegalStateException("We should never reach here."); + } + } catch (FileDoesNotExistException | FileAlreadyExistsException e) { + handleConcurrentModification( + syncState.mContext, currentInode.getLockedPath().getUri().getPath(), false, e); + } + } else { + syncState.mContext.reportSyncOperationSuccess(SyncOperation.NOOP); + } + checkShouldSetDescendantsLoaded(currentInode.getInode(), syncState); + return new SingleInodeSyncResult(true, true, false); + } + + private void handleConcurrentModification( + SyncProcessContext context, String path, boolean isRoot, Exception e) + throws FileAlreadyExistsException, FileDoesNotExistException { + String loggingMessage = "Sync metadata failed on [{}] due to concurrent modification."; + if (!isRoot && context.isConcurrentModificationAllowed()) { + context.reportSyncOperationSuccess(SyncOperation.SKIPPED_DUE_TO_CONCURRENT_MODIFICATION); + LOG.info(loggingMessage, path, e); + } else { + context.reportSyncFailReason(SyncFailReason.PROCESSING_CONCURRENT_UPDATE_DURING_SYNC, e); + LOG.error(loggingMessage, path, e); + if (e instanceof FileAlreadyExistsException) { + throw (FileAlreadyExistsException) e; + } + if (e instanceof FileDoesNotExistException) { + throw (FileDoesNotExistException) e; + } + throw new RuntimeException(e); + } + } + + private Pair deletePath( + SyncProcessContext context, LockedInodePath lockedInodePath, boolean skipNonPersisted) + throws FileDoesNotExistException, DirectoryNotEmptyException, IOException, + InvalidPathException { + DeleteContext syncDeleteContext = DeleteContext.mergeFrom( + DeletePOptions.newBuilder() + .setRecursive(true) + .setAlluxioOnly(true) + .setUnchecked(true)) + .skipNotPersisted(skipNonPersisted) + .setMetadataLoad(true); + Pair deletedInodes = mFsMaster.deleteInternal(context.getRpcContext(), + lockedInodePath, syncDeleteContext, true); + if (deletedInodes.getFirst() == 0 && deletedInodes.getSecond() == 0) { + throw new FileDoesNotExistException(lockedInodePath + " does not exist."); + } + return deletedInodes; + } + + private void updateInodeMetadata( + SyncProcessContext context, LockedInodePath lockedInodePath, + UfsStatus ufsStatus, Fingerprint fingerprint) + throws FileDoesNotExistException, AccessControlException, InvalidPathException { + // UpdateMetadata is used when a file or a directory only had metadata change. + // It works by calling SetAttributeInternal on the inodePath. + short mode = ufsStatus.getMode(); + SetAttributePOptions.Builder builder = SetAttributePOptions.newBuilder() + .setMode(new Mode(mode).toProto()); + if (!ufsStatus.getOwner().equals("")) { + builder.setOwner(ufsStatus.getOwner()); + } + if (!ufsStatus.getGroup().equals("")) { + builder.setOwner(ufsStatus.getGroup()); + } + SetAttributeContext ctx = SetAttributeContext.mergeFrom(builder) + .setUfsFingerprint(fingerprint.serialize()) + .setMetadataLoad(true); + mFsMaster.setAttributeSingleFile(context.getRpcContext(), lockedInodePath, false, + CommonUtils.getCurrentMs(), ctx); + } + + private List createInodeFileMetadata( + SyncProcessContext context, LockedInodePath lockedInodePath, + UfsStatus ufsStatus, SyncProcessState syncState + ) throws InvalidPathException, FileDoesNotExistException, FileAlreadyExistsException, + BlockInfoException, IOException { + long blockSize = ((UfsFileStatus) ufsStatus).getBlockSize(); + if (blockSize == UfsFileStatus.UNKNOWN_BLOCK_SIZE) { + throw new RuntimeException("Unknown block size"); + } + + // Metadata loaded from UFS has no TTL set. + CreateFileContext createFileContext = CreateFileContext.mergeFromDefault(mCreateFilePOptions); + createFileContext.getOptions().setBlockSizeBytes(blockSize); + // Ancestor should be created before unless it is the sync root + createFileContext.getOptions().setRecursive(true); + FileSystemMasterCommonPOptions commonPOptions = + mIgnoreTTL ? NO_TTL_OPTION : context.getCommonOptions(); + createFileContext.getOptions() + .setCommonOptions(FileSystemMasterCommonPOptions.newBuilder() + .setTtl(commonPOptions.getTtl()) + .setTtlAction(commonPOptions.getTtlAction())); + Fingerprint fingerprint = Fingerprint.create(syncState.mUfs.getUnderFSType(), ufsStatus); + createFileContext.setMissingDirFingerprint(() -> + Fingerprint.create(syncState.mUfs.getUnderFSType(), + new UfsDirectoryStatus( + ufsStatus.getName(), ufsStatus.getOwner(), + ufsStatus.getGroup(), ufsStatus.getMode())).serialize()); + createFileContext.setFingerprint(fingerprint.serialize()); + + createFileContext.setWriteType(WriteType.THROUGH); // set as through since already in UFS + createFileContext.setMetadataLoad(true, false); + createFileContext.setOwner(ufsStatus.getOwner()); + createFileContext.setGroup(ufsStatus.getGroup()); + createFileContext.setXAttr(ufsStatus.getXAttr()); + short ufsMode = ufsStatus.getMode(); + Mode mode = new Mode(ufsMode); + Long ufsLastModified = ufsStatus.getLastModifiedTime(); + if (syncState.mMountInfo.getOptions().getShared()) { + mode.setOtherBits(mode.getOtherBits().or(mode.getOwnerBits())); + } + createFileContext.getOptions().setMode(mode.toProto()); + // NO ACL for now + if (ufsLastModified != null) { + createFileContext.setOperationTimeMs(ufsLastModified); + } + List result = mFsMaster.createCompleteFileInternalForMetadataSync( + context.getRpcContext(), lockedInodePath, createFileContext, (UfsFileStatus) ufsStatus); + context.addDirectoriesToUpdateAbsentCache(lockedInodePath.getUri().getParent()); + return result; + } + + private List createInodeDirectoryMetadata( + SyncProcessContext context, LockedInodePath lockedInodePath, + UfsStatus ufsStatus, SyncProcessState syncState + ) throws InvalidPathException, FileDoesNotExistException, FileAlreadyExistsException, + IOException { + MountTable.Resolution resolution = mMountTable.resolve(lockedInodePath.getUri()); + boolean isMountPoint = mMountTable.isMountPoint(lockedInodePath.getUri()); + + CreateDirectoryContext createDirectoryContext = CreateDirectoryContext.defaults(); + createDirectoryContext.getOptions() + .setRecursive(true) + .setAllowExists(false) + .setCommonOptions(FileSystemMasterCommonPOptions.newBuilder() + .setTtl(context.getCommonOptions().getTtl()) + .setTtlAction(context.getCommonOptions().getTtlAction())); + createDirectoryContext.setMountPoint(isMountPoint); + createDirectoryContext.setMetadataLoad(true, false); + createDirectoryContext.setWriteType(WriteType.THROUGH); + String dirFingerprint = Fingerprint.create( + syncState.mUfs.getUnderFSType(), ufsStatus).serialize(); + createDirectoryContext.setMissingDirFingerprint(() -> dirFingerprint); + createDirectoryContext.setFingerprint(dirFingerprint); + + String ufsOwner = ufsStatus.getOwner(); + String ufsGroup = ufsStatus.getGroup(); + short ufsMode = ufsStatus.getMode(); + Long lastModifiedTime = ufsStatus.getLastModifiedTime(); + Mode mode = new Mode(ufsMode); + if (resolution.getShared()) { + mode.setOtherBits(mode.getOtherBits().or(mode.getOwnerBits())); + } + createDirectoryContext.getOptions().setMode(mode.toProto()); + createDirectoryContext + .setOwner(ufsOwner) + .setGroup(ufsGroup) + .setUfsStatus(ufsStatus); + createDirectoryContext.setXAttr(ufsStatus.getXAttr()); + + if (lastModifiedTime != null) { + createDirectoryContext.setOperationTimeMs(lastModifiedTime); + } + return mFsMaster.createDirectoryInternal( + context.getRpcContext(), + lockedInodePath, + resolution.getUfsClient(), + resolution.getUri(), + createDirectoryContext + ); + } + + /** + * @return the task tracker + */ + public TaskTracker getTaskTracker() { + return mTaskTracker; + } + + static final class UfsItem { + final UfsStatus mUfsItem; + final String mAlluxioPath; + final AlluxioURI mAlluxioUri; + + UfsItem(UfsStatus ufsStatus, String ufsMount, String alluxioMount) { + mAlluxioPath = ufsPathToAlluxioPath(ufsStatus.getName(), ufsMount, alluxioMount); + mAlluxioUri = new AlluxioURI(mAlluxioPath); + mUfsItem = ufsStatus; + } + } + + @VisibleForTesting + static final class SyncProcessState { + final String mAlluxioMountPath; + final AlluxioURI mAlluxioSyncPath; + final LockedInodePath mAlluxioSyncPathLocked; + final AlluxioURI mReadFrom; + final boolean mSkipInitialReadFrom; + final AlluxioURI mReadUntil; + final SyncProcessContext mContext; + final SkippableInodeIterator mInodeIterator; + final Iterator mUfsStatusIterator; + final MountInfo mMountInfo; + final UnderFileSystem mUfs; + final boolean mIsFirstLoad; + final boolean mContainsNestedMount; + boolean mTraversedRootPath = false; + boolean mDowngradedRootPath = false; + + SyncProcessState( + String alluxioMountPath, + AlluxioURI alluxioSyncPath, + LockedInodePath alluxioSyncPathLocked, + boolean isFirstLoad, + AlluxioURI readFrom, boolean skipInitialReadFrom, + @Nullable AlluxioURI readUntil, + SyncProcessContext context, + SkippableInodeIterator inodeIterator, + Iterator ufsStatusIterator, + MountInfo mountInfo, UnderFileSystem underFileSystem, + boolean containsNestedMount) { + mAlluxioMountPath = alluxioMountPath; + mAlluxioSyncPath = alluxioSyncPath; + mAlluxioSyncPathLocked = alluxioSyncPathLocked; + mIsFirstLoad = isFirstLoad; + mReadFrom = readFrom; + mSkipInitialReadFrom = skipInitialReadFrom; + mReadUntil = readUntil; + mContext = context; + mInodeIterator = inodeIterator; + mUfsStatusIterator = ufsStatusIterator; + mMountInfo = mountInfo; + mUfs = underFileSystem; + mContainsNestedMount = containsNestedMount; + } + + private void downgradeRootPath() { + // once we have traversed the root sync path we downgrade it to a read lock + mAlluxioSyncPathLocked.downgradeToRead(); + mDowngradedRootPath = true; + } + + @Nullable + InodeIterationResult getNextInode() throws InvalidPathException { + if (mTraversedRootPath && !mDowngradedRootPath) { + downgradeRootPath(); + } + mTraversedRootPath = true; + InodeIterationResult next = IteratorUtils.nextOrNull(mInodeIterator); + if (next != null) { + if (!mAlluxioSyncPath.isAncestorOf(next.getLockedPath().getUri(), false)) { + downgradeRootPath(); + return null; + } + if (mReadUntil != null) { + if (next.getLockedPath().getUri().compareTo(mReadUntil) > 0) { + downgradeRootPath(); + return null; + } + } + } + return next; + } + } + + protected static class SingleInodeSyncResult { + boolean mMoveUfs; + boolean mMoveInode; + boolean mSkipChildren; + + public SingleInodeSyncResult(boolean moveUfs, boolean moveInode, boolean skipChildren) { + mMoveUfs = moveUfs; + mMoveInode = moveInode; + mSkipChildren = skipChildren; + } + } + + private String stripPrefixIfPresent(AlluxioURI syncRoot, @Nullable String startAfter) + throws InvalidPathException { + if (startAfter == null || !startAfter.startsWith(AlluxioURI.SEPARATOR)) { + return startAfter; + } + // this path starts from the root, so we must remove the prefix + String startAfterCheck = startAfter.substring(0, + Math.min(syncRoot.getPath().length(), startAfter.length())); + if (!syncRoot.getPath().startsWith(startAfterCheck)) { + throw new InvalidPathException( + ExceptionMessage.START_AFTER_DOES_NOT_MATCH_PATH + .getMessage(startAfter, syncRoot.getPath())); + } + startAfter = startAfter.substring( + Math.min(startAfter.length(), syncRoot.getPath().length())); + if (startAfter.startsWith(AlluxioURI.SEPARATOR)) { + startAfter = startAfter.substring(1); + } + if (startAfter.equals("")) { + startAfter = null; + } + return startAfter; + } +} diff --git a/core/server/master/src/main/java/alluxio/master/file/mdsync/DirectoryPathWaiter.java b/core/server/master/src/main/java/alluxio/master/file/mdsync/DirectoryPathWaiter.java new file mode 100644 index 000000000000..6ca03a5c4540 --- /dev/null +++ b/core/server/master/src/main/java/alluxio/master/file/mdsync/DirectoryPathWaiter.java @@ -0,0 +1,71 @@ +/* + * The Alluxio Open Foundation licenses this work under the Apache License, version 2.0 + * (the "License"). You may not use this work except in compliance with the License, which is + * available at www.apache.org/licenses/LICENSE-2.0 + * + * This software is distributed on an "AS IS" basis, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, + * either express or implied, as more fully set forth in the License. + * + * See the NOTICE file distributed with this work for information regarding copyright ownership. + */ + +package alluxio.master.file.mdsync; + +import alluxio.AlluxioURI; +import alluxio.conf.path.TrieNode; +import alluxio.resource.CloseableResource; +import alluxio.underfs.UfsClient; + +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import java.util.function.Function; + +class DirectoryPathWaiter extends BaseTask { + private static final Logger LOG = LoggerFactory.getLogger(DirectoryPathWaiter.class); + + private final TrieNode mCompletedDirs = new TrieNode<>(); + + DirectoryPathWaiter( + TaskInfo info, long startTime, Function> clientSupplier, + boolean removeOnComplete) { + super(info, startTime, clientSupplier, removeOnComplete); + } + + @Override + public synchronized boolean waitForSync(AlluxioURI path) { + while (true) { + if (mIsCompleted != null) { + return !mIsCompleted.getThrowable().isPresent(); + } + boolean completed = mCompletedDirs.getClosestTerminal(path.getPath()) + .map(result -> { + if (result.getValue().equals(path)) { + return true; + } + AlluxioURI parent = path.getParent(); + return parent != null && parent.equals(result.getValue()); + }).orElse(false); + if (completed) { + return true; + } + try { + wait(); + } catch (InterruptedException e) { + LOG.debug("Interrupted while waiting for synced path {}", path); + return false; + } + } + } + + @Override + public synchronized void nextCompleted(SyncProcessResult completed) { + if (!completed.isTruncated()) { + LOG.debug("Completed load of path {}", completed.getBaseLoadPath()); + mCompletedDirs.insert(completed.getBaseLoadPath().getPath()) + .setValue(completed.getBaseLoadPath()); + notifyAll(); + } + } +} diff --git a/core/server/master/src/main/java/alluxio/master/file/mdsync/LoadRequest.java b/core/server/master/src/main/java/alluxio/master/file/mdsync/LoadRequest.java new file mode 100644 index 000000000000..3d592ebe7f0b --- /dev/null +++ b/core/server/master/src/main/java/alluxio/master/file/mdsync/LoadRequest.java @@ -0,0 +1,157 @@ +/* + * The Alluxio Open Foundation licenses this work under the Apache License, version 2.0 + * (the "License"). You may not use this work except in compliance with the License, which is + * available at www.apache.org/licenses/LICENSE-2.0 + * + * This software is distributed on an "AS IS" basis, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, + * either express or implied, as more fully set forth in the License. + * + * See the NOTICE file distributed with this work for information regarding copyright ownership. + */ + +package alluxio.master.file.mdsync; + +import alluxio.AlluxioURI; +import alluxio.file.options.DescendantType; +import alluxio.retry.CountingRetry; +import alluxio.retry.RetryPolicy; + +import java.util.Optional; +import javax.annotation.Nullable; + +/** + * This is a request for a single batch load sent to the UFS. + */ +public class LoadRequest implements Comparable { + private final TaskInfo mTaskInfo; + private final AlluxioURI mPath; + private final String mContinuationToken; + private final DescendantType mDescendantType; + private final long mId; + private final AlluxioURI mPreviousLoadLast; + private final boolean mIsFirstLoad; + /** + * This is the id of the load request that started a set of batches of load requests, i.e. + * the batches of loads until one is not truncated. + */ + private final long mBatchSetId; + private final RetryPolicy mRetryPolicy = new CountingRetry(2); + + LoadRequest( + long id, long batchSetId, TaskInfo taskInfo, AlluxioURI path, + @Nullable String continuationToken, + @Nullable AlluxioURI previousLoadLast, + DescendantType descendantType, + boolean isFirstLoad) { + taskInfo.getStats().gotLoadRequest(); + mTaskInfo = taskInfo; + mPath = path; + mId = id; + mBatchSetId = batchSetId; + mContinuationToken = continuationToken; + mDescendantType = descendantType; + mPreviousLoadLast = previousLoadLast; + mIsFirstLoad = isFirstLoad; + } + + Optional getPreviousLoadLast() { + return Optional.ofNullable(mPreviousLoadLast); + } + + /** + * @return the batch ID, i.e. the load ID of the directory that initiated this load + * if using {@link alluxio.file.options.DirectoryLoadType#BFS} or + * {@link alluxio.file.options.DirectoryLoadType#DFS} + */ + long getBatchSetId() { + return mBatchSetId; + } + + boolean attempt() { + return mRetryPolicy.attempt(); + } + + /** + * @return the task info + */ + TaskInfo getTaskInfo() { + return mTaskInfo; + } + + /** + * @return if the load request is the first load request + */ + boolean isFirstLoad() { + return mIsFirstLoad; + } + + AlluxioURI getLoadPath() { + return mPath; + } + + /** + * @return the descendant type for this specific load request. Note + * that this may be different from the descendant type of the overall + * sync operation. For example if the {@link alluxio.file.options.DirectoryLoadType} + * is BFS or DFS and the overall descendant type is ALL, then the + * descendant type of each of the load requests will be ONE. + */ + DescendantType getDescendantType() { + return mDescendantType; + } + + long getBaseTaskId() { + return mTaskInfo.getId(); + } + + /** + * @return the unique id for this specific load request + */ + long getLoadRequestId() { + return mId; + } + + @Nullable + String getContinuationToken() { + return mContinuationToken; + } + + void onError(Throwable t) { + mTaskInfo.getMdSync().onLoadRequestError(mTaskInfo.getId(), mId, t); + } + + @Override + public int compareTo(LoadRequest o) { + // First compare the directory load id + int baseTaskCmp; + switch (o.mTaskInfo.getLoadByDirectory()) { + case SINGLE_LISTING: + return Long.compare(mId, o.mId); + case DFS: + baseTaskCmp = Long.compare(o.mBatchSetId, mBatchSetId); + break; + default: + baseTaskCmp = Long.compare(mBatchSetId, o.mBatchSetId); + break; + } + if (baseTaskCmp != 0) { + return baseTaskCmp; + } + // then compare the base id + return Long.compare(mId, o.mId); + } + + @Override + public boolean equals(Object obj) { + if (obj instanceof LoadRequest) { + return compareTo((LoadRequest) obj) == 0; + } + return false; + } + + @Override + public int hashCode() { + // fix find bugs + return super.hashCode(); + } +} diff --git a/core/server/master/src/main/java/alluxio/master/file/mdsync/LoadRequestExecutor.java b/core/server/master/src/main/java/alluxio/master/file/mdsync/LoadRequestExecutor.java new file mode 100644 index 000000000000..fa0e2a06de28 --- /dev/null +++ b/core/server/master/src/main/java/alluxio/master/file/mdsync/LoadRequestExecutor.java @@ -0,0 +1,271 @@ +/* + * The Alluxio Open Foundation licenses this work under the Apache License, version 2.0 + * (the "License"). You may not use this work except in compliance with the License, which is + * available at www.apache.org/licenses/LICENSE-2.0 + * + * This software is distributed on an "AS IS" basis, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, + * either express or implied, as more fully set forth in the License. + * + * See the NOTICE file distributed with this work for information regarding copyright ownership. + */ + +package alluxio.master.file.mdsync; + +import static java.util.concurrent.TimeUnit.NANOSECONDS; + +import alluxio.Constants; +import alluxio.collections.ConcurrentHashSet; +import alluxio.exception.runtime.InternalRuntimeException; +import alluxio.metrics.MetricKey; +import alluxio.metrics.MetricsSystem; +import alluxio.resource.CloseableResource; +import alluxio.underfs.UfsClient; +import alluxio.underfs.UfsLoadResult; +import alluxio.util.logging.SamplingLogger; + +import com.google.common.base.Preconditions; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import java.io.Closeable; +import java.io.IOException; +import java.util.Map; +import java.util.Optional; +import java.util.PriorityQueue; +import java.util.Set; +import java.util.concurrent.BlockingQueue; +import java.util.concurrent.ConcurrentHashMap; +import java.util.concurrent.ConcurrentLinkedDeque; +import java.util.concurrent.LinkedBlockingQueue; +import java.util.concurrent.atomic.AtomicInteger; +import javax.annotation.Nullable; + +class LoadRequestExecutor implements Closeable { + private static final Logger LOG = LoggerFactory.getLogger(LoadRequestExecutor.class); + private static final Logger SAMPLING_LOG = new SamplingLogger(LOG, 5L * Constants.SECOND_MS); + + /** Limit the number of running (or completed but not yet processed) load requests. **/ + private final AtomicInteger mRemainingTickets; + private final int mMaxRunning; + + private final Map mPathLoaderTasks = new ConcurrentHashMap<>(); + // Loader tasks with pending loads + private final Set mPathLoaderTasksWithPendingLoads = new ConcurrentHashSet<>(); + // Same as above, except ordered by priority + private final ConcurrentLinkedDeque mPathLoaderTaskQueue = new ConcurrentLinkedDeque<>(); + // Load requests in order of to be processed + private final BlockingQueue mLoadRequests = new LinkedBlockingQueue<>(); + // Rate limited loads that are not yet ready to be run + private final PriorityQueue mRateLimited = new PriorityQueue<>(); + + private final LoadResultExecutor mResultExecutor; + + private final Thread mExecutor; + + LoadRequestExecutor(int maxRunning, LoadResultExecutor resultExecutor) { + mMaxRunning = maxRunning; + mRemainingTickets = new AtomicInteger(maxRunning); + mResultExecutor = resultExecutor; + mExecutor = new Thread(() -> { + while (!Thread.interrupted()) { + try { + runNextLoadTask(); + } catch (InterruptedException e) { + return; + } + } + LOG.info("Load request runner thread exiting"); + }, "LoadRequestRunner"); + mExecutor.start(); + registerMetrics(); + } + + synchronized void addPathLoaderTask(PathLoaderTask task) { + long id = task.getTaskInfo().getId(); + task.runOnPendingLoad(() -> hasNewLoadTask(id)); + mPathLoaderTasks.put(id, task); + mPathLoaderTaskQueue.add(id); + mPathLoaderTasksWithPendingLoads.add(id); + notifyAll(); + } + + synchronized void hasNewLoadTask(long taskId) { + if (!mPathLoaderTasksWithPendingLoads.contains(taskId)) { + mPathLoaderTaskQueue.add(taskId); + mPathLoaderTasksWithPendingLoads.add(taskId); + notifyAll(); + } + } + + private void onLoadError(LoadRequest request, Throwable t) { + // Errors are reported on an attempt basis. A reported load error does not + // lead to the sync failure because we retry on UFS load failure. The sync + // can still proceed if the following try succeeds. + // Please refer to BaseTask::getState to get the sync task state. + if (t instanceof DefaultSyncProcess.MountPointNotFoundRuntimeException) { + request.getTaskInfo().getStats().reportSyncFailReason( + request, null, SyncFailReason.LOADING_MOUNT_POINT_DOES_NOT_EXIST, t); + } else { + request.getTaskInfo().getStats().reportSyncFailReason( + request, null, SyncFailReason.LOADING_UFS_IO_FAILURE, t); + } + releaseRunning(); + request.onError(t); + } + + private void processLoadResult(LoadRequest request, UfsLoadResult ufsLoadResult) { + Optional loadResult = request.getTaskInfo().getMdSync() + .onReceiveLoadRequestOutput(request.getBaseTaskId(), + request.getLoadRequestId(), ufsLoadResult); + synchronized (this) { + PathLoaderTask task = mPathLoaderTasks.get(request.getBaseTaskId()); + if (task != null && loadResult.isPresent()) { + LoadResult result = loadResult.get(); + mResultExecutor.processLoadResult(result, () -> { + releaseRunning(); + result.getTaskInfo().getStats().mProcessStarted.incrementAndGet(); + }, v -> { + result.getTaskInfo().getStats().mProcessCompleted.incrementAndGet(); + result.onProcessComplete(v); + }, result::onProcessError); + } else { + releaseRunning(); + if (loadResult.isPresent()) { + LOG.debug("Got a load result for id {} with no corresponding" + + "path loader task", request.getBaseTaskId()); + } + } + } + } + + private void runNextLoadTask() throws InterruptedException { + // loop until there is a task ready to execute + synchronized (this) { + while ((mLoadRequests.isEmpty() || mRemainingTickets.get() == 0) + && (mRateLimited.isEmpty() || !mRateLimited.peek().isReady())) { + // check if a task is ready to run, and we have tickets remaining + if (mRemainingTickets.get() > 0 && !mPathLoaderTaskQueue.isEmpty()) { + Long nextId = mPathLoaderTaskQueue.poll(); + if (nextId != null) { + checkNextLoad(nextId); + } + } else { // otherwise, sleep + long waitNanos = 0; + if (!mRateLimited.isEmpty()) { + waitNanos = mRateLimited.peek().getWaitTime(); + if (waitNanos <= 0) { + break; + } + } + // wait until a rate limited task is ready, or this.notifyAll() is called + if (waitNanos == 0) { + wait(); + } else { + // we only sleep if our wait time is less than 1 ms + // otherwise we spin wait + if (waitNanos >= Constants.MS_NANO) { + NANOSECONDS.timedWait(this, waitNanos); + } + } + } + } + } + SAMPLING_LOG.info("Concurrent running ufs load tasks {}, tasks with pending load requests {}," + + " rate limited pending requests {}", + mMaxRunning - mRemainingTickets.get(), mPathLoaderTasks.size(), mRateLimited.size()); + if (!mRateLimited.isEmpty() && mRateLimited.peek().isReady()) { + RateLimitedRequest request = mRateLimited.remove(); + runTask(request.mTask, request.mLoadRequest); + } else { + LoadRequest nxtRequest = mLoadRequests.take(); + PathLoaderTask task = mPathLoaderTasks.get(nxtRequest.getBaseTaskId()); + if (task != null) { + Preconditions.checkState(mRemainingTickets.decrementAndGet() >= 0); + Optional rateLimit = task.getRateLimiter().acquire(); + if (rateLimit.isPresent()) { + mRateLimited.add(new RateLimitedRequest(task, nxtRequest, rateLimit.get())); + } else { + runTask(task, nxtRequest); + } + } else { + LOG.debug("Got load request {} with task id {} with no corresponding task", + nxtRequest.getLoadRequestId(), nxtRequest.getLoadRequestId()); + } + } + } + + private synchronized void releaseRunning() { + mRemainingTickets.incrementAndGet(); + notifyAll(); + } + + synchronized void onTaskComplete(long taskId) { + mPathLoaderTasks.remove(taskId); + } + + private void runTask(PathLoaderTask task, LoadRequest loadRequest) { + try (CloseableResource client = task.getClient()) { + @Nullable String startAfter = null; + if (loadRequest.isFirstLoad()) { + startAfter = loadRequest.getTaskInfo().getStartAfter(); + } + client.get().performListingAsync(loadRequest.getLoadPath().getPath(), + loadRequest.getContinuationToken(), startAfter, + loadRequest.getDescendantType(), loadRequest.isFirstLoad(), + ufsLoadResult -> processLoadResult(loadRequest, ufsLoadResult), + t -> onLoadError(loadRequest, t)); + } catch (Throwable t) { + onLoadError(loadRequest, t); + } + } + + private void checkNextLoad(long id) { + PathLoaderTask task = mPathLoaderTasks.get(id); + if (task == null || task.isComplete()) { + mPathLoaderTasks.remove(id); + mPathLoaderTasksWithPendingLoads.remove(id); + return; + } + Optional nxtRequest = task.getNext(); + if (nxtRequest.isPresent()) { + try { + mLoadRequests.put(nxtRequest.get()); + mPathLoaderTaskQueue.addLast(id); + } catch (InterruptedException e) { + throw new InternalRuntimeException("Not expected to block here", e); + } + } else { + mPathLoaderTasksWithPendingLoads.remove(id); + } + } + + @Override + public void close() throws IOException { + mExecutor.interrupt(); + try { + mExecutor.join(5_000); + } catch (InterruptedException e) { + LOG.debug("Interrupted while waiting for load request runner to terminate"); + } + mResultExecutor.close(); + } + + private void registerMetrics() { + MetricsSystem.registerGaugeIfAbsent( + MetricsSystem.getMetricName( + MetricKey.MASTER_METADATA_SYNC_QUEUED_LOADS.getName()), + () -> { + synchronized (this) { + int count = 0; + for (PathLoaderTask task : mPathLoaderTasks.values()) { + count += task.getPendingLoadCount(); + } + return count; + } + }); + MetricsSystem.registerGaugeIfAbsent( + MetricsSystem.getMetricName( + MetricKey.MASTER_METADATA_SYNC_RUNNING_LOADS.getName()), + () -> mMaxRunning - mRemainingTickets.get()); + } +} diff --git a/core/server/master/src/main/java/alluxio/master/file/mdsync/LoadResult.java b/core/server/master/src/main/java/alluxio/master/file/mdsync/LoadResult.java new file mode 100644 index 000000000000..0287441f62de --- /dev/null +++ b/core/server/master/src/main/java/alluxio/master/file/mdsync/LoadResult.java @@ -0,0 +1,125 @@ +/* + * The Alluxio Open Foundation licenses this work under the Apache License, version 2.0 + * (the "License"). You may not use this work except in compliance with the License, which is + * available at www.apache.org/licenses/LICENSE-2.0 + * + * This software is distributed on an "AS IS" basis, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, + * either express or implied, as more fully set forth in the License. + * + * See the NOTICE file distributed with this work for information regarding copyright ownership. + */ + +package alluxio.master.file.mdsync; + +import alluxio.AlluxioURI; +import alluxio.underfs.UfsLoadResult; + +import java.util.Optional; +import javax.annotation.Nullable; + +/** + * This is the result of a single batch load from the UFS. + */ +public class LoadResult implements Comparable { + private final TaskInfo mTaskInfo; + private final AlluxioURI mBaseLoadPath; + private final UfsLoadResult mUfsLoadResult; + private final LoadRequest mLoadRequest; + private final AlluxioURI mPreviousLast; + private final boolean mIsFirstLoad; + + /** + * Creates a load result. + * @param loadRequest the load request + * @param baseLoadPath the base load path + * @param taskInfo the task info + * @param previousLast the previous last load item + * @param ufsLoadResult the ufs load result + * @param isFirstLoad if the load is the first load + */ + public LoadResult( + LoadRequest loadRequest, AlluxioURI baseLoadPath, TaskInfo taskInfo, + @Nullable AlluxioURI previousLast, UfsLoadResult ufsLoadResult, + boolean isFirstLoad) { + mLoadRequest = loadRequest; + mBaseLoadPath = baseLoadPath; + mTaskInfo = taskInfo; + mUfsLoadResult = ufsLoadResult; + mPreviousLast = previousLast; + mIsFirstLoad = isFirstLoad; + } + + /** + * @return true if this is the first load + */ + public boolean isFirstLoad() { + return mIsFirstLoad; + } + + /** + * @return the last item in the previous load + */ + public Optional getPreviousLast() { + return Optional.ofNullable(mPreviousLast); + } + + /** + * @return the load path + */ + public AlluxioURI getBaseLoadPath() { + return mBaseLoadPath; + } + + /** + * @return the ufs load result + */ + public UfsLoadResult getUfsLoadResult() { + return mUfsLoadResult; + } + + /** + * @return the task info + */ + public TaskInfo getTaskInfo() { + return mTaskInfo; + } + + void onProcessComplete(SyncProcessResult result) { + mTaskInfo.getMdSync().onProcessComplete( + mTaskInfo.getId(), mLoadRequest.getLoadRequestId(), result); + } + + void onProcessError(Throwable t) { + mTaskInfo.getMdSync().onProcessError(mTaskInfo.getId(), t); + } + + /** + * @return the load request + */ + public LoadRequest getLoadRequest() { + return mLoadRequest; + } + + @Override + public int compareTo(LoadResult o) { + int idCmp = Long.compare(mTaskInfo.getId(), o.mTaskInfo.getId()); + if (idCmp != 0) { + return idCmp; + } + return mLoadRequest.compareTo(o.mLoadRequest); + } + + @Override + public boolean equals(Object obj) { + if (obj instanceof LoadResult) { + return compareTo((LoadResult) obj) == 0; + } + return false; + } + + @Override + public int hashCode() { + // fix find bugs + return super.hashCode(); + } +} diff --git a/core/server/master/src/main/java/alluxio/master/file/mdsync/LoadResultExecutor.java b/core/server/master/src/main/java/alluxio/master/file/mdsync/LoadResultExecutor.java new file mode 100644 index 000000000000..c5e6ae15dbab --- /dev/null +++ b/core/server/master/src/main/java/alluxio/master/file/mdsync/LoadResultExecutor.java @@ -0,0 +1,67 @@ +/* + * The Alluxio Open Foundation licenses this work under the Apache License, version 2.0 + * (the "License"). You may not use this work except in compliance with the License, which is + * available at www.apache.org/licenses/LICENSE-2.0 + * + * This software is distributed on an "AS IS" basis, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, + * either express or implied, as more fully set forth in the License. + * + * See the NOTICE file distributed with this work for information regarding copyright ownership. + */ + +package alluxio.master.file.mdsync; + +import alluxio.master.file.meta.UfsSyncPathCache; +import alluxio.util.ThreadFactoryUtils; + +import java.io.Closeable; +import java.io.IOException; +import java.util.concurrent.ExecutorService; +import java.util.concurrent.Executors; +import java.util.function.Consumer; + +/** + * Takes {@link LoadResult} objects and processes them in an executor service. + */ +class LoadResultExecutor implements Closeable { + + private final ExecutorService mExecutor; + private final UfsSyncPathCache mSyncPathCache; + private final SyncProcess mSyncProcess; + + LoadResultExecutor( + SyncProcess syncProcess, + int executorThreads, UfsSyncPathCache syncPathCache) { + mExecutor = Executors.newFixedThreadPool(executorThreads, + ThreadFactoryUtils.build("mdsync-perform-sync-%d", true)); + mSyncPathCache = syncPathCache; + mSyncProcess = syncProcess; + } + + void processLoadResult( + LoadResult result, Runnable beforeProcessing, Consumer onComplete, + Consumer onError) { + mExecutor.submit(() -> { + beforeProcessing.run(); + try { + onComplete.accept( + mSyncProcess.performSync(result, mSyncPathCache) + ); + } catch (DefaultSyncProcess.MountPointNotFoundRuntimeException e) { + result.getTaskInfo().getStats().reportSyncFailReason( + result.getLoadRequest(), result, + SyncFailReason.PROCESSING_MOUNT_POINT_DOES_NOT_EXIST, e); + onError.accept(e); + } catch (Throwable t) { + result.getTaskInfo().getStats().reportSyncFailReason( + result.getLoadRequest(), result, SyncFailReason.PROCESSING_UNKNOWN, t); + onError.accept(t); + } + }); + } + + @Override + public void close() throws IOException { + mExecutor.shutdown(); + } +} diff --git a/core/server/master/src/main/java/alluxio/master/file/mdsync/MetadataSyncHandler.java b/core/server/master/src/main/java/alluxio/master/file/mdsync/MetadataSyncHandler.java new file mode 100644 index 000000000000..40ccc0c2bf52 --- /dev/null +++ b/core/server/master/src/main/java/alluxio/master/file/mdsync/MetadataSyncHandler.java @@ -0,0 +1,100 @@ +/* + * The Alluxio Open Foundation licenses this work under the Apache License, version 2.0 + * (the "License"). You may not use this work except in compliance with the License, which is + * available at www.apache.org/licenses/LICENSE-2.0 + * + * This software is distributed on an "AS IS" basis, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, + * either express or implied, as more fully set forth in the License. + * + * See the NOTICE file distributed with this work for information regarding copyright ownership. + */ + +package alluxio.master.file.mdsync; + +import alluxio.AlluxioURI; +import alluxio.master.file.DefaultFileSystemMaster; +import alluxio.master.file.meta.InodeTree; +import alluxio.underfs.UfsLoadResult; + +import com.google.common.annotations.VisibleForTesting; + +import java.util.Optional; + +/** + * The interactions between different task processing steps is exposed through this + * standard interface in order to allow changes in the future, for example calling + * separate components over the network. + */ +public class MetadataSyncHandler { + + private final TaskTracker mTaskTracker; + @VisibleForTesting + final DefaultFileSystemMaster mFsMaster; + private final InodeTree mInodeTree; + + /** + * Creates a metadata sync kernel. + * @param taskTracker the task tracker + * @param fsMaster the file system master + * @param inodeTree the inode tree + */ + public MetadataSyncHandler( + TaskTracker taskTracker, DefaultFileSystemMaster fsMaster, InodeTree inodeTree) { + mTaskTracker = taskTracker; + mFsMaster = fsMaster; + mInodeTree = inodeTree; + } + + void onLoadRequestError(long taskId, long loadId, Throwable t) { + mTaskTracker.getActiveTask(taskId).ifPresent( + task -> task.getPathLoadTask().onLoadRequestError(loadId, t)); + } + + void onFailed(long taskId, Throwable t) { + mTaskTracker.getActiveTask(taskId).ifPresent(task -> { + task.onFailed(t); + }); + } + + void onProcessError(long taskId, Throwable t) { + mTaskTracker.getActiveTask(taskId).ifPresent(task -> + task.getPathLoadTask().onProcessError(t)); + } + + void onEachResult(long taskId, SyncProcessResult result) { + mTaskTracker.getActiveTask(taskId).ifPresent(task -> task.nextCompleted(result)); + } + + void onTaskError(long taskId, Throwable t) { + mTaskTracker.getActiveTask(taskId).ifPresent(task -> mTaskTracker.taskError(taskId, t)); + } + + void onTaskComplete(long taskId, boolean isFile) { + mTaskTracker.taskComplete(taskId, isFile); + } + + void onPathLoadComplete(long taskId, boolean isFile) { + mTaskTracker.getActiveTask(taskId).ifPresent( + task -> task.onComplete(isFile, mFsMaster, mInodeTree)); + } + + /** + * Loads a nested directory. + * @param taskId the task id + * @param path the load path + */ + public void loadNestedDirectory(long taskId, AlluxioURI path) { + mTaskTracker.getActiveTask(taskId).ifPresent( + task -> task.getPathLoadTask().loadNestedDirectory(path)); + } + + Optional onReceiveLoadRequestOutput(long taskId, long loadId, UfsLoadResult result) { + return mTaskTracker.getActiveTask(taskId).flatMap(task -> + task.getPathLoadTask().createLoadResult(loadId, result)); + } + + void onProcessComplete(long taskId, long loadRequestId, SyncProcessResult result) { + mTaskTracker.getActiveTask(taskId).ifPresent(task -> + task.getPathLoadTask().onProcessComplete(loadRequestId, result)); + } +} diff --git a/core/server/master/src/main/java/alluxio/master/file/mdsync/PathLoaderTask.java b/core/server/master/src/main/java/alluxio/master/file/mdsync/PathLoaderTask.java new file mode 100644 index 000000000000..755eb021658b --- /dev/null +++ b/core/server/master/src/main/java/alluxio/master/file/mdsync/PathLoaderTask.java @@ -0,0 +1,259 @@ +/* + * The Alluxio Open Foundation licenses this work under the Apache License, version 2.0 + * (the "License"). You may not use this work except in compliance with the License, which is + * available at www.apache.org/licenses/LICENSE-2.0 + * + * This software is distributed on an "AS IS" basis, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, + * either express or implied, as more fully set forth in the License. + * + * See the NOTICE file distributed with this work for information regarding copyright ownership. + */ + +package alluxio.master.file.mdsync; + +import alluxio.AlluxioURI; +import alluxio.file.options.DescendantType; +import alluxio.file.options.DirectoryLoadType; +import alluxio.metrics.MetricKey; +import alluxio.metrics.MetricsSystem; +import alluxio.resource.CloseableResource; +import alluxio.underfs.UfsClient; +import alluxio.underfs.UfsLoadResult; +import alluxio.util.RateLimiter; + +import com.codahale.metrics.Counter; +import com.google.common.base.Preconditions; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import java.util.HashMap; +import java.util.HashSet; +import java.util.Optional; +import java.util.concurrent.PriorityBlockingQueue; +import java.util.function.Function; +import javax.annotation.Nullable; + +/** + * This is the task for handling the loading of a path from the UFS. + * It will consist of at least 1 load request. + */ +public class PathLoaderTask { + private static final Logger LOG = LoggerFactory.getLogger(PathLoaderTask.class); + + public static final Counter PROCESS_FAIL_COUNT + = MetricsSystem.counter(MetricKey.MASTER_METADATA_SYNC_PROCESSING_FAILED.getName()); + public static final Counter LOAD_FAIL_COUNT + = MetricsSystem.counter(MetricKey.MASTER_METADATA_SYNC_LOADS_FAILED.getName()); + + /** + * All load requests that are ready, but have not yet started executing. + * This must be concurrent safe as other threads will poll it to get the + * next load request. + */ + private final PriorityBlockingQueue mNextLoad; + /** + * True when the task is completed, must be volatile, as other threads + * will access it to check if they should stop polling {@link PathLoaderTask#mNextLoad}. + */ + private volatile boolean mCompleted = false; + /** + * These are all running (or ready to be run) load requests. + */ + private final HashMap mRunningLoads = new HashMap<>(); + /** + * The load id that starts each load (where a load is a set of multiple load batches until + * a batch is not truncated) is stored here until the request that truncates this load + * is completed. + */ + private final HashSet mTruncatedLoads = new HashSet<>(); + private final TaskInfo mTaskInfo; + private long mNxtLoadId = 0; + private Runnable mRunOnPendingLoad; + private final RateLimiter mRateLimiter; + + private final Function> mClientSupplier; + + private DescendantType computeDescendantType() { + if (mTaskInfo.getDescendantType() == DescendantType.ALL + && mTaskInfo.getLoadByDirectory() != DirectoryLoadType.SINGLE_LISTING) { + return DescendantType.ONE; + } + return mTaskInfo.getDescendantType(); + } + + /** + * Create a new PathLoaderTask. + * @param taskInfo task info + * @param continuationToken token + * @param clientSupplier the client supplier + */ + public PathLoaderTask( + TaskInfo taskInfo, @Nullable String continuationToken, + Function> clientSupplier) { + mTaskInfo = taskInfo; + final long loadId = mNxtLoadId++; + // the first load request will get a GetStatus check on the path + // the following loads will be listings + LoadRequest firstRequest = new LoadRequest(loadId, loadId, mTaskInfo, mTaskInfo.getBasePath(), + continuationToken, null, computeDescendantType(), true); + mNextLoad = new PriorityBlockingQueue<>(); + addLoadRequest(firstRequest, true); + mClientSupplier = clientSupplier; + try (CloseableResource client = mClientSupplier.apply(mTaskInfo.getBasePath())) { + mRateLimiter = client.get().getRateLimiter(); + } + } + + RateLimiter getRateLimiter() { + return mRateLimiter; + } + + boolean isComplete() { + return mCompleted; + } + + TaskInfo getTaskInfo() { + return mTaskInfo; + } + + CloseableResource getClient() { + return mClientSupplier.apply(mTaskInfo.getBasePath()); + } + + synchronized void runOnPendingLoad(Runnable toRun) { + mRunOnPendingLoad = toRun; + } + + synchronized Optional createLoadResult( + long requestId, UfsLoadResult ufsLoadResult) { + if (mCompleted) { + return Optional.empty(); + } + LoadRequest originalRequest = mRunningLoads.get(requestId); + if (originalRequest == null) { + LOG.debug("Received a load result for task {} for a load that was already" + + "removed with id {}", + mTaskInfo, requestId); + return Optional.empty(); + } + TaskStats stats = mTaskInfo.getStats(); + stats.gotBatch(ufsLoadResult.getItemsCount()); + if (originalRequest.isFirstLoad() && ufsLoadResult.isFirstFile()) { + stats.setFirstLoadFile(); + } + // If truncated, need to submit a new task for the next set of items + // unless descendant type is none + boolean shouldLoadMore = originalRequest.getDescendantType() != DescendantType.NONE + && ufsLoadResult.isTruncated(); + if (shouldLoadMore) { + final long loadId = mNxtLoadId++; + addLoadRequest(new LoadRequest(loadId, originalRequest.getBatchSetId(), mTaskInfo, + originalRequest.getLoadPath(), ufsLoadResult.getContinuationToken(), + ufsLoadResult.getLastItem().orElse(null), + computeDescendantType(), false), + false); + } + return Optional.of(new LoadResult(originalRequest, originalRequest.getLoadPath(), + mTaskInfo, originalRequest.getPreviousLoadLast().orElse(null), + ufsLoadResult, originalRequest.isFirstLoad())); + } + + void loadNestedDirectory(AlluxioURI path) { + // If we are loading by directory, then we must create a new load task on each + // directory traversed + synchronized (this) { + final long loadId = mNxtLoadId++; + addLoadRequest(new LoadRequest(loadId, loadId, mTaskInfo, path, + null, null, computeDescendantType(), false), true); + } + } + + private void addLoadRequest(LoadRequest loadRequest, boolean isFirstForPath) { + mRunningLoads.put(loadRequest.getLoadRequestId(), loadRequest); + mNextLoad.add(loadRequest); + if (isFirstForPath) { + mTruncatedLoads.add(loadRequest.getBatchSetId()); + } + if (mRunOnPendingLoad != null) { + mRunOnPendingLoad.run(); + } + } + + /** + * This should be called when a load request task with id is finished + * processing by the metadata sync. + * @param loadRequestId the id of the finished task + */ + void onProcessComplete(long loadRequestId, SyncProcessResult result) { + mTaskInfo.getMdSync().onEachResult(mTaskInfo.getId(), result); + boolean completed = false; + synchronized (this) { + LoadRequest request = mRunningLoads.remove(loadRequestId); + if (request != null && !result.isTruncated()) { + Preconditions.checkState(mTruncatedLoads.remove(request.getBatchSetId()), + "load request %s finished, without finding the load %s that started the batch loading", + loadRequestId, request.getBatchSetId()); + } + if (mTruncatedLoads.size() == 0 && mRunningLoads.size() == 0) { + // all sets of loads have finished + completed = true; + mCompleted = true; + } + } + if (completed) { + mTaskInfo.getMdSync().onPathLoadComplete(mTaskInfo.getId(), + result.rootPathIsFile()); + } + } + + synchronized void onProcessError(Throwable t) { + PROCESS_FAIL_COUNT.inc(); + // If there is a processing error then we fail the entire task + mTaskInfo.getStats().setProcessFailed(); + mCompleted = true; + mTaskInfo.getMdSync().onFailed(mTaskInfo.getId(), t); + } + + synchronized void onLoadRequestError(long id, Throwable t) { + LOAD_FAIL_COUNT.inc(); + mTaskInfo.getStats().gotLoadError(); + if (mCompleted) { + LOG.debug("Received a load error for task {} wit id {} after the task was completed", + mTaskInfo, id); + return; + } + LoadRequest load = mRunningLoads.get(id); + if (load == null) { + LOG.debug("Received a load error for task {} for a load that was already" + + "removed with id {}", + mTaskInfo, id); + return; + } + if (load.attempt()) { + LOG.debug("Rescheduling retry of load on path {}, with id {}, with continuation token {}" + + "after error {}", + mTaskInfo, load.getLoadRequestId(), load.getContinuationToken(), t); + addLoadRequest(load, false); + } else { + LOG.warn("Path loader task failed of load on path {}," + + "with id {} with continuation token {} after error {}", + mTaskInfo, load.getLoadRequestId(), load.getContinuationToken(), t); + mCompleted = true; + mTaskInfo.getStats().setLoadFailed(); + mTaskInfo.getMdSync().onFailed(mTaskInfo.getId(), t); + } + } + + synchronized void cancel() { + LOG.debug("Canceling load task on path {}", mTaskInfo); + mCompleted = true; + } + + Optional getNext() { + return Optional.ofNullable(mNextLoad.poll()); + } + + int getPendingLoadCount() { + return mNextLoad.size(); + } +} diff --git a/core/server/master/src/main/java/alluxio/master/file/mdsync/PathSequence.java b/core/server/master/src/main/java/alluxio/master/file/mdsync/PathSequence.java new file mode 100644 index 000000000000..8882099f75f7 --- /dev/null +++ b/core/server/master/src/main/java/alluxio/master/file/mdsync/PathSequence.java @@ -0,0 +1,59 @@ +/* + * The Alluxio Open Foundation licenses this work under the Apache License, version 2.0 + * (the "License"). You may not use this work except in compliance with the License, which is + * available at www.apache.org/licenses/LICENSE-2.0 + * + * This software is distributed on an "AS IS" basis, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, + * either express or implied, as more fully set forth in the License. + * + * See the NOTICE file distributed with this work for information regarding copyright ownership. + */ + +package alluxio.master.file.mdsync; + +import alluxio.AlluxioURI; + +import java.util.Objects; + +/** + * A path sequence. + */ +public class PathSequence { + private final AlluxioURI mStart; + private final AlluxioURI mEnd; + + /** + * Creates a path sequence. + * @param start the start path + * @param end the end path + */ + public PathSequence(AlluxioURI start, AlluxioURI end) { + mStart = start; + mEnd = end; + } + + AlluxioURI getStart() { + return mStart; + } + + AlluxioURI getEnd() { + return mEnd; + } + + @Override + public boolean equals(Object o) { + if (this == o) { + return true; + } + if (o == null || getClass() != o.getClass()) { + return false; + } + PathSequence that = (PathSequence) o; + return Objects.equals(mStart, that.mStart) && Objects.equals(mEnd, that.mEnd); + } + + @Override + public int hashCode() { + return Objects.hash(mStart, mEnd); + } +} diff --git a/core/server/master/src/main/java/alluxio/master/file/mdsync/PathWaiter.java b/core/server/master/src/main/java/alluxio/master/file/mdsync/PathWaiter.java new file mode 100644 index 000000000000..106962f4ca35 --- /dev/null +++ b/core/server/master/src/main/java/alluxio/master/file/mdsync/PathWaiter.java @@ -0,0 +1,30 @@ +/* + * The Alluxio Open Foundation licenses this work under the Apache License, version 2.0 + * (the "License"). You may not use this work except in compliance with the License, which is + * available at www.apache.org/licenses/LICENSE-2.0 + * + * This software is distributed on an "AS IS" basis, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, + * either express or implied, as more fully set forth in the License. + * + * See the NOTICE file distributed with this work for information regarding copyright ownership. + */ + +package alluxio.master.file.mdsync; + +import alluxio.AlluxioURI; + +interface PathWaiter { + + /** + * The calling thread will be blocked until the given path has been synced. + * @param path the path to sync + * @return true if the sync on the path was successful, false otherwise + */ + boolean waitForSync(AlluxioURI path); + + /** + * Called on each batch of results that has completed processing. + * @param completed the completed results + */ + void nextCompleted(SyncProcessResult completed); +} diff --git a/core/server/master/src/main/java/alluxio/master/file/mdsync/RateLimitedRequest.java b/core/server/master/src/main/java/alluxio/master/file/mdsync/RateLimitedRequest.java new file mode 100644 index 000000000000..eb838375e6a1 --- /dev/null +++ b/core/server/master/src/main/java/alluxio/master/file/mdsync/RateLimitedRequest.java @@ -0,0 +1,60 @@ +/* + * The Alluxio Open Foundation licenses this work under the Apache License, version 2.0 + * (the "License"). You may not use this work except in compliance with the License, which is + * available at www.apache.org/licenses/LICENSE-2.0 + * + * This software is distributed on an "AS IS" basis, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, + * either express or implied, as more fully set forth in the License. + * + * See the NOTICE file distributed with this work for information regarding copyright ownership. + */ + +package alluxio.master.file.mdsync; + +import com.google.common.base.Preconditions; + +import java.util.Objects; + +class RateLimitedRequest implements Comparable { + + PathLoaderTask mTask; + LoadRequest mLoadRequest; + long mPermit; + + RateLimitedRequest(PathLoaderTask task, LoadRequest loadRequest, long permit) { + mTask = Preconditions.checkNotNull(task); + mLoadRequest = Preconditions.checkNotNull(loadRequest); + mPermit = permit; + } + + public boolean isReady() { + return mTask.getRateLimiter().getWaitTimeNanos(mPermit) <= 0; + } + + public long getWaitTime() { + return mTask.getRateLimiter().getWaitTimeNanos(mPermit); + } + + @Override + public boolean equals(Object o) { + if (this == o) { + return true; + } + if (o == null || getClass() != o.getClass()) { + return false; + } + RateLimitedRequest that = (RateLimitedRequest) o; + return mPermit == that.mPermit && mTask.equals(that.mTask) + && mLoadRequest.equals(that.mLoadRequest); + } + + @Override + public int hashCode() { + return Objects.hash(mTask, mLoadRequest, mPermit); + } + + @Override + public int compareTo(RateLimitedRequest o) { + return Long.compare(mPermit, o.mPermit); + } +} diff --git a/core/server/master/src/main/java/alluxio/master/file/mdsync/SyncFailReason.java b/core/server/master/src/main/java/alluxio/master/file/mdsync/SyncFailReason.java new file mode 100644 index 000000000000..2a1c9f4828dd --- /dev/null +++ b/core/server/master/src/main/java/alluxio/master/file/mdsync/SyncFailReason.java @@ -0,0 +1,28 @@ +/* + * The Alluxio Open Foundation licenses this work under the Apache License, version 2.0 + * (the "License"). You may not use this work except in compliance with the License, which is + * available at www.apache.org/licenses/LICENSE-2.0 + * + * This software is distributed on an "AS IS" basis, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, + * either express or implied, as more fully set forth in the License. + * + * See the NOTICE file distributed with this work for information regarding copyright ownership. + */ + +package alluxio.master.file.mdsync; + +/** + * The metadata sync fail reason. + */ +public enum SyncFailReason { + UNKNOWN, + UNSUPPORTED, + + LOADING_UFS_IO_FAILURE, + LOADING_MOUNT_POINT_DOES_NOT_EXIST, + + PROCESSING_UNKNOWN, + PROCESSING_CONCURRENT_UPDATE_DURING_SYNC, + PROCESSING_FILE_DOES_NOT_EXIST, + PROCESSING_MOUNT_POINT_DOES_NOT_EXIST, +} diff --git a/core/server/master/src/main/java/alluxio/master/file/mdsync/SyncOperation.java b/core/server/master/src/main/java/alluxio/master/file/mdsync/SyncOperation.java new file mode 100644 index 000000000000..2b4deadb0490 --- /dev/null +++ b/core/server/master/src/main/java/alluxio/master/file/mdsync/SyncOperation.java @@ -0,0 +1,80 @@ +/* + * The Alluxio Open Foundation licenses this work under the Apache License, version 2.0 + * (the "License"). You may not use this work except in compliance with the License, which is + * available at www.apache.org/licenses/LICENSE-2.0 + * + * This software is distributed on an "AS IS" basis, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, + * either express or implied, as more fully set forth in the License. + * + * See the NOTICE file distributed with this work for information regarding copyright ownership. + */ + +package alluxio.master.file.mdsync; + +import com.codahale.metrics.Counter; + +/** + * The metadata sync operations. + */ +public enum SyncOperation { + // Compared but not updated + NOOP(0, SyncOperationMetrics.NOOP_COUNT), + CREATE(1, SyncOperationMetrics.CREATE_COUNT), + DELETE(2, SyncOperationMetrics.DELETE_COUNT), + // Deleted then created due to the changed file data + RECREATE(3, SyncOperationMetrics.RECREATED_COUNT), + // Metadata updated + UPDATE(4, SyncOperationMetrics.UPDATE_COUNT), + SKIPPED_DUE_TO_CONCURRENT_MODIFICATION(5, SyncOperationMetrics.SKIP_CONCURRENT_UPDATE_COUNT), + SKIPPED_ON_MOUNT_POINT(6, SyncOperationMetrics.SKIP_MOUNT_POINT_COUNT), + SKIPPED_NON_PERSISTED(7, SyncOperationMetrics.SKIPPED_NON_PERSISTED_COUNT); + + private final int mValue; + private final Counter mCounter; + + SyncOperation(int value, Counter counter) { + mValue = value; + mCounter = counter; + } + + /** + * @param value the value + * @return the enum of the value + */ + public static SyncOperation fromInteger(int value) { + switch (value) { + case 0: + return NOOP; + case 1: + return CREATE; + case 2: + return DELETE; + case 3: + return RECREATE; + case 4: + return UPDATE; + case 5: + return SKIPPED_DUE_TO_CONCURRENT_MODIFICATION; + case 6: + return SKIPPED_ON_MOUNT_POINT; + case 7: + return SKIPPED_NON_PERSISTED; + default: + throw new IllegalArgumentException("Invalid SyncOperation value: " + value); + } + } + + /** + * @return the value + */ + public int getValue() { + return mValue; + } + + /** + * @return the metric counter + */ + public Counter getCounter() { + return mCounter; + } +} diff --git a/core/server/master/src/main/java/alluxio/master/file/mdsync/SyncOperationMetrics.java b/core/server/master/src/main/java/alluxio/master/file/mdsync/SyncOperationMetrics.java new file mode 100644 index 000000000000..37317f6737db --- /dev/null +++ b/core/server/master/src/main/java/alluxio/master/file/mdsync/SyncOperationMetrics.java @@ -0,0 +1,40 @@ +/* + * The Alluxio Open Foundation licenses this work under the Apache License, version 2.0 + * (the "License"). You may not use this work except in compliance with the License, which is + * available at www.apache.org/licenses/LICENSE-2.0 + * + * This software is distributed on an "AS IS" basis, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, + * either express or implied, as more fully set forth in the License. + * + * See the NOTICE file distributed with this work for information regarding copyright ownership. + */ + +package alluxio.master.file.mdsync; + +import alluxio.metrics.MetricKey; +import alluxio.metrics.MetricsSystem; + +import com.codahale.metrics.Counter; + +/** + * Sync operation metrics. + */ +public class SyncOperationMetrics { + public static final Counter CREATE_COUNT + = MetricsSystem.counter(MetricKey.MASTER_METADATA_SYNC_FILES_CREATED.getName()); + public static final Counter DELETE_COUNT + = MetricsSystem.counter(MetricKey.MASTER_METADATA_SYNC_FILES_DELETED.getName()); + public static final Counter RECREATED_COUNT + = MetricsSystem.counter(MetricKey.MASTER_METADATA_SYNC_FILES_RECREATED.getName()); + public static final Counter UPDATE_COUNT + = MetricsSystem.counter(MetricKey.MASTER_METADATA_SYNC_FILES_UPDATED.getName()); + public static final Counter SKIP_CONCURRENT_UPDATE_COUNT + = MetricsSystem.counter( + MetricKey.MASTER_METADATA_SYNC_FILES_SKIPPED_CONCURRENT_UPDATE.getName()); + public static final Counter SKIP_MOUNT_POINT_COUNT + = MetricsSystem.counter(MetricKey.MASTER_METADATA_SYNC_FILES_SKIPPED_MOUNT_POINT.getName()); + public static final Counter NOOP_COUNT + = MetricsSystem.counter(MetricKey.MASTER_METADATA_SYNC_FILES_NOOP.getName()); + public static final Counter SKIPPED_NON_PERSISTED_COUNT + = MetricsSystem.counter(MetricKey.MASTER_METADATA_SYNC_FILES_SKIPPED_NON_PERSISTED.getName()); +} diff --git a/core/server/master/src/main/java/alluxio/master/file/mdsync/SyncProcess.java b/core/server/master/src/main/java/alluxio/master/file/mdsync/SyncProcess.java new file mode 100644 index 000000000000..280fbf1809f2 --- /dev/null +++ b/core/server/master/src/main/java/alluxio/master/file/mdsync/SyncProcess.java @@ -0,0 +1,29 @@ +/* + * The Alluxio Open Foundation licenses this work under the Apache License, version 2.0 + * (the "License"). You may not use this work except in compliance with the License, which is + * available at www.apache.org/licenses/LICENSE-2.0 + * + * This software is distributed on an "AS IS" basis, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, + * either express or implied, as more fully set forth in the License. + * + * See the NOTICE file distributed with this work for information regarding copyright ownership. + */ + +package alluxio.master.file.mdsync; + +import alluxio.master.file.meta.UfsSyncPathCache; + +/** + * The sync process interfaces. + */ +public interface SyncProcess { + /** + * Performs a sync. + * @param loadResult the UFS load result + * @param syncPathCache the sync path cache for updating the last sync time + * @return the sync process result + */ + SyncProcessResult performSync( + LoadResult loadResult, UfsSyncPathCache syncPathCache) throws Throwable; +} + diff --git a/core/server/master/src/main/java/alluxio/master/file/mdsync/SyncProcessContext.java b/core/server/master/src/main/java/alluxio/master/file/mdsync/SyncProcessContext.java new file mode 100644 index 000000000000..b5bc9c321a1b --- /dev/null +++ b/core/server/master/src/main/java/alluxio/master/file/mdsync/SyncProcessContext.java @@ -0,0 +1,273 @@ +/* + * The Alluxio Open Foundation licenses this work under the Apache License, version 2.0 + * (the "License"). You may not use this work except in compliance with the License, which is + * available at www.apache.org/licenses/LICENSE-2.0 + * + * This software is distributed on an "AS IS" basis, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, + * either express or implied, as more fully set forth in the License. + * + * See the NOTICE file distributed with this work for information regarding copyright ownership. + */ + +package alluxio.master.file.mdsync; + +import alluxio.AlluxioURI; +import alluxio.collections.ConcurrentHashSet; +import alluxio.file.options.DescendantType; +import alluxio.grpc.FileSystemMasterCommonPOptions; +import alluxio.master.file.BlockDeletionContext; +import alluxio.master.file.FileSystemJournalEntryMerger; +import alluxio.master.file.RpcContext; +import alluxio.master.file.contexts.OperationContext; +import alluxio.master.file.meta.UfsAbsentPathCache; +import alluxio.master.journal.FileSystemMergeJournalContext; +import alluxio.master.journal.MetadataSyncMergeJournalContext; + +import com.google.common.base.Preconditions; + +import java.io.Closeable; +import java.io.IOException; +import java.util.Set; + +/** + * The context for the metadata sync processing. + */ +public class SyncProcessContext implements Closeable { + private final DescendantType mDescendantType; + private final MetadataSyncRpcContext mRpcContext; + private final RpcContext mBaseRpcContext; + private final boolean mAllowConcurrentModification; + private final FileSystemMasterCommonPOptions mCommonOptions; + private final Set mDirectoriesToUpdateAbsentCache = new ConcurrentHashSet<>(); + private final TaskInfo mTaskInfo; + private final LoadResult mLoadResult; + + /** + * Creates a metadata sync context. + * + * @param loadResult the load UFS result + * @param baseRpcContext the base rpc context + * @param rpcContext the metadata sync rpc context + * @param commonOptions the common options for TTL configurations + */ + private SyncProcessContext( + LoadResult loadResult, RpcContext baseRpcContext, MetadataSyncRpcContext rpcContext, + FileSystemMasterCommonPOptions commonOptions, + boolean allowConcurrentModification + ) { + mDescendantType = loadResult.getLoadRequest().getDescendantType(); + mRpcContext = rpcContext; + mBaseRpcContext = baseRpcContext; + mCommonOptions = commonOptions; + mAllowConcurrentModification = allowConcurrentModification; + mTaskInfo = loadResult.getTaskInfo(); + mLoadResult = loadResult; + } + + /** + * @return the descendant type of the sync + * NONE -> only syncs the inode itself + * ONE -> syncs the inode and its direct children + * ALL -> recursively syncs a directory + */ + public DescendantType getDescendantType() { + return mDescendantType; + } + + /** + * During the sync, the inodes might be updated by other requests concurrently, that makes + * the sync operation stale. If the concurrent modification is allowed, these inodes will be + * skipped, otherwise the sync will fail. + * + * @return true, if the concurrent modification is allowed. Otherwise, false + */ + public boolean isConcurrentModificationAllowed() { + return mAllowConcurrentModification; + } + + /** + * @return if the sync is a recursive sync + */ + public boolean isRecursive() { + return mDescendantType == DescendantType.ALL; + } + + /** + * @return the rpc context + */ + public MetadataSyncRpcContext getRpcContext() { + return mRpcContext; + } + + /** + * @return the metadata sync journal context + */ + public MetadataSyncMergeJournalContext getMetadataSyncJournalContext() { + return mRpcContext.getJournalContext(); + } + + /** + * @return the common options + */ + public FileSystemMasterCommonPOptions getCommonOptions() { + return mCommonOptions; + } + + /** + * adds directories which are supposed to update is children loaded flag when the sync is done. + * + * @param path the path + */ + public void addDirectoriesToUpdateIsChildrenLoaded(AlluxioURI path) { + mTaskInfo.addPathToUpdateDirectChildrenLoaded(path); + } + + /** + * adds directories which exists and needs to update the absent cache later. + * @param path the path + */ + public void addDirectoriesToUpdateAbsentCache(AlluxioURI path) { + mDirectoriesToUpdateAbsentCache.add(path); + } + + /** + * Updates the absent cache and set directories existing. + * @param ufsAbsentPathCache the absent cache + */ + public void updateAbsentCache(UfsAbsentPathCache ufsAbsentPathCache) { + for (AlluxioURI uri: mDirectoriesToUpdateAbsentCache) { + ufsAbsentPathCache.processExisting(uri); + } + } + + /** + * reports the completion of a successful sync operation. + * + * @param operation the operation + */ + public void reportSyncOperationSuccess(SyncOperation operation) { + reportSyncOperationSuccess(operation, 1); + } + + /** + * reports the completion of a successful sync operation. + * + * @param operation the operation + * @param count the number of successes + */ + public void reportSyncOperationSuccess(SyncOperation operation, long count) { + operation.getCounter().inc(count); + mTaskInfo.getStats().reportSyncOperationSuccess(operation, count); + } + + /** + * Reports a fail reason leading to the sync failure. + * + * @param reason the reason + * @param t the throwable + */ + public void reportSyncFailReason(SyncFailReason reason, Throwable t) { + mTaskInfo.getStats().reportSyncFailReason(mLoadResult.getLoadRequest(), mLoadResult, reason, t); + } + + /** + * @return the task info + */ + public TaskInfo getTaskInfo() { + return mTaskInfo; + } + + @Override + public void close() throws IOException { + mRpcContext.close(); + mBaseRpcContext.close(); + } + + static class MetadataSyncRpcContext extends RpcContext { + public MetadataSyncRpcContext( + BlockDeletionContext blockDeleter, MetadataSyncMergeJournalContext journalContext, + OperationContext operationContext) { + super(blockDeleter, journalContext, operationContext); + } + + @Override + public MetadataSyncMergeJournalContext getJournalContext() { + return (MetadataSyncMergeJournalContext) super.getJournalContext(); + } + } + + /** + * Creates a builder. + */ + public static class Builder { + private LoadResult mLoadResult; + private MetadataSyncRpcContext mRpcContext; + private RpcContext mBaseRpcContext; + private FileSystemMasterCommonPOptions mCommonOptions = DefaultSyncProcess.NO_TTL_OPTION; + private boolean mAllowConcurrentModification = true; + + /** + * Creates a builder. + * + * @param rpcContext the rpc context + * @param loadResult the load UFS result + * @return a new builder + */ + public static Builder builder(RpcContext rpcContext, LoadResult loadResult) { + Preconditions.checkState( + !(rpcContext.getJournalContext() instanceof FileSystemMergeJournalContext)); + Builder builder = new Builder(); + builder.mLoadResult = loadResult; + /* + * Wrap the journal context with a MetadataSyncMergeJournalContext, which behaves + * differently in: + * 1. the journals are merged and stayed in the context until it gets flushed + * 2. when close() or flush() are called, the journal does not trigger a hard flush + * that commits the journals, instead, it only adds the journals to the async journal writer. + * During the metadata sync process, we are creating/updating many files, but we don't want + * to hard flush journals on every inode updates. + */ + builder.mBaseRpcContext = rpcContext; + builder.mRpcContext = new MetadataSyncRpcContext(rpcContext.getBlockDeletionContext(), + new MetadataSyncMergeJournalContext(rpcContext.getJournalContext(), + new FileSystemJournalEntryMerger()), rpcContext.getOperationContext()); + return builder; + } + + /** + * @param rpcContext the rpc context + * @return builder + */ + public Builder setRpcContext(MetadataSyncRpcContext rpcContext) { + mRpcContext = rpcContext; + return this; + } + + /** + * @param commonOptions the common option + * @return builder + */ + public Builder setCommonOptions(FileSystemMasterCommonPOptions commonOptions) { + mCommonOptions = commonOptions; + return this; + } + + /** + * @param allowModification the current modification is allowed + * @return the builder + */ + public Builder setAllowModification(boolean allowModification) { + mAllowConcurrentModification = allowModification; + return this; + } + + /** + * @return the built metadata sync context + */ + public SyncProcessContext build() { + return new SyncProcessContext( + mLoadResult, mBaseRpcContext, mRpcContext, mCommonOptions, + mAllowConcurrentModification); + } + } +} diff --git a/core/server/master/src/main/java/alluxio/master/file/mdsync/SyncProcessResult.java b/core/server/master/src/main/java/alluxio/master/file/mdsync/SyncProcessResult.java new file mode 100644 index 000000000000..315ae51cd7c2 --- /dev/null +++ b/core/server/master/src/main/java/alluxio/master/file/mdsync/SyncProcessResult.java @@ -0,0 +1,84 @@ +/* + * The Alluxio Open Foundation licenses this work under the Apache License, version 2.0 + * (the "License"). You may not use this work except in compliance with the License, which is + * available at www.apache.org/licenses/LICENSE-2.0 + * + * This software is distributed on an "AS IS" basis, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, + * either express or implied, as more fully set forth in the License. + * + * See the NOTICE file distributed with this work for information regarding copyright ownership. + */ + +package alluxio.master.file.mdsync; + +import alluxio.AlluxioURI; + +import java.util.Optional; +import javax.annotation.Nullable; + +/** + * This is the result of performing the metadata sync in Alluxio. + */ +public class SyncProcessResult { + + private final AlluxioURI mBaseLoadPath; + private final TaskInfo mTaskInfo; + private final PathSequence mLoaded; + private final boolean mIsTruncated; + private final boolean mRootPathIsFile; + + /** + * Constructs an instance of {@link SyncProcessResult}. + * + * @param taskInfo the task info + * @param baseLoadPath the base load path + * @param loaded the path sequence + * @param isTruncated whether the result is truncated or not + * @param rootPathIsFile whether the root path is a file or not + */ + public SyncProcessResult( + TaskInfo taskInfo, AlluxioURI baseLoadPath, + @Nullable PathSequence loaded, boolean isTruncated, + boolean rootPathIsFile) { + mRootPathIsFile = rootPathIsFile; + mBaseLoadPath = baseLoadPath; + mTaskInfo = taskInfo; + mLoaded = loaded; + mIsTruncated = isTruncated; + } + + /** + * @return true if the root path is a file, false otherwise + */ + public boolean rootPathIsFile() { + return mRootPathIsFile; + } + + /** + * @return the base load path + */ + public AlluxioURI getBaseLoadPath() { + return mBaseLoadPath; + } + + /** + * @return true if the result is truncated, false otherwise + */ + public boolean isTruncated() { + return mIsTruncated; + } + + /** + * @return Optional containing the loaded path sequence + */ + public Optional getLoaded() { + return Optional.ofNullable(mLoaded); + } + + /** + * @return the task info + */ + public TaskInfo getTaskInfo() { + return mTaskInfo; + } +} diff --git a/core/server/master/src/main/java/alluxio/master/file/mdsync/TaskGroup.java b/core/server/master/src/main/java/alluxio/master/file/mdsync/TaskGroup.java new file mode 100644 index 000000000000..5253b46a872f --- /dev/null +++ b/core/server/master/src/main/java/alluxio/master/file/mdsync/TaskGroup.java @@ -0,0 +1,113 @@ +/* + * The Alluxio Open Foundation licenses this work under the Apache License, version 2.0 + * (the "License"). You may not use this work except in compliance with the License, which is + * available at www.apache.org/licenses/LICENSE-2.0 + * + * This software is distributed on an "AS IS" basis, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, + * either express or implied, as more fully set forth in the License. + * + * See the NOTICE file distributed with this work for information regarding copyright ownership. + */ + +package alluxio.master.file.mdsync; + +import alluxio.annotation.SuppressFBWarnings; +import alluxio.exception.runtime.DeadlineExceededRuntimeException; +import alluxio.grpc.SyncMetadataTask; + +import com.google.common.base.Preconditions; +import com.google.common.base.Stopwatch; + +import java.util.Arrays; +import java.util.concurrent.TimeUnit; +import java.util.stream.Stream; + +/** + * A TaskGroup represents a set of {@link BaseTask} objects. + */ +public class TaskGroup { + @SuppressFBWarnings(value = "EI_EXPOSE_REP2") + private final BaseTask[] mTasks; + private final long mGroupId; + + /** + * Creates a new task group. + * @param groupId the id for this task group + * @param tasks the tasks to group + */ + public TaskGroup(long groupId, BaseTask... tasks) { + Preconditions.checkState(tasks != null && tasks.length > 0); + mGroupId = groupId; + mTasks = tasks; + } + + /** + * @return the base task for this group + */ + public BaseTask getBaseTask() { + return mTasks[0]; + } + + /** + * @return a stream of the tasks + */ + public Stream getTasks() { + return Arrays.stream(mTasks); + } + + /** + * @return the task count + */ + public int getTaskCount() { + return mTasks.length; + } + + /** + * @return true if all tasks succeeded + */ + public boolean allSucceeded() { + return Arrays.stream(mTasks).allMatch(BaseTask::succeeded); + } + + /** + * @return a stream of the tasks in protobuf format + */ + public Stream toProtoTasks() { + return getTasks().map(BaseTask::toProtoTask); + } + + /** + * @return the unique group id for this task + */ + public long getGroupId() { + return mGroupId; + } + + /** + * Waits for all the tasks to complete or until + * a timeout occurs. If any tasks fail it will throw the + * error caused by the failed task. + * If the wait times-out a {@link DeadlineExceededRuntimeException} is thrown. + * @param timeoutMs the time in milliseconds to wait for the task + * to complete, or 0 to wait forever + */ + public void waitAllComplete(long timeoutMs) throws Throwable { + Stopwatch sw = Stopwatch.createStarted(); + for (BaseTask task : mTasks) { + task.waitComplete(getRemainingTime(sw, timeoutMs)); + } + } + + private static long getRemainingTime( + Stopwatch sw, long timeoutMs) throws DeadlineExceededRuntimeException { + // Endless wait + if (timeoutMs == 0) { + return 0; + } + long remaining = timeoutMs - sw.elapsed(TimeUnit.MILLISECONDS); + if (remaining <= 0) { + throw new DeadlineExceededRuntimeException("Task still running."); + } + return remaining; + } +} diff --git a/core/server/master/src/main/java/alluxio/master/file/mdsync/TaskInfo.java b/core/server/master/src/main/java/alluxio/master/file/mdsync/TaskInfo.java new file mode 100644 index 000000000000..197b5092376b --- /dev/null +++ b/core/server/master/src/main/java/alluxio/master/file/mdsync/TaskInfo.java @@ -0,0 +1,149 @@ +/* + * The Alluxio Open Foundation licenses this work under the Apache License, version 2.0 + * (the "License"). You may not use this work except in compliance with the License, which is + * available at www.apache.org/licenses/LICENSE-2.0 + * + * This software is distributed on an "AS IS" basis, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, + * either express or implied, as more fully set forth in the License. + * + * See the NOTICE file distributed with this work for information regarding copyright ownership. + */ + +package alluxio.master.file.mdsync; + +import alluxio.AlluxioURI; +import alluxio.conf.path.TrieNode; +import alluxio.file.options.DescendantType; +import alluxio.file.options.DirectoryLoadType; + +import java.util.stream.Stream; +import javax.annotation.Nullable; + +/** + * This represents the overall metadata sync task information. + */ +public class TaskInfo { + private final AlluxioURI mBasePath; + private final AlluxioURI mAlluxioPath; + private final String mStartAfter; + private final DescendantType mDescendantType; + private final long mId; + private final DirectoryLoadType mLoadByDirectory; + private final long mSyncInterval; + private final MetadataSyncHandler mMetadataSyncHandler; + private final TaskStats mStats; + + private final TrieNode mPathsToUpdateDirectChildrenLoaded = new TrieNode<>(); + + TaskInfo( + MetadataSyncHandler metadataSyncHandler, + AlluxioURI ufsPath, // basePath should be without the header/bucket, e.g. no s3:// + AlluxioURI alluxioPath, + @Nullable String startAfter, + DescendantType descendantType, + long syncInterval, + DirectoryLoadType loadByDirectory, + long id) { + mBasePath = ufsPath; + mAlluxioPath = alluxioPath; + mSyncInterval = syncInterval; + mDescendantType = descendantType; + mLoadByDirectory = loadByDirectory; + mId = id; + mStartAfter = startAfter; + mMetadataSyncHandler = metadataSyncHandler; + mStats = new TaskStats(); + } + + /** + * @return the task stats + */ + public TaskStats getStats() { + return mStats; + } + + /** + * @return the alluxio path + */ + public AlluxioURI getAlluxioPath() { + return mAlluxioPath; + } + + /** + * @return the sync interval + */ + public long getSyncInterval() { + return mSyncInterval; + } + + /** + * @return true, if the task contains dir load tasks + */ + public boolean hasDirLoadTasks() { + return mDescendantType == DescendantType.ALL + && mLoadByDirectory != DirectoryLoadType.SINGLE_LISTING; + } + + String getStartAfter() { + return mStartAfter; + } + + /** + * @return the metadata sync kernel + */ + public MetadataSyncHandler getMdSync() { + return mMetadataSyncHandler; + } + + /** + * @return the base path + */ + public AlluxioURI getBasePath() { + return mBasePath; + } + + /** + * @return the id + */ + public long getId() { + return mId; + } + + /** + * @return the load by directory type + */ + DirectoryLoadType getLoadByDirectory() { + return mLoadByDirectory; + } + + /** + * @return the descendant type + */ + public DescendantType getDescendantType() { + return mDescendantType; + } + + @Override + public String toString() { + return String.format( + "TaskInfo{UFS path: %s, AlluxioPath: %s, Descendant Type: %s," + + " Directory Load Type: %s, Id: %d}", mBasePath, mAlluxioPath, + mDescendantType, mLoadByDirectory, mId); + } + + /** + * @return the paths need to update direct children loaded + */ + synchronized Stream getPathsToUpdateDirectChildrenLoaded() { + return mPathsToUpdateDirectChildrenLoaded.getLeafChildren("/").map(TrieNode::getValue); + } + + /** + * Add path to set direct children loaded. This call must be synchronized + * as it will be called by different threads while processing tasks. + * @param uri to update direct children loaded + */ + synchronized void addPathToUpdateDirectChildrenLoaded(AlluxioURI uri) { + mPathsToUpdateDirectChildrenLoaded.insert(uri.getPath()).setValue(uri); + } +} diff --git a/core/server/master/src/main/java/alluxio/master/file/mdsync/TaskStats.java b/core/server/master/src/main/java/alluxio/master/file/mdsync/TaskStats.java new file mode 100644 index 000000000000..e2615dc9334d --- /dev/null +++ b/core/server/master/src/main/java/alluxio/master/file/mdsync/TaskStats.java @@ -0,0 +1,279 @@ +/* + * The Alluxio Open Foundation licenses this work under the Apache License, version 2.0 + * (the "License"). You may not use this work except in compliance with the License, which is + * available at www.apache.org/licenses/LICENSE-2.0 + * + * This software is distributed on an "AS IS" basis, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, + * either express or implied, as more fully set forth in the License. + * + * See the NOTICE file distributed with this work for information regarding copyright ownership. + */ + +package alluxio.master.file.mdsync; + +import alluxio.collections.Pair; + +import com.google.common.base.MoreObjects; + +import java.util.Map; +import java.util.concurrent.ConcurrentHashMap; +import java.util.concurrent.atomic.AtomicInteger; +import java.util.concurrent.atomic.AtomicLong; +import javax.annotation.Nullable; + +/** + * The metadata sync task stats. + */ +public class TaskStats { + private final AtomicInteger mBatches = new AtomicInteger(); + private final AtomicInteger mStatuses = new AtomicInteger(); + private final AtomicInteger mLoadErrors = new AtomicInteger(); + private final AtomicInteger mLoadRequests = new AtomicInteger(); + final AtomicInteger mProcessStarted = new AtomicInteger(); + final AtomicInteger mProcessCompleted = new AtomicInteger(); + private final AtomicLong[] mSuccessOperationCount; + private final Map mSyncFailReasons = + new ConcurrentHashMap<>(); + private volatile boolean mLoadFailed; + private volatile boolean mProcessFailed; + private volatile boolean mFirstLoadFile; + private volatile boolean mSyncFailed = false; + + /** + * Creates a new task stats. + */ + public TaskStats() { + mSuccessOperationCount = new AtomicLong[SyncOperation.values().length]; + for (int i = 0; i < mSuccessOperationCount.length; ++i) { + mSuccessOperationCount[i] = new AtomicLong(); + } + } + + @Override + public String toString() { + MoreObjects.ToStringHelper helper = MoreObjects.toStringHelper(this) + .add("Success op count", getSuccessOperationCountString().getSecond()) + .add("# of batches", mBatches.get()) + .add("# of objects loaded from UFS", mStatuses.get()) + .add("# of load requests", mLoadRequests.get()) + .add("# of load errors", mLoadErrors.get()) + .add("Load failed", mLoadFailed) + .add("Process failed", mProcessFailed) + .add("First load was file", mFirstLoadFile) + .add("Failed load requests", mSyncFailReasons); + return helper.toString(); + } + + /** + * @return a formatted string that is used to display as the cli command output + */ + public Pair toReportString() { + Pair successOps = getSuccessOperationCountString(); + MoreObjects.ToStringHelper helper = MoreObjects.toStringHelper(this); + helper.add("Success op count", successOps.getSecond()) + .add("# of batches", mBatches.get()) + .add("# of objects loaded from UFS", mStatuses.get()) + .add("# of load requests", mLoadRequests.get()) + .add("# of load errors", mLoadErrors.get()); + if (mSyncFailReasons.size() > 0) { + helper.add("Failed load requests", mSyncFailReasons); + } + return new Pair<>(successOps.getFirst(), helper.toString()); + } + + /** + * @return if the first load was file + */ + boolean firstLoadWasFile() { + return mFirstLoadFile; + } + + /** + * @return if the load is failed + */ + boolean isLoadFailed() { + return mLoadFailed; + } + + /** + * @return if the processing is failed + */ + boolean isProcessFailed() { + return mProcessFailed; + } + + int getLoadRequestCount() { + return mLoadRequests.get(); + } + + int getBatchCount() { + return mBatches.get(); + } + + /** + * @return the status count + */ + int getStatusCount() { + return mStatuses.get(); + } + + int getLoadErrors() { + return mLoadErrors.get(); + } + + void gotBatch(int size) { + mBatches.incrementAndGet(); + mStatuses.addAndGet(size); + } + + void gotLoadRequest() { + mLoadRequests.incrementAndGet(); + } + + void gotLoadError() { + mLoadErrors.incrementAndGet(); + } + + void setLoadFailed() { + mLoadFailed = true; + } + + void setProcessFailed() { + mProcessFailed = true; + } + + void setFirstLoadFile() { + mFirstLoadFile = true; + } + + /** + * @return success operation count map + */ + public AtomicLong[] getSuccessOperationCount() { + return mSuccessOperationCount; + } + + private Pair getSuccessOperationCountString() { + StringBuilder sb = new StringBuilder(); + sb.append("{"); + long total = 0; + for (int i = 0; i < mSuccessOperationCount.length; ++i) { + long value = mSuccessOperationCount[i].get(); + total += value; + if (value != 0) { + sb.append("[") + .append(SyncOperation.fromInteger(i)) + .append(":") + .append(value) + .append("]"); + } + } + sb.append("}"); + return new Pair<>(total, sb.toString()); + } + + /** + * reports the completion of a successful sync operation. + * + * @param operation the operation + * @param count the number of successes + */ + void reportSyncOperationSuccess(SyncOperation operation, long count) { + mSuccessOperationCount[operation.getValue()].addAndGet(count); + } + + /** + * Sets the sync failed. + */ + public void setSyncFailed() { + mSyncFailed = true; + } + + /** + * @return if the sync failed + */ + public boolean getSyncFailed() { + return mSyncFailed; + } + + /** + * Reports a sync fail reason. + * @param request the load request + * @param loadResult the load result + * @param reason the sync fail reason + * @param t the exception + */ + void reportSyncFailReason( + LoadRequest request, @Nullable LoadResult loadResult, + SyncFailReason reason, Throwable t) { + mSyncFailReasons.putIfAbsent( + request.getLoadRequestId(), new SyncFailure(request, loadResult, reason, t) + ); + } + + /** + * @return the sync fail reason map + * The key is the load request id and the value is the failure. + * A reported error does not necessarily fail the sync as we retry. This map only records all + * failures we even encountered. Please refer to BaseTask::getState to get the sync task state. + */ + public Map getSyncFailReasons() { + return mSyncFailReasons; + } + + /** + * The sync failure. + */ + public static class SyncFailure { + private final LoadRequest mLoadRequest; + @Nullable + private final LoadResult mLoadResult; + private final Throwable mThrowable; + private final SyncFailReason mFailReason; + + /** + * Constructs an object. + * @param loadRequest the load request + * @param loadResult the load result + * @param failReason the fail reason + * @param throwable the exception + */ + public SyncFailure( + LoadRequest loadRequest, @Nullable LoadResult loadResult, + SyncFailReason failReason, Throwable throwable) { + mLoadRequest = loadRequest; + mLoadResult = loadResult; + mThrowable = throwable; + mFailReason = failReason; + } + + /** + * @return the sync fail reason + */ + public SyncFailReason getSyncFailReason() { + return mFailReason; + } + + @Override + public String toString() { + String loadFrom = "{beginning}"; + if (mLoadRequest.getPreviousLoadLast().isPresent()) { + loadFrom = mLoadRequest.getPreviousLoadLast().get().toString(); + } + String loadUntil = "{N/A}"; + if (mLoadResult != null && mLoadResult.getUfsLoadResult().getLastItem().isPresent()) { + loadUntil = mLoadResult.getUfsLoadResult().getLastItem().get().toString(); + } + + MoreObjects.ToStringHelper helper = MoreObjects.toStringHelper(this) + .add("LoadRequestId", mLoadRequest.getLoadRequestId()) + .add("FailReason", mFailReason) + .add("DescendantType", mLoadRequest.getDescendantType()) + .add("LoadPath", mLoadRequest.getLoadRequestId()) + .add("LoadFrom", loadFrom) + .add("LoadUntil", loadUntil) + .add("Exception", mThrowable); + return helper.toString(); + } + } +} diff --git a/core/server/master/src/main/java/alluxio/master/file/mdsync/TaskTracker.java b/core/server/master/src/main/java/alluxio/master/file/mdsync/TaskTracker.java new file mode 100644 index 000000000000..ec521ce83aab --- /dev/null +++ b/core/server/master/src/main/java/alluxio/master/file/mdsync/TaskTracker.java @@ -0,0 +1,329 @@ +/* + * The Alluxio Open Foundation licenses this work under the Apache License, version 2.0 + * (the "License"). You may not use this work except in compliance with the License, which is + * available at www.apache.org/licenses/LICENSE-2.0 + * + * This software is distributed on an "AS IS" basis, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, + * either express or implied, as more fully set forth in the License. + * + * See the NOTICE file distributed with this work for information regarding copyright ownership. + */ + +package alluxio.master.file.mdsync; + +import alluxio.AlluxioURI; +import alluxio.collections.Pair; +import alluxio.conf.path.TrieNode; +import alluxio.exception.status.NotFoundException; +import alluxio.file.options.DescendantType; +import alluxio.file.options.DirectoryLoadType; +import alluxio.grpc.SyncMetadataTask; +import alluxio.master.file.meta.UfsAbsentPathCache; +import alluxio.master.file.meta.UfsSyncPathCache; +import alluxio.metrics.MetricKey; +import alluxio.metrics.MetricsSystem; +import alluxio.resource.CloseableResource; +import alluxio.underfs.UfsClient; + +import com.codahale.metrics.Counter; +import com.google.common.annotations.VisibleForTesting; +import com.google.common.base.Preconditions; +import com.google.common.cache.Cache; +import com.google.common.cache.CacheBuilder; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import java.io.Closeable; +import java.io.IOException; +import java.util.HashMap; +import java.util.Optional; +import java.util.function.Function; +import javax.annotation.Nullable; + +/** + * Tracks metadata sync tasks. The tasks will be submitted by UFS URL by user RPC threads. + */ +public class TaskTracker implements Closeable { + private static final Logger LOG = LoggerFactory.getLogger(TaskTracker.class); + + private final TrieNode mActiveRecursiveListTasks; + private final TrieNode mActiveListTasks; + private final TrieNode mActiveStatusTasks; + private final HashMap mActiveTaskMap = new HashMap<>(); + // TODO(elega) make this a configurable property + private final Cache mFinishedTaskMap = + CacheBuilder.newBuilder().maximumSize(1000).build(); + private final LoadRequestExecutor mLoadRequestExecutor; + private final UfsSyncPathCache mSyncPathCache; + private final UfsAbsentPathCache mAbsentPathCache; + private final Function> mClientSupplier; + + public static final Counter COMPLETED_TASK_COUNT + = MetricsSystem.counter(MetricKey.MASTER_METADATA_SYNC_COMPLETED_TASKS.getName()); + public static final Counter FAILED_TASK_COUNT + = MetricsSystem.counter(MetricKey.MASTER_METADATA_SYNC_FAILED_TASKS.getName()); + public static final Counter CANCELLED_TASK_COUNT + = MetricsSystem.counter(MetricKey.MASTER_METADATA_SYNC_CANCELLED_TASKS.getName()); + + private long mNxtId = 0; + + /** + * Create a new TaskTracker. + * @param executorThreads the number of threads to run the metadata sync processing + * @param maxUfsRequests the maximum number of concurrently running + * (or completed but not yet processed) Ufs requests + * @param allowConcurrentNonRecursiveList if true, non-recursive lists tasks will + * run concurrently with recursive list tasks + * @param allowConcurrentGetStatus if true, getStatus tasks will run concurrently + * with recursive list tasks + * @param syncPathCache the sync path cache + * @param absentPathCache the absent cache + * @param syncProcess the sync process + * @param clientSupplier the client supplier + */ + public TaskTracker( + int executorThreads, int maxUfsRequests, + boolean allowConcurrentGetStatus, boolean allowConcurrentNonRecursiveList, + UfsSyncPathCache syncPathCache, + UfsAbsentPathCache absentPathCache, + SyncProcess syncProcess, + Function> clientSupplier) { + LOG.info("Metadata sync executor threads {}, max concurrent ufs requests {}", + executorThreads, maxUfsRequests); + mSyncPathCache = syncPathCache; + mAbsentPathCache = absentPathCache; + mLoadRequestExecutor = new LoadRequestExecutor(maxUfsRequests, + new LoadResultExecutor(syncProcess, executorThreads, syncPathCache)); + mActiveRecursiveListTasks = new TrieNode<>(); + if (allowConcurrentNonRecursiveList) { + mActiveListTasks = new TrieNode<>(); + } else { + mActiveListTasks = mActiveRecursiveListTasks; + } + if (allowConcurrentGetStatus) { + mActiveStatusTasks = new TrieNode<>(); + } else { + mActiveStatusTasks = mActiveRecursiveListTasks; + } + mClientSupplier = clientSupplier; + registerMetrics(); + } + + /** + * @param taskId the task id + * @return the task + */ + public synchronized Optional getActiveTask(long taskId) { + return Optional.ofNullable(mActiveTaskMap.get(taskId)); + } + + /** + * @param taskId the task id + * @return the task + */ + public synchronized Optional getTaskProto(long taskId) { + BaseTask task = mActiveTaskMap.get(taskId); + if (task != null) { + return Optional.of(task.toProtoTask()); + } + return Optional.ofNullable(mFinishedTaskMap.getIfPresent(taskId)); + } + + synchronized boolean hasRunningTasks() { + return mActiveListTasks.getCommonRoots().hasNext() + || mActiveStatusTasks.getCommonRoots().hasNext() + || mActiveRecursiveListTasks.getCommonRoots().hasNext(); + } + + void taskComplete(long taskId, boolean isFile) { + synchronized (this) { + BaseTask baseTask = mActiveTaskMap.get(taskId); + if (baseTask != null) { + if (!baseTask.removeOnComplete()) { + mFinishedTaskMap.put(taskId, baseTask.toProtoTask()); + } + COMPLETED_TASK_COUNT.inc(); + mActiveTaskMap.remove(taskId); + LOG.debug("Task {} completed", baseTask); + mSyncPathCache.notifySyncedPath(baseTask.getTaskInfo().getBasePath(), + baseTask.getTaskInfo().getDescendantType(), baseTask.getStartTime(), + null, isFile); + if (baseTask.getTaskInfo().getStats().getStatusCount() == 0) { + mAbsentPathCache.addSinglePath(baseTask.getTaskInfo().getBasePath()); + } else { + mAbsentPathCache.processExisting(baseTask.getTaskInfo().getBasePath()); + } + TrieNode activeTasks = getActiveTasksForDescendantType( + baseTask.getTaskInfo().getDescendantType()); + Preconditions.checkNotNull(activeTasks.deleteIf( + baseTask.getTaskInfo().getBasePath().toString(), a -> true), + "task missing").setValue(null); + } else { + LOG.debug("Task with id {} completed, but was already removed", taskId); + } + } + mLoadRequestExecutor.onTaskComplete(taskId); + } + + void taskError(long taskId, Throwable t) { + synchronized (this) { + BaseTask baseTask = mActiveTaskMap.remove(taskId); + if (baseTask != null) { + FAILED_TASK_COUNT.inc(); + LOG.debug("Task {} failed with error {}", baseTask, t); + TrieNode activeTasks = getActiveTasksForDescendantType( + baseTask.getTaskInfo().getDescendantType()); + Preconditions.checkNotNull(activeTasks.deleteIf( + baseTask.getTaskInfo().getBasePath().toString(), a -> true), + "task missing").setValue(null); + if (!baseTask.removeOnComplete()) { + mFinishedTaskMap.put(taskId, baseTask.toProtoTask()); + } + } else { + LOG.debug("Task with id {} failed with error, but was already removed", taskId, t); + } + } + mLoadRequestExecutor.onTaskComplete(taskId); + } + + synchronized void cancelTasksUnderPath(AlluxioURI path) { + mActiveRecursiveListTasks.getLeafChildren(path.toString()).forEach(nxt -> + mActiveTaskMap.remove(nxt.getValue().cancel())); + mActiveListTasks.getLeafChildren(path.toString()).forEach(nxt -> + mActiveTaskMap.remove(nxt.getValue().cancel())); + mActiveStatusTasks.getLeafChildren(path.toString()).forEach(nxt -> + mActiveTaskMap.remove(nxt.getValue().cancel())); + } + + /** + * Cancels an ongoing sync task. + * @param taskId the task id + */ + public synchronized void cancelTaskById(long taskId) throws NotFoundException { + BaseTask baseTask = mActiveTaskMap.get(taskId); + if (baseTask == null) { + throw new NotFoundException("Task " + taskId + " not found or has already been canceled."); + } + if (baseTask.isCompleted().isPresent()) { + return; + } + if (!baseTask.removeOnComplete()) { + mFinishedTaskMap.put(taskId, baseTask.toProtoTask()); + } + CANCELLED_TASK_COUNT.inc(); + mActiveTaskMap.remove(taskId); + baseTask.cancel(); + TrieNode activeTasks = getActiveTasksForDescendantType( + baseTask.getTaskInfo().getDescendantType()); + Preconditions.checkNotNull(activeTasks.deleteIf( + baseTask.getTaskInfo().getBasePath().toString(), a -> true), "task missing") + .setValue(null); + } + + private TrieNode getActiveTasksForDescendantType(DescendantType depth) { + switch (depth) { + case NONE: + return mActiveStatusTasks; + case ONE: + return mActiveListTasks; + default: + return mActiveRecursiveListTasks; + } + } + + /** + * Launches a metadata sync task asynchronously with the given parameters. + * This function should be used when manually launching metadata sync tasks. + * @param metadataSyncHandler the MdSync object + * @param ufsPath the ufsPath to sync + * @param alluxioPath the alluxio path matching the mounted ufsPath + * @param startAfter if the sync should start after a given internal path + * @param depth the depth of descendents to load + * @param syncInterval the sync interval + * @param loadByDirectory the load by directory type + * @param removeOnComplete if the task should be removed on complete + * @return the running task object + */ + public BaseTask launchTaskAsync( + MetadataSyncHandler metadataSyncHandler, + AlluxioURI ufsPath, AlluxioURI alluxioPath, + @Nullable String startAfter, + DescendantType depth, long syncInterval, + DirectoryLoadType loadByDirectory, + boolean removeOnComplete) { + BaseTask task; + synchronized (this) { + TrieNode activeTasks = getActiveTasksForDescendantType(depth); + task = activeTasks.getLeafChildren(ufsPath.toString()) + .map(TrieNode::getValue).filter(nxt -> nxt.pathIsCovered(ufsPath, depth)).findFirst() + .orElseGet(() -> { + TrieNode newNode = activeTasks.insert(ufsPath.toString()); + Preconditions.checkState(newNode.getValue() == null); + final long id = mNxtId++; + BaseTask newTask = BaseTask.create( + new TaskInfo(metadataSyncHandler, ufsPath, alluxioPath, startAfter, + depth, syncInterval, loadByDirectory, id), + mSyncPathCache.recordStartSync(), + mClientSupplier, + removeOnComplete); + mActiveTaskMap.put(id, newTask); + newNode.setValue(newTask); + mLoadRequestExecutor.addPathLoaderTask(newTask.getLoadTask()); + return newTask; + }); + } + return task; + } + + /** + * Launches a metadata sync task with the given parameters. + * This function should be used when traversing the tree, and the + * path being traversed is needing a sync. + * This method will not return until the initial sync path has been + * synchronized. For example if the alluxio sync path is "/mount/file" + * it will not return until "file" has been synchronized. If instead + * the path being synchronized is a directory, e.g. "/mount/directory/" + * then the function will return as soon as the first batch of items + * in the directory has been synchronized, e.g. "/mount/directory/first", + * allowing the user to start listing the file before the sync has been + * completed entirely. As the directory is traversed, this function should + * be called on each subsequent path until the sync is complete. + * TODO(tcrain) integrate this in the filesystem operations traversal + * @param metadataSyncHandler the MdSync object + * @param ufsPath the ufsPath to sync + * @param alluxioPath the alluxio path matching the mounted ufsPath + * @param startAfter if the sync should start after a given internal path + * @param depth the depth of descendents to load + * @param syncInterval the sync interval + * @param loadByDirectory the load by directory type + * @return the running task object + */ + @VisibleForTesting + public Pair checkTask( + MetadataSyncHandler metadataSyncHandler, + AlluxioURI ufsPath, AlluxioURI alluxioPath, + @Nullable String startAfter, + DescendantType depth, long syncInterval, + DirectoryLoadType loadByDirectory) { + // TODO(elega/tcrain) This method needs to be updated to support nested sync + BaseTask task = launchTaskAsync(metadataSyncHandler, ufsPath, alluxioPath, startAfter, + depth, syncInterval, loadByDirectory, true); + return new Pair<>(task.waitForSync(ufsPath), task); + } + + @Override + public void close() throws IOException { + mLoadRequestExecutor.close(); + } + + private void registerMetrics() { + MetricsSystem.registerGaugeIfAbsent( + MetricsSystem.getMetricName( + MetricKey.MASTER_METADATA_SYNC_RUNNING_TASKS.getName()), + () -> { + synchronized (this) { + return mActiveTaskMap.size(); + } + }); + } +} diff --git a/core/server/master/src/main/java/alluxio/master/file/meta/InodeIterationResult.java b/core/server/master/src/main/java/alluxio/master/file/meta/InodeIterationResult.java new file mode 100644 index 000000000000..6eb70cd41fc5 --- /dev/null +++ b/core/server/master/src/main/java/alluxio/master/file/meta/InodeIterationResult.java @@ -0,0 +1,50 @@ +/* + * The Alluxio Open Foundation licenses this work under the Apache License, version 2.0 + * (the "License"). You may not use this work except in compliance with the License, which is + * available at www.apache.org/licenses/LICENSE-2.0 + * + * This software is distributed on an "AS IS" basis, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, + * either express or implied, as more fully set forth in the License. + * + * See the NOTICE file distributed with this work for information regarding copyright ownership. + */ + +package alluxio.master.file.meta; + +/** + * The inode, its full path, and the locked path. + */ +public class InodeIterationResult { + private final Inode mInode; + private final LockedInodePath mLockedPath; + + /** + * Creates an instance. + * @param inode the inode + * @param lockedPath the locked path + */ + public InodeIterationResult( + Inode inode, LockedInodePath lockedPath) { + mInode = inode; + mLockedPath = lockedPath; + } + + /** + * @return the inode + */ + public Inode getInode() { + return mInode; + } + + /** + * @return the locked path + */ + public LockedInodePath getLockedPath() { + return mLockedPath; + } + + @Override + public String toString() { + return mLockedPath.getUri().toString(); + } +} diff --git a/core/server/master/src/main/java/alluxio/master/file/meta/InodeTree.java b/core/server/master/src/main/java/alluxio/master/file/meta/InodeTree.java index 9c4a45d172b2..fa54fa301d2c 100644 --- a/core/server/master/src/main/java/alluxio/master/file/meta/InodeTree.java +++ b/core/server/master/src/main/java/alluxio/master/file/meta/InodeTree.java @@ -874,7 +874,7 @@ public List createPath(RpcContext rpcContext, LockedInodePath inodePath, InodeDirectoryView currentInodeDirectory = ancestorInode.asDirectory(); List createdInodes = new ArrayList<>(); - if (context.isPersisted()) { + if (context.isPersisted() && context.isPersistNonExistingParentDirectories()) { // Synchronously persist directories. These inodes are already READ locked. for (Inode inode : inodePath.getInodeList()) { if (!inode.isPersisted()) { @@ -913,23 +913,34 @@ public List createPath(RpcContext rpcContext, LockedInodePath inodePath, // NOTE, we set the mode of missing ancestor directories to be the default value, rather // than inheriting the option of the final file to create, because it may not have // "execute" permission. - CreateDirectoryContext missingDirContext = CreateDirectoryContext.defaults(); - missingDirContext.getOptions().setCommonOptions(FileSystemMasterCommonPOptions.newBuilder() - .setTtl(context.getTtl()).setTtlAction(context.getTtlAction())); - missingDirContext.setWriteType(context.getWriteType()); - missingDirContext.setOperationTimeMs(context.getOperationTimeMs()); - missingDirContext.setMountPoint(false); - missingDirContext.setOwner(context.getOwner()); - missingDirContext.setGroup(context.getGroup()); - if (context.getXAttr() != null - && context.getXAttrPropStrat() != null - && context.getXAttrPropStrat() == XAttrPropagationStrategy.NEW_PATHS) { - missingDirContext.setXAttr(context.getXAttr()); - } StringBuilder pathBuilder = new StringBuilder().append( String.join(AlluxioURI.SEPARATOR, Arrays.asList(pathComponents).subList(0, pathIndex)) ); + CreateDirectoryContext missingDirContext = null; + if (pathIndex < pathComponents.length - 1) { + missingDirContext = CreateDirectoryContext.defaults(); + missingDirContext.getOptions().setCommonOptions(FileSystemMasterCommonPOptions.newBuilder() + .setTtl(context.getTtl()).setTtlAction(context.getTtlAction())); + missingDirContext.setWriteType(context.getWriteType()); + missingDirContext.setOperationTimeMs(context.getOperationTimeMs()); + missingDirContext.setMountPoint(false); + missingDirContext.setOwner(context.getOwner()); + missingDirContext.setGroup(context.getGroup()); + if (context.isMetadataLoad() && !context.isPersistNonExistingParentDirectories()) { + // If this is a metadata load, and we are not going to persist internal + // directories (i.e. adding object markers), then we mark the internal + // directories as persisted + missingDirContext.setWriteType(WriteType.THROUGH); + missingDirContext.setMissingDirFingerprint(context::getMissingDirFingerprint); + } + if (context.getXAttr() != null + && context.getXAttrPropStrat() != null + && context.getXAttrPropStrat() == XAttrPropagationStrategy.NEW_PATHS) { + missingDirContext.setXAttr(context.getXAttr()); + } + } for (int k = pathIndex; k < (pathComponents.length - 1); k++) { + assert missingDirContext != null; MutableInodeDirectory newDir = MutableInodeDirectory.create( mDirectoryIdGenerator.getNewDirectoryId(rpcContext.getJournalContext()), currentInodeDirectory.getId(), pathComponents[k], missingDirContext); @@ -952,6 +963,10 @@ public List createPath(RpcContext rpcContext, LockedInodePath inodePath, newDir.setInternalAcl(pair.getFirst()); newDir.setDefaultACL(pair.getSecond()); } + if (context.isPersisted() && !context.isPersistNonExistingParentDirectories()) { + newDir.setPersistenceState(PersistenceState.PERSISTED); + newDir.setUfsFingerprint(context.getMissingDirFingerprint()); + } String newDirPath = k == 0 ? ROOT_PATH : pathBuilder.append(AlluxioURI.SEPARATOR).append(pathComponents[k]).toString(); mState.applyAndJournal(rpcContext, newDir, @@ -961,7 +976,7 @@ public List createPath(RpcContext rpcContext, LockedInodePath inodePath, // Persist the directory *after* it exists in the inode tree. This prevents multiple // concurrent creates from trying to persist the same directory name. - if (context.isPersisted()) { + if (context.isPersisted() && context.isPersistNonExistingParentDirectories()) { syncPersistExistingDirectory(rpcContext, newDir, context.isMetadataLoad()); } createdInodes.add(Inode.wrap(newDir)); @@ -1019,7 +1034,14 @@ public List createPath(RpcContext rpcContext, LockedInodePath inodePath, newInode = newDir; } else if (context instanceof CreateFileContext) { CreateFileContext fileContext = (CreateFileContext) context; - MutableInodeFile newFile = MutableInodeFile.create(mContainerIdGenerator.getNewContainerId(), + final long blockContainerId; + if (fileContext.getCompleteFileInfo() != null) { + blockContainerId = fileContext.getCompleteFileInfo().getContainerId(); + } else { + blockContainerId = mContainerIdGenerator.getNewContainerId(); + } + + MutableInodeFile newFile = MutableInodeFile.create(blockContainerId, currentInodeDirectory.getId(), name, System.currentTimeMillis(), fileContext); // if the parent has a default ACL, copy that default ACL ANDed with the umask as the new diff --git a/core/server/master/src/main/java/alluxio/master/file/meta/LockedInodePath.java b/core/server/master/src/main/java/alluxio/master/file/meta/LockedInodePath.java index 7c66b86a5e1f..649286a3350b 100644 --- a/core/server/master/src/main/java/alluxio/master/file/meta/LockedInodePath.java +++ b/core/server/master/src/main/java/alluxio/master/file/meta/LockedInodePath.java @@ -409,7 +409,28 @@ public LockedInodePath lockDescendant(AlluxioURI descendantUri, LockPattern lock */ public LockedInodePath lockChild(Inode child, LockPattern lockPattern) throws InvalidPathException { - return lockChild(child, lockPattern, addComponent(mPathComponents, child.getName())); + return lockChild(child, lockPattern, true); + } + + /** + * Returns a new locked inode path composed of the current path plus the child inode. + * The original locked inode path is unaffected. + * The path is traversed or not depending on the shouldTraverse parameter. + * + * childComponentsHint can be used to save the work of computing path components when the path + * components for the new path are already known. + * + * On failure, all locks taken by this method will be released. + * + * @param child the child inode + * @param lockPattern the lock pattern + * @param shouldTraverse if the path should be traversed or not + * @return the new locked path + */ + public LockedInodePath lockChild(Inode child, LockPattern lockPattern, boolean shouldTraverse) + throws InvalidPathException { + return lockChild(child, lockPattern, addComponent(mPathComponents, child.getName()), + shouldTraverse); } /** @@ -423,7 +444,23 @@ public LockedInodePath lockChild(Inode child, LockPattern lockPattern) */ public LockedInodePath lockChild(Inode child, LockPattern lockPattern, String[] childComponentsHint) throws InvalidPathException { - return lockChildByName(child.getName(), lockPattern, childComponentsHint); + return lockChildByName(child.getName(), lockPattern, childComponentsHint, true); + } + + /** + * Efficient version of {@link #lockChild(Inode, LockPattern)} for when the child path + * components are already known. + * + * @param child the child inode + * @param lockPattern the lock pattern + * @param childComponentsHint path components for the new path + * @param shouldTraverse if the path should be traversed or not + * @return the new locked path + */ + public LockedInodePath lockChild( + Inode child, LockPattern lockPattern, String[] childComponentsHint, + boolean shouldTraverse) throws InvalidPathException { + return lockChildByName(child.getName(), lockPattern, childComponentsHint, shouldTraverse); } /** @@ -433,13 +470,16 @@ public LockedInodePath lockChild(Inode child, LockPattern lockPattern, * @param childName the name of the child inode * @param lockPattern the lock pattern * @param childComponentsHint path components for the new path + * @param shouldTraverse if the path should be traversed or not * @return the new locked path */ public LockedInodePath lockChildByName(String childName, LockPattern lockPattern, - String[] childComponentsHint) throws InvalidPathException { + String[] childComponentsHint, boolean shouldTraverse) throws InvalidPathException { LockedInodePath path = new LockedInodePath(mUri.joinUnsafe(childName), this, childComponentsHint, lockPattern, mUseTryLock); - path.traverseOrClose(); + if (shouldTraverse) { + path.traverseOrClose(); + } return path; } diff --git a/core/server/master/src/main/java/alluxio/master/file/meta/MountTable.java b/core/server/master/src/main/java/alluxio/master/file/meta/MountTable.java index da0ae9c4d871..1dce972be1ae 100644 --- a/core/server/master/src/main/java/alluxio/master/file/meta/MountTable.java +++ b/core/server/master/src/main/java/alluxio/master/file/meta/MountTable.java @@ -356,6 +356,17 @@ public Map getMountTable() { */ public boolean containsMountPoint(AlluxioURI uri, boolean containsSelf) throws InvalidPathException { + return containsMountPoint(uri, containsSelf, true); + } + + /** + * @param uri the Alluxio uri to check + * @param containsSelf cause method to return true when given uri itself is a mount point + * @param cleanPath if the paths should be cleaned + * @return true if the given uri has a descendant which is a mount point [, or is a mount point] + */ + public boolean containsMountPoint(AlluxioURI uri, boolean containsSelf, boolean cleanPath) + throws InvalidPathException { String path = uri.getPath(); try (LockResource r = new LockResource(mReadLock)) { @@ -364,7 +375,7 @@ public boolean containsMountPoint(AlluxioURI uri, boolean containsSelf) if (!containsSelf && mountPath.equals(path)) { continue; } - if (PathUtils.hasPrefix(mountPath, path)) { + if (PathUtils.hasPrefix(mountPath, path, cleanPath)) { return true; } } diff --git a/core/server/master/src/main/java/alluxio/master/file/meta/MutableInodeDirectory.java b/core/server/master/src/main/java/alluxio/master/file/meta/MutableInodeDirectory.java index 2118fd68b04e..675fa80a8851 100644 --- a/core/server/master/src/main/java/alluxio/master/file/meta/MutableInodeDirectory.java +++ b/core/server/master/src/main/java/alluxio/master/file/meta/MutableInodeDirectory.java @@ -230,7 +230,7 @@ public static MutableInodeDirectory fromJournalEntry(InodeDirectoryEntry entry) */ public static MutableInodeDirectory create(long id, long parentId, String name, CreateDirectoryContext context) { - return new MutableInodeDirectory(id) + MutableInodeDirectory directory = new MutableInodeDirectory(id) .setParentId(parentId) .setName(name) .setTtl(context.getTtl()) @@ -243,6 +243,10 @@ public static MutableInodeDirectory create(long id, long parentId, String name, .setAcl(context.getDefaultAcl()) .setMountPoint(context.isMountPoint()) .setXAttr(context.getXAttr()); + if (context.getFingerprint() != null) { + directory.setUfsFingerprint(context.getFingerprint()); + } + return directory; } @Override diff --git a/core/server/master/src/main/java/alluxio/master/file/meta/MutableInodeFile.java b/core/server/master/src/main/java/alluxio/master/file/meta/MutableInodeFile.java index eb8e254b3c0b..82bbea15677e 100644 --- a/core/server/master/src/main/java/alluxio/master/file/meta/MutableInodeFile.java +++ b/core/server/master/src/main/java/alluxio/master/file/meta/MutableInodeFile.java @@ -439,8 +439,8 @@ public static MutableInodeFile create(long blockContainerId, long parentId, Stri CreateFilePOptionsOrBuilder options = context.getOptions(); Preconditions.checkArgument( options.getReplicationMax() == Constants.REPLICATION_MAX_INFINITY - || options.getReplicationMax() >= options.getReplicationMin()); - return new MutableInodeFile(blockContainerId) + || options.getReplicationMax() >= options.getReplicationMin()); + MutableInodeFile inodeFile = new MutableInodeFile(blockContainerId) .setBlockSizeBytes(options.getBlockSizeBytes()) .setCreationTimeMs(creationTimeMs) .setName(name) @@ -462,6 +462,15 @@ public static MutableInodeFile create(long blockContainerId, long parentId, Stri ? Constants.NO_AUTO_PERSIST : System.currentTimeMillis() + options.getPersistenceWaitTime()) .setXAttr(context.getXAttr()); + if (context.getFingerprint() != null) { + inodeFile.setUfsFingerprint(context.getFingerprint()); + } + if (context.getCompleteFileInfo() != null) { + inodeFile.setBlockIds(context.getCompleteFileInfo().getBlockIds()); + inodeFile.setCompleted(true); + inodeFile.setLength(context.getCompleteFileInfo().getLength()); + } + return inodeFile; } @Override diff --git a/core/server/master/src/main/java/alluxio/master/file/meta/UfsSyncUtils.java b/core/server/master/src/main/java/alluxio/master/file/meta/UfsSyncUtils.java index 0babb5c8d0c9..2e5d29362ea6 100644 --- a/core/server/master/src/main/java/alluxio/master/file/meta/UfsSyncUtils.java +++ b/core/server/master/src/main/java/alluxio/master/file/meta/UfsSyncUtils.java @@ -13,6 +13,8 @@ import alluxio.underfs.Fingerprint; +import com.google.common.base.Preconditions; + import javax.annotation.concurrent.NotThreadSafe; /** @@ -37,6 +39,7 @@ public static SyncPlan computeSyncPlan(Inode inode, Fingerprint ufsFingerprint, // issues#15211: If Inodes store a Fingerprint proto instead of Strings, // we would save many String parsings here. Fingerprint inodeFingerprint = Fingerprint.parse(inode.getUfsFingerprint()); + Preconditions.checkState(inodeFingerprint != null, "Got invalid UFS fingerprint"); boolean isContentSynced = inodeUfsIsContentSynced(inode, inodeFingerprint, ufsFingerprint); boolean isMetadataSynced = inodeUfsIsMetadataSynced(inode, inodeFingerprint, ufsFingerprint); boolean ufsExists = ufsFingerprint.isValid(); diff --git a/core/server/master/src/main/java/alluxio/master/metastore/ReadOnlyInodeStore.java b/core/server/master/src/main/java/alluxio/master/metastore/ReadOnlyInodeStore.java index 191e90da720c..37795632c962 100644 --- a/core/server/master/src/main/java/alluxio/master/metastore/ReadOnlyInodeStore.java +++ b/core/server/master/src/main/java/alluxio/master/metastore/ReadOnlyInodeStore.java @@ -11,15 +11,24 @@ package alluxio.master.metastore; +import alluxio.exception.FileDoesNotExistException; +import alluxio.exception.InvalidPathException; +import alluxio.exception.runtime.InternalRuntimeException; +import alluxio.file.options.DescendantType; import alluxio.master.file.meta.EdgeEntry; import alluxio.master.file.meta.Inode; import alluxio.master.file.meta.InodeDirectoryView; +import alluxio.master.file.meta.InodeIterationResult; +import alluxio.master.file.meta.InodeTree; +import alluxio.master.file.meta.LockedInodePath; import alluxio.master.file.meta.MutableInode; import alluxio.resource.CloseableIterator; import com.google.common.annotations.VisibleForTesting; +import com.google.common.base.Preconditions; import java.io.Closeable; +import java.io.IOException; import java.util.Iterator; import java.util.NoSuchElementException; import java.util.Optional; @@ -182,6 +191,122 @@ default CloseableIterator getChildren(InodeDirectoryView inode) return getChildren(inode.getId(), ReadOption.defaults()); } + /** + * Creates an iterator starting from the path, and including its + * children. + * @param option the read option + * @param descendantType the type of descendants to load + * @param includeBaseInode if the iterator should include the inode from the base path + * @param lockedPath the locked path to the root inode + * @return a skippable iterator that supports to skip children during the iteration + */ + default SkippableInodeIterator getSkippableChildrenIterator( + ReadOption option, DescendantType descendantType, boolean includeBaseInode, + LockedInodePath lockedPath) { + Inode inode; + try { + inode = lockedPath.getInode(); + } catch (FileDoesNotExistException e) { + return new SkippableInodeIterator() { + @Override + public void skipChildrenOfTheCurrent() { + } + + @Override + public void close() { + } + + @Override + public boolean hasNext() { + return false; + } + + @Override + public InodeIterationResult next() { + throw new NoSuchElementException(); + } + }; + } + if (descendantType == DescendantType.ALL) { + return new RecursiveInodeIterator(this, inode, includeBaseInode, option, lockedPath); + } else if (descendantType == DescendantType.NONE) { + Preconditions.checkState(includeBaseInode); + // if descendant type is none, we should only return the parent node + return new SkippableInodeIterator() { + InodeIterationResult mFirst = new InodeIterationResult(inode, lockedPath); + @Override + public void close() { + } + + @Override + public void skipChildrenOfTheCurrent() { + } + + @Override + public boolean hasNext() { + return mFirst != null; + } + + @Override + public InodeIterationResult next() { + if (mFirst == null) { + throw new NoSuchElementException(); + } + InodeIterationResult ret = mFirst; + mFirst = null; + return ret; + } + }; + } + + final CloseableIterator iterator = getChildren(inode.getId(), option); + return new SkippableInodeIterator() { + + LockedInodePath mPreviousPath = null; + final LockedInodePath mRootPath = lockedPath; + Inode mFirst = includeBaseInode ? inode : null; + + @Override + public void skipChildrenOfTheCurrent() { + // No-op + } + + @Override + public boolean hasNext() { + return mFirst != null || iterator.hasNext(); + } + + @Override + public InodeIterationResult next() { + if (mFirst != null) { + Inode ret = mFirst; + mFirst = null; + return new InodeIterationResult(ret, lockedPath); + } + if (mPreviousPath != null) { + mPreviousPath.close(); + } + Inode inode = iterator.next(); + + try { + mPreviousPath = mRootPath.lockChild(inode, InodeTree.LockPattern.WRITE_EDGE, false); + } catch (InvalidPathException e) { + // Should not reach here since the path should be valid + throw new InternalRuntimeException(e); + } + return new InodeIterationResult(inode, mPreviousPath); + } + + @Override + public void close() throws IOException { + iterator.close(); + if (mPreviousPath != null) { + mPreviousPath.close(); + } + } + }; + } + /** * @param inodeId an inode id * @param name an inode name diff --git a/core/server/master/src/main/java/alluxio/master/metastore/RecursiveInodeIterator.java b/core/server/master/src/main/java/alluxio/master/metastore/RecursiveInodeIterator.java new file mode 100644 index 000000000000..76d7cfa78a97 --- /dev/null +++ b/core/server/master/src/main/java/alluxio/master/metastore/RecursiveInodeIterator.java @@ -0,0 +1,233 @@ +/* + * The Alluxio Open Foundation licenses this work under the Apache License, version 2.0 + * (the "License"). You may not use this work except in compliance with the License, which is + * available at www.apache.org/licenses/LICENSE-2.0 + * + * This software is distributed on an "AS IS" basis, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, + * either express or implied, as more fully set forth in the License. + * + * See the NOTICE file distributed with this work for information regarding copyright ownership. + */ + +package alluxio.master.metastore; + +import alluxio.AlluxioURI; +import alluxio.collections.Pair; +import alluxio.exception.InvalidPathException; +import alluxio.exception.runtime.InternalRuntimeException; +import alluxio.master.file.meta.Inode; +import alluxio.master.file.meta.InodeIterationResult; +import alluxio.master.file.meta.InodeTree; +import alluxio.master.file.meta.LockedInodePath; +import alluxio.resource.CloseableIterator; + +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import java.io.IOException; +import java.util.ArrayList; +import java.util.Arrays; +import java.util.Collections; +import java.util.List; +import java.util.Stack; +import java.util.function.Function; +import javax.annotation.Nullable; + +/** + * A recursive inode iterator that supports to skip children inodes during iteration. + */ +public class RecursiveInodeIterator implements SkippableInodeIterator { + private static final Logger LOG = LoggerFactory.getLogger(RecursiveInodeIterator.class); + + private final Stack, LockedInodePath>> + mIteratorStack = new Stack<>(); + private final ReadOnlyInodeStore mInodeStore; + private boolean mHasNextCalled = false; + private boolean mHasNext; + private final List mNameComponents = new ArrayList<>(); + private final List mStartAfterPathComponents; + private LockedInodePath mLastLockedPath = null; + private Inode mFirst; + private final LockedInodePath mRootPath; + private boolean mCurrentInodeDirectory; + + /** + * Constructs an instance. + * + * @param inodeStore the inode store + * @param inode the root inode + * @param includeBaseInode if the inode of the base path should be included + * @param readOption the read option + * @param lockedPath the locked path to the root inode + */ + public RecursiveInodeIterator( + ReadOnlyInodeStore inodeStore, + Inode inode, + boolean includeBaseInode, + ReadOption readOption, + LockedInodePath lockedPath + ) { + mFirst = includeBaseInode ? inode : null; + mRootPath = lockedPath; + String startFrom = readOption.getStartFrom(); + if (startFrom == null) { + mStartAfterPathComponents = Collections.emptyList(); + } else { + try { + startFrom = readOption.getStartFrom().startsWith(AlluxioURI.SEPARATOR) + ? readOption.getStartFrom().substring(1) : readOption.getStartFrom(); + mStartAfterPathComponents = Arrays.asList(startFrom + .split(AlluxioURI.SEPARATOR)); + } catch (Exception e) { + throw new RuntimeException(e); + } + } + ReadOption firstReadOption; + if (mStartAfterPathComponents.size() > 0) { + firstReadOption = + ReadOption.newBuilder().setReadFrom(mStartAfterPathComponents.get(0)).build(); + } else { + firstReadOption = ReadOption.defaults(); + } + mIteratorStack.push(new Pair<>(inodeStore.getChildren( + inode.getId(), firstReadOption), lockedPath)); + mInodeStore = inodeStore; + } + + // The locked inode path will become stale after skipChildrenOfTheCurrent() is called. + @Override + public void skipChildrenOfTheCurrent() { + if (mHasNextCalled) { + throw new IllegalStateException("Cannot call hasNext"); + } + if (!mCurrentInodeDirectory) { + // If the current inode is a file, then this is just a no-op. + return; + } + popStack(); + if (mNameComponents.size() > 0) { + mNameComponents.remove(mNameComponents.size() - 1); + } + } + + private void popStack() { + Pair, LockedInodePath> item = mIteratorStack.pop(); + item.getFirst().close(); + if (!mIteratorStack.isEmpty()) { + item.getSecond().close(); + } + } + + @Override + public boolean hasNext() { + if (mFirst != null) { + return true; + } + if (mHasNextCalled) { + return mHasNext; + } + while (!mIteratorStack.isEmpty() && !tryOnIterator( + mIteratorStack.peek().getFirst(), CloseableIterator::hasNext + )) { + popStack(); + // When the iteration finishes, the size of mPathComponents is 0 + if (mNameComponents.size() > 0) { + mNameComponents.remove(mNameComponents.size() - 1); + } + } + mHasNextCalled = true; + mHasNext = !mIteratorStack.isEmpty(); + return mHasNext; + } + + @Override + public InodeIterationResult next() { + if (!hasNext()) { + throw new InternalRuntimeException("Called next on a completed iterator"); + } + if (mFirst != null) { + Inode ret = mFirst; + mFirst = null; + mCurrentInodeDirectory = ret.isDirectory(); + return new InodeIterationResult(ret, mRootPath); + } + Pair, LockedInodePath> top = mIteratorStack.peek(); + try { + top.getSecond().traverse(); + } catch (InvalidPathException e) { + // should not reach here as the path is valid + throw new InternalRuntimeException(e); + } + if (mLastLockedPath != null) { + mLastLockedPath.close(); + mLastLockedPath = null; + } else { + if (top.getSecond().getLockPattern() != InodeTree.LockPattern.READ) { + // after the parent has been returned, we can downgrade it to a read lock + top.getSecond().downgradeToRead(); + } + } + Inode current = tryOnIterator(top.getFirst(), CloseableIterator::next); + LockedInodePath lockedPath; + try { + lockedPath = top.getSecond().lockChild(current, InodeTree.LockPattern.WRITE_EDGE, false); + } catch (InvalidPathException e) { + // should not reach here as the path is valid + throw new InternalRuntimeException(e); + } + if (current.isDirectory()) { + ReadOption readOption = ReadOption.newBuilder() + .setReadFrom(populateStartAfter(current.getName())).build(); + CloseableIterator nextLevelIterator = + mInodeStore.getChildren(current.getId(), readOption); + mIteratorStack.push(new Pair<>(nextLevelIterator, lockedPath)); + mNameComponents.add(current.getName()); + } else { + mLastLockedPath = lockedPath; + } + mHasNextCalled = false; + mCurrentInodeDirectory = current.isDirectory(); + return new InodeIterationResult(current, lockedPath); + } + + /** + * @param currentInodeName the current inode name + * @return the startAfter string that are used when getChildren is called + */ + private @Nullable String populateStartAfter(String currentInodeName) { + if (mNameComponents.size() + 1 >= mStartAfterPathComponents.size()) { + return null; + } + for (int i = 0; i < mNameComponents.size(); ++i) { + if (!mNameComponents.get(i).equals(mStartAfterPathComponents.get(i))) { + return null; + } + } + if (!currentInodeName.equals(mStartAfterPathComponents.get(mNameComponents.size()))) { + return null; + } + return mStartAfterPathComponents.get(mNameComponents.size() + 1); + } + + private T tryOnIterator( + CloseableIterator iterator, + Function, T> supplier) { + try { + return supplier.apply(iterator); + } catch (Exception e) { + iterator.close(); + throw e; + } + } + + @Override + public void close() throws IOException { + if (mLastLockedPath != null) { + mLastLockedPath.close(); + mLastLockedPath = null; + } + while (!mIteratorStack.isEmpty()) { + popStack(); + } + } +} diff --git a/core/server/master/src/main/java/alluxio/master/metastore/SkippableInodeIterator.java b/core/server/master/src/main/java/alluxio/master/metastore/SkippableInodeIterator.java new file mode 100644 index 000000000000..d5d14215e0b1 --- /dev/null +++ b/core/server/master/src/main/java/alluxio/master/metastore/SkippableInodeIterator.java @@ -0,0 +1,30 @@ +/* + * The Alluxio Open Foundation licenses this work under the Apache License, version 2.0 + * (the "License"). You may not use this work except in compliance with the License, which is + * available at www.apache.org/licenses/LICENSE-2.0 + * + * This software is distributed on an "AS IS" basis, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, + * either express or implied, as more fully set forth in the License. + * + * See the NOTICE file distributed with this work for information regarding copyright ownership. + */ + +package alluxio.master.metastore; + +import alluxio.master.file.meta.InodeIterationResult; + +import java.io.Closeable; +import java.util.Iterator; + +/** + * Iterator over inodes that allows to skip a directory when iterating. + */ +public interface SkippableInodeIterator + extends Iterator, Closeable { + /** + * Skip the children of the current inode during the iteration. + */ + default void skipChildrenOfTheCurrent() { + throw new UnsupportedOperationException("Operation not supported"); + } +} diff --git a/core/server/master/src/test/java/alluxio/master/file/FileSystemMasterS3UfsTest.java b/core/server/master/src/test/java/alluxio/master/file/FileSystemMasterS3UfsTest.java index b54ce176585b..dcde599c818c 100644 --- a/core/server/master/src/test/java/alluxio/master/file/FileSystemMasterS3UfsTest.java +++ b/core/server/master/src/test/java/alluxio/master/file/FileSystemMasterS3UfsTest.java @@ -11,28 +11,30 @@ package alluxio.master.file; -import static org.junit.Assert.assertEquals; -import static org.junit.Assert.assertNotNull; import static org.junit.Assert.assertTrue; import alluxio.AlluxioURI; -import alluxio.client.WriteType; import alluxio.conf.Configuration; import alluxio.conf.PropertyKey; import alluxio.exception.AccessControlException; +import alluxio.exception.BlockInfoException; +import alluxio.exception.FileAlreadyCompletedException; import alluxio.exception.FileAlreadyExistsException; import alluxio.exception.FileDoesNotExistException; +import alluxio.exception.InvalidFileSizeException; import alluxio.exception.InvalidPathException; -import alluxio.master.file.contexts.CreateDirectoryContext; import alluxio.master.file.contexts.ExistsContext; import alluxio.master.file.contexts.MountContext; import com.amazonaws.auth.AWSStaticCredentialsProvider; -import com.amazonaws.auth.AnonymousAWSCredentials; +import com.amazonaws.auth.BasicAWSCredentials; import com.amazonaws.client.builder.AwsClientBuilder; +import com.amazonaws.regions.Regions; import com.amazonaws.services.s3.AmazonS3; import com.amazonaws.services.s3.AmazonS3ClientBuilder; -import io.findify.s3mock.S3Mock; +import org.gaul.s3proxy.junit.S3ProxyRule; +import org.junit.Ignore; +import org.junit.Rule; import org.junit.Test; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -51,44 +53,45 @@ public final class FileSystemMasterS3UfsTest extends FileSystemMasterTestBase { private static final AlluxioURI UFS_ROOT = new AlluxioURI("s3://test-bucket/"); private static final AlluxioURI MOUNT_POINT = new AlluxioURI("/s3_mount"); private AmazonS3 mS3Client; - private S3Mock mS3MockServer; + @Rule + public S3ProxyRule mS3Proxy = S3ProxyRule.builder() + .withPort(8001) + .withCredentials("_", "_") + .build(); @Override public void before() throws Exception { - mS3MockServer = new S3Mock.Builder().withPort(8001).withInMemoryBackend().build(); - mS3MockServer.start(); - Configuration.set(PropertyKey.UNDERFS_S3_ENDPOINT, "localhost:8001"); Configuration.set(PropertyKey.UNDERFS_S3_ENDPOINT_REGION, "us-west-2"); Configuration.set(PropertyKey.UNDERFS_S3_DISABLE_DNS_BUCKETS, true); - Configuration.set(PropertyKey.S3A_ACCESS_KEY, "_"); - Configuration.set(PropertyKey.S3A_SECRET_KEY, "_"); + Configuration.set(PropertyKey.S3A_ACCESS_KEY, mS3Proxy.getAccessKey()); + Configuration.set(PropertyKey.S3A_SECRET_KEY, mS3Proxy.getSecretKey()); - AwsClientBuilder.EndpointConfiguration - endpoint = new AwsClientBuilder.EndpointConfiguration( - "http://localhost:8001", "us-west-2"); mS3Client = AmazonS3ClientBuilder .standard() .withPathStyleAccessEnabled(true) - .withEndpointConfiguration(endpoint) - .withCredentials(new AWSStaticCredentialsProvider(new AnonymousAWSCredentials())) + .withCredentials( + new AWSStaticCredentialsProvider( + new BasicAWSCredentials(mS3Proxy.getAccessKey(), mS3Proxy.getSecretKey()))) + .withEndpointConfiguration( + new AwsClientBuilder.EndpointConfiguration(mS3Proxy.getUri().toString(), + Regions.US_WEST_2.getName())) .build(); mS3Client.createBucket(TEST_BUCKET); super.before(); } + @Ignore @Test public void basicWrite() throws FileDoesNotExistException, FileAlreadyExistsException, AccessControlException, - IOException, InvalidPathException { - mFileSystemMaster.mount(MOUNT_POINT, UFS_ROOT, MountContext.defaults()); - mFileSystemMaster.createDirectory( - MOUNT_POINT.join(TEST_DIRECTORY), - CreateDirectoryContext.defaults().setWriteType(WriteType.THROUGH) - ); - assertEquals(1, mS3Client.listObjects(TEST_BUCKET).getObjectSummaries().size()); - assertNotNull(mS3Client.getObject(TEST_BUCKET, TEST_DIRECTORY + "/")); + IOException, InvalidPathException, BlockInfoException, InvalidFileSizeException, + FileAlreadyCompletedException { + // Not testable: + // when you create a directory, there's nothing created correspondingly in S3 + // when you create a file, you need to open it on the client side to write the content, + // which is out of the scope of this testing. } @Test @@ -103,13 +106,6 @@ public void basicSync() @Override public void after() throws Exception { mS3Client = null; - try { - if (mS3MockServer != null) { - mS3MockServer.shutdown(); - } - } finally { - mS3MockServer = null; - } super.after(); } } diff --git a/core/server/master/src/test/java/alluxio/master/file/FileSystemMasterTestBase.java b/core/server/master/src/test/java/alluxio/master/file/FileSystemMasterTestBase.java index 0e90d2f796c1..cfa8b17daad8 100644 --- a/core/server/master/src/test/java/alluxio/master/file/FileSystemMasterTestBase.java +++ b/core/server/master/src/test/java/alluxio/master/file/FileSystemMasterTestBase.java @@ -43,8 +43,12 @@ import alluxio.master.file.contexts.GetStatusContext; import alluxio.master.file.contexts.ListStatusContext; import alluxio.master.file.contexts.MountContext; +import alluxio.master.file.mdsync.DefaultSyncProcess; +import alluxio.master.file.mdsync.TestSyncProcessor; import alluxio.master.file.meta.InodeTree; +import alluxio.master.file.meta.MountTable; import alluxio.master.file.meta.TtlIntervalRule; +import alluxio.master.file.meta.UfsSyncPathCache; import alluxio.master.journal.JournalSystem; import alluxio.master.journal.JournalTestUtils; import alluxio.master.journal.JournalType; @@ -363,7 +367,15 @@ void startServices() throws Exception { mExecutorService = Executors .newFixedThreadPool(4, ThreadFactoryUtils.build("DefaultFileSystemMasterTest-%d", true)); mFileSystemMaster = new DefaultFileSystemMaster(mBlockMaster, masterContext, - ExecutorServiceFactories.constantExecutorServiceFactory(mExecutorService), mClock); + ExecutorServiceFactories.constantExecutorServiceFactory(mExecutorService), mClock) { + @Override + protected DefaultSyncProcess createSyncProcess( + ReadOnlyInodeStore inodeStore, MountTable mountTable, InodeTree inodeTree, + UfsSyncPathCache syncPathCache) { + return new TestSyncProcessor( + this, inodeStore, mountTable, inodeTree, syncPathCache, getAbsentPathCache()); + } + }; mInodeStore = mFileSystemMaster.getInodeStore(); mInodeTree = mFileSystemMaster.getInodeTree(); mRegistry.add(FileSystemMaster.class, mFileSystemMaster); @@ -382,7 +394,7 @@ void startServices() throws Exception { Constants.MEDIUM_SSD, (long) Constants.KB), ImmutableMap.of(), new HashMap(), RegisterWorkerPOptions.getDefaultInstance()); mWorkerId2 = mBlockMaster.getWorkerId( - new WorkerNetAddress().setHost("remote").setRpcPort(80).setDataPort(81).setWebPort(82)); + new WorkerNetAddress().setHost("localhost").setRpcPort(83).setDataPort(84).setWebPort(85)); mBlockMaster.workerRegister(mWorkerId2, Arrays.asList(Constants.MEDIUM_MEM, Constants.MEDIUM_SSD), ImmutableMap.of(Constants.MEDIUM_MEM, (long) Constants.MB, diff --git a/core/server/master/src/test/java/alluxio/master/file/FileSystemMetadataSyncV2BenchmarkTest.java b/core/server/master/src/test/java/alluxio/master/file/FileSystemMetadataSyncV2BenchmarkTest.java new file mode 100644 index 000000000000..50de76fe9699 --- /dev/null +++ b/core/server/master/src/test/java/alluxio/master/file/FileSystemMetadataSyncV2BenchmarkTest.java @@ -0,0 +1,131 @@ +/* + * The Alluxio Open Foundation licenses this work under the Apache License, version 2.0 + * (the "License"). You may not use this work except in compliance with the License, which is + * available at www.apache.org/licenses/LICENSE-2.0 + * + * This software is distributed on an "AS IS" basis, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, + * either express or implied, as more fully set forth in the License. + * + * See the NOTICE file distributed with this work for information regarding copyright ownership. + */ + +package alluxio.master.file; + +import alluxio.AlluxioURI; +import alluxio.exception.AccessControlException; +import alluxio.exception.FileAlreadyExistsException; +import alluxio.exception.FileDoesNotExistException; +import alluxio.exception.InvalidPathException; +import alluxio.file.options.DescendantType; +import alluxio.file.options.DirectoryLoadType; +import alluxio.grpc.FileSystemMasterCommonPOptions; +import alluxio.grpc.ListStatusPOptions; +import alluxio.grpc.LoadMetadataPType; +import alluxio.master.file.contexts.ListStatusContext; +import alluxio.master.file.contexts.MountContext; +import alluxio.master.file.mdsync.BaseTask; +import alluxio.util.CommonUtils; + +import org.apache.commons.io.FileUtils; +import org.junit.Ignore; +import org.junit.Test; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import java.io.File; +import java.io.FileOutputStream; +import java.io.IOException; + +/** + * This class is to test the load metadata performance against a local UFS. + * use {@link FileSystemMetadataSyncV2BenchmarkTest#generateTestFiles()} to generate test files + * first, then run the v1 or v2 sync respectively. + * This class is for debugging and should not be run as a unit test. + */ +@Ignore +public final class FileSystemMetadataSyncV2BenchmarkTest extends FileSystemMasterTestBase { + private static final Logger LOG = + LoggerFactory.getLogger(FileSystemMetadataSyncV2BenchmarkTest.class); + private static final String LOCAL_FS_ABSOLUTE_PATH = "/tmp/s3-test-files/bucket"; + private static final String SUB_DIR = "/0/0/0/0"; + private static final AlluxioURI UFS_ROOT = new AlluxioURI( + "file://" + LOCAL_FS_ABSOLUTE_PATH + SUB_DIR); + private static final AlluxioURI MOUNT_POINT = new AlluxioURI("/local_mount"); + + @Override + public void before() throws Exception { + super.before(); + } + + @Test + public void syncV2() + throws Throwable { + mFileSystemMaster.mount(MOUNT_POINT, UFS_ROOT, MountContext.defaults()); + + // Sync one file from UFS + // First pass + BaseTask result = mFileSystemMaster.getMetadataSyncer().syncPath( + MOUNT_POINT, DescendantType.ALL, DirectoryLoadType.BFS, 0 + ).getBaseTask(); + result.waitComplete(0); + System.out.println(result.getTaskInfo().getStats()); + + System.out.println("--------Second pass----------"); + result = mFileSystemMaster.getMetadataSyncer().syncPath( + MOUNT_POINT, DescendantType.ALL, DirectoryLoadType.BFS, 0 + ).getBaseTask(); + result.waitComplete(0); + System.out.println(result.getTaskInfo().getStats()); + } + + @Test + public void syncV1() + throws FileDoesNotExistException, FileAlreadyExistsException, AccessControlException, + IOException, InvalidPathException { + mFileSystemMaster.mount(MOUNT_POINT, UFS_ROOT, MountContext.defaults()); + + // Sync one file from UFS + long start = CommonUtils.getCurrentMs(); + mFileSystemMaster.listStatus(MOUNT_POINT, listSync(true)); + System.out.println("Time elapsed " + (CommonUtils.getCurrentMs() - start) + "ms"); + } + + @Ignore + @Test + public void generateTestFiles() throws IOException { + int count = 0; + for (int i = 0; i < 2; ++i) { + for (int j = 0; j < 2; ++j) { + for (int k = 0; k < 2; ++k) { + for (int l = 0; l < 2; ++l) { + for (int n = 0; n < 10; ++n) { + for (int m = 0; m < 10000; ++m) { + count++; + if (count % 10000 == 0) { + System.out.println(count); + } + String fileData = "f"; + FileOutputStream fos = + FileUtils.openOutputStream(new File( + String.format( + "%s/%d/%d/%d/%d/%d/f%d", LOCAL_FS_ABSOLUTE_PATH, i, j, k, l, n, m))); + fos.write(fileData.getBytes()); + fos.flush(); + fos.close(); + } + } + } + } + } + } + } + + private ListStatusContext listSync(boolean isRecursive) { + return ListStatusContext.mergeFrom(ListStatusPOptions.newBuilder() + .setRecursive(isRecursive) + .setLoadMetadataType(LoadMetadataPType.ALWAYS) + .setCommonOptions( + FileSystemMasterCommonPOptions.newBuilder().setSyncIntervalMs(0).build() + )); + } +} diff --git a/core/server/master/src/test/java/alluxio/master/file/FileSystemMetadataSyncV2Test.java b/core/server/master/src/test/java/alluxio/master/file/FileSystemMetadataSyncV2Test.java new file mode 100644 index 000000000000..76a1d3499d51 --- /dev/null +++ b/core/server/master/src/test/java/alluxio/master/file/FileSystemMetadataSyncV2Test.java @@ -0,0 +1,1375 @@ +/* + * The Alluxio Open Foundation licenses this work under the Apache License, version 2.0 + * (the "License"). You may not use this work except in compliance with the License, which is + * available at www.apache.org/licenses/LICENSE-2.0 + * + * This software is distributed on an "AS IS" basis, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, + * either express or implied, as more fully set forth in the License. + * + * See the NOTICE file distributed with this work for information regarding copyright ownership. + */ + +package alluxio.master.file; + +import static org.junit.Assert.assertEquals; +import static org.junit.Assert.assertFalse; +import static org.junit.Assert.assertThrows; +import static org.junit.Assert.assertTrue; + +import alluxio.AlluxioURI; +import alluxio.client.WriteType; +import alluxio.concurrent.jsr.CompletableFuture; +import alluxio.exception.InvalidPathException; +import alluxio.file.options.DescendantType; +import alluxio.file.options.DirectoryLoadType; +import alluxio.grpc.CreateFilePOptions; +import alluxio.grpc.DeletePOptions; +import alluxio.grpc.WritePType; +import alluxio.master.file.contexts.CompleteFileContext; +import alluxio.master.file.contexts.CreateDirectoryContext; +import alluxio.master.file.contexts.CreateFileContext; +import alluxio.master.file.contexts.DeleteContext; +import alluxio.master.file.contexts.ExistsContext; +import alluxio.master.file.contexts.MountContext; +import alluxio.master.file.mdsync.BaseTask; +import alluxio.master.file.mdsync.DefaultSyncProcess; +import alluxio.master.file.mdsync.SyncFailReason; +import alluxio.master.file.mdsync.SyncOperation; +import alluxio.master.file.mdsync.TaskStats; +import alluxio.master.file.mdsync.TestSyncProcessor; +import alluxio.master.file.meta.MountTable; +import alluxio.resource.CloseableResource; +import alluxio.underfs.UfsClient; +import alluxio.underfs.UfsLoadResult; +import alluxio.underfs.UfsStatus; +import alluxio.underfs.UnderFileSystem; +import alluxio.util.CommonUtils; +import alluxio.wire.FileInfo; + +import com.google.common.collect.ImmutableList; +import com.google.common.collect.ImmutableMap; +import org.junit.Test; +import org.junit.runner.RunWith; +import org.junit.runners.Parameterized; + +import java.io.IOException; +import java.util.Arrays; +import java.util.Collection; +import java.util.List; +import java.util.Map; +import java.util.Objects; +import java.util.Set; +import java.util.concurrent.SynchronousQueue; +import java.util.concurrent.atomic.AtomicBoolean; +import java.util.concurrent.atomic.AtomicReference; +import java.util.stream.Collectors; + +/** + * Unit tests for {@link FileSystemMaster}. + */ +@RunWith(Parameterized.class) +public class FileSystemMetadataSyncV2Test extends MetadataSyncV2TestBase { + + @Parameterized.Parameters + public static Collection data() { + return Arrays.asList(new Object[][] { + {DirectoryLoadType.SINGLE_LISTING}, + {DirectoryLoadType.BFS}, + {DirectoryLoadType.DFS}, + }); + } + + public FileSystemMetadataSyncV2Test(DirectoryLoadType directoryLoadType) { + mDirectoryLoadType = directoryLoadType; + } + + @Test + public void asyncListingOperations() throws Exception { + mFileSystemMaster.mount(MOUNT_POINT, UFS_ROOT, MountContext.defaults()); + mS3Client.putObject(TEST_BUCKET, TEST_DIRECTORY + "/" + TEST_FILE, TEST_CONTENT); + mS3Client.putObject(TEST_BUCKET, + TEST_DIRECTORY + "/" + TEST_DIRECTORY + "/" + TEST_FILE, TEST_CONTENT); + + // with depth none only include the path itself + assertEquals(ImmutableList.of(TEST_DIRECTORY + "/"), + listAsync(MOUNT_POINT.join(TEST_DIRECTORY), DescendantType.NONE) + .getItems().map(UfsStatus::getName).collect(Collectors.toList())); + // depth one will have the file and nested directory + assertEquals(ImmutableList.of(TEST_DIRECTORY + "/" + TEST_DIRECTORY + "/", + TEST_DIRECTORY + "/" + TEST_FILE), + listAsync(MOUNT_POINT.join(TEST_DIRECTORY), DescendantType.ONE) + .getItems().map(UfsStatus::getName).collect(Collectors.toList())); + // depth all will only have the files + assertEquals(ImmutableList.of(TEST_DIRECTORY + "/" + TEST_DIRECTORY + "/" + TEST_FILE, + TEST_DIRECTORY + "/" + TEST_FILE), + listAsync(MOUNT_POINT.join(TEST_DIRECTORY), DescendantType.ALL) + .getItems().map(UfsStatus::getName).collect(Collectors.toList())); + } + + UfsLoadResult listAsync(AlluxioURI alluxioPath, DescendantType descendantType) throws Exception { + MountTable.Resolution resolution = mFileSystemMaster.getMountTable().resolve(alluxioPath); + try (CloseableResource ufsClient = + Objects.requireNonNull(mFileSystemMaster.getMountTable() + .getUfsClient(resolution.getMountId())).acquireUfsResource()) { + UfsClient cli = ufsClient.get(); + SynchronousQueue result = new SynchronousQueue<>(); + cli.performListingAsync(resolution.getUri().getPath(), null, null, descendantType, true, + ufsResult -> { + try { + result.put(ufsResult); + } catch (InterruptedException e) { + throw new RuntimeException(e); + } + }, t -> { + try { + result.put(t); + } catch (InterruptedException e) { + throw new RuntimeException(e); + } + }); + return (UfsLoadResult) result.take(); + } + } + + @Test + public void syncDirDepth() throws Throwable { + mFileSystemMaster.mount(MOUNT_POINT, UFS_ROOT, MountContext.defaults()); + mS3Client.putObject(TEST_BUCKET, TEST_DIRECTORY + "/" + TEST_FILE, TEST_CONTENT); + + // Sync the dir + AlluxioURI syncPath = MOUNT_POINT.join(TEST_DIRECTORY); + BaseTask result = mFileSystemMaster.getMetadataSyncer().syncPath( + syncPath, DescendantType.NONE, mDirectoryLoadType, 0).getBaseTask(); + result.waitComplete(TIMEOUT_MS); + assertTrue(result.succeeded()); + assertSyncOperations(result.getTaskInfo(), ImmutableMap.of( + SyncOperation.CREATE, 1L + )); + + // Sync again, expect no change + result = mFileSystemMaster.getMetadataSyncer().syncPath( + syncPath, DescendantType.NONE, mDirectoryLoadType, 0).getBaseTask(); + result.waitComplete(TIMEOUT_MS); + assertTrue(result.succeeded()); + assertSyncOperations(result.getTaskInfo(), ImmutableMap.of( + SyncOperation.NOOP, 1L + )); + + // Sync with depth 1, should see the file + result = mFileSystemMaster.getMetadataSyncer().syncPath( + syncPath, DescendantType.ONE, mDirectoryLoadType, 0).getBaseTask(); + result.waitComplete(TIMEOUT_MS); + assertTrue(result.succeeded()); + assertSyncOperations(result.getTaskInfo(), ImmutableMap.of( + SyncOperation.CREATE, 1L + )); + + // Sync again, expect no change + result = mFileSystemMaster.getMetadataSyncer().syncPath( + syncPath, DescendantType.NONE, mDirectoryLoadType, 0).getBaseTask(); + result.waitComplete(TIMEOUT_MS); + assertTrue(result.succeeded()); + assertSyncOperations(result.getTaskInfo(), ImmutableMap.of( + SyncOperation.NOOP, 1L + )); + long mountPointInodeId = mFileSystemMaster.getFileInfo(MOUNT_POINT, getNoSync()).getFileId(); + assertFalse(mFileSystemMaster.getInodeStore() + .get(mountPointInodeId).get().asDirectory().isDirectChildrenLoaded()); + } + + @Test + public void syncNonPersistedNested() throws Throwable { + mFileSystemMaster.mount(MOUNT_POINT, UFS_ROOT, MountContext.defaults()); + mS3Client.putObject(TEST_BUCKET, TEST_DIRECTORY + "/" + TEST_FILE, TEST_CONTENT); + + BaseTask result = mFileSystemMaster.getMetadataSyncer().syncPath( + MOUNT_POINT, DescendantType.ALL, mDirectoryLoadType, 0).getBaseTask(); + result.waitComplete(TIMEOUT_MS); + assertTrue(result.succeeded()); + assertSyncOperations(result.getTaskInfo(), ImmutableMap.of( + SyncOperation.CREATE, 2L + )); + + // make a non persisted file in the nested path + AlluxioURI nestedPath = MOUNT_POINT.join(TEST_DIRECTORY); + for (int i = 0; i < 3; i++) { + nestedPath = nestedPath.join(TEST_DIRECTORY); + mFileSystemMaster.createDirectory(nestedPath, CreateDirectoryContext.defaults()); + } + mFileSystemMaster.createFile(nestedPath.join("file1"), + CreateFileContext.defaults().setWriteType(WriteType.MUST_CACHE)); + mFileSystemMaster.completeFile(nestedPath.join("file1"), + CompleteFileContext.defaults()); + + result = mFileSystemMaster.getMetadataSyncer().syncPath( + MOUNT_POINT, DescendantType.ALL, mDirectoryLoadType, 0).getBaseTask(); + result.waitComplete(TIMEOUT_MS); + assertTrue(result.succeeded()); + assertSyncOperations(result.getTaskInfo(), ImmutableMap.of( + SyncOperation.NOOP, + mDirectoryLoadType == DirectoryLoadType.SINGLE_LISTING ? 1L : 2L, + SyncOperation.SKIPPED_NON_PERSISTED, 4L // the nested file and its parents + )); + assertTrue(mFileSystemMaster.exists(nestedPath.join("file1"), ExistsContext.defaults())); + + // delete the object and sync again + mS3Client.deleteObject(TEST_BUCKET, TEST_DIRECTORY + "/" + TEST_FILE); + result = mFileSystemMaster.getMetadataSyncer().syncPath( + MOUNT_POINT, DescendantType.ALL, mDirectoryLoadType, 0).getBaseTask(); + result.waitComplete(TIMEOUT_MS); + assertTrue(result.succeeded()); + assertSyncOperations(result.getTaskInfo(), ImmutableMap.of( + SyncOperation.DELETE, 1L, + SyncOperation.SKIPPED_NON_PERSISTED, 5L // the nested file and its parents + )); + assertTrue(mFileSystemMaster.exists(nestedPath.join("file1"), ExistsContext.defaults())); + } + + @Test + public void syncNonPersistedExists() throws Throwable { + mFileSystemMaster.mount(MOUNT_POINT, UFS_ROOT, MountContext.defaults()); + mS3Client.putObject(TEST_BUCKET, TEST_FILE, TEST_CONTENT); + + // Sync the file + BaseTask result = mFileSystemMaster.getMetadataSyncer().syncPath( + MOUNT_POINT, DescendantType.ALL, mDirectoryLoadType, 0).getBaseTask(); + result.waitComplete(TIMEOUT_MS); + assertTrue(result.succeeded()); + assertSyncOperations(result.getTaskInfo(), ImmutableMap.of( + SyncOperation.CREATE, 1L + )); + + AlluxioURI filePath = MOUNT_POINT.join(TEST_FILE); + // recreate the file, but put it in alluxio only + mFileSystemMaster.delete(filePath, DeleteContext.mergeFrom( + DeletePOptions.newBuilder().setAlluxioOnly(true))); + mFileSystemMaster.createFile(filePath, + CreateFileContext.defaults().setWriteType(WriteType.MUST_CACHE)); + mFileSystemMaster.completeFile(filePath, CompleteFileContext.defaults()); + + result = mFileSystemMaster.getMetadataSyncer().syncPath( + MOUNT_POINT, DescendantType.ALL, mDirectoryLoadType, 0).getBaseTask(); + result.waitComplete(TIMEOUT_MS); + assertTrue(result.succeeded()); + assertSyncOperations(result.getTaskInfo(), ImmutableMap.of( + SyncOperation.SKIPPED_NON_PERSISTED, 1L + )); + } + + @Test + public void syncNonPersisted() throws Throwable { + mFileSystemMaster.mount(MOUNT_POINT, UFS_ROOT, MountContext.defaults()); + for (int i = 0; i < 10; i++) { + mS3Client.putObject(TEST_BUCKET, TEST_DIRECTORY + "/" + TEST_FILE + i, TEST_CONTENT); + } + BaseTask result = mFileSystemMaster.getMetadataSyncer().syncPath( + MOUNT_POINT, DescendantType.ALL, mDirectoryLoadType, 0).getBaseTask(); + result.waitComplete(TIMEOUT_MS); + assertTrue(result.succeeded()); + assertSyncOperations(result.getTaskInfo(), ImmutableMap.of( + SyncOperation.CREATE, 11L + )); + + // make a non-complete file in the mount path + mFileSystemMaster.createFile(MOUNT_POINT.join("file1"), + CreateFileContext.defaults()); + // make a non persisted file in the nested path + mFileSystemMaster.createFile(MOUNT_POINT.join(TEST_DIRECTORY).join("file1"), + CreateFileContext.defaults().setWriteType(WriteType.MUST_CACHE)); + mFileSystemMaster.completeFile(MOUNT_POINT.join(TEST_DIRECTORY).join("file1"), + CompleteFileContext.defaults()); + result = mFileSystemMaster.getMetadataSyncer().syncPath( + MOUNT_POINT, DescendantType.ONE, mDirectoryLoadType, 0).getBaseTask(); + result.waitComplete(TIMEOUT_MS); + assertTrue(result.succeeded()); + assertSyncOperations(result.getTaskInfo(), ImmutableMap.of( + SyncOperation.SKIPPED_NON_PERSISTED, 1L, + SyncOperation.NOOP, 1L + )); + + assertTrue(mFileSystemMaster.exists(MOUNT_POINT.join("file1"), ExistsContext.defaults())); + assertTrue(mFileSystemMaster.exists(MOUNT_POINT.join(TEST_DIRECTORY) + .join("file1"), ExistsContext.defaults())); + + // delete all objects on the UFS + for (int i = 0; i < 10; i++) { + mS3Client.deleteObject(TEST_BUCKET, TEST_DIRECTORY + "/" + TEST_FILE + i); + } + result = mFileSystemMaster.getMetadataSyncer().syncPath( + MOUNT_POINT, DescendantType.ALL, mDirectoryLoadType, 0).getBaseTask(); + result.waitComplete(TIMEOUT_MS); + assertTrue(result.succeeded()); + assertSyncOperations(result.getTaskInfo(), ImmutableMap.of( + SyncOperation.DELETE, 10L, + SyncOperation.SKIPPED_NON_PERSISTED, 3L // includes the skipped directory + )); + + assertTrue(mFileSystemMaster.exists(MOUNT_POINT.join("file1"), ExistsContext.defaults())); + assertTrue(mFileSystemMaster.exists(MOUNT_POINT.join(TEST_DIRECTORY) + .join("file1"), ExistsContext.defaults())); + + result = mFileSystemMaster.getMetadataSyncer().syncPath( + MOUNT_POINT, DescendantType.ALL, mDirectoryLoadType, 0).getBaseTask(); + result.waitComplete(TIMEOUT_MS); + assertTrue(result.succeeded()); + assertSyncOperations(result.getTaskInfo(), ImmutableMap.of( + SyncOperation.SKIPPED_NON_PERSISTED, 3L // includes the skipped directory + )); + assertTrue(mFileSystemMaster.exists(MOUNT_POINT.join("file1"), ExistsContext.defaults())); + assertTrue(mFileSystemMaster.exists(MOUNT_POINT.join(TEST_DIRECTORY) + .join("file1"), ExistsContext.defaults())); + } + + @Test + public void basicSyncMultiRequest() throws Throwable { + mFileSystemMaster.mount(MOUNT_POINT, UFS_ROOT, MountContext.defaults()); + for (int i = 0; i < 10; i++) { + mS3Client.putObject(TEST_BUCKET, TEST_FILE + i, TEST_CONTENT); + } + mS3Client.putObject(TEST_BUCKET, TEST_FILE, TEST_CONTENT); + + BaseTask result = mFileSystemMaster.getMetadataSyncer().syncPath( + MOUNT_POINT, DescendantType.ALL, mDirectoryLoadType, 0).getBaseTask(); + result.waitComplete(TIMEOUT_MS); + assertTrue(result.succeeded()); + assertSyncOperations(result.getTaskInfo(), ImmutableMap.of( + SyncOperation.CREATE, 11L + )); + long mountPointInodeId = mFileSystemMaster.getFileInfo(MOUNT_POINT, getNoSync()).getFileId(); + assertTrue(mFileSystemMaster.getInodeStore() + .get(mountPointInodeId).get().asDirectory().isDirectChildrenLoaded()); + checkUfsMatches(MOUNT_POINT, TEST_BUCKET, + "", mFileSystemMaster, mClient); + + result = mFileSystemMaster.getMetadataSyncer().syncPath( + MOUNT_POINT, DescendantType.ALL, mDirectoryLoadType, 0).getBaseTask(); + result.waitComplete(TIMEOUT_MS); + assertTrue(result.succeeded()); + assertSyncOperations(result.getTaskInfo(), ImmutableMap.of( + SyncOperation.NOOP, 11L + )); + checkUfsMatches(MOUNT_POINT, TEST_BUCKET, + "", mFileSystemMaster, mClient); + } + + @Test + public void dirTest() throws Throwable { + mFileSystemMaster.mount(MOUNT_POINT, UFS_ROOT, MountContext.defaults()); + mS3Client.putObject(TEST_BUCKET, TEST_DIRECTORY + "/" + TEST_FILE, TEST_CONTENT); + + // load the dir with depth 1 + BaseTask result = mFileSystemMaster.getMetadataSyncer().syncPath( + MOUNT_POINT, DescendantType.ONE, mDirectoryLoadType, 0).getBaseTask(); + result.waitComplete(TIMEOUT_MS); + assertTrue(result.succeeded()); + List items = mFileSystemMaster.listStatus(MOUNT_POINT, listNoSync(true)); + assertEquals(1, items.size()); + } + + @Test + public void basicSync() throws Throwable { + mFileSystemMaster.mount(MOUNT_POINT, UFS_ROOT, MountContext.defaults()); + mS3Client.putObject(TEST_BUCKET, TEST_FILE, TEST_CONTENT); + + BaseTask result = mFileSystemMaster.getMetadataSyncer().syncPath( + MOUNT_POINT, DescendantType.ALL, mDirectoryLoadType, 0).getBaseTask(); + result.waitComplete(TIMEOUT_MS); + assertTrue(result.succeeded()); + assertSyncOperations(result.getTaskInfo(), ImmutableMap.of( + SyncOperation.CREATE, 1L + )); + + long mountPointInodeId = mFileSystemMaster.getFileInfo(MOUNT_POINT, getNoSync()).getFileId(); + assertTrue(mFileSystemMaster.getInodeStore() + .get(mountPointInodeId).get().asDirectory().isDirectChildrenLoaded()); + checkUfsMatches(MOUNT_POINT, TEST_BUCKET, + "", mFileSystemMaster, mClient); + + result = mFileSystemMaster.getMetadataSyncer().syncPath( + MOUNT_POINT, DescendantType.ALL, mDirectoryLoadType, 0).getBaseTask(); + result.waitComplete(TIMEOUT_MS); + assertTrue(result.succeeded()); + assertSyncOperations(result.getTaskInfo(), ImmutableMap.of( + SyncOperation.NOOP, 1L + )); + checkUfsMatches(MOUNT_POINT, TEST_BUCKET, + "", mFileSystemMaster, mClient); + } + + @Test + public void testUpdateDirectChildrenLoaded() throws Throwable { + mFileSystemMaster.mount(MOUNT_POINT, UFS_ROOT, MountContext.defaults()); + mS3Client.putObject(TEST_BUCKET, "d1/foo", TEST_CONTENT); + mS3Client.putObject(TEST_BUCKET, "d2/foo", TEST_CONTENT); + mS3Client.putObject(TEST_BUCKET, "d3/d4/foo", TEST_CONTENT); + + BaseTask result = mFileSystemMaster.getMetadataSyncer().syncPath( + MOUNT_POINT.join("d3"), DescendantType.ALL, mDirectoryLoadType, 0).getBaseTask(); + result.waitComplete(TIMEOUT_MS); + assertTrue(result.succeeded()); + + assertFalse(mFileSystemMaster.getInodeStore() + .get(mFileSystemMaster.getFileInfo(MOUNT_POINT, getNoSync()).getFileId()) + .get().asDirectory().isDirectChildrenLoaded()); + + assertTrue(mFileSystemMaster.getInodeStore() + .get(mFileSystemMaster.getFileInfo(MOUNT_POINT.join("d3"), getNoSync()).getFileId()) + .get().asDirectory().isDirectChildrenLoaded()); + + assertTrue(mFileSystemMaster.getInodeStore() + .get(mFileSystemMaster.getFileInfo(MOUNT_POINT.join("d3/d4"), getNoSync()).getFileId()) + .get().asDirectory().isDirectChildrenLoaded()); + + result = mFileSystemMaster.getMetadataSyncer().syncPath( + MOUNT_POINT, DescendantType.ONE, mDirectoryLoadType, 0).getBaseTask(); + result.waitComplete(TIMEOUT_MS); + assertTrue(result.succeeded()); + assertTrue(mFileSystemMaster.getInodeStore() + .get(mFileSystemMaster.getFileInfo(MOUNT_POINT, getNoSync()).getFileId()) + .get().asDirectory().isDirectChildrenLoaded()); + assertFalse(mFileSystemMaster.getInodeStore() + .get(mFileSystemMaster.getFileInfo(MOUNT_POINT.join("d1"), getNoSync()).getFileId()) + .get().asDirectory().isDirectChildrenLoaded()); + assertFalse(mFileSystemMaster.getInodeStore() + .get(mFileSystemMaster.getFileInfo(MOUNT_POINT.join("d2"), getNoSync()).getFileId()) + .get().asDirectory().isDirectChildrenLoaded()); + + result = mFileSystemMaster.getMetadataSyncer().syncPath( + MOUNT_POINT, DescendantType.ALL, mDirectoryLoadType, 0).getBaseTask(); + result.waitComplete(TIMEOUT_MS); + assertTrue(result.succeeded()); + assertTrue(mFileSystemMaster.getInodeStore() + .get(mFileSystemMaster.getFileInfo(MOUNT_POINT.join("d1"), getNoSync()).getFileId()) + .get().asDirectory().isDirectChildrenLoaded()); + assertTrue(mFileSystemMaster.getInodeStore() + .get(mFileSystemMaster.getFileInfo(MOUNT_POINT.join("d2"), getNoSync()).getFileId()) + .get().asDirectory().isDirectChildrenLoaded()); + } + + @Test + public void basicSyncNestedMount() throws Throwable { + mS3Client.putObject(TEST_BUCKET, + TEST_DIRECTORY + "/", ""); + mFileSystemMaster.mount(MOUNT_POINT, UFS_ROOT.join(TEST_DIRECTORY), MountContext.defaults()); + mS3Client.putObject(TEST_BUCKET, TEST_DIRECTORY + "/" + TEST_FILE, TEST_CONTENT); + + BaseTask result = mFileSystemMaster.getMetadataSyncer().syncPath( + MOUNT_POINT, DescendantType.ALL, mDirectoryLoadType, 0).getBaseTask(); + result.waitComplete(TIMEOUT_MS); + assertTrue(result.succeeded()); + long mountPointInodeId = mFileSystemMaster.getFileInfo(MOUNT_POINT, getNoSync()).getFileId(); + assertTrue(mFileSystemMaster.getInodeStore() + .get(mountPointInodeId).get().asDirectory().isDirectChildrenLoaded()); + checkUfsMatches(MOUNT_POINT, TEST_BUCKET, TEST_DIRECTORY, mFileSystemMaster, mClient); + assertSyncOperations(result.getTaskInfo(), ImmutableMap.of( + SyncOperation.CREATE, 1L + )); + + result = mFileSystemMaster.getMetadataSyncer().syncPath( + MOUNT_POINT, DescendantType.ALL, mDirectoryLoadType, 0).getBaseTask(); + result.waitComplete(TIMEOUT_MS); + assertTrue(result.succeeded()); + assertSyncOperations(result.getTaskInfo(), ImmutableMap.of( + SyncOperation.NOOP, 1L + )); + + checkUfsMatches(MOUNT_POINT, TEST_BUCKET, TEST_DIRECTORY, mFileSystemMaster, mClient); + } + + @Test + public void basicSyncNestedMountNestedDir() throws Throwable { + mS3Client.putObject(TEST_BUCKET, + TEST_DIRECTORY + "/", ""); + mFileSystemMaster.mount(MOUNT_POINT, UFS_ROOT.join(TEST_DIRECTORY), MountContext.defaults()); + // create files + for (int i = 0; i < 10; i++) { + mS3Client.putObject(TEST_BUCKET, TEST_DIRECTORY + "/" + TEST_FILE + i, TEST_CONTENT); + } + // create nested files + for (int i = 0; i < 10; i++) { + mS3Client.putObject(TEST_BUCKET, TEST_DIRECTORY + "/" + + TEST_DIRECTORY + "/" + TEST_FILE + i, TEST_CONTENT); + } + + BaseTask result = mFileSystemMaster.getMetadataSyncer().syncPath( + MOUNT_POINT, DescendantType.ALL, mDirectoryLoadType, 0).getBaseTask(); + result.waitComplete(TIMEOUT_MS); + assertTrue(result.succeeded()); + assertSyncOperations(result.getTaskInfo(), ImmutableMap.of( + SyncOperation.CREATE, 21L + )); + long mountPointInodeId = mFileSystemMaster.getFileInfo(MOUNT_POINT, getNoSync()).getFileId(); + assertTrue(mFileSystemMaster.getInodeStore() + .get(mountPointInodeId).get().asDirectory().isDirectChildrenLoaded()); + checkUfsMatches(MOUNT_POINT, TEST_BUCKET, TEST_DIRECTORY, mFileSystemMaster, mClient); + + result = mFileSystemMaster.getMetadataSyncer().syncPath( + MOUNT_POINT, DescendantType.ALL, mDirectoryLoadType, 0).getBaseTask(); + result.waitComplete(TIMEOUT_MS); + assertTrue(result.succeeded()); + assertSyncOperations(result.getTaskInfo(), ImmutableMap.of( + SyncOperation.NOOP, + mDirectoryLoadType == DirectoryLoadType.SINGLE_LISTING ? 20L : 21L + )); + checkUfsMatches(MOUNT_POINT, TEST_BUCKET, TEST_DIRECTORY, mFileSystemMaster, mClient); + } + + @Test + public void basicSyncNestedMountNestedDirWithMarkers() throws Throwable { + mFileSystemMaster.mount(MOUNT_POINT, UFS_ROOT, MountContext.defaults()); + long mountPointInodeId = mFileSystemMaster.getFileInfo(MOUNT_POINT, getNoSync()).getFileId(); + // create directory markers + mS3Client.putObject(TEST_BUCKET, TEST_DIRECTORY + "/", ""); + mS3Client.putObject(TEST_BUCKET, TEST_DIRECTORY + "/" + TEST_DIRECTORY + "/", ""); + // create files + for (int i = 0; i < 10; i++) { + mS3Client.putObject(TEST_BUCKET, TEST_DIRECTORY + "/" + TEST_FILE + i, TEST_CONTENT); + } + // create nested files + for (int i = 0; i < 10; i++) { + mS3Client.putObject(TEST_BUCKET, TEST_DIRECTORY + "/" + + TEST_DIRECTORY + "/" + TEST_FILE + i, TEST_CONTENT); + } + + BaseTask result = mFileSystemMaster.getMetadataSyncer().syncPath( + MOUNT_POINT, DescendantType.ALL, mDirectoryLoadType, 0).getBaseTask(); + result.waitComplete(TIMEOUT_MS); + assertTrue(result.succeeded()); + assertTrue(mFileSystemMaster.getInodeStore() + .get(mountPointInodeId).get().asDirectory().isDirectChildrenLoaded()); + checkUfsMatches(MOUNT_POINT, TEST_BUCKET, "", mFileSystemMaster, mClient); + assertSyncOperations(result.getTaskInfo(), ImmutableMap.of( + SyncOperation.CREATE, 22L + )); + + result = mFileSystemMaster.getMetadataSyncer().syncPath( + MOUNT_POINT, DescendantType.ALL, mDirectoryLoadType, 0).getBaseTask(); + result.waitComplete(TIMEOUT_MS); + assertTrue(result.succeeded()); + assertSyncOperations(result.getTaskInfo(), ImmutableMap.of( + SyncOperation.NOOP, 22L + )); + + checkUfsMatches(MOUNT_POINT, TEST_BUCKET, "", mFileSystemMaster, mClient); + } + + @Test + public void basicSyncEmptyDirWithMarkers() throws Throwable { + mFileSystemMaster.mount(MOUNT_POINT, UFS_ROOT, MountContext.defaults()); + long mountPointInodeId = mFileSystemMaster.getFileInfo(MOUNT_POINT, getNoSync()).getFileId(); + // create directory marker + mS3Client.putObject(TEST_BUCKET, TEST_DIRECTORY + "/", ""); + + BaseTask result = mFileSystemMaster.getMetadataSyncer().syncPath( + MOUNT_POINT, DescendantType.ALL, mDirectoryLoadType, 0).getBaseTask(); + result.waitComplete(TIMEOUT_MS); + assertTrue(result.succeeded()); + assertTrue(mFileSystemMaster.getInodeStore() + .get(mountPointInodeId).get().asDirectory().isDirectChildrenLoaded()); + assertSyncOperations(result.getTaskInfo(), ImmutableMap.of( + SyncOperation.CREATE, 1L, + SyncOperation.NOOP, 0L + )); + checkUfsMatches(MOUNT_POINT, TEST_BUCKET, "", mFileSystemMaster, mClient); + + result = mFileSystemMaster.getMetadataSyncer().syncPath( + MOUNT_POINT, DescendantType.ALL, mDirectoryLoadType, 0).getBaseTask(); + result.waitComplete(TIMEOUT_MS); + assertTrue(result.succeeded()); + assertSyncOperations(result.getTaskInfo(), ImmutableMap.of( + SyncOperation.NOOP, 1L + )); + checkUfsMatches(MOUNT_POINT, TEST_BUCKET, "", mFileSystemMaster, mClient); + } + + @Test + public void basicSyncNestedFile() throws Throwable { + mFileSystemMaster.mount(MOUNT_POINT, UFS_ROOT, MountContext.defaults()); + long mountPointInodeId = mFileSystemMaster.getFileInfo(MOUNT_POINT, getNoSync()).getFileId(); + for (int i = 0; i < 10; i++) { + mS3Client.putObject(TEST_BUCKET, TEST_DIRECTORY + "/" + TEST_FILE + i, TEST_CONTENT); + } + + BaseTask result = mFileSystemMaster.getMetadataSyncer().syncPath( + MOUNT_POINT, DescendantType.ALL, mDirectoryLoadType, 0).getBaseTask(); + result.waitComplete(TIMEOUT_MS); + assertTrue(result.succeeded()); + assertSyncOperations(result.getTaskInfo(), ImmutableMap.of( + SyncOperation.CREATE, 11L + )); + assertTrue(mFileSystemMaster.getInodeStore() + .get(mountPointInodeId).get().asDirectory().isDirectChildrenLoaded()); + checkUfsMatches(MOUNT_POINT, TEST_BUCKET, "", mFileSystemMaster, mClient); + + result = mFileSystemMaster.getMetadataSyncer().syncPath( + MOUNT_POINT, DescendantType.ALL, mDirectoryLoadType, 0).getBaseTask(); + result.waitComplete(TIMEOUT_MS); + assertTrue(result.succeeded()); + assertSyncOperations(result.getTaskInfo(), ImmutableMap.of( + SyncOperation.NOOP, + mDirectoryLoadType != DirectoryLoadType.SINGLE_LISTING ? 11L : 10L + )); + checkUfsMatches(MOUNT_POINT, TEST_BUCKET, "", mFileSystemMaster, mClient); + } + + @Test + public void basicSyncDirectory() throws Throwable { + mFileSystemMaster.mount(MOUNT_POINT, UFS_ROOT, MountContext.defaults()); + long mountPointInodeId = mFileSystemMaster.getFileInfo(MOUNT_POINT, getNoSync()).getFileId(); + for (int i = 0; i < 10; i++) { + mS3Client.putObject(TEST_BUCKET, TEST_DIRECTORY + "/" + TEST_FILE + i, TEST_CONTENT); + } + + AlluxioURI syncPath = MOUNT_POINT.join(TEST_DIRECTORY); + BaseTask result = mFileSystemMaster.getMetadataSyncer().syncPath( + syncPath, DescendantType.ALL, mDirectoryLoadType, 0).getBaseTask(); + result.waitComplete(TIMEOUT_MS); + assertTrue(result.succeeded()); + assertFalse(mFileSystemMaster.getInodeStore() + .get(mountPointInodeId).get().asDirectory().isDirectChildrenLoaded()); + checkUfsMatches(MOUNT_POINT, TEST_BUCKET, "", mFileSystemMaster, mClient); + assertSyncOperations(result.getTaskInfo(), ImmutableMap.of( + SyncOperation.CREATE, 11L + )); + + result = mFileSystemMaster.getMetadataSyncer().syncPath( + syncPath, DescendantType.ALL, mDirectoryLoadType, 0).getBaseTask(); + result.waitComplete(TIMEOUT_MS); + assertTrue(result.succeeded()); + checkUfsMatches(MOUNT_POINT, TEST_BUCKET, "", mFileSystemMaster, mClient); + assertSyncOperations(result.getTaskInfo(), ImmutableMap.of( + SyncOperation.NOOP, 10L + )); + } + + @Test + public void syncInodeHappyPath() throws Throwable { + mFileSystemMaster.mount(MOUNT_POINT, UFS_ROOT, MountContext.defaults()); + mS3Client.putObject(TEST_BUCKET, TEST_FILE, TEST_CONTENT); + + long mountPointInodeId = mFileSystemMaster.getFileInfo(MOUNT_POINT, getNoSync()).getFileId(); + assertFalse(mFileSystemMaster.getInodeStore() + .get(mountPointInodeId).get().asDirectory().isDirectChildrenLoaded()); + + // Sync one file from UFS + BaseTask result = mFileSystemMaster.getMetadataSyncer().syncPath( + MOUNT_POINT.join(TEST_FILE), DescendantType.ONE, mDirectoryLoadType, 0).getBaseTask(); + result.waitComplete(TIMEOUT_MS); + assertTrue(result.succeeded()); + assertSyncOperations(result.getTaskInfo(), ImmutableMap.of( + SyncOperation.CREATE, 1L + )); + FileInfo info = mFileSystemMaster.getFileInfo(MOUNT_POINT.join(TEST_FILE), getNoSync()); + assertFalse(info.isFolder()); + assertTrue(info.isCompleted()); + checkUfsMatches(MOUNT_POINT, TEST_BUCKET, "", mFileSystemMaster, mClient); + + // Sync again, expect no change + result = mFileSystemMaster.getMetadataSyncer().syncPath( + MOUNT_POINT.join(TEST_FILE), DescendantType.ONE, mDirectoryLoadType, 0).getBaseTask(); + result.waitComplete(TIMEOUT_MS); + assertTrue(result.succeeded()); + assertSyncOperations(result.getTaskInfo(), ImmutableMap.of( + SyncOperation.NOOP, 1L + )); + checkUfsMatches(MOUNT_POINT, TEST_BUCKET, "", mFileSystemMaster, mClient); + + // Delete the file from UFS, then sync again + mS3Client.deleteObject(TEST_BUCKET, TEST_FILE); + result = mFileSystemMaster.getMetadataSyncer().syncPath( + MOUNT_POINT.join(TEST_FILE), DescendantType.ONE, mDirectoryLoadType, 0).getBaseTask(); + result.waitComplete(TIMEOUT_MS); + assertTrue(result.succeeded()); + assertSyncOperations(result.getTaskInfo(), ImmutableMap.of( + SyncOperation.DELETE, 1L + )); + checkUfsMatches(MOUNT_POINT, TEST_BUCKET, "", mFileSystemMaster, mClient); + assertFalse(mFileSystemMaster.getInodeStore() + .get(mountPointInodeId).get().asDirectory().isDirectChildrenLoaded()); + } + + @Test + public void syncInodeDescendantTypeNoneHappyPath() throws Throwable { + mFileSystemMaster.mount(MOUNT_POINT, UFS_ROOT, MountContext.defaults()); + mS3Client.putObject(TEST_BUCKET, TEST_FILE, TEST_CONTENT); + + long mountPointInodeId = mFileSystemMaster.getFileInfo(MOUNT_POINT, getNoSync()).getFileId(); + assertFalse(mFileSystemMaster.getInodeStore() + .get(mountPointInodeId).get().asDirectory().isDirectChildrenLoaded()); + + // Sync one file from UFS + BaseTask result = mFileSystemMaster.getMetadataSyncer().syncPath( + MOUNT_POINT.join(TEST_FILE), DescendantType.NONE, mDirectoryLoadType, 0).getBaseTask(); + result.waitComplete(TIMEOUT_MS); + assertTrue(result.succeeded()); + assertSyncOperations(result.getTaskInfo(), ImmutableMap.of( + SyncOperation.CREATE, 1L + )); + FileInfo info = mFileSystemMaster.getFileInfo(MOUNT_POINT.join(TEST_FILE), getNoSync()); + assertFalse(info.isFolder()); + assertTrue(info.isCompleted()); + checkUfsMatches(MOUNT_POINT, TEST_BUCKET, "", mFileSystemMaster, mClient); + } + + @Test + public void deleteOneAndAddAnother() throws Throwable { + mFileSystemMaster.mount(MOUNT_POINT, UFS_ROOT, MountContext.defaults()); + mS3Client.putObject(TEST_BUCKET, "foo/a", TEST_CONTENT); + mS3Client.putObject(TEST_BUCKET, "foo/c", TEST_CONTENT); + + // Sync two files from UFS + BaseTask result = mFileSystemMaster.getMetadataSyncer().syncPath( + MOUNT_POINT.join("foo"), DescendantType.ALL, mDirectoryLoadType, 0).getBaseTask(); + result.waitComplete(TIMEOUT_MS); + assertTrue(result.succeeded()); + assertSyncOperations(result.getTaskInfo(), ImmutableMap.of( + SyncOperation.CREATE, 3L + )); + checkUfsMatches(MOUNT_POINT, TEST_BUCKET, "", mFileSystemMaster, mClient); + + // Delete one and create another + mS3Client.deleteObject(TEST_BUCKET, "foo/a"); + mS3Client.putObject(TEST_BUCKET, "foo/b", TEST_CONTENT); + result = mFileSystemMaster.getMetadataSyncer().syncPath( + MOUNT_POINT.join("foo"), DescendantType.ALL, mDirectoryLoadType, 0).getBaseTask(); + result.waitComplete(TIMEOUT_MS); + assertTrue(result.succeeded()); + assertSyncOperations(result.getTaskInfo(), ImmutableMap.of( + SyncOperation.CREATE, 1L, + SyncOperation.DELETE, 1L, + SyncOperation.NOOP, 1L + )); + checkUfsMatches(MOUNT_POINT, TEST_BUCKET, "", mFileSystemMaster, mClient); + } + + @Test + public void deleteDirectory() throws Throwable { + mFileSystemMaster.mount(MOUNT_POINT, UFS_ROOT, MountContext.defaults()); + mS3Client.putObject(TEST_BUCKET, "d1/f1", TEST_CONTENT); + mS3Client.putObject(TEST_BUCKET, "d1/f2", TEST_CONTENT); + mS3Client.putObject(TEST_BUCKET, "d2/f1", TEST_CONTENT); + + BaseTask result = mFileSystemMaster.getMetadataSyncer().syncPath( + MOUNT_POINT, DescendantType.ALL, mDirectoryLoadType, 0).getBaseTask(); + result.waitComplete(TIMEOUT_MS); + assertTrue(result.succeeded()); + assertSyncOperations(result.getTaskInfo(), ImmutableMap.of( + SyncOperation.CREATE, 5L + )); + checkUfsMatches(MOUNT_POINT, TEST_BUCKET, "", mFileSystemMaster, mClient); + + mS3Client.deleteObject(TEST_BUCKET, "d1/f1"); + mS3Client.deleteObject(TEST_BUCKET, "d1/f2"); + mS3Client.putObject(TEST_BUCKET, "d0/f1", TEST_CONTENT); + result = mFileSystemMaster.getMetadataSyncer().syncPath( + MOUNT_POINT, DescendantType.ALL, mDirectoryLoadType, 0).getBaseTask(); + result.waitComplete(TIMEOUT_MS); + assertTrue(result.succeeded()); + + // "d2/f1" + long noopCount = 1; + if (mDirectoryLoadType != DirectoryLoadType.SINGLE_LISTING) { + // "d2" + noopCount++; + } + assertSyncOperations(result.getTaskInfo(), ImmutableMap.of( + SyncOperation.CREATE, 2L, + SyncOperation.DELETE, 3L, + SyncOperation.NOOP, noopCount + )); + + checkUfsMatches(MOUNT_POINT, TEST_BUCKET, "", mFileSystemMaster, mClient); + } + + @Test + public void syncInodeHappyPathNestedObjects() throws Throwable { + mS3Client.putObject(TEST_BUCKET, "d1/1", TEST_CONTENT); + mS3Client.putObject(TEST_BUCKET, "d1/2", TEST_CONTENT); + mS3Client.putObject(TEST_BUCKET, "d1/3", TEST_CONTENT); + mS3Client.putObject(TEST_BUCKET, "d2/1", TEST_CONTENT); + mS3Client.putObject(TEST_BUCKET, "d2/2", TEST_CONTENT); + mS3Client.putObject(TEST_BUCKET, "d2/3", TEST_CONTENT); + mS3Client.putObject(TEST_BUCKET, "d3/1", TEST_CONTENT); + mS3Client.putObject(TEST_BUCKET, "d3/2", TEST_CONTENT); + mS3Client.putObject(TEST_BUCKET, "d3/3", TEST_CONTENT); + mFileSystemMaster.mount(MOUNT_POINT, UFS_ROOT, MountContext.defaults()); + + // count the files + long numInodes = 9; + // count the directories + numInodes += 3; + + // Sync one file from UFS + BaseTask result = mFileSystemMaster.getMetadataSyncer().syncPath( + MOUNT_POINT, DescendantType.ALL, mDirectoryLoadType, 0).getBaseTask(); + result.waitComplete(TIMEOUT_MS); + assertTrue(result.succeeded()); + assertSyncOperations(result.getTaskInfo(), ImmutableMap.of( + SyncOperation.CREATE, numInodes + )); + long mountPointInodeId = mFileSystemMaster.getFileInfo(MOUNT_POINT, getNoSync()).getFileId(); + assertTrue(mFileSystemMaster.getInodeStore() + .get(mountPointInodeId).get().asDirectory().isDirectChildrenLoaded()); + // count the files + long noopCount = 9; + if (mDirectoryLoadType != DirectoryLoadType.SINGLE_LISTING) { + // count the directories + noopCount += 3; + } + + // Sync again, expect no change + result = mFileSystemMaster.getMetadataSyncer().syncPath( + MOUNT_POINT, DescendantType.ALL, mDirectoryLoadType, 0).getBaseTask(); + result.waitComplete(TIMEOUT_MS); + assertTrue(result.succeeded()); + assertSyncOperations(result.getTaskInfo(), ImmutableMap.of( + SyncOperation.NOOP, noopCount + )); + checkUfsMatches(MOUNT_POINT, TEST_BUCKET, "", mFileSystemMaster, mClient); + } + + @Test + public void syncNestedObjectsCreateThenDelete() throws Throwable { + mS3Client.putObject(TEST_BUCKET, "d/1", TEST_CONTENT); + mS3Client.putObject(TEST_BUCKET, "d/2", TEST_CONTENT); + mS3Client.putObject(TEST_BUCKET, "d/3", TEST_CONTENT); + mFileSystemMaster.mount(MOUNT_POINT, UFS_ROOT, MountContext.defaults()); + + // count the files + long numInodes = 3; + // count the directories + numInodes += 1; + + // Sync one file from UFS + BaseTask result = mFileSystemMaster.getMetadataSyncer().syncPath( + MOUNT_POINT, DescendantType.ALL, mDirectoryLoadType, 0).getBaseTask(); + result.waitComplete(TIMEOUT_MS); + assertTrue(result.succeeded()); + assertSyncOperations(result.getTaskInfo(), ImmutableMap.of( + SyncOperation.CREATE, numInodes + )); + long mountPointInodeId = mFileSystemMaster.getFileInfo(MOUNT_POINT, getNoSync()).getFileId(); + assertTrue(mFileSystemMaster.getInodeStore() + .get(mountPointInodeId).get().asDirectory().isDirectChildrenLoaded()); + mS3Client.deleteObject(TEST_BUCKET, "d/1"); + mS3Client.deleteObject(TEST_BUCKET, "d/2"); + mS3Client.deleteObject(TEST_BUCKET, "d/3"); + + result = mFileSystemMaster.getMetadataSyncer().syncPath( + MOUNT_POINT, DescendantType.ALL, mDirectoryLoadType, 0).getBaseTask(); + result.waitComplete(TIMEOUT_MS); + assertTrue(result.succeeded()); + assertSyncOperations(result.getTaskInfo(), ImmutableMap.of( + SyncOperation.DELETE, 4L + )); + } + + @Test + public void syncInodeUfsDown() + throws Throwable { + mFileSystemMaster.mount(MOUNT_POINT, UFS_ROOT, MountContext.defaults()); + mS3Client.putObject(TEST_BUCKET, TEST_FILE, TEST_CONTENT); + + stopS3Server(); + final BaseTask result = mFileSystemMaster.getMetadataSyncer().syncPath( + MOUNT_POINT, DescendantType.ONE, mDirectoryLoadType, 0).getBaseTask(); + assertThrows(IOException.class, () -> { + result.waitComplete(TIMEOUT_MS); + }); + assertSyncFailureReason(result.getTaskInfo(), SyncFailReason.LOADING_UFS_IO_FAILURE); + + assertFalse(mFileSystemMaster.getInodeStore() + .get(mFileSystemMaster.getFileInfo(MOUNT_POINT, getNoSync()).getFileId()) + .get().asDirectory().isDirectChildrenLoaded()); + + startS3Server(); + } + + @Test + public void syncInodeProcessingErrorHandling() + throws Throwable { + mFileSystemMaster.mount(MOUNT_POINT, UFS_ROOT, MountContext.defaults()); + mS3Client.putObject(TEST_BUCKET, TEST_FILE, TEST_CONTENT); + TestSyncProcessor syncer = (TestSyncProcessor) mFileSystemMaster.getMetadataSyncer(); + syncer.beforePerformSyncOne((ignored) -> { + throw new Exception("fail"); + }); + final BaseTask result = mFileSystemMaster.getMetadataSyncer().syncPath( + MOUNT_POINT, DescendantType.ONE, mDirectoryLoadType, 0).getBaseTask(); + assertThrows(Exception.class, () -> { + result.waitComplete(TIMEOUT_MS); + }); + assertSyncFailureReason(result.getTaskInfo(), SyncFailReason.PROCESSING_UNKNOWN); + long mountPointInodeId = mFileSystemMaster.getFileInfo(MOUNT_POINT, getNoSync()).getFileId(); + assertFalse(mFileSystemMaster.getInodeStore() + .get(mountPointInodeId).get().asDirectory().isDirectChildrenLoaded()); + + syncer.beforePerformSyncOne((context) -> { + Exception e = new Exception("fail"); + context.reportSyncFailReason(SyncFailReason.PROCESSING_CONCURRENT_UPDATE_DURING_SYNC, e); + throw e; + }); + final BaseTask result2 = mFileSystemMaster.getMetadataSyncer().syncPath( + MOUNT_POINT, DescendantType.ONE, mDirectoryLoadType, 0).getBaseTask(); + assertThrows(Exception.class, () -> { + result2.waitComplete(TIMEOUT_MS); + }); + assertSyncFailureReason(result2.getTaskInfo(), + SyncFailReason.PROCESSING_CONCURRENT_UPDATE_DURING_SYNC); + } + + @Test + public void syncDirectoryHappyPath() throws Throwable { + mFileSystemMaster.mount(MOUNT_POINT, UFS_ROOT, MountContext.defaults()); + mS3Client.putObject(TEST_BUCKET, "file1", TEST_CONTENT); + mS3Client.putObject(TEST_BUCKET, "file2", TEST_CONTENT); + mS3Client.putObject(TEST_BUCKET, "file3", TEST_CONTENT); + + // To recreate -> content hashes are different + mFileSystemMaster.createFile(MOUNT_POINT.join("file1"), CreateFileContext.mergeFrom( + CreateFilePOptions.newBuilder().setWriteType(WritePType.THROUGH))); + mFileSystemMaster.completeFile(MOUNT_POINT.join("file1"), CompleteFileContext.defaults()); + mS3Client.putObject(TEST_BUCKET, "file1", TEST_CONTENT + "diff"); + + // To delete -> doesn't exist in UFS + mFileSystemMaster.createDirectory(MOUNT_POINT.join("directory1"), + CreateDirectoryContext.defaults()); + + BaseTask result = mFileSystemMaster.getMetadataSyncer().syncPath( + MOUNT_POINT, DescendantType.ONE, mDirectoryLoadType, 0).getBaseTask(); + result.waitComplete(TIMEOUT_MS); + assertTrue(result.succeeded()); + checkUfsMatches(MOUNT_POINT, TEST_BUCKET, "", mFileSystemMaster, mClient); + long mountPointInodeId = mFileSystemMaster.getFileInfo(MOUNT_POINT, getNoSync()).getFileId(); + assertTrue(mFileSystemMaster.getInodeStore() + .get(mountPointInodeId).get().asDirectory().isDirectChildrenLoaded()); + assertSyncOperations(result.getTaskInfo(), ImmutableMap.of( + // file2 & file 3 + SyncOperation.CREATE, 2L, + // directory1 + SyncOperation.DELETE, 1L, + // file1 + SyncOperation.RECREATE, 1L + )); + } + + @Test + public void syncDirectoryTestUFSIteration() throws Throwable { + mFileSystemMaster.mount(MOUNT_POINT, UFS_ROOT, MountContext.defaults()); + for (int i = 0; i < 100; ++i) { + mS3Client.putObject(TEST_BUCKET, "file" + i, ""); + } + + BaseTask result = mFileSystemMaster.getMetadataSyncer().syncPath( + MOUNT_POINT, DescendantType.ONE, mDirectoryLoadType, 0).getBaseTask(); + result.waitComplete(TIMEOUT_MS); + assertTrue(result.succeeded()); + long mountPointInodeId = mFileSystemMaster.getFileInfo(MOUNT_POINT, getNoSync()).getFileId(); + assertTrue(mFileSystemMaster.getInodeStore() + .get(mountPointInodeId).get().asDirectory().isDirectChildrenLoaded()); + checkUfsMatches(MOUNT_POINT, TEST_BUCKET, "", mFileSystemMaster, mClient); + + assertSyncOperations(result.getTaskInfo(), ImmutableMap.of( + SyncOperation.CREATE, 100L + )); + } + + @Test + public void syncDirectoryTestUFSIterationRecursive() throws Throwable { + mFileSystemMaster.mount(MOUNT_POINT, UFS_ROOT, MountContext.defaults()); + int filePerDirectory = 5; + // count the files + int createdInodeCount = filePerDirectory * filePerDirectory * filePerDirectory; + // count the directories + createdInodeCount += filePerDirectory * filePerDirectory + filePerDirectory; + + for (int i = 0; i < filePerDirectory; ++i) { + for (int j = 0; j < filePerDirectory; ++j) { + for (int k = 0; k < filePerDirectory; ++k) { + mS3Client.putObject(TEST_BUCKET, String.format("%d/%d/%d", i, j, k), ""); + } + } + } + + BaseTask result = mFileSystemMaster.getMetadataSyncer().syncPath( + MOUNT_POINT, DescendantType.ALL, mDirectoryLoadType, 0).getBaseTask(); + result.waitComplete(TIMEOUT_MS); + long mountPointInodeId = mFileSystemMaster.getFileInfo(MOUNT_POINT, getNoSync()).getFileId(); + assertTrue(mFileSystemMaster.getInodeStore() + .get(mountPointInodeId).get().asDirectory().isDirectChildrenLoaded()); + assertTrue(result.succeeded()); + checkUfsMatches(MOUNT_POINT, TEST_BUCKET, "", mFileSystemMaster, mClient); + + assertSyncOperations(result.getTaskInfo(), ImmutableMap.of( + SyncOperation.CREATE, (long) createdInodeCount + )); + + // count the files + int noopInodeCount = filePerDirectory * filePerDirectory * filePerDirectory; + if (mDirectoryLoadType != DirectoryLoadType.SINGLE_LISTING) { + // count the directories + noopInodeCount += filePerDirectory * filePerDirectory + filePerDirectory; + } + + result = mFileSystemMaster.getMetadataSyncer().syncPath( + MOUNT_POINT, DescendantType.ALL, mDirectoryLoadType, 0).getBaseTask(); + result.waitComplete(TIMEOUT_MS); + assertTrue(result.succeeded()); + checkUfsMatches(MOUNT_POINT, TEST_BUCKET, "", mFileSystemMaster, mClient); + // All created node were not changed. + assertSyncOperations(result.getTaskInfo(), ImmutableMap.of( + SyncOperation.NOOP, (long) noopInodeCount + )); + } + + @Test + public void syncNonS3DirectoryDelete() + throws Throwable { + // Create a directory not on local ufs + mFileSystemMaster.createDirectory(new AlluxioURI("/test_directory"), + CreateDirectoryContext.defaults()); + mFileSystemMaster.createDirectory(new AlluxioURI("/test_directory/sub_directory"), + CreateDirectoryContext.defaults()); + BaseTask result = mFileSystemMaster.getMetadataSyncer().syncPath( + new AlluxioURI("/test_directory"), DescendantType.ALL, mDirectoryLoadType, 0).getBaseTask(); + result.waitComplete(TIMEOUT_MS); + assertTrue(result.succeeded()); + assertSyncOperations(result.getTaskInfo(), ImmutableMap.of( + SyncOperation.DELETE, 2L + )); + + // Create a directory not on local ufs + mFileSystemMaster.createDirectory(new AlluxioURI("/test_directory"), + CreateDirectoryContext.defaults()); + mFileSystemMaster.createDirectory(new AlluxioURI("/test_directory/sub_directory"), + CreateDirectoryContext.defaults()); + result = mFileSystemMaster.getMetadataSyncer().syncPath( + new AlluxioURI("/test_directory"), DescendantType.ONE, mDirectoryLoadType, 0).getBaseTask(); + result.waitComplete(TIMEOUT_MS); + assertTrue(result.succeeded()); + assertSyncOperations(result.getTaskInfo(), ImmutableMap.of( + SyncOperation.DELETE, 2L + )); + + // Create a directory not on local ufs + mFileSystemMaster.createDirectory(new AlluxioURI("/test_directory"), + CreateDirectoryContext.defaults()); + mFileSystemMaster.createDirectory(new AlluxioURI("/test_directory/sub_directory"), + CreateDirectoryContext.defaults()); + result = mFileSystemMaster.getMetadataSyncer().syncPath( + new AlluxioURI("/test_directory"), DescendantType.NONE, mDirectoryLoadType, 0) + .getBaseTask(); + result.waitComplete(TIMEOUT_MS); + assertTrue(result.succeeded()); + assertSyncOperations(result.getTaskInfo(), ImmutableMap.of( + SyncOperation.DELETE, 2L + )); + } + + @Test + public void testS3Fingerprint() throws Throwable { + mFileSystemMaster.mount(MOUNT_POINT, UFS_ROOT, MountContext.defaults()); + mS3Client.putObject(TEST_BUCKET, "f1", TEST_CONTENT); + mS3Client.putObject(TEST_BUCKET, "f2", TEST_CONTENT); + mS3Client.putObject(TEST_BUCKET, "f3", TEST_CONTENT); + + // Sync to load metadata + + BaseTask result = mFileSystemMaster.getMetadataSyncer().syncPath( + MOUNT_POINT, DescendantType.ALL, mDirectoryLoadType, 0).getBaseTask(); + result.waitComplete(TIMEOUT_MS); + assertTrue(result.succeeded()); + checkUfsMatches(MOUNT_POINT, TEST_BUCKET, "", mFileSystemMaster, mClient); + + assertSyncOperations(result.getTaskInfo(), ImmutableMap.of( + SyncOperation.CREATE, 3L + )); + + mS3Client.putObject(TEST_BUCKET, "f1", ""); + mS3Client.putObject(TEST_BUCKET, "f2", TEST_CONTENT); + + result = mFileSystemMaster.getMetadataSyncer().syncPath( + MOUNT_POINT, DescendantType.ALL, mDirectoryLoadType, 0).getBaseTask(); + result.waitComplete(TIMEOUT_MS); + assertTrue(result.succeeded()); + long mountPointInodeId = mFileSystemMaster.getFileInfo(MOUNT_POINT, getNoSync()).getFileId(); + assertTrue(mFileSystemMaster.getInodeStore() + .get(mountPointInodeId).get().asDirectory().isDirectChildrenLoaded()); + checkUfsMatches(MOUNT_POINT, TEST_BUCKET, "", mFileSystemMaster, mClient); + assertSyncOperations(result.getTaskInfo(), ImmutableMap.of( + // f1, f3 + SyncOperation.NOOP, 2L, + // f2 + SyncOperation.RECREATE, 1L + )); + } + + @Test + public void syncNoneOnMountPoint1() throws Throwable { + mFileSystemMaster.mount(MOUNT_POINT, UFS_ROOT, MountContext.defaults()); + mS3Client.putObject(TEST_BUCKET, "d1/f1", TEST_CONTENT); + mS3Client.putObject(TEST_BUCKET, "d1/f2", TEST_CONTENT); + + BaseTask result = mFileSystemMaster.getMetadataSyncer().syncPath( + MOUNT_POINT, DescendantType.NONE, mDirectoryLoadType, 0).getBaseTask(); + result.waitComplete(TIMEOUT_MS); + assertTrue(result.succeeded()); + assertSyncOperations(result.getTaskInfo(), ImmutableMap.of( + )); + } + + @Test + public void syncNoneOnMountPoint2() throws Throwable { + mFileSystemMaster.mount(MOUNT_POINT, UFS_ROOT, MountContext.defaults()); + mS3Client.putObject(TEST_BUCKET, "d1/f1", TEST_CONTENT); + mS3Client.putObject(TEST_BUCKET, "d1/f2", TEST_CONTENT); + mS3Client.putObject(TEST_BUCKET, "d2/f1", TEST_CONTENT); + + BaseTask result = mFileSystemMaster.getMetadataSyncer().syncPath( + MOUNT_POINT, DescendantType.NONE, mDirectoryLoadType, 0).getBaseTask(); + result.waitComplete(TIMEOUT_MS); + assertTrue(result.succeeded()); + assertSyncOperations(result.getTaskInfo(), ImmutableMap.of( + )); + } + + @Test + public void syncUfsNotFound() throws Throwable { + mFileSystemMaster.mount(MOUNT_POINT, UFS_ROOT, MountContext.defaults()); + + BaseTask result = mFileSystemMaster.getMetadataSyncer().syncPath( + MOUNT_POINT.join("/non_existing_path"), DescendantType.ALL, mDirectoryLoadType, 0) + .getBaseTask(); + result.waitComplete(TIMEOUT_MS); + assertTrue(mFileSystemMaster.getAbsentPathCache().isAbsentSince( + new AlluxioURI("/non_existing_path"), 0)); + } + + @Test + public void unmountDuringSync() throws Exception { + TestSyncProcessor syncer = (TestSyncProcessor) mFileSystemMaster.getMetadataSyncer(); + + mFileSystemMaster.mount(MOUNT_POINT, UFS_ROOT, MountContext.defaults()); + for (int i = 0; i < 100; ++i) { + mS3Client.putObject(TEST_BUCKET, "file" + i, ""); + } + + BaseTask baseTask = mFileSystemMaster.getMetadataSyncer().syncPath( + MOUNT_POINT, DescendantType.ONE, mDirectoryLoadType, 0).getBaseTask(); + + AtomicBoolean unmount = new AtomicBoolean(false); + syncer.blockUntilNthSyncThenDo(50, () -> unmount.set(true)); + CompletableFuture unmountFuture = CompletableFuture.supplyAsync(() -> { + try { + while (!unmount.get()) { + CommonUtils.sleepMs(1); + } + mFileSystemMaster.unmount(MOUNT_POINT); + return null; + } catch (Throwable e) { + throw new RuntimeException(e); + } + }); + + unmountFuture.get(); + assertThrows(DefaultSyncProcess.MountPointNotFoundRuntimeException.class, + () -> baseTask.waitComplete(TIMEOUT_MS)); + + assertFalse(baseTask.succeeded()); + assertFalse(mFileSystemMaster.exists(MOUNT_POINT, existsNoSync())); + + Map syncFailures = + baseTask.getTaskInfo().getStats().getSyncFailReasons(); + Set + reasons = syncFailures.values().stream().map(TaskStats.SyncFailure::getSyncFailReason) + .collect(Collectors.toSet()); + assertTrue(reasons.contains(SyncFailReason.PROCESSING_MOUNT_POINT_DOES_NOT_EXIST) + || reasons.contains(SyncFailReason.LOADING_MOUNT_POINT_DOES_NOT_EXIST)); + } + + @Test + public void concurrentDelete() throws Exception { + TestSyncProcessor syncer = (TestSyncProcessor) mFileSystemMaster.getMetadataSyncer(); + + mFileSystemMaster.mount(MOUNT_POINT, UFS_ROOT, MountContext.defaults()); + // Create a directory not on s3 ufs + mFileSystemMaster.createDirectory(MOUNT_POINT.join("/d"), + CreateDirectoryContext.defaults().setWriteType(WriteType.MUST_CACHE)); + // Create something else into s3 + mS3Client.putObject(TEST_BUCKET, TEST_FILE, TEST_CONTENT); + + AtomicReference baseTask = new AtomicReference<>(); + CompletableFuture syncFuture = CompletableFuture.supplyAsync(() -> { + try { + baseTask.set(mFileSystemMaster.getMetadataSyncer().syncPath( + MOUNT_POINT, DescendantType.ONE, mDirectoryLoadType, 0).getBaseTask()); + baseTask.get().waitComplete(TIMEOUT_MS); + return null; + } catch (Throwable t) { + throw new RuntimeException(t); + } + }); + + // blocks on the sync of "/d" (the 1st sync target) + syncer.blockUntilNthSyncThenDo(1, () -> { + mFileSystemMaster.delete(MOUNT_POINT.join("/d"), DeleteContext.create( + DeletePOptions.newBuilder().setAlluxioOnly(true))); + }); + syncFuture.get(); + assertTrue(baseTask.get().succeeded()); + checkUfsMatches(MOUNT_POINT, TEST_BUCKET, "", mFileSystemMaster, mClient); + assertSyncOperations(baseTask.get().getTaskInfo(), ImmutableMap.of( + // /test_file + SyncOperation.CREATE, 1L, + // /d + SyncOperation.SKIPPED_DUE_TO_CONCURRENT_MODIFICATION, 1L + )); + } + + @Test + public void concurrentCreate() throws Exception { + TestSyncProcessor syncer = (TestSyncProcessor) mFileSystemMaster.getMetadataSyncer(); + + mFileSystemMaster.mount(MOUNT_POINT, UFS_ROOT, MountContext.defaults()); + // Create the test file into s3 + mS3Client.putObject(TEST_BUCKET, TEST_FILE, TEST_CONTENT); + + AtomicReference baseTask = new AtomicReference<>(); + CompletableFuture syncFuture = CompletableFuture.supplyAsync(() -> { + try { + baseTask.set(mFileSystemMaster.getMetadataSyncer().syncPath( + MOUNT_POINT, DescendantType.ONE, mDirectoryLoadType, 0).getBaseTask()); + baseTask.get().waitComplete(TIMEOUT_MS); + return null; + } catch (Throwable t) { + throw new RuntimeException(t); + } + }); + + // blocks on the sync of "/test_file" (the 1st sync target) + syncer.blockUntilNthSyncThenDo(1, () -> { + mFileSystemMaster.createFile( + MOUNT_POINT.join(TEST_FILE), + CreateFileContext.defaults().setWriteType(WriteType.MUST_CACHE)); + }); + syncFuture.get(); + assertTrue(baseTask.get().succeeded()); + assertSyncOperations(baseTask.get().getTaskInfo(), ImmutableMap.of( + // /test_file + SyncOperation.SKIPPED_DUE_TO_CONCURRENT_MODIFICATION, 1L + )); + } + + @Test + public void startAfter() throws Throwable { + mFileSystemMaster.mount(MOUNT_POINT, UFS_ROOT, MountContext.defaults()); + mS3Client.putObject(TEST_BUCKET, "f1", TEST_CONTENT); + mS3Client.putObject(TEST_BUCKET, "f2", TEST_CONTENT); + mS3Client.putObject(TEST_BUCKET, "f3", TEST_CONTENT); + + BaseTask result = mFileSystemMaster.getMetadataSyncer().syncPath( + MOUNT_POINT, DescendantType.ONE, mDirectoryLoadType, 0, "f3", false) + .getBaseTask(); + result.waitComplete(TIMEOUT_MS); + assertTrue(result.succeeded()); + assertEquals(0, mFileSystemMaster.listStatus(MOUNT_POINT, listNoSync(false)).size()); + + result = mFileSystemMaster.getMetadataSyncer().syncPath( + MOUNT_POINT, DescendantType.ONE, mDirectoryLoadType, 0, "f2", false) + .getBaseTask(); + result.waitComplete(TIMEOUT_MS); + assertTrue(result.succeeded()); + assertEquals(1, mFileSystemMaster.listStatus(MOUNT_POINT, listNoSync(false)).size()); + + result = mFileSystemMaster.getMetadataSyncer().syncPath( + MOUNT_POINT, DescendantType.ONE, mDirectoryLoadType, 0, "f1", false) + .getBaseTask(); + result.waitComplete(TIMEOUT_MS); + assertTrue(result.succeeded()); + assertEquals(2, mFileSystemMaster.listStatus(MOUNT_POINT, listNoSync(false)).size()); + + result = mFileSystemMaster.getMetadataSyncer().syncPath( + MOUNT_POINT, DescendantType.ONE, mDirectoryLoadType, 0, "f0", false) + .getBaseTask(); + result.waitComplete(TIMEOUT_MS); + assertTrue(result.succeeded()); + assertEquals(3, mFileSystemMaster.listStatus(MOUNT_POINT, listNoSync(false)).size()); + + result = mFileSystemMaster.getMetadataSyncer().syncPath( + MOUNT_POINT, DescendantType.ALL, mDirectoryLoadType, 0, null, false) + .getBaseTask(); + result.waitComplete(TIMEOUT_MS); + assertTrue(result.succeeded()); + assertEquals(3, mFileSystemMaster.listStatus(MOUNT_POINT, listNoSync(false)).size()); + } + + @Test + public void startAfterAbsolutePath() throws Throwable { + mFileSystemMaster.mount(MOUNT_POINT, UFS_ROOT, MountContext.defaults()); + mS3Client.putObject(TEST_BUCKET, "root/f1", TEST_CONTENT); + mS3Client.putObject(TEST_BUCKET, "root/f2", TEST_CONTENT); + mS3Client.putObject(TEST_BUCKET, "root/f3", TEST_CONTENT); + // The S3 mock server has a bug where 403 is returned if startAfter exceeds the last + // object key. + assertThrows(InvalidPathException.class, () -> { + BaseTask result = mFileSystemMaster.getMetadataSyncer().syncPath( + MOUNT_POINT.join("root"), DescendantType.ONE, mDirectoryLoadType, + 0, "/random/path", false) + .getBaseTask(); + result.waitComplete(TIMEOUT_MS); + }); + + BaseTask result = mFileSystemMaster.getMetadataSyncer().syncPath( + MOUNT_POINT.join("root"), DescendantType.ONE, mDirectoryLoadType, 0, + "/s3_mount/root/f2", false) + .getBaseTask(); + result.waitComplete(TIMEOUT_MS); + assertTrue(result.succeeded()); + assertEquals(1, mFileSystemMaster.listStatus(MOUNT_POINT.join("root"), + listNoSync(false)).size()); + + result = mFileSystemMaster.getMetadataSyncer().syncPath( + MOUNT_POINT.join("root"), DescendantType.ONE, mDirectoryLoadType, 0, + "/s3_mount/root", false) + .getBaseTask(); + result.waitComplete(TIMEOUT_MS); + assertTrue(result.succeeded()); + assertEquals(3, mFileSystemMaster.listStatus(MOUNT_POINT.join("root"), + listNoSync(false)).size()); + } + + @Test + public void startAfterRecursive() throws Throwable { + if (mDirectoryLoadType != DirectoryLoadType.SINGLE_LISTING) { + // NOT SUPPORTED + return; + } + + mFileSystemMaster.mount(MOUNT_POINT, UFS_ROOT, MountContext.defaults()); + mS3Client.putObject(TEST_BUCKET, "root/d1/d1/f1", TEST_CONTENT); + mS3Client.putObject(TEST_BUCKET, "root/d1/d1/f2", TEST_CONTENT); + mS3Client.putObject(TEST_BUCKET, "root/d1/d2/f1", TEST_CONTENT); + mS3Client.putObject(TEST_BUCKET, "root/d1/d2/f3", TEST_CONTENT); + mS3Client.putObject(TEST_BUCKET, "root/d1/f1", TEST_CONTENT); + mS3Client.putObject(TEST_BUCKET, "root/d2/f1", TEST_CONTENT); + mS3Client.putObject(TEST_BUCKET, "root/f1", TEST_CONTENT); + + BaseTask result = mFileSystemMaster.getMetadataSyncer().syncPath( + MOUNT_POINT.join("root"), DescendantType.ALL, mDirectoryLoadType, 0, "d1/d2/f2", false) + .getBaseTask(); + result.waitComplete(TIMEOUT_MS); + assertTrue(result.succeeded()); + /* + (under "/s3_mount/root") + /d1 + /d2 + /f3 + /f1 + /d2 + /d1 + /f1 + */ + assertEquals(7, + mFileSystemMaster.listStatus(MOUNT_POINT.join("root"), listNoSync(true)).size()); + } +} diff --git a/core/server/master/src/test/java/alluxio/master/file/MetadataSyncDepthV2Test.java b/core/server/master/src/test/java/alluxio/master/file/MetadataSyncDepthV2Test.java new file mode 100644 index 000000000000..a528cd332f48 --- /dev/null +++ b/core/server/master/src/test/java/alluxio/master/file/MetadataSyncDepthV2Test.java @@ -0,0 +1,204 @@ +/* + * The Alluxio Open Foundation licenses this work under the Apache License, version 2.0 + * (the "License"). You may not use this work except in compliance with the License, which is + * available at www.apache.org/licenses/LICENSE-2.0 + * + * This software is distributed on an "AS IS" basis, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, + * either express or implied, as more fully set forth in the License. + * + * See the NOTICE file distributed with this work for information regarding copyright ownership. + */ + +package alluxio.master.file; + +import static org.junit.Assert.assertFalse; +import static org.junit.Assert.assertTrue; + +import alluxio.AlluxioURI; +import alluxio.file.options.DescendantType; +import alluxio.file.options.DirectoryLoadType; +import alluxio.master.file.contexts.MountContext; +import alluxio.master.file.mdsync.BaseTask; +import alluxio.master.file.mdsync.SyncOperation; + +import com.google.common.collect.ImmutableMap; +import org.junit.Test; +import org.junit.runner.RunWith; +import org.junit.runners.Parameterized; + +import java.util.Arrays; +import java.util.Collection; + +@RunWith(Parameterized.class) +public class MetadataSyncDepthV2Test extends MetadataSyncV2TestBase { + + @Parameterized.Parameters + public static Collection data() { + return Arrays.asList(new Object[][]{ + {DirectoryLoadType.SINGLE_LISTING, DescendantType.ALL}, + {DirectoryLoadType.BFS, DescendantType.ALL}, + {DirectoryLoadType.DFS, DescendantType.ALL}, + {DirectoryLoadType.SINGLE_LISTING, DescendantType.ONE}, + {DirectoryLoadType.BFS, DescendantType.ONE}, + {DirectoryLoadType.DFS, DescendantType.ONE}, + {DirectoryLoadType.SINGLE_LISTING, DescendantType.NONE}, + {DirectoryLoadType.BFS, DescendantType.NONE}, + {DirectoryLoadType.DFS, DescendantType.NONE}, + }); + } + + DescendantType mDescendantType; + + public MetadataSyncDepthV2Test( + DirectoryLoadType directoryLoadType, DescendantType descendantType) { + mDescendantType = descendantType; + mDirectoryLoadType = directoryLoadType; + } + + @Test + public void syncSingleDir() throws Throwable { + mFileSystemMaster.mount(MOUNT_POINT, UFS_ROOT, MountContext.defaults()); + mS3Client.putObject(TEST_BUCKET, TEST_DIRECTORY + "/", ""); + + // Sync the dir + AlluxioURI syncPath = MOUNT_POINT.join(TEST_DIRECTORY); + BaseTask result = mFileSystemMaster.getMetadataSyncer().syncPath( + syncPath, mDescendantType, mDirectoryLoadType, 0).getBaseTask(); + result.waitComplete(TIMEOUT_MS); + assertTrue(result.succeeded()); + assertSyncOperations(result.getTaskInfo(), ImmutableMap.of( + SyncOperation.CREATE, 1L + )); + + // Sync again, expect no change + result = mFileSystemMaster.getMetadataSyncer().syncPath( + syncPath, mDescendantType, mDirectoryLoadType, 0).getBaseTask(); + result.waitComplete(TIMEOUT_MS); + assertTrue(result.succeeded()); + assertSyncOperations(result.getTaskInfo(), ImmutableMap.of( + SyncOperation.NOOP, 1L + )); + long mountPointInodeId = mFileSystemMaster.getFileInfo(MOUNT_POINT, getNoSync()).getFileId(); + assertFalse(mFileSystemMaster.getInodeStore() + .get(mountPointInodeId).get().asDirectory().isDirectChildrenLoaded()); + } + + @Test + public void syncSingleDirNested() throws Throwable { + mFileSystemMaster.mount(MOUNT_POINT, UFS_ROOT, MountContext.defaults()); + String dirPath = TEST_DIRECTORY + "/" + TEST_DIRECTORY + "/"; + mS3Client.putObject(TEST_BUCKET, dirPath, ""); + + // Sync the dir + AlluxioURI syncPath = MOUNT_POINT.join(TEST_DIRECTORY).join(TEST_DIRECTORY); + BaseTask result = mFileSystemMaster.getMetadataSyncer().syncPath( + syncPath, mDescendantType, mDirectoryLoadType, 0).getBaseTask(); + result.waitComplete(TIMEOUT_MS); + assertTrue(result.succeeded()); + assertSyncOperations(result.getTaskInfo(), ImmutableMap.of( + SyncOperation.CREATE, 2L + )); + + // Sync again, expect no change + result = mFileSystemMaster.getMetadataSyncer().syncPath( + syncPath, mDescendantType, mDirectoryLoadType, 0).getBaseTask(); + result.waitComplete(TIMEOUT_MS); + assertTrue(result.succeeded()); + assertSyncOperations(result.getTaskInfo(), ImmutableMap.of( + SyncOperation.NOOP, 1L + )); + + // Delete the dir + mS3Client.deleteObject(TEST_BUCKET, dirPath); + result = mFileSystemMaster.getMetadataSyncer().syncPath( + syncPath, mDescendantType, mDirectoryLoadType, 0).getBaseTask(); + result.waitComplete(TIMEOUT_MS); + assertTrue(result.succeeded()); + assertSyncOperations(result.getTaskInfo(), ImmutableMap.of( + SyncOperation.DELETE, 1L + )); + + // The parent should also be gone + result = mFileSystemMaster.getMetadataSyncer().syncPath( + MOUNT_POINT.join(TEST_DIRECTORY), mDescendantType, mDirectoryLoadType, 0) + .getBaseTask(); + result.waitComplete(TIMEOUT_MS); + assertTrue(result.succeeded()); + assertSyncOperations(result.getTaskInfo(), ImmutableMap.of( + SyncOperation.DELETE, 1L + )); + long mountPointInodeId = mFileSystemMaster.getFileInfo(MOUNT_POINT, getNoSync()).getFileId(); + assertFalse(mFileSystemMaster.getInodeStore() + .get(mountPointInodeId).get().asDirectory().isDirectChildrenLoaded()); + + // Sync the root, expect no change + result = mFileSystemMaster.getMetadataSyncer().syncPath( + MOUNT_POINT, mDescendantType, mDirectoryLoadType, 0).getBaseTask(); + result.waitComplete(TIMEOUT_MS); + assertTrue(result.succeeded()); + assertSyncOperations(result.getTaskInfo(), ImmutableMap.of()); + assertTrue(mFileSystemMaster.getInodeStore() + .get(mountPointInodeId).get().asDirectory().isDirectChildrenLoaded()); + checkUfsMatches(MOUNT_POINT, TEST_BUCKET, "", mFileSystemMaster, mClient); + } + + @Test + public void syncSingleFile() throws Throwable { + mFileSystemMaster.mount(MOUNT_POINT, UFS_ROOT, MountContext.defaults()); + mS3Client.putObject(TEST_BUCKET, TEST_DIRECTORY + "/" + TEST_FILE, TEST_CONTENT); + + // Sync the file + AlluxioURI syncPath = MOUNT_POINT.join(TEST_DIRECTORY).join(TEST_FILE); + BaseTask result = mFileSystemMaster.getMetadataSyncer().syncPath( + syncPath, mDescendantType, mDirectoryLoadType, 0).getBaseTask(); + result.waitComplete(TIMEOUT_MS); + assertTrue(result.succeeded()); + assertSyncOperations(result.getTaskInfo(), ImmutableMap.of( + SyncOperation.CREATE, 2L + )); + + // Sync again, expect no change + result = mFileSystemMaster.getMetadataSyncer().syncPath( + syncPath, mDescendantType, mDirectoryLoadType, 0).getBaseTask(); + result.waitComplete(TIMEOUT_MS); + assertTrue(result.succeeded()); + assertSyncOperations(result.getTaskInfo(), ImmutableMap.of( + SyncOperation.NOOP, 1L + )); + + // update the metadata for the path + mS3Client.putObject(TEST_BUCKET, TEST_DIRECTORY + "/" + TEST_FILE, TEST_CONTENT_MODIFIED); + + // Sync should see the change + result = mFileSystemMaster.getMetadataSyncer().syncPath( + syncPath, mDescendantType, mDirectoryLoadType, 0).getBaseTask(); + result.waitComplete(TIMEOUT_MS); + assertTrue(result.succeeded()); + assertSyncOperations(result.getTaskInfo(), ImmutableMap.of( + SyncOperation.RECREATE, 1L + )); + long mountPointInodeId = mFileSystemMaster.getFileInfo(MOUNT_POINT, getNoSync()).getFileId(); + assertFalse(mFileSystemMaster.getInodeStore() + .get(mountPointInodeId).get().asDirectory().isDirectChildrenLoaded()); + + // Delete the file + mS3Client.deleteObject(TEST_BUCKET, TEST_DIRECTORY + "/" + TEST_FILE); + // Sync the root, all should be removed + result = mFileSystemMaster.getMetadataSyncer().syncPath( + MOUNT_POINT, mDescendantType, mDirectoryLoadType, 0).getBaseTask(); + result.waitComplete(TIMEOUT_MS); + assertTrue(result.succeeded()); + assertSyncOperations(result.getTaskInfo(), ImmutableMap.of( + SyncOperation.DELETE, mDescendantType == DescendantType.NONE ? 0L : 2L + )); + assertTrue(mFileSystemMaster.getInodeStore() + .get(mountPointInodeId).get().asDirectory().isDirectChildrenLoaded()); + boolean exists = mFileSystemMaster.exists(syncPath, existsNoSync()); + if (mDescendantType == DescendantType.NONE) { + // since we only synced the root path, the nested file should not be deleted + assertTrue(exists); + } else { + assertFalse(exists); + } + } +} diff --git a/core/server/master/src/test/java/alluxio/master/file/MetadataSyncMultiMountV2Test.java b/core/server/master/src/test/java/alluxio/master/file/MetadataSyncMultiMountV2Test.java new file mode 100644 index 000000000000..30d7df0fd33c --- /dev/null +++ b/core/server/master/src/test/java/alluxio/master/file/MetadataSyncMultiMountV2Test.java @@ -0,0 +1,199 @@ +/* + * The Alluxio Open Foundation licenses this work under the Apache License, version 2.0 + * (the "License"). You may not use this work except in compliance with the License, which is + * available at www.apache.org/licenses/LICENSE-2.0 + * + * This software is distributed on an "AS IS" basis, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, + * either express or implied, as more fully set forth in the License. + * + * See the NOTICE file distributed with this work for information regarding copyright ownership. + */ + +package alluxio.master.file; + +import static org.junit.Assert.assertEquals; +import static org.junit.Assert.assertTrue; + +import alluxio.AlluxioURI; +import alluxio.client.WriteType; +import alluxio.file.options.DescendantType; +import alluxio.file.options.DirectoryLoadType; +import alluxio.master.file.contexts.CreateDirectoryContext; +import alluxio.master.file.contexts.MountContext; +import alluxio.master.file.mdsync.SyncOperation; +import alluxio.master.file.mdsync.TaskGroup; +import alluxio.wire.FileInfo; + +import com.google.common.collect.ImmutableMap; +import org.junit.Test; +import org.junit.runner.RunWith; +import org.junit.runners.Parameterized; + +import java.io.File; +import java.util.Arrays; +import java.util.Collection; +import java.util.List; + +@RunWith(Parameterized.class) +public class MetadataSyncMultiMountV2Test extends MetadataSyncV2TestBase { + public MetadataSyncMultiMountV2Test(DirectoryLoadType directoryLoadType) { + mDirectoryLoadType = directoryLoadType; + } + + @Parameterized.Parameters + public static Collection data() { + return Arrays.asList(new Object[][] { + {DirectoryLoadType.SINGLE_LISTING}, + {DirectoryLoadType.BFS}, + {DirectoryLoadType.DFS}, + }); + } + + @Test + public void syncNonS3DirectoryShadowingMountPoint() + throws Throwable { + /* + / (root) -> local file system (disk) + /s3_mount -> s3 bucket + create /s3_mount in the local first system that shadows the mount point and then do + a metadata sync on root + the sync of the local file system /s3_mount is expected to be skipped + */ + + String localUfsPath + = mFileSystemMaster.getMountTable().resolve(MOUNT_POINT).getUri().getPath(); + mFileSystemMaster.mount(MOUNT_POINT, UFS_ROOT, MountContext.defaults()); + assertTrue(new File(localUfsPath).createNewFile()); + TaskGroup result = mFileSystemMaster.getMetadataSyncer().syncPath( + new AlluxioURI("/"), DescendantType.ALL, mDirectoryLoadType, 0); + result.waitAllComplete(TIMEOUT_MS); + assertTrue(result.allSucceeded()); + assertSyncOperations(result, ImmutableMap.of( + SyncOperation.SKIPPED_ON_MOUNT_POINT, 1L + )); + FileInfo mountPointFileInfo = mFileSystemMaster.getFileInfo(MOUNT_POINT, getNoSync()); + assertTrue(mountPointFileInfo.isMountPoint()); + assertTrue(mountPointFileInfo.isFolder()); + } + + @Test + public void syncNestedS3Mount() + throws Throwable { + mFileSystemMaster.mount(MOUNT_POINT, UFS_ROOT, MountContext.defaults()); + mFileSystemMaster.mount(NESTED_S3_MOUNT_POINT, UFS_ROOT2, MountContext.defaults()); + mS3Client.putObject(TEST_BUCKET, "f1", TEST_CONTENT); + mS3Client.putObject(TEST_BUCKET, "d/f1", TEST_CONTENT); + mS3Client.putObject(TEST_BUCKET2, "f2", TEST_CONTENT); + mS3Client.putObject(TEST_BUCKET2, "d/f2", TEST_CONTENT); + + /* + / (ROOT) -> unchanged (root mount point local fs) + /s3_mount -> unchanged (mount point s3://test-bucket) + /f1 -> created + /d -> pseudo directory (created) + /f1 -> (created) + /nested_s3_mount -> unchanged (mount point s3://test-bucket-2) + /f2 -> created + /d -> pseudo directory (created) + /f2 -> (created) + */ + TaskGroup result = mFileSystemMaster.getMetadataSyncer().syncPath( + new AlluxioURI("/"), DescendantType.ALL, mDirectoryLoadType, 0); + result.waitAllComplete(TIMEOUT_MS); + assertSyncOperations(result, ImmutableMap.of( + SyncOperation.CREATE, 6L + )); + assertTrue(result.allSucceeded()); + + List inodes = mFileSystemMaster.listStatus(new AlluxioURI("/"), listNoSync(true)); + assertEquals(8, inodes.size()); + assertTrue(mFileSystemMaster.exists(NESTED_S3_MOUNT_POINT.join("d/f2"), existsNoSync())); + assertTrue(mFileSystemMaster.exists(MOUNT_POINT.join("d/f1"), existsNoSync())); + } + + @Test + public void syncNestedS3MountShadowingMountPoint() + throws Throwable { + /* + / (ROOT) -> unchanged (root mount point local fs) + /s3_mount -> unchanged (mount point s3://test-bucket) + /nested_s3_mount -> unchanged (mount point s3://test-bucket-2) + /foo -> created + /nested_s3_mount -> SHADOWED (mount point s3://test-bucket) + /shadowed -> SHADOWED + /bar/baz -> SHADOWED + /not_shadowed -> created + */ + + mFileSystemMaster.mount(MOUNT_POINT, UFS_ROOT, MountContext.defaults()); + mFileSystemMaster.mount(NESTED_S3_MOUNT_POINT, UFS_ROOT2, MountContext.defaults()); + mS3Client.putObject(TEST_BUCKET, "nested_s3_mount/shadowed", TEST_CONTENT); + mS3Client.putObject(TEST_BUCKET, "nested_s3_mount/bar/baz", TEST_CONTENT); + mS3Client.putObject(TEST_BUCKET, "not_shadowed", TEST_CONTENT); + mS3Client.putObject(TEST_BUCKET2, "foo", TEST_CONTENT); + + TaskGroup result = mFileSystemMaster.getMetadataSyncer().syncPath( + new AlluxioURI("/"), DescendantType.ALL, mDirectoryLoadType, 0); + result.waitAllComplete(TIMEOUT_MS); + result.getTasks() + .forEach(it -> System.out.println(it.getTaskInfo().getStats().toReportString())); + assertSyncOperations(result, ImmutableMap.of( + SyncOperation.CREATE, 2L, + SyncOperation.SKIPPED_ON_MOUNT_POINT, mDirectoryLoadType + == DirectoryLoadType.SINGLE_LISTING ? 2L : 1L + )); + assertTrue(result.allSucceeded()); + List inodes = mFileSystemMaster.listStatus(new AlluxioURI("/"), listNoSync(true)); + assertEquals(4, inodes.size()); + } + + @Test + public void syncS3NestedMountLocalFs() + throws Throwable { + // mount /s3_mount -> s3://test-bucket + mFileSystemMaster.mount(MOUNT_POINT, UFS_ROOT, MountContext.defaults()); + mS3Client.putObject(TEST_BUCKET, "foo/bar", TEST_CONTENT); + mS3Client.putObject(TEST_BUCKET, "foo/baz", TEST_CONTENT); + + mFileSystemMaster.createDirectory(new AlluxioURI("/mnt"), + CreateDirectoryContext.defaults().setWriteType(WriteType.THROUGH)); + // mount /mnt/nested_s3_mount -> s3://test-bucket-2 + mFileSystemMaster.mount(NESTED_MOUNT_POINT, UFS_ROOT2, MountContext.defaults()); + mS3Client.putObject(TEST_BUCKET2, "foo/bar", TEST_CONTENT); + mS3Client.putObject(TEST_BUCKET2, "foo/baz", TEST_CONTENT); + + TaskGroup result = mFileSystemMaster.getMetadataSyncer().syncPath( + new AlluxioURI("/"), DescendantType.ONE, mDirectoryLoadType, 0); + result.waitAllComplete(TIMEOUT_MS); + assertTrue(result.allSucceeded()); + assertSyncOperations(result, ImmutableMap.of( + SyncOperation.NOOP, 1L + )); + assertEquals(1, result.getTaskCount()); + + /* + / (ROOT) -> unchanged (root mount point local fs) + /s3_mount -> unchanged (mount point s3://test-bucket) + /foo -> pseudo directory (created) + /bar -> (created) + /baz -> (created) + /mnt -> unchanged + /nested_s3_mount -> unchanged (mount point s3://test-bucket-2) + /foo -> pseudo directory (created) + /bar -> (created) + /baz -> (created) + */ + result = mFileSystemMaster.getMetadataSyncer().syncPath( + new AlluxioURI("/"), DescendantType.ALL, mDirectoryLoadType, 0); + result.waitAllComplete(TIMEOUT_MS); + assertTrue(result.allSucceeded()); + assertSyncOperations(result, ImmutableMap.of( + SyncOperation.NOOP, 1L, + SyncOperation.CREATE, 6L + )); + assertEquals(3, result.getTaskCount()); + + List inodes = mFileSystemMaster.listStatus(new AlluxioURI("/"), listNoSync(true)); + assertEquals(9, inodes.size()); + } +} diff --git a/core/server/master/src/test/java/alluxio/master/file/MetadataSyncNonObjectStoreV2Test.java b/core/server/master/src/test/java/alluxio/master/file/MetadataSyncNonObjectStoreV2Test.java new file mode 100644 index 000000000000..0c8ced898029 --- /dev/null +++ b/core/server/master/src/test/java/alluxio/master/file/MetadataSyncNonObjectStoreV2Test.java @@ -0,0 +1,163 @@ +/* + * The Alluxio Open Foundation licenses this work under the Apache License, version 2.0 + * (the "License"). You may not use this work except in compliance with the License, which is + * available at www.apache.org/licenses/LICENSE-2.0 + * + * This software is distributed on an "AS IS" basis, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, + * either express or implied, as more fully set forth in the License. + * + * See the NOTICE file distributed with this work for information regarding copyright ownership. + */ + +package alluxio.master.file; + +import static alluxio.master.file.MetadataSyncV2TestBase.TIMEOUT_MS; +import static alluxio.master.file.MetadataSyncV2TestBase.assertSyncOperations; +import static alluxio.master.file.MetadataSyncV2TestBase.existsNoSync; +import static org.junit.Assert.assertTrue; + +import alluxio.AlluxioURI; +import alluxio.client.WriteType; +import alluxio.file.options.DescendantType; +import alluxio.file.options.DirectoryLoadType; +import alluxio.grpc.CreateDirectoryPOptions; +import alluxio.grpc.DeletePOptions; +import alluxio.master.file.contexts.CreateDirectoryContext; +import alluxio.master.file.contexts.DeleteContext; +import alluxio.master.file.mdsync.BaseTask; +import alluxio.master.file.mdsync.SyncOperation; +import alluxio.security.authorization.Mode; + +import com.google.common.collect.ImmutableMap; +import org.junit.Test; +import org.junit.runner.RunWith; +import org.junit.runners.Parameterized; + +import java.io.File; +import java.util.Arrays; +import java.util.Collection; + +@RunWith(Parameterized.class) +public class MetadataSyncNonObjectStoreV2Test extends FileSystemMasterTestBase { + + DirectoryLoadType mDirectoryLoadType; + + @Parameterized.Parameters + public static Collection data() { + return Arrays.asList(new Object[][] { + {DirectoryLoadType.SINGLE_LISTING}, + {DirectoryLoadType.BFS}, + {DirectoryLoadType.DFS}, + }); + } + + public MetadataSyncNonObjectStoreV2Test(DirectoryLoadType directoryLoadType) { + mDirectoryLoadType = directoryLoadType; + } + + @Test + public void syncEmptyDirectory() + throws Throwable { + String path = mFileSystemMaster.getMountTable().resolve(new AlluxioURI("/")).getUri().getPath(); + assertTrue(new File(path + "/test_directory").mkdir()); + + BaseTask result = mFileSystemMaster.getMetadataSyncer().syncPath( + new AlluxioURI("/"), DescendantType.ALL, mDirectoryLoadType, 0) + .getBaseTask(); + result.waitComplete(TIMEOUT_MS); + assertTrue(result.succeeded()); + assertSyncOperations(result.getTaskInfo(), ImmutableMap.of( + SyncOperation.CREATE, 1L + )); + assertTrue(mFileSystemMaster.exists(new AlluxioURI("/test_directory"), existsNoSync())); + + result = mFileSystemMaster.getMetadataSyncer().syncPath( + new AlluxioURI("/"), DescendantType.ALL, mDirectoryLoadType, 0) + .getBaseTask(); + result.waitComplete(TIMEOUT_MS); + assertTrue(result.succeeded()); + assertSyncOperations(result.getTaskInfo(), ImmutableMap.of( + SyncOperation.NOOP, 1L + )); + } + + @Test + public void syncNonS3DirectorySync() + throws Throwable { + String path = mFileSystemMaster.getMountTable().resolve(new AlluxioURI("/")).getUri().getPath(); + assertTrue(new File(path + "/test_file").createNewFile()); + assertTrue(new File(path + "/test_directory").mkdir()); + assertTrue(new File(path + "/test_directory/test_file").createNewFile()); + assertTrue(new File(path + "/test_directory/nested_directory").mkdir()); + assertTrue(new File(path + "/test_directory/nested_directory/test_file").createNewFile()); + + BaseTask result = mFileSystemMaster.getMetadataSyncer().syncPath( + new AlluxioURI("/test_directory"), DescendantType.NONE, mDirectoryLoadType, 0) + .getBaseTask(); + result.waitComplete(TIMEOUT_MS); + assertTrue(result.succeeded()); + assertSyncOperations(result.getTaskInfo(), ImmutableMap.of( + SyncOperation.CREATE, 1L + )); + assertTrue(mFileSystemMaster.exists(new AlluxioURI("/test_directory"), existsNoSync())); + + result = mFileSystemMaster.getMetadataSyncer().syncPath( + new AlluxioURI("/test_file"), DescendantType.NONE, mDirectoryLoadType, 0) + .getBaseTask(); + result.waitComplete(TIMEOUT_MS); + assertTrue(result.succeeded()); + assertSyncOperations(result.getTaskInfo(), ImmutableMap.of( + SyncOperation.CREATE, 1L + )); + assertTrue(mFileSystemMaster.exists(new AlluxioURI("/test_file"), existsNoSync())); + + // TODO(yimin) when the descendant type is ONE/ALL, seems like the NOOP of the root inode + // itself is not counted. + result = mFileSystemMaster.getMetadataSyncer().syncPath( + new AlluxioURI("/test_directory"), DescendantType.ONE, mDirectoryLoadType, 0) + .getBaseTask(); + result.waitComplete(TIMEOUT_MS); + assertTrue(result.succeeded()); + assertSyncOperations(result.getTaskInfo(), ImmutableMap.of( + SyncOperation.CREATE, 2L, + SyncOperation.NOOP, 1L + )); + assertTrue(mFileSystemMaster.exists(new AlluxioURI("/test_directory"), existsNoSync())); + + result = mFileSystemMaster.getMetadataSyncer().syncPath( + new AlluxioURI("/test_directory"), DescendantType.ALL, mDirectoryLoadType, 0) + .getBaseTask(); + result.waitComplete(TIMEOUT_MS); + assertTrue(result.succeeded()); + assertSyncOperations(result.getTaskInfo(), ImmutableMap.of( + SyncOperation.CREATE, 1L, + SyncOperation.NOOP, 3L + )); + assertTrue(mFileSystemMaster.exists(new AlluxioURI("/test_directory"), existsNoSync())); + } + + @Test + public void testNonS3Fingerprint() throws Throwable { + // this essentially creates a directory and mode its alluxio directory without + // syncing the change down to ufs + mFileSystemMaster.createDirectory(new AlluxioURI("/d"), + CreateDirectoryContext.defaults().setWriteType(WriteType.THROUGH)); + mFileSystemMaster.delete(new AlluxioURI("/d"), + DeleteContext.mergeFrom(DeletePOptions.newBuilder().setAlluxioOnly(true))); + mFileSystemMaster.createDirectory(new AlluxioURI("/d"), + CreateDirectoryContext.mergeFrom( + CreateDirectoryPOptions.newBuilder().setMode(new Mode((short) 0777).toProto())) + .setWriteType(WriteType.MUST_CACHE)); + + BaseTask result = mFileSystemMaster.getMetadataSyncer().syncPath( + new AlluxioURI("/"), DescendantType.ALL, mDirectoryLoadType, 0) + .getBaseTask(); + result.waitComplete(TIMEOUT_MS); + assertTrue(result.succeeded()); + + assertSyncOperations(result.getTaskInfo(), ImmutableMap.of( + // d + SyncOperation.UPDATE, 1L + )); + } +} diff --git a/core/server/master/src/test/java/alluxio/master/file/MetadataSyncV2TestBase.java b/core/server/master/src/test/java/alluxio/master/file/MetadataSyncV2TestBase.java new file mode 100644 index 000000000000..a5053d7718e5 --- /dev/null +++ b/core/server/master/src/test/java/alluxio/master/file/MetadataSyncV2TestBase.java @@ -0,0 +1,308 @@ +/* + * The Alluxio Open Foundation licenses this work under the Apache License, version 2.0 + * (the "License"). You may not use this work except in compliance with the License, which is + * available at www.apache.org/licenses/LICENSE-2.0 + * + * This software is distributed on an "AS IS" basis, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, + * either express or implied, as more fully set forth in the License. + * + * See the NOTICE file distributed with this work for information regarding copyright ownership. + */ + +package alluxio.master.file; + +import static org.junit.Assert.assertEquals; +import static org.junit.Assert.assertTrue; + +import alluxio.AlluxioURI; +import alluxio.collections.Pair; +import alluxio.conf.Configuration; +import alluxio.conf.PropertyKey; +import alluxio.file.options.DirectoryLoadType; +import alluxio.grpc.ExistsPOptions; +import alluxio.grpc.FileSystemMasterCommonPOptions; +import alluxio.grpc.GetStatusPOptions; +import alluxio.grpc.ListStatusPOptions; +import alluxio.grpc.LoadMetadataPType; +import alluxio.master.file.contexts.ExistsContext; +import alluxio.master.file.contexts.GetStatusContext; +import alluxio.master.file.contexts.ListStatusContext; +import alluxio.master.file.mdsync.SyncFailReason; +import alluxio.master.file.mdsync.SyncOperation; +import alluxio.master.file.mdsync.TaskGroup; +import alluxio.master.file.mdsync.TaskInfo; +import alluxio.master.file.mdsync.TaskStats; +import alluxio.util.io.PathUtils; +import alluxio.wire.FileInfo; + +import com.amazonaws.auth.AWSStaticCredentialsProvider; +import com.amazonaws.auth.BasicAWSCredentials; +import com.amazonaws.client.builder.AwsClientBuilder; +import com.amazonaws.regions.Regions; +import com.amazonaws.services.s3.AmazonS3; +import com.amazonaws.services.s3.AmazonS3ClientBuilder; +import org.gaul.s3proxy.S3Proxy; +import org.gaul.s3proxy.junit.S3ProxyJunitCore; +import org.gaul.s3proxy.junit.S3ProxyRule; +import org.junit.Rule; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; +import software.amazon.awssdk.auth.credentials.AwsBasicCredentials; +import software.amazon.awssdk.auth.credentials.StaticCredentialsProvider; +import software.amazon.awssdk.regions.Region; +import software.amazon.awssdk.services.s3.S3Client; +import software.amazon.awssdk.services.s3.model.CommonPrefix; +import software.amazon.awssdk.services.s3.model.ListObjectsV2Request; +import software.amazon.awssdk.services.s3.model.S3Object; +import software.amazon.awssdk.services.s3.paginators.ListObjectsV2Iterable; + +import java.lang.reflect.Field; +import java.util.Arrays; +import java.util.Iterator; +import java.util.Map; +import java.util.Stack; +import java.util.concurrent.atomic.AtomicLong; +import java.util.stream.Collectors; +import java.util.stream.Stream; + +public class MetadataSyncV2TestBase extends FileSystemMasterTestBase { + static final Logger LOG = LoggerFactory.getLogger(FileSystemMetadataSyncV2Test.class); + static final String TEST_BUCKET = "alluxio-mdsync-test-bucket"; + static final String TEST_BUCKET2 = "alluxio-mdsync-test-bucket-2"; + static final String TEST_FILE = "test_file"; + static final String TEST_DIRECTORY = "test_directory"; + static final String TEST_CONTENT = "test_content"; + static final String TEST_CONTENT_MODIFIED = "test_content_modified"; + static final AlluxioURI UFS_ROOT = new AlluxioURI("s3://" + TEST_BUCKET + "/"); + static final AlluxioURI UFS_ROOT2 = new AlluxioURI("s3://" + TEST_BUCKET2 + "/"); + static final AlluxioURI MOUNT_POINT = new AlluxioURI("/s3_mount"); + static final AlluxioURI MOUNT_POINT2 = new AlluxioURI("/s3_mount2"); + static final AlluxioURI NESTED_MOUNT_POINT = new AlluxioURI("/mnt/nested_s3_mount"); + static final AlluxioURI NESTED_S3_MOUNT_POINT = + new AlluxioURI("/s3_mount/nested_s3_mount"); + static final long TIMEOUT_MS = 30_000; + + @Rule + public S3ProxyRule mS3Proxy = S3ProxyRule.builder() + .withBlobStoreProvider("transient") + .withCredentials("_", "_") + .build(); + + boolean mUseRealS3 = false; + AmazonS3 mS3Client; + S3Client mClient; + DirectoryLoadType mDirectoryLoadType; + + @Override + public void before() throws Exception { + Configuration.set(PropertyKey.SECURITY_AUTHORIZATION_PERMISSION_ENABLED, false); + Configuration.set(PropertyKey.UNDERFS_LISTING_LENGTH, 2); + + if (mUseRealS3) { + Configuration.set(PropertyKey.UNDERFS_S3_REGION, "us-west-1"); + mClient = S3Client.builder().region(Region.US_WEST_1).build(); + mS3Client = AmazonS3ClientBuilder.standard() + .withRegion(Region.US_WEST_1.toString()).build(); + } else { + Configuration.set(PropertyKey.UNDERFS_S3_ENDPOINT, + mS3Proxy.getUri().getHost() + ":" + mS3Proxy.getUri().getPort()); + Configuration.set(PropertyKey.UNDERFS_S3_ENDPOINT_REGION, "us-west-2"); + Configuration.set(PropertyKey.UNDERFS_S3_DISABLE_DNS_BUCKETS, true); + Configuration.set(PropertyKey.S3A_ACCESS_KEY, mS3Proxy.getAccessKey()); + Configuration.set(PropertyKey.S3A_SECRET_KEY, mS3Proxy.getSecretKey()); + mClient = S3Client.builder().credentialsProvider(StaticCredentialsProvider.create( + AwsBasicCredentials.create(mS3Proxy.getAccessKey(), mS3Proxy.getSecretKey()))) + .endpointOverride(mS3Proxy.getUri()).region(Region.US_WEST_2) + .build(); + + mS3Client = AmazonS3ClientBuilder + .standard() + .withPathStyleAccessEnabled(true) + .withCredentials( + new AWSStaticCredentialsProvider( + new BasicAWSCredentials(mS3Proxy.getAccessKey(), mS3Proxy.getSecretKey()))) + .withEndpointConfiguration( + new AwsClientBuilder.EndpointConfiguration(mS3Proxy.getUri().toString(), + Regions.US_WEST_2.getName())) + .build(); + } + mS3Client.createBucket(TEST_BUCKET); + mS3Client.createBucket(TEST_BUCKET2); + super.before(); + } + + @Override + public void after() throws Exception { + mS3Client.shutdown(); + mClient.close(); + try { + stopS3Server(); + } catch (Exception e) { + LOG.error("Closing s3 mock server failed", e); + } + super.after(); + } + + ListStatusContext listSync(boolean isRecursive) { + return ListStatusContext.mergeFrom(ListStatusPOptions.newBuilder() + .setRecursive(isRecursive) + .setLoadMetadataType(LoadMetadataPType.ALWAYS) + .setCommonOptions( + FileSystemMasterCommonPOptions.newBuilder().setSyncIntervalMs(0).build() + )); + } + + ListStatusContext listNoSync(boolean isRecursive) { + return ListStatusContext.mergeFrom(ListStatusPOptions.newBuilder() + .setRecursive(isRecursive) + .setLoadMetadataType(LoadMetadataPType.NEVER) + .setCommonOptions( + FileSystemMasterCommonPOptions.newBuilder().setSyncIntervalMs(-1).build() + )); + } + + GetStatusContext getNoSync() { + return GetStatusContext.mergeFrom(GetStatusPOptions.newBuilder() + .setLoadMetadataType(LoadMetadataPType.NEVER) + .setCommonOptions( + FileSystemMasterCommonPOptions.newBuilder().setSyncIntervalMs(-1).build() + )); + } + + static ExistsContext existsNoSync() { + return ExistsContext.mergeFrom(ExistsPOptions.newBuilder() + .setLoadMetadataType(LoadMetadataPType.NEVER) + .setCommonOptions( + FileSystemMasterCommonPOptions.newBuilder().setSyncIntervalMs(-1).build() + )); + } + + void stopS3Server() { + try { + Field coreField = S3ProxyRule.class.getDeclaredField("core"); + coreField.setAccessible(true); + S3ProxyJunitCore core = (S3ProxyJunitCore) coreField.get(mS3Proxy); + Field s3ProxyField = S3ProxyJunitCore.class.getDeclaredField("s3Proxy"); + s3ProxyField.setAccessible(true); + S3Proxy proxy = (S3Proxy) s3ProxyField.get(core); + proxy.stop(); + } catch (Exception e) { + throw new RuntimeException(e); + } + } + + void startS3Server() { + try { + Field coreField = S3ProxyRule.class.getDeclaredField("core"); + coreField.setAccessible(true); + S3ProxyJunitCore core = (S3ProxyJunitCore) coreField.get(mS3Proxy); + Field s3ProxyField = S3ProxyJunitCore.class.getDeclaredField("s3Proxy"); + s3ProxyField.setAccessible(true); + S3Proxy proxy = (S3Proxy) s3ProxyField.get(core); + proxy.start(); + } catch (Exception e) { + throw new RuntimeException(e); + } + } + + public static void checkUfsMatches( + AlluxioURI alluxioPath, String s3Bucket, + String mountPrefix, + DefaultFileSystemMaster master, S3Client s3client) throws Exception { + + Stack> toCheck = new Stack<>(); + toCheck.push(new Pair<>(alluxioPath.getPath(), mountPrefix)); + while (!toCheck.isEmpty()) { + Pair nxt = toCheck.pop(); + + Iterator alluxioItems = master.listStatus(new AlluxioURI(nxt.getFirst()), + ListStatusContext.defaults().disableMetadataSync()).stream().iterator(); + Iterator> ufsItems = listUfsPath(s3Bucket, nxt.getSecond(), s3client, + mountPrefix, alluxioPath.getPath()); + while (alluxioItems.hasNext()) { + FileInfo nxtAlluxio = alluxioItems.next(); + if (!ufsItems.hasNext()) { + throw new IllegalStateException( + String.format("Ufs did not find alluxio item %s", nxtAlluxio)); + } + Pair nxtUfs = ufsItems.next(); + String nxtInode = nxtAlluxio.getPath(); + if (nxtAlluxio.isFolder()) { + toCheck.push(new Pair<>(nxtAlluxio.getPath(), nxtUfs.getSecond())); + nxtInode = PathUtils.normalizePath(nxtInode, AlluxioURI.SEPARATOR); + } + // System.out.printf("Checking %s, %s%n", nxtInode, nxtUfs.getFirst()); + assertEquals(nxtInode, nxtUfs.getFirst()); + } + if (ufsItems.hasNext()) { + throw new IllegalStateException( + String.format("alluxio did not find ufs item %s", ufsItems.next())); + } + } + } + + static Iterator> listUfsPath( + String s3Bucket, String s3Path, S3Client client, + String mountPrefix, String alluxioPrefix) { + String normalizedPrefix = PathUtils.normalizePath(alluxioPrefix, AlluxioURI.SEPARATOR); + if (!s3Path.isEmpty()) { + s3Path = PathUtils.normalizePath(s3Path, AlluxioURI.SEPARATOR); + } + if (!mountPrefix.isEmpty()) { + mountPrefix = PathUtils.normalizePath(mountPrefix, AlluxioURI.SEPARATOR); + } + ListObjectsV2Iterable result = client.listObjectsV2Paginator(ListObjectsV2Request.builder() + .bucket(s3Bucket).delimiter(AlluxioURI.SEPARATOR).prefix(s3Path).build()); + String finalMountPrefix = mountPrefix; + String finalS3Path = s3Path; + return result.stream().flatMap(resp -> + Stream.concat(resp.commonPrefixes().stream().map(CommonPrefix::prefix), + resp.contents().stream().map(S3Object::key))) + .filter(nxt -> { + assertTrue(nxt.startsWith(finalS3Path)); + return nxt.length() > finalS3Path.length(); + }).sorted().distinct() + .map(nxt -> new Pair<>( + normalizedPrefix + nxt.substring(finalMountPrefix.length()), nxt)).iterator(); + } + + static void assertSyncOperations(TaskInfo taskInfo, Map operations) { + assertSyncOperations(taskInfo.getStats().getSuccessOperationCount(), operations); + } + + static void assertSyncOperations(TaskGroup taskGroup, Map operations) { + AtomicLong[] stats = new AtomicLong[SyncOperation.values().length]; + for (int i = 0; i < stats.length; ++i) { + stats[i] = new AtomicLong(); + } + taskGroup.getTasks().forEach( + it -> { + AtomicLong[] taskStats = it.getTaskInfo().getStats().getSuccessOperationCount(); + for (int i = 0; i < taskStats.length; ++i) { + stats[i].addAndGet(taskStats[i].get()); + } + } + ); + assertSyncOperations(stats, operations); + } + + private static void assertSyncOperations( + AtomicLong[] stats, Map operations) { + for (SyncOperation operation : SyncOperation.values()) { + assertEquals( + "Operation " + operation.toString() + " count not equal. " + + "Actual operation count: " + + Arrays.toString(stats), + (long) operations.getOrDefault(operation, 0L), + stats[operation.getValue()].get() + ); + } + } + + static void assertSyncFailureReason(TaskInfo taskInfo, SyncFailReason failReason) { + Map failReasons = taskInfo.getStats().getSyncFailReasons(); + assertEquals(1, failReasons.size()); + assertTrue(failReasons.entrySet().stream().map(it -> it.getValue().getSyncFailReason()).collect( + Collectors.toList()).contains(failReason)); + } +} diff --git a/core/server/master/src/test/java/alluxio/master/file/mdsync/BaseTaskTest.java b/core/server/master/src/test/java/alluxio/master/file/mdsync/BaseTaskTest.java new file mode 100644 index 000000000000..617b200c8088 --- /dev/null +++ b/core/server/master/src/test/java/alluxio/master/file/mdsync/BaseTaskTest.java @@ -0,0 +1,128 @@ +/* + * The Alluxio Open Foundation licenses this work under the Apache License, version 2.0 + * (the "License"). You may not use this work except in compliance with the License, which is + * available at www.apache.org/licenses/LICENSE-2.0 + * + * This software is distributed on an "AS IS" basis, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, + * either express or implied, as more fully set forth in the License. + * + * See the NOTICE file distributed with this work for information regarding copyright ownership. + */ + +package alluxio.master.file.mdsync; + +import static alluxio.file.options.DescendantType.ALL; +import static alluxio.file.options.DescendantType.NONE; +import static alluxio.file.options.DescendantType.ONE; +import static org.junit.Assert.assertFalse; +import static org.junit.Assert.assertTrue; + +import alluxio.AlluxioURI; +import alluxio.file.options.DirectoryLoadType; +import alluxio.resource.CloseableResource; +import alluxio.underfs.UfsClient; + +import org.junit.Before; +import org.junit.Test; +import org.mockito.Mockito; + +import java.time.Clock; +import java.util.function.Function; + +public class BaseTaskTest { + + private MetadataSyncHandler mMetadataSyncHandler; + + private final Clock mClock = Clock.systemUTC(); + + private final MockUfsClient mUfsClient = new MockUfsClient(); + + private final Function> mClientSupplier = + (uri) -> new CloseableResource(mUfsClient) { + @Override + public void closeResource() {} + }; + + @Before + public void before() { + mMetadataSyncHandler = new MetadataSyncHandler(Mockito.mock(TaskTracker.class), null, null); + } + + @Test + public void PathIsCoveredNone() { + BaseTask path = BaseTask.create(new TaskInfo(mMetadataSyncHandler, new AlluxioURI("/path"), + new AlluxioURI("/path"), null, + NONE, 0, DirectoryLoadType.SINGLE_LISTING, 0), mClock.millis(), mClientSupplier); + assertTrue(path.pathIsCovered(new AlluxioURI("/path"), NONE)); + assertFalse(path.pathIsCovered(new AlluxioURI("/"), NONE)); + assertFalse(path.pathIsCovered(new AlluxioURI("/p"), NONE)); + assertFalse(path.pathIsCovered(new AlluxioURI("/path2"), NONE)); + assertFalse(path.pathIsCovered(new AlluxioURI("/path/nested"), NONE)); + + assertFalse(path.pathIsCovered(new AlluxioURI("/path"), ONE)); + assertFalse(path.pathIsCovered(new AlluxioURI("/"), ONE)); + assertFalse(path.pathIsCovered(new AlluxioURI("/p"), ONE)); + assertFalse(path.pathIsCovered(new AlluxioURI("/path2"), ONE)); + assertFalse(path.pathIsCovered(new AlluxioURI("/path/nested"), ONE)); + + assertFalse(path.pathIsCovered(new AlluxioURI("/path"), ALL)); + assertFalse(path.pathIsCovered(new AlluxioURI("/"), ALL)); + assertFalse(path.pathIsCovered(new AlluxioURI("/p"), ALL)); + assertFalse(path.pathIsCovered(new AlluxioURI("/path2"), ALL)); + assertFalse(path.pathIsCovered(new AlluxioURI("/path/nested"), ALL)); + } + + @Test + public void PathIsCoveredOne() { + BaseTask path = BaseTask.create(new TaskInfo(mMetadataSyncHandler, new AlluxioURI("/path"), + new AlluxioURI("/path"), null, + ONE, 0, DirectoryLoadType.SINGLE_LISTING, 0), mClock.millis(), mClientSupplier); + assertTrue(path.pathIsCovered(new AlluxioURI("/path"), NONE)); + assertFalse(path.pathIsCovered(new AlluxioURI("/"), NONE)); + assertFalse(path.pathIsCovered(new AlluxioURI("/p"), NONE)); + assertFalse(path.pathIsCovered(new AlluxioURI("/path2"), NONE)); + assertTrue(path.pathIsCovered(new AlluxioURI("/path/nested"), NONE)); + assertFalse(path.pathIsCovered(new AlluxioURI("/path/nested/nested"), NONE)); + + assertTrue(path.pathIsCovered(new AlluxioURI("/path"), ONE)); + assertFalse(path.pathIsCovered(new AlluxioURI("/"), ONE)); + assertFalse(path.pathIsCovered(new AlluxioURI("/p"), ONE)); + assertFalse(path.pathIsCovered(new AlluxioURI("/path2"), ONE)); + assertFalse(path.pathIsCovered(new AlluxioURI("/path/nested"), ONE)); + assertFalse(path.pathIsCovered(new AlluxioURI("/path/nested/nested"), ONE)); + + assertFalse(path.pathIsCovered(new AlluxioURI("/path"), ALL)); + assertFalse(path.pathIsCovered(new AlluxioURI("/"), ALL)); + assertFalse(path.pathIsCovered(new AlluxioURI("/p"), ALL)); + assertFalse(path.pathIsCovered(new AlluxioURI("/path2"), ALL)); + assertFalse(path.pathIsCovered(new AlluxioURI("/path/nested"), ALL)); + assertFalse(path.pathIsCovered(new AlluxioURI("/path/nested/nested"), ALL)); + } + + @Test + public void PathIsCoveredAll() { + BaseTask path = BaseTask.create(new TaskInfo(mMetadataSyncHandler, new AlluxioURI("/path"), + new AlluxioURI("/path"), null, + ALL, 0, DirectoryLoadType.SINGLE_LISTING, 0), mClock.millis(), mClientSupplier); + assertTrue(path.pathIsCovered(new AlluxioURI("/path"), NONE)); + assertFalse(path.pathIsCovered(new AlluxioURI("/"), NONE)); + assertFalse(path.pathIsCovered(new AlluxioURI("/p"), NONE)); + assertFalse(path.pathIsCovered(new AlluxioURI("/path2"), NONE)); + assertTrue(path.pathIsCovered(new AlluxioURI("/path/nested"), NONE)); + assertTrue(path.pathIsCovered(new AlluxioURI("/path/nested/nested"), NONE)); + + assertTrue(path.pathIsCovered(new AlluxioURI("/path"), ONE)); + assertFalse(path.pathIsCovered(new AlluxioURI("/"), ONE)); + assertFalse(path.pathIsCovered(new AlluxioURI("/p"), ONE)); + assertFalse(path.pathIsCovered(new AlluxioURI("/path2"), ONE)); + assertTrue(path.pathIsCovered(new AlluxioURI("/path/nested"), ONE)); + assertTrue(path.pathIsCovered(new AlluxioURI("/path/nested/nested"), ONE)); + + assertTrue(path.pathIsCovered(new AlluxioURI("/path"), ALL)); + assertFalse(path.pathIsCovered(new AlluxioURI("/"), ALL)); + assertFalse(path.pathIsCovered(new AlluxioURI("/p"), ALL)); + assertFalse(path.pathIsCovered(new AlluxioURI("/path2"), ALL)); + assertTrue(path.pathIsCovered(new AlluxioURI("/path/nested"), ALL)); + assertTrue(path.pathIsCovered(new AlluxioURI("/path/nested/nested"), ALL)); + } +} diff --git a/core/server/master/src/test/java/alluxio/master/file/mdsync/BatchPathWaiterTest.java b/core/server/master/src/test/java/alluxio/master/file/mdsync/BatchPathWaiterTest.java new file mode 100644 index 000000000000..8e4cd834be5e --- /dev/null +++ b/core/server/master/src/test/java/alluxio/master/file/mdsync/BatchPathWaiterTest.java @@ -0,0 +1,334 @@ +/* + * The Alluxio Open Foundation licenses this work under the Apache License, version 2.0 + * (the "License"). You may not use this work except in compliance with the License, which is + * available at www.apache.org/licenses/LICENSE-2.0 + * + * This software is distributed on an "AS IS" basis, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, + * either express or implied, as more fully set forth in the License. + * + * See the NOTICE file distributed with this work for information regarding copyright ownership. + */ + +package alluxio.master.file.mdsync; + +import static alluxio.file.options.DescendantType.ALL; +import static alluxio.file.options.DescendantType.NONE; +import static alluxio.file.options.DescendantType.ONE; +import static org.junit.Assert.assertEquals; +import static org.junit.Assert.assertFalse; +import static org.junit.Assert.assertThrows; +import static org.junit.Assert.assertTrue; +import static org.mockito.ArgumentMatchers.anyBoolean; +import static org.mockito.ArgumentMatchers.anyLong; + +import alluxio.AlluxioURI; +import alluxio.exception.status.UnavailableException; +import alluxio.file.options.DirectoryLoadType; +import alluxio.master.file.DefaultFileSystemMaster; +import alluxio.master.journal.NoopJournalContext; +import alluxio.resource.CloseableResource; +import alluxio.underfs.UfsClient; + +import com.google.common.collect.Lists; +import org.junit.After; +import org.junit.Before; +import org.junit.Test; +import org.mockito.Mockito; + +import java.time.Clock; +import java.util.List; +import java.util.concurrent.ExecutorService; +import java.util.concurrent.Executors; +import java.util.concurrent.Future; +import java.util.concurrent.TimeUnit; +import java.util.concurrent.TimeoutException; +import java.util.function.Function; + +public class BatchPathWaiterTest { + + ExecutorService mThreadPool; + + private final Clock mClock = Clock.systemUTC(); + private MetadataSyncHandler mMetadataSyncHandler; + + private final MockUfsClient mUfsClient = new MockUfsClient(); + + private final Function> mClientSupplier = + (uri) -> new CloseableResource(mUfsClient) { + @Override + public void closeResource() {} + }; + + @Before + public void before() throws UnavailableException { + mThreadPool = Executors.newCachedThreadPool(); + DefaultFileSystemMaster defaultFileSystemMaster = Mockito.mock(DefaultFileSystemMaster.class); + Mockito.when(defaultFileSystemMaster.createJournalContext()) + .thenReturn(NoopJournalContext.INSTANCE); + mMetadataSyncHandler = Mockito.spy(new MetadataSyncHandler(Mockito.mock(TaskTracker.class), + defaultFileSystemMaster, null)); + } + + @After + public void after() { + mThreadPool.shutdown(); + } + + @Test + public void TestWaiter() throws Exception { + long nxtLoadID = 0; + TaskInfo ti = new TaskInfo(mMetadataSyncHandler, new AlluxioURI("/path"), + new AlluxioURI("/path"), null, + NONE, 0, DirectoryLoadType.SINGLE_LISTING, 0); + BaseTask path = BaseTask.create(ti, mClock.millis(), mClientSupplier); + Mockito.doAnswer(ans -> { + path.onComplete(ans.getArgument(1), mMetadataSyncHandler.mFsMaster, null); + return null; + }).when(mMetadataSyncHandler).onPathLoadComplete(anyLong(), anyBoolean()); + + Future waiter = mThreadPool.submit(() -> path.waitForSync(new AlluxioURI("/path"))); + assertThrows(TimeoutException.class, () -> waiter.get(1, TimeUnit.SECONDS)); + // Complete the sync + path.nextCompleted(new SyncProcessResult(ti, ti.getBasePath(), null, + false, false)); + SyncProcessResult result = new SyncProcessResult(ti, ti.getBasePath(), + new PathSequence(new AlluxioURI("/path"), + new AlluxioURI("/path")), false, true); + path.nextCompleted(result); + // Even though we completed the path being waited for, we only release the waiter for + // paths greater than the completed path + assertThrows(TimeoutException.class, () -> waiter.get(1, TimeUnit.SECONDS)); + // now on completion of the task the waiter can be released + path.getPathLoadTask().onProcessComplete(nxtLoadID, result); + assertTrue(path.isCompleted().isPresent()); + assertTrue(waiter.get(1, TimeUnit.SECONDS)); + } + + @Test + public void TestMultiWaiter() throws Exception { + long nxtLoadID = 0; + TaskInfo ti = new TaskInfo(mMetadataSyncHandler, new AlluxioURI("/path"), + new AlluxioURI("/path"), null, + ONE, 0, DirectoryLoadType.SINGLE_LISTING, 0); + BaseTask path = BaseTask.create(ti, mClock.millis(), mClientSupplier); + Mockito.doAnswer(ans -> { + path.onComplete(ans.getArgument(1), mMetadataSyncHandler.mFsMaster, null); + return null; + }).when(mMetadataSyncHandler).onPathLoadComplete(anyLong(), anyBoolean()); + + Future waiter1 = mThreadPool.submit(() -> path.waitForSync(new AlluxioURI("/path/1"))); + Future waiter2 = mThreadPool.submit(() -> path.waitForSync(new AlluxioURI("/path/2"))); + // after completing /path/1 no waiters will be released + path.nextCompleted(new SyncProcessResult(ti, ti.getBasePath(), + new PathSequence(new AlluxioURI("/path"), + new AlluxioURI("/path/1")), true, false)); + assertThrows(TimeoutException.class, () -> waiter1.get(1, TimeUnit.SECONDS)); + assertThrows(TimeoutException.class, () -> waiter2.get(1, TimeUnit.SECONDS)); + // after completing /path/2, the waiter for /path/1 will be released + SyncProcessResult result = new SyncProcessResult(ti, ti.getBasePath(), + new PathSequence(new AlluxioURI("/path/1"), + new AlluxioURI("/path/2")), false, false); + path.nextCompleted(result); + assertTrue(waiter1.get(1, TimeUnit.SECONDS)); + assertThrows(TimeoutException.class, () -> waiter2.get(1, TimeUnit.SECONDS)); + // now on completion of the task all waiters can be released + path.getPathLoadTask().onProcessComplete(nxtLoadID, result); + assertTrue(path.isCompleted().isPresent()); + assertTrue(waiter2.get(1, TimeUnit.SECONDS)); + } + + @Test + public void TestWaiterOutOfOrder() throws Exception { + long nxtLoadID = 0; + TaskInfo ti = new TaskInfo(mMetadataSyncHandler, new AlluxioURI("/path"), + new AlluxioURI("/path"), null, + ONE, 0, DirectoryLoadType.SINGLE_LISTING, 0); + BaseTask path = BaseTask.create(ti, mClock.millis(), mClientSupplier); + Mockito.doAnswer(ans -> { + path.onComplete(ans.getArgument(1), mMetadataSyncHandler.mFsMaster, null); + return null; + }).when(mMetadataSyncHandler).onPathLoadComplete(anyLong(), anyBoolean()); + + Future waiter1 = mThreadPool.submit(() -> path.waitForSync(new AlluxioURI("/path/1"))); + Future waiter2 = mThreadPool.submit(() -> path.waitForSync(new AlluxioURI("/path/2"))); + assertThrows(TimeoutException.class, () -> waiter1.get(1, TimeUnit.SECONDS)); + path.nextCompleted(new SyncProcessResult(ti, ti.getBasePath(), + new PathSequence(new AlluxioURI("/path/3"), + new AlluxioURI("/path/4")), true, false)); + assertThrows(TimeoutException.class, () -> waiter1.get(1, TimeUnit.SECONDS)); + assertThrows(TimeoutException.class, () -> waiter2.get(1, TimeUnit.SECONDS)); + path.nextCompleted(new SyncProcessResult(ti, ti.getBasePath(), + new PathSequence(new AlluxioURI("/path/2"), + new AlluxioURI("/path/3")), true, false)); + assertThrows(TimeoutException.class, () -> waiter1.get(1, TimeUnit.SECONDS)); + assertThrows(TimeoutException.class, () -> waiter2.get(1, TimeUnit.SECONDS)); + path.nextCompleted(new SyncProcessResult(ti, ti.getBasePath(), + new PathSequence(new AlluxioURI("/path"), + new AlluxioURI("/path/1")), true, false)); + assertThrows(TimeoutException.class, () -> waiter1.get(1, TimeUnit.SECONDS)); + assertThrows(TimeoutException.class, () -> waiter2.get(1, TimeUnit.SECONDS)); + SyncProcessResult result = new SyncProcessResult(ti, ti.getBasePath(), + new PathSequence(new AlluxioURI("/path/1"), + new AlluxioURI("/path/2")), false, false); + path.nextCompleted(result); + assertTrue(waiter2.get(1, TimeUnit.SECONDS)); + path.getPathLoadTask().onProcessComplete(nxtLoadID, result); + assertTrue(path.isCompleted().isPresent()); + } + + @Test + public void TestBaseTackSinglePath() { + long nxtLoadID = 0; + TaskInfo ti = new TaskInfo(mMetadataSyncHandler, new AlluxioURI("/path"), + new AlluxioURI("/path"), null, + NONE, 0, DirectoryLoadType.SINGLE_LISTING, 0); + BaseTask path = BaseTask.create(ti, mClock.millis(), mClientSupplier); + Mockito.doAnswer(ans -> { + path.onComplete(ans.getArgument(1), mMetadataSyncHandler.mFsMaster, null); + return null; + }).when(mMetadataSyncHandler).onPathLoadComplete(anyLong(), anyBoolean()); + + assertFalse(path.isCompleted().isPresent()); + SyncProcessResult result = new SyncProcessResult(ti, ti.getBasePath(), + new PathSequence(new AlluxioURI("/path"), + new AlluxioURI("/path")), false, false); + path.nextCompleted(result); + path.getPathLoadTask().onProcessComplete(nxtLoadID, result); + assertTrue(path.isCompleted().isPresent()); + } + + @Test + public void TestBaseTaskInOrder() { + long nxtLoadID = 0; + TaskInfo ti = new TaskInfo(mMetadataSyncHandler, new AlluxioURI("/"), + new AlluxioURI("/"), null, + ALL, 0, DirectoryLoadType.SINGLE_LISTING, 0); + BatchPathWaiter root = (BatchPathWaiter) BaseTask.create( + ti, mClock.millis(), mClientSupplier); + Mockito.doAnswer(ans -> { + root.onComplete(ans.getArgument(1), mMetadataSyncHandler.mFsMaster, null); + return null; + }).when(mMetadataSyncHandler).onPathLoadComplete(anyLong(), anyBoolean()); + assertFalse(root.isCompleted().isPresent()); + + // complete , should have |<,/ad>| + PathSequence completed = new PathSequence(new AlluxioURI("/"), + new AlluxioURI("/ad")); + List completedList = Lists.newArrayList( + new PathSequence(new AlluxioURI(""), new AlluxioURI("/ad"))); + root.nextCompleted(new SyncProcessResult(ti, ti.getBasePath(), completed, true, + false)); + assertEquals(completedList, root.getLastCompleted()); + + // complete , should have |<,/bf>| + completed = new PathSequence(new AlluxioURI("/ad"), new AlluxioURI("/bf")); + completedList = Lists.newArrayList(new PathSequence(new AlluxioURI(""), new AlluxioURI("/bf"))); + root.nextCompleted(new SyncProcessResult(ti, ti.getBasePath(), completed, true, + false)); + assertEquals(completedList, root.getLastCompleted()); + + // complete , should have |<,/bf/eg| + completed = new PathSequence(new AlluxioURI("/bf"), new AlluxioURI("/bf/eg")); + completedList = Lists.newArrayList(new PathSequence(new AlluxioURI(""), + new AlluxioURI("/bf/eg"))); + root.nextCompleted(new SyncProcessResult(ti, ti.getBasePath(), completed, true, + false)); + assertEquals(completedList, root.getLastCompleted()); + + // complete , should have |<,/tr| + completed = new PathSequence(new AlluxioURI("/bf/eg"), new AlluxioURI("/tr")); + completedList = Lists.newArrayList(new PathSequence(new AlluxioURI(""), new AlluxioURI("/tr"))); + root.nextCompleted(new SyncProcessResult(ti, ti.getBasePath(), completed, true, + false)); + assertEquals(completedList, root.getLastCompleted()); + + // finish with + completed = new PathSequence(new AlluxioURI("/tr"), new AlluxioURI("/trd")); + SyncProcessResult finalResult = new SyncProcessResult(ti, ti.getBasePath(), completed, + false, false); + root.nextCompleted(finalResult); + root.getPathLoadTask().onProcessComplete(nxtLoadID, finalResult); + assertTrue(root.isCompleted().isPresent()); + } + + @Test + public void TestBaseTaskOutOfOrder() { + long nxtLoadID = 0; + TaskInfo ti = new TaskInfo(mMetadataSyncHandler, new AlluxioURI("/"), + new AlluxioURI("/"), null, + ONE, 0, DirectoryLoadType.SINGLE_LISTING, 0); + BatchPathWaiter root = (BatchPathWaiter) BaseTask.create(ti, mClock.millis(), mClientSupplier); + Mockito.doAnswer(ans -> { + root.onComplete(ans.getArgument(1), mMetadataSyncHandler.mFsMaster, null); + return null; + }).when(mMetadataSyncHandler).onPathLoadComplete(anyLong(), anyBoolean()); + assertFalse(root.isCompleted().isPresent()); + + // complete , should have |<,a>| + PathSequence completed = new PathSequence(new AlluxioURI("/"), new AlluxioURI("/a")); + List completedList = Lists.newArrayList( + new PathSequence(new AlluxioURI(""), new AlluxioURI("/a"))); + root.nextCompleted(new SyncProcessResult(ti, ti.getBasePath(), completed, true, false + )); + assertEquals(completedList, root.getLastCompleted()); + + // complete , should have |<,b>| + completed = new PathSequence(new AlluxioURI("/a"), new AlluxioURI("/b")); + completedList = Lists.newArrayList(new PathSequence(new AlluxioURI(""), new AlluxioURI("/b"))); + root.nextCompleted(new SyncProcessResult(ti, ti.getBasePath(), completed, true, false + )); + assertEquals(completedList, root.getLastCompleted()); + + // complete , should have |<, /b>, | + completed = new PathSequence(new AlluxioURI("/c"), new AlluxioURI("/d")); + completedList.add(completed); + root.nextCompleted(new SyncProcessResult(ti, ti.getBasePath(), completed, true, false + )); + assertEquals(completedList, root.getLastCompleted()); + + // complete , should have |<,/d>| + completed = new PathSequence(new AlluxioURI("/b"), new AlluxioURI("/c")); + completedList = Lists.newArrayList(new PathSequence(new AlluxioURI(""), new AlluxioURI("/d"))); + root.nextCompleted(new SyncProcessResult(ti, ti.getBasePath(), completed, true, false + )); + assertEquals(completedList, root.getLastCompleted()); + + // complete , should have |<,/d>, | + completed = new PathSequence(new AlluxioURI("/g"), new AlluxioURI("/h")); + completedList.add(completed); + root.nextCompleted(new SyncProcessResult(ti, ti.getBasePath(), completed, true, false + )); + assertEquals(completedList, root.getLastCompleted()); + + // complete , should have |<,/e>, | + completed = new PathSequence(new AlluxioURI("/d"), new AlluxioURI("/e")); + completedList = Lists.newArrayList(new PathSequence(new AlluxioURI(""), new AlluxioURI("/e")), + new PathSequence(new AlluxioURI("/g"), new AlluxioURI("/h"))); + root.nextCompleted(new SyncProcessResult(ti, ti.getBasePath(), completed, true, false + )); + assertEquals(completedList, root.getLastCompleted()); + + // complete , should have |<,/e>, | + completed = new PathSequence(new AlluxioURI("/f"), new AlluxioURI("/g")); + completedList = Lists.newArrayList(new PathSequence(new AlluxioURI(""), new AlluxioURI("/e")), + new PathSequence(new AlluxioURI("/f"), new AlluxioURI("/h"))); + root.nextCompleted(new SyncProcessResult(ti, ti.getBasePath(), completed, true, false + )); + assertEquals(completedList, root.getLastCompleted()); + + // complete , should have |<,/h>| + completed = new PathSequence(new AlluxioURI("/e"), new AlluxioURI("/f")); + completedList = Lists.newArrayList(new PathSequence(new AlluxioURI(""), new AlluxioURI("/h"))); + root.nextCompleted(new SyncProcessResult(ti, ti.getBasePath(), completed, true, false + )); + assertEquals(completedList, root.getLastCompleted()); + + // finish with + completed = new PathSequence(new AlluxioURI("/h"), new AlluxioURI("/j")); + SyncProcessResult finalResult = new SyncProcessResult(ti, ti.getBasePath(), completed, + false, false); + root.nextCompleted(finalResult); + root.getPathLoadTask().onProcessComplete(nxtLoadID, finalResult); + assertTrue(root.isCompleted().isPresent()); + } +} diff --git a/core/server/master/src/test/java/alluxio/master/file/mdsync/DirectoryPathWaiterTest.java b/core/server/master/src/test/java/alluxio/master/file/mdsync/DirectoryPathWaiterTest.java new file mode 100644 index 000000000000..30b6892e8402 --- /dev/null +++ b/core/server/master/src/test/java/alluxio/master/file/mdsync/DirectoryPathWaiterTest.java @@ -0,0 +1,196 @@ +/* + * The Alluxio Open Foundation licenses this work under the Apache License, version 2.0 + * (the "License"). You may not use this work except in compliance with the License, which is + * available at www.apache.org/licenses/LICENSE-2.0 + * + * This software is distributed on an "AS IS" basis, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, + * either express or implied, as more fully set forth in the License. + * + * See the NOTICE file distributed with this work for information regarding copyright ownership. + */ + +package alluxio.master.file.mdsync; + +import static alluxio.file.options.DescendantType.ALL; +import static org.junit.Assert.assertThrows; +import static org.junit.Assert.assertTrue; +import static org.mockito.ArgumentMatchers.anyBoolean; +import static org.mockito.ArgumentMatchers.anyLong; + +import alluxio.AlluxioURI; +import alluxio.exception.status.UnavailableException; +import alluxio.file.options.DirectoryLoadType; +import alluxio.master.file.DefaultFileSystemMaster; +import alluxio.master.journal.NoopJournalContext; +import alluxio.resource.CloseableResource; +import alluxio.underfs.UfsClient; + +import org.junit.After; +import org.junit.Before; +import org.junit.Test; +import org.junit.runner.RunWith; +import org.junit.runners.Parameterized; +import org.mockito.Mockito; + +import java.time.Clock; +import java.util.Arrays; +import java.util.Collection; +import java.util.concurrent.ExecutorService; +import java.util.concurrent.Executors; +import java.util.concurrent.Future; +import java.util.concurrent.TimeUnit; +import java.util.concurrent.TimeoutException; +import java.util.function.Function; + +@RunWith(Parameterized.class) +public class DirectoryPathWaiterTest { + + @Parameterized.Parameters + public static Collection directoryLoadTypes() { + return Arrays.asList(DirectoryLoadType.DFS, DirectoryLoadType.BFS); + } + + public DirectoryPathWaiterTest(DirectoryLoadType loadType) { + mDirLoadType = loadType; + } + + private final MockUfsClient mUfsClient = new MockUfsClient(); + + private final Function> mClientSupplier = + (uri) -> new CloseableResource(mUfsClient) { + @Override + public void closeResource() {} + }; + + DirectoryLoadType mDirLoadType; + ExecutorService mThreadPool; + Clock mClock = Clock.systemUTC(); + MetadataSyncHandler mMetadataSyncHandler; + + @Before + public void before() throws UnavailableException { + mThreadPool = Executors.newCachedThreadPool(); + DefaultFileSystemMaster defaultFileSystemMaster = Mockito.mock(DefaultFileSystemMaster.class); + Mockito.when(defaultFileSystemMaster.createJournalContext()) + .thenReturn(NoopJournalContext.INSTANCE); + mMetadataSyncHandler = Mockito.spy(new MetadataSyncHandler(Mockito.mock(TaskTracker.class), + defaultFileSystemMaster, null)); + } + + @After + public void after() { + mThreadPool.shutdown(); + } + + @Test + public void TestWaiter() throws Exception { + TaskInfo ti = new TaskInfo(mMetadataSyncHandler, new AlluxioURI("/path"), + new AlluxioURI("/path"), null, + ALL, 0, mDirLoadType, 0); + BaseTask path = BaseTask.create(ti, mClock.millis(), mClientSupplier); + Mockito.doAnswer(ans -> { + path.onComplete(ans.getArgument(1), mMetadataSyncHandler.mFsMaster, null); + return null; + }).when(mMetadataSyncHandler).onPathLoadComplete(anyLong(), anyBoolean()); + + Future waiter = mThreadPool.submit(() -> path.waitForSync(new AlluxioURI("/path"))); + assertThrows(TimeoutException.class, () -> waiter.get(1, TimeUnit.SECONDS)); + path.nextCompleted(new SyncProcessResult(ti, ti.getBasePath(), + new PathSequence(new AlluxioURI("/path"), + new AlluxioURI("/path")), false, true)); + assertTrue(waiter.get(1, TimeUnit.SECONDS)); + } + + @Test + public void TestMultiWaiter() throws Exception { + TaskInfo ti = new TaskInfo(mMetadataSyncHandler, new AlluxioURI("/path"), + new AlluxioURI("/path"), null, + ALL, 0, mDirLoadType, 0); + BaseTask path = BaseTask.create(ti, mClock.millis(), mClientSupplier); + Mockito.doAnswer(ans -> { + path.onComplete(ans.getArgument(1), mMetadataSyncHandler.mFsMaster, null); + return null; + }).when(mMetadataSyncHandler).onPathLoadComplete(anyLong(), anyBoolean()); + + Future waiter1 = mThreadPool.submit(() -> path.waitForSync(new AlluxioURI("/path/1"))); + Future waiter2 = mThreadPool.submit(() -> path.waitForSync(new AlluxioURI("/path/2"))); + assertThrows(TimeoutException.class, () -> waiter1.get(1, TimeUnit.SECONDS)); + path.nextCompleted(new SyncProcessResult(ti, new AlluxioURI("/path/1"), + new PathSequence(new AlluxioURI("/path/1"), + new AlluxioURI("/path/1")), false, false)); + assertTrue(waiter1.get(1, TimeUnit.SECONDS)); + // if the path is truncated, it should not release the waiter on the path + path.nextCompleted(new SyncProcessResult(ti, new AlluxioURI("/path/2"), + new PathSequence(new AlluxioURI("/path/2"), + new AlluxioURI("/path/2")), true, false)); + assertThrows(TimeoutException.class, () -> waiter2.get(1, TimeUnit.SECONDS)); + path.nextCompleted(new SyncProcessResult(ti, new AlluxioURI("/path/2"), + new PathSequence(new AlluxioURI("/path/2"), + new AlluxioURI("/path/2")), false, false)); + assertTrue(waiter2.get(1, TimeUnit.SECONDS)); + } + + @Test + public void TestNestedWaiter() throws Exception { + TaskInfo ti = new TaskInfo(mMetadataSyncHandler, new AlluxioURI("/path"), + new AlluxioURI("/path"), null, + ALL, 0, mDirLoadType, 0); + BaseTask path = BaseTask.create(ti, mClock.millis(), mClientSupplier); + Mockito.doAnswer(ans -> { + path.onComplete(ans.getArgument(1), mMetadataSyncHandler.mFsMaster, null); + return null; + }).when(mMetadataSyncHandler).onPathLoadComplete(anyLong(), anyBoolean()); + + Future waiter1 = mThreadPool.submit(() -> path.waitForSync(new AlluxioURI("/path/1"))); + Future waiter2 = mThreadPool.submit(() -> path.waitForSync(new AlluxioURI("/path/2"))); + // a different nested path should not release the waiters + path.nextCompleted(new SyncProcessResult(ti, new AlluxioURI("/path/other"), + new PathSequence(new AlluxioURI("/path/1"), + new AlluxioURI("/path/1")), false, false)); + assertThrows(TimeoutException.class, () -> waiter1.get(1, TimeUnit.SECONDS)); + assertThrows(TimeoutException.class, () -> waiter2.get(1, TimeUnit.SECONDS)); + // the parent path should release both the children + path.nextCompleted(new SyncProcessResult(ti, new AlluxioURI("/path"), + new PathSequence(new AlluxioURI("/path/1"), + new AlluxioURI("/path/1")), false, false)); + assertTrue(waiter1.get(1, TimeUnit.SECONDS)); + assertTrue(waiter2.get(1, TimeUnit.SECONDS)); + } + + @Test + public void TestParentWaiter() throws Exception { + long loadRequestID = 0; + TaskInfo ti = new TaskInfo(mMetadataSyncHandler, new AlluxioURI("/"), + new AlluxioURI("/path"), null, + ALL, 0, mDirLoadType, 0); + BaseTask path = BaseTask.create(ti, mClock.millis(), mClientSupplier); + Mockito.doAnswer(ans -> { + path.onComplete(ans.getArgument(1), mMetadataSyncHandler.mFsMaster, null); + return null; + }).when(mMetadataSyncHandler).onPathLoadComplete(anyLong(), anyBoolean()); + + Future waiter1 = mThreadPool.submit(() -> + path.waitForSync(new AlluxioURI("/path/nested/1"))); + Future waiter2 = mThreadPool.submit(() -> + path.waitForSync(new AlluxioURI("/path/nested"))); + Future waiter3 = mThreadPool.submit(() -> + path.waitForSync(new AlluxioURI("/path"))); + // finishing the root should only release the direct children + path.nextCompleted(new SyncProcessResult(ti, new AlluxioURI("/"), + new PathSequence(new AlluxioURI("/path/1"), + new AlluxioURI("/path/1")), false, false)); + assertThrows(TimeoutException.class, () -> waiter1.get(1, TimeUnit.SECONDS)); + assertThrows(TimeoutException.class, () -> waiter2.get(1, TimeUnit.SECONDS)); + assertTrue(waiter3.get(1, TimeUnit.SECONDS)); + // finishing /path should release the direct children of /path + SyncProcessResult finalResult = new SyncProcessResult(ti, new AlluxioURI("/path"), + new PathSequence(new AlluxioURI("/path/1"), + new AlluxioURI("/path/1")), false, false); + path.nextCompleted(finalResult); + assertThrows(TimeoutException.class, () -> waiter1.get(1, TimeUnit.SECONDS)); + assertTrue(waiter2.get(1, TimeUnit.SECONDS)); + // finishing the whole task should release the remaining waiters + path.getPathLoadTask().onProcessComplete(loadRequestID, finalResult); + assertTrue(waiter1.get(1, TimeUnit.SECONDS)); + } +} diff --git a/core/server/master/src/test/java/alluxio/master/file/mdsync/DummySyncProcess.java b/core/server/master/src/test/java/alluxio/master/file/mdsync/DummySyncProcess.java new file mode 100644 index 000000000000..a268dce6f015 --- /dev/null +++ b/core/server/master/src/test/java/alluxio/master/file/mdsync/DummySyncProcess.java @@ -0,0 +1,63 @@ +/* + * The Alluxio Open Foundation licenses this work under the Apache License, version 2.0 + * (the "License"). You may not use this work except in compliance with the License, which is + * available at www.apache.org/licenses/LICENSE-2.0 + * + * This software is distributed on an "AS IS" basis, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, + * either express or implied, as more fully set forth in the License. + * + * See the NOTICE file distributed with this work for information regarding copyright ownership. + */ + +package alluxio.master.file.mdsync; + +import alluxio.AlluxioURI; +import alluxio.exception.InvalidPathException; +import alluxio.exception.runtime.InvalidArgumentRuntimeException; +import alluxio.master.file.meta.UfsSyncPathCache; +import alluxio.underfs.UfsStatus; + +import java.util.List; +import java.util.stream.Collectors; +import java.util.stream.Stream; + +public class DummySyncProcess implements SyncProcess { + + @Override + public SyncProcessResult performSync( + LoadResult loadResult, UfsSyncPathCache syncPathCache) throws Throwable { + + Stream stream = loadResult.getUfsLoadResult().getItems().peek(status -> { + // If we are loading by directory, then we must create a new load task on each + // directory traversed + if (loadResult.getTaskInfo().hasDirLoadTasks() && status.isDirectory()) { + try { + AlluxioURI fullPath = loadResult.getBaseLoadPath().join(status.getName()); + // first check if the directory needs to be synced + if (syncPathCache.shouldSyncPath( + fullPath, // no reverse resolve in test + loadResult.getTaskInfo().getSyncInterval(), + loadResult.getTaskInfo().getDescendantType()).isShouldSync()) { + loadResult.getTaskInfo().getMdSync() + .loadNestedDirectory(loadResult.getTaskInfo().getId(), fullPath); + } + } catch (InvalidPathException e) { + throw new InvalidArgumentRuntimeException(e); + } + } + }); + List items = stream.collect(Collectors.toList()); + if (items.size() == 0) { + return new SyncProcessResult(loadResult.getTaskInfo(), loadResult.getBaseLoadPath(), + null, false, false); + } + boolean rootPathIsFile = items.size() == 1 && loadResult.getBaseLoadPath().equals( + loadResult.getTaskInfo().getBasePath()) && !items.get(0).isDirectory(); + return new SyncProcessResult(loadResult.getTaskInfo(), loadResult.getBaseLoadPath(), + new PathSequence(new AlluxioURI(items.get(0).getName()), + new AlluxioURI(items.get(items.size() - 1).getName())), + loadResult.getUfsLoadResult().isTruncated(), rootPathIsFile + ); + } +} + diff --git a/core/server/master/src/test/java/alluxio/master/file/mdsync/MockUfsClient.java b/core/server/master/src/test/java/alluxio/master/file/mdsync/MockUfsClient.java new file mode 100644 index 000000000000..d10612b9b879 --- /dev/null +++ b/core/server/master/src/test/java/alluxio/master/file/mdsync/MockUfsClient.java @@ -0,0 +1,108 @@ +/* + * The Alluxio Open Foundation licenses this work under the Apache License, version 2.0 + * (the "License"). You may not use this work except in compliance with the License, which is + * available at www.apache.org/licenses/LICENSE-2.0 + * + * This software is distributed on an "AS IS" basis, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, + * either express or implied, as more fully set forth in the License. + * + * See the NOTICE file distributed with this work for information regarding copyright ownership. + */ + +package alluxio.master.file.mdsync; + +import alluxio.AlluxioURI; +import alluxio.collections.Pair; +import alluxio.file.options.DescendantType; +import alluxio.underfs.UfsClient; +import alluxio.underfs.UfsLoadResult; +import alluxio.underfs.UfsStatus; +import alluxio.util.RateLimiter; + +import java.util.Iterator; +import java.util.List; +import java.util.function.Consumer; +import java.util.function.Function; +import java.util.stream.Collectors; +import java.util.stream.Stream; +import javax.annotation.Nullable; + +public class MockUfsClient implements UfsClient { + + Throwable mError = null; + Iterator> mItems = null; + Function, Boolean>> mResultFunc = null; + UfsStatus mUfsStatus = null; + RateLimiter mRateLimiter = null; + Function mGetStatusFunc = null; + + void setError(@Nullable Throwable t) { + mError = t; + } + + void setRateLimiter(RateLimiter rateLimiter) { + mRateLimiter = rateLimiter; + } + + void setResult(Iterator> items) { + mItems = items; + } + + void setGetStatusItem(UfsStatus item) { + mUfsStatus = item; + } + + void setListingResultFunc(Function, Boolean>> resultFunc) { + mResultFunc = resultFunc; + } + + public void performGetStatusAsync( + String path, Consumer onComplete, Consumer onError) { + UfsStatus status = mUfsStatus; + if (mGetStatusFunc != null) { + status = mGetStatusFunc.apply(path); + } + onComplete.accept(new UfsLoadResult( + status == null ? Stream.empty() : Stream.of(status), + status == null ? 0 : 1, + null, null, false, + status != null && status.isFile(), true)); + } + + @Override + public void performListingAsync( + String path, @Nullable String continuationToken, @Nullable String startAfter, + DescendantType descendantType, boolean checkStatus, + Consumer onComplete, Consumer onError) { + if (mError != null) { + onError.accept(mError); + } else if (mResultFunc != null) { + try { + Pair, Boolean> result = mResultFunc.apply(path); + List items = result.getFirst().collect(Collectors.toList()); + AlluxioURI lastItem = new AlluxioURI(items.get(items.size() - 1).getName()); + onComplete.accept(new UfsLoadResult(items.stream(), items.size(), + continuationToken, lastItem, result.getSecond(), + items.size() > 0 && items.get(0).isFile(), true)); + } catch (Throwable t) { + onError.accept(t); + } + } else { + if (mItems.hasNext()) { + List items = mItems.next().collect(Collectors.toList()); + AlluxioURI lastItem = new AlluxioURI(items.get(items.size() - 1).getName()); + onComplete.accept(new UfsLoadResult(items.stream(), items.size(), + continuationToken, lastItem, mItems.hasNext(), + items.size() > 0 && items.get(0).isFile(), true)); + } + } + } + + @Override + public RateLimiter getRateLimiter() { + if (mRateLimiter == null) { + return RateLimiter.createRateLimiter(0); + } + return mRateLimiter; + } +} diff --git a/core/server/master/src/test/java/alluxio/master/file/mdsync/TaskTrackerTest.java b/core/server/master/src/test/java/alluxio/master/file/mdsync/TaskTrackerTest.java new file mode 100644 index 000000000000..644e7fcdf92c --- /dev/null +++ b/core/server/master/src/test/java/alluxio/master/file/mdsync/TaskTrackerTest.java @@ -0,0 +1,673 @@ +/* + * The Alluxio Open Foundation licenses this work under the Apache License, version 2.0 + * (the "License"). You may not use this work except in compliance with the License, which is + * available at www.apache.org/licenses/LICENSE-2.0 + * + * This software is distributed on an "AS IS" basis, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, + * either express or implied, as more fully set forth in the License. + * + * See the NOTICE file distributed with this work for information regarding copyright ownership. + */ + +package alluxio.master.file.mdsync; + +import static org.junit.Assert.assertEquals; +import static org.junit.Assert.assertFalse; +import static org.junit.Assert.assertThrows; +import static org.junit.Assert.assertTrue; +import static org.mockito.ArgumentMatchers.any; +import static org.mockito.ArgumentMatchers.anyLong; + +import alluxio.AlluxioURI; +import alluxio.collections.Pair; +import alluxio.exception.status.UnavailableException; +import alluxio.file.options.DescendantType; +import alluxio.file.options.DirectoryLoadType; +import alluxio.master.file.DefaultFileSystemMaster; +import alluxio.master.file.meta.SyncCheck; +import alluxio.master.file.meta.UfsAbsentPathCache; +import alluxio.master.file.meta.UfsSyncPathCache; +import alluxio.master.journal.NoopJournalContext; +import alluxio.resource.CloseableResource; +import alluxio.underfs.UfsClient; +import alluxio.underfs.UfsDirectoryStatus; +import alluxio.underfs.UfsFileStatus; +import alluxio.underfs.UfsStatus; +import alluxio.util.CommonUtils; +import alluxio.util.SimpleRateLimiter; +import alluxio.util.WaitForOptions; + +import com.google.common.base.Ticker; +import com.google.common.collect.ImmutableList; +import org.junit.After; +import org.junit.Before; +import org.junit.Test; +import org.mockito.Mockito; + +import java.io.IOException; +import java.time.Duration; +import java.util.Collections; +import java.util.concurrent.CountDownLatch; +import java.util.concurrent.ExecutorService; +import java.util.concurrent.Executors; +import java.util.concurrent.Future; +import java.util.concurrent.Semaphore; +import java.util.concurrent.TimeUnit; +import java.util.concurrent.TimeoutException; +import java.util.concurrent.atomic.AtomicInteger; +import java.util.concurrent.atomic.AtomicLong; +import java.util.stream.Stream; + +public class TaskTrackerTest { + + ExecutorService mThreadPool; + TaskTracker mTaskTracker; + MetadataSyncHandler mMetadataSyncHandler; + MockUfsClient mUfsClient; + UfsSyncPathCache mUfsSyncPathCache; + UfsAbsentPathCache mAbsentCache; + SyncProcess mSyncProcess; + UfsStatus mFileStatus = new UfsFileStatus("file", "", + 0L, 0L, "", "", (short) 0, 0L); + UfsStatus mDirStatus = new UfsDirectoryStatus("dir", "", "", (short) 0); + static final long WAIT_TIMEOUT = 5_000; + + private CloseableResource getClient(AlluxioURI ignored) { + return new CloseableResource(mUfsClient) { + @Override + public void closeResource() { + } + }; + } + + @Before + public void before() throws UnavailableException { + mThreadPool = Executors.newCachedThreadPool(); + mUfsClient = Mockito.spy(new MockUfsClient()); + mSyncProcess = Mockito.spy(new DummySyncProcess()); + mUfsSyncPathCache = Mockito.mock(UfsSyncPathCache.class); + mAbsentCache = Mockito.mock(UfsAbsentPathCache.class); + mTaskTracker = new TaskTracker( + 1, 1, false, false, + mUfsSyncPathCache, mAbsentCache, mSyncProcess, this::getClient); + DefaultFileSystemMaster defaultFileSystemMaster = Mockito.mock(DefaultFileSystemMaster.class); + Mockito.when(defaultFileSystemMaster.createJournalContext()) + .thenReturn(NoopJournalContext.INSTANCE); + mMetadataSyncHandler = new MetadataSyncHandler(mTaskTracker, defaultFileSystemMaster, null); + } + + @After + public void after() throws Throwable { + assertFalse(mTaskTracker.hasRunningTasks()); + mTaskTracker.close(); + mThreadPool.shutdown(); + } + + void checkStats( + TaskStats stats, int batches, int statuses, int loadErrors, + int loadRequests, boolean loadFailed, boolean processFailed, + boolean firstLoadWasFile) { + if (batches >= 0) { + assertEquals(batches, stats.getBatchCount()); + } + if (statuses >= 0) { + assertEquals(statuses, stats.getStatusCount()); + } + if (loadErrors >= 0) { + assertEquals(loadErrors, stats.getLoadErrors()); + } + if (loadRequests >= 0) { + assertEquals(loadRequests, stats.getLoadRequestCount()); + } + assertEquals(loadFailed, stats.isLoadFailed()); + assertEquals(processFailed, stats.isProcessFailed()); + assertEquals(firstLoadWasFile, stats.firstLoadWasFile()); + } + + @Test + public void rateLimitedTest() throws Throwable { + // Be sure ufs loads, and result processing can happen concurrently + int concurrentUfsLoads = 2; + int totalBatches = 10; + int concurrentProcessing = 5; + AtomicInteger remainingLoadCount = new AtomicInteger(totalBatches); + final AtomicLong time = new AtomicLong(0); + long permitsPerSecond = 100000; + long timePerPermit = Duration.ofSeconds(1).toNanos() / permitsPerSecond; + // add a rate limiter + Semaphore rateLimiterBlocker = new Semaphore(0); + SimpleRateLimiter rateLimiter = Mockito.spy( + new SimpleRateLimiter(permitsPerSecond, new Ticker() { + @Override + public long read() { + return time.get(); + } + })); + Mockito.doAnswer(ans -> { + Object result = ans.callRealMethod(); + // after acquiring a permit, we let the main thread know + // by increasing the semaphore + rateLimiterBlocker.release(); + return result; + }).when(rateLimiter).acquire(); + mUfsClient.setRateLimiter(rateLimiter); + mTaskTracker.close(); + mTaskTracker = new TaskTracker( + concurrentProcessing, concurrentUfsLoads, false, false, + mUfsSyncPathCache, mAbsentCache, mSyncProcess, this::getClient); + mMetadataSyncHandler = new MetadataSyncHandler( + mTaskTracker, mMetadataSyncHandler.mFsMaster, null); + mUfsClient.setListingResultFunc(path -> { + int nxtItem = remainingLoadCount.decrementAndGet(); + boolean truncated = nxtItem > 0; + return new Pair<>(Stream.of(mFileStatus), truncated); + }); + Mockito.doReturn(SyncCheck.shouldSyncWithTime(0)) + .when(mUfsSyncPathCache).shouldSyncPath(any(), anyLong(), any()); + + for (int i = 0; i < 10; i++) { + remainingLoadCount.set(totalBatches); + + // move the time forward, and take a rate limit permit + // so that any new task will be blocked + time.addAndGet(timePerPermit); + rateLimiter.acquire(); + rateLimiterBlocker.acquire(); + + Future> task = mThreadPool.submit(() -> + mTaskTracker.checkTask(mMetadataSyncHandler, new AlluxioURI("/"), + new AlluxioURI("/"), null, + DescendantType.ALL, 0, DirectoryLoadType.SINGLE_LISTING)); + + for (int j = 0; j < totalBatches; j++) { + int finalJ = j; + CommonUtils.waitForResult("Rate limited listStatus", remainingLoadCount::get, + v -> v == totalBatches - finalJ, + // wait for the next listStatus call to get its rate limiter permit + WaitForOptions.defaults().setTimeoutMs(1000)); + rateLimiterBlocker.acquire(); + // allow the rate limited operation to succeed by moving the time forward + time.addAndGet(timePerPermit); + } + Pair result = task.get(); + assertTrue(result.getFirst()); + result.getSecond().waitComplete(WAIT_TIMEOUT); + assertEquals(remainingLoadCount.get(), 0); + TaskStats stats = result.getSecond().getTaskInfo().getStats(); + checkStats(stats, totalBatches, totalBatches, 0, totalBatches, + false, false, true); + } + } + + @Test + public void concurrentProcessTest() throws Throwable { + // Be sure ufs loads, and result processing can happen concurrently + mTaskTracker.close(); + int concurrentUfsLoads = 5; + int totalBatches = 100; + int concurrentProcessing = 5; + mTaskTracker = new TaskTracker( + concurrentProcessing, concurrentUfsLoads, false, false, + mUfsSyncPathCache, mAbsentCache, mSyncProcess, this::getClient); + mMetadataSyncHandler = new MetadataSyncHandler( + mTaskTracker, mMetadataSyncHandler.mFsMaster, null); + AtomicInteger remainingLoadCount = new AtomicInteger(totalBatches); + AtomicInteger processingCount = new AtomicInteger(0); + mUfsClient.setListingResultFunc(path -> { + int nxtItem = remainingLoadCount.decrementAndGet(); + boolean truncated = nxtItem != 0; + return new Pair<>(Stream.of(mFileStatus), truncated); + }); + Mockito.doReturn(SyncCheck.shouldSyncWithTime(0)) + .when(mUfsSyncPathCache).shouldSyncPath(any(), anyLong(), any()); + + for (int i = 0; i < 100; i++) { + remainingLoadCount.set(totalBatches); + processingCount.set(0); + CountDownLatch blocker = new CountDownLatch(1); + Mockito.doAnswer(ans -> { + processingCount.incrementAndGet(); + // block the processing to ensure we have concurrent load requests + blocker.await(); + return ans.callRealMethod(); + }).when(mSyncProcess).performSync(any(), any()); + Future> task = mThreadPool.submit(() -> + mTaskTracker.checkTask(mMetadataSyncHandler, new AlluxioURI("/"), + new AlluxioURI("/"), null, + DescendantType.ALL, 0, DirectoryLoadType.SINGLE_LISTING)); + CommonUtils.waitForResult("Concurrent load", remainingLoadCount::get, + v -> v == totalBatches - concurrentUfsLoads - concurrentProcessing, + WaitForOptions.defaults().setTimeoutMs(1000)); + CommonUtils.waitForResult("Concurrent processing", processingCount::get, + v -> v == concurrentProcessing, + WaitForOptions.defaults().setTimeoutMs(1000)); + // let the processing complete + blocker.countDown(); + Pair result = task.get(); + assertTrue(result.getFirst()); + result.getSecond().waitComplete(WAIT_TIMEOUT); + assertEquals(remainingLoadCount.get(), 0); + TaskStats stats = result.getSecond().getTaskInfo().getStats(); + checkStats(stats, 100, 100, 0, 100, false, false, true); + } + } + + @Test + public void concurrentDirProcessErrorTest() throws Throwable { + // Fail processing during concurrent ufs loading and processing when using load by directory + mTaskTracker.close(); + int concurrentUfsLoads = 5; + int totalBatches = 100; + int processError = 95; + int concurrentProcessing = 5; + AtomicInteger remainingProcessCount = new AtomicInteger(processError); + mTaskTracker = new TaskTracker( + concurrentProcessing, concurrentUfsLoads, false, false, + mUfsSyncPathCache, mAbsentCache, mSyncProcess, this::getClient); + Mockito.doAnswer(ans -> { + if (remainingProcessCount.decrementAndGet() == 0) { + throw new IOException(); + } + return ans.callRealMethod(); + }).when(mSyncProcess).performSync(any(), any()); + Mockito.doReturn(SyncCheck.shouldSyncWithTime(0)) + .when(mUfsSyncPathCache).shouldSyncPath(any(), anyLong(), any()); + + mMetadataSyncHandler = new MetadataSyncHandler(mTaskTracker, null, null); + for (int i = 0; i < 100; i++) { + for (DirectoryLoadType loadType + : ImmutableList.of(DirectoryLoadType.DFS, DirectoryLoadType.BFS)) { + AtomicInteger remainingLoadCount = new AtomicInteger(totalBatches); + remainingProcessCount.set(processError); + mUfsClient.setListingResultFunc(path -> { + int nxtItem = remainingLoadCount.decrementAndGet(); + boolean truncated = nxtItem > 0; + return new Pair<>(Stream.of(mFileStatus, mDirStatus), truncated); + }); + + Future> task = mThreadPool.submit(() -> + mTaskTracker.checkTask(mMetadataSyncHandler, new AlluxioURI("/"), + new AlluxioURI("/"), null, + DescendantType.ALL, 0, loadType)); + Pair result = task.get(); + assertThrows(IOException.class, () -> result.getSecond().waitComplete(WAIT_TIMEOUT)); + assertFalse(result.getSecond().succeeded()); + TaskStats stats = result.getSecond().getTaskInfo().getStats(); + checkStats(stats, -1, -1, -1, -1, false, true, true); + } + } + } + + @Test + public void concurrentDirLoadErrorTest() throws Throwable { + // Fail processing during concurrent ufs loading and processing + mTaskTracker.close(); + int concurrentUfsLoads = 5; + int totalBatches = 100; + int concurrentProcessing = 5; + mTaskTracker = new TaskTracker( + concurrentProcessing, concurrentUfsLoads, false, false, + mUfsSyncPathCache, mAbsentCache, mSyncProcess, this::getClient); + Mockito.doReturn(SyncCheck.shouldSyncWithTime(0)) + .when(mUfsSyncPathCache).shouldSyncPath(any(), anyLong(), any()); + AtomicInteger remainingLoadCount = new AtomicInteger(totalBatches); + mUfsClient.setListingResultFunc(path -> { + int nxtItem = remainingLoadCount.decrementAndGet(); + boolean truncated = nxtItem > 0; + if (truncated) { + return new Pair<>(Stream.of(mFileStatus, mDirStatus), true); + } else { + throw new RuntimeException(); + } + }); + + mMetadataSyncHandler = new MetadataSyncHandler(mTaskTracker, null, null); + for (int i = 0; i < 100; i++) { + for (DirectoryLoadType loadType + : ImmutableList.of(DirectoryLoadType.DFS, DirectoryLoadType.BFS)) { + remainingLoadCount.set(totalBatches); + Future> task = mThreadPool.submit(() -> + mTaskTracker.checkTask(mMetadataSyncHandler, new AlluxioURI("/"), + new AlluxioURI("/"), null, + DescendantType.ALL, 0, loadType)); + Pair result = task.get(); + assertFalse(result.getFirst()); + assertThrows(RuntimeException.class, () -> result.getSecond().waitComplete(WAIT_TIMEOUT)); + TaskStats stats = result.getSecond().getTaskInfo().getStats(); + checkStats(stats, -1, -1, -1, -1, true, false, true); + } + } + } + + @Test + public void concurrentDirLoadTest() throws Throwable { + // Fail processing during concurrent ufs loading and processing + mTaskTracker.close(); + int concurrentUfsLoads = 5; + int totalBatches = 100; + int concurrentProcessing = 5; + mTaskTracker = new TaskTracker( + concurrentProcessing, concurrentUfsLoads, false, false, + mUfsSyncPathCache, mAbsentCache, mSyncProcess, this::getClient); + mMetadataSyncHandler = new MetadataSyncHandler( + mTaskTracker, mMetadataSyncHandler.mFsMaster, null); + Mockito.doReturn(SyncCheck.shouldSyncWithTime(0)) + .when(mUfsSyncPathCache).shouldSyncPath(any(), anyLong(), any()); + AtomicInteger remainingLoadCount = new AtomicInteger(totalBatches); + mUfsClient.setListingResultFunc(path -> { + int nxtItem = remainingLoadCount.decrementAndGet(); + boolean truncated = nxtItem > 0; + if (truncated) { + return new Pair<>(Stream.of(mFileStatus, mDirStatus), true); + } else { + return new Pair<>(Stream.of(mFileStatus), false); + } + }); + + for (int i = 0; i < 100; i++) { + for (DirectoryLoadType loadType + : ImmutableList.of(DirectoryLoadType.DFS, DirectoryLoadType.BFS)) { + remainingLoadCount.set(totalBatches); + + Future> task = mThreadPool.submit(() -> + mTaskTracker.checkTask(mMetadataSyncHandler, new AlluxioURI("/"), + new AlluxioURI("/"), null, + DescendantType.ALL, 0, loadType)); + Pair result = task.get(); + assertTrue(result.getFirst()); + result.getSecond().waitComplete(WAIT_TIMEOUT); + TaskStats stats = result.getSecond().getTaskInfo().getStats(); + checkStats(stats, -1, -1, 0, -1, false, false, true); + } + } + } + + @Test + public void concurrentProcessErrorTest() throws Throwable { + // Fail processing during concurrent ufs loading and processing + mTaskTracker.close(); + int concurrentUfsLoads = 5; + int totalBatches = 100; + int batchFailureNumber = 50; + int concurrentProcessing = 5; + mTaskTracker = new TaskTracker( + concurrentProcessing, concurrentUfsLoads, false, false, + mUfsSyncPathCache, mAbsentCache, mSyncProcess, this::getClient); + mMetadataSyncHandler = new MetadataSyncHandler(mTaskTracker, null, null); + AtomicInteger remainingLoadCount = new AtomicInteger(totalBatches); + AtomicInteger processingCount = new AtomicInteger(0); + mUfsClient.setListingResultFunc(path -> { + int nxtItem = remainingLoadCount.decrementAndGet(); + boolean truncated = nxtItem != 0; + return new Pair<>(Stream.of(mFileStatus), truncated); + }); + Mockito.doAnswer(ans -> { + if (processingCount.incrementAndGet() == batchFailureNumber) { + throw new IOException(); + } + return ans.callRealMethod(); + }).when(mSyncProcess).performSync(any(), any()); + Mockito.doReturn(SyncCheck.shouldSyncWithTime(0)) + .when(mUfsSyncPathCache).shouldSyncPath(any(), anyLong(), any()); + + for (int i = 0; i < 100; i++) { + remainingLoadCount.set(totalBatches); + processingCount.set(0); + Future> task = mThreadPool.submit(() -> + mTaskTracker.checkTask(mMetadataSyncHandler, new AlluxioURI("/"), + new AlluxioURI("/"), null, + DescendantType.ALL, 0, DirectoryLoadType.SINGLE_LISTING)); + Pair result = task.get(); + assertFalse(result.getFirst()); + assertThrows(IOException.class, () -> result.getSecond().waitComplete(WAIT_TIMEOUT)); + TaskStats stats = result.getSecond().getTaskInfo().getStats(); + checkStats(stats, -1, -1, 0, -1, false, true, true); + } + } + + @Test + public void concurrentLoadErrorTest() throws Throwable { + // Fail processing during concurrent ufs loading and processing + mTaskTracker.close(); + int concurrentUfsLoads = 5; + int totalBatches = 100; + int loadFailNumber = 50; + int concurrentProcessing = 5; + mTaskTracker = new TaskTracker( + concurrentProcessing, concurrentUfsLoads, false, false, + mUfsSyncPathCache, mAbsentCache, mSyncProcess, this::getClient); + mMetadataSyncHandler = new MetadataSyncHandler(mTaskTracker, null, null); + AtomicInteger remainingLoadCount = new AtomicInteger(totalBatches); + mUfsClient.setListingResultFunc(path -> { + int nxtItem = remainingLoadCount.decrementAndGet(); + if (nxtItem <= loadFailNumber) { + throw new RuntimeException(); + } + return new Pair<>(Stream.of(mFileStatus), true); + }); + Mockito.doReturn(SyncCheck.shouldSyncWithTime(0)) + .when(mUfsSyncPathCache).shouldSyncPath(any(), anyLong(), any()); + + for (int i = 0; i < 100; i++) { + remainingLoadCount.set(totalBatches); + Future> task = mThreadPool.submit(() -> + mTaskTracker.checkTask(mMetadataSyncHandler, new AlluxioURI("/"), + new AlluxioURI("/"), null, + DescendantType.ALL, 0, DirectoryLoadType.SINGLE_LISTING)); + Pair result = task.get(); + assertFalse(result.getFirst()); + assertThrows(RuntimeException.class, () -> result.getSecond().waitComplete(WAIT_TIMEOUT)); + TaskStats stats = result.getSecond().getTaskInfo().getStats(); + checkStats(stats, -1, -1, 4, -1, true, false, true); + } + } + + @Test + public void concurrentLoadTest() throws Throwable { + // be sure loads can happen concurrently + mTaskTracker.close(); + int concurrentUfsLoads = 5; + int totalBatches = 100; + mTaskTracker = new TaskTracker( + 1, concurrentUfsLoads, false, false, + mUfsSyncPathCache, mAbsentCache, mSyncProcess, this::getClient); + mMetadataSyncHandler = new MetadataSyncHandler(mTaskTracker, + mMetadataSyncHandler.mFsMaster, null); + AtomicInteger count = new AtomicInteger(totalBatches); + mUfsClient.setListingResultFunc(path -> { + int nxtItem = count.decrementAndGet(); + boolean truncated = nxtItem != 0; + return new Pair<>(Stream.of(mFileStatus), truncated); + }); + Mockito.doReturn(SyncCheck.shouldSyncWithTime(0)) + .when(mUfsSyncPathCache).shouldSyncPath(any(), anyLong(), any()); + + for (int i = 0; i < 100; i++) { + count.set(totalBatches); + CountDownLatch blocker = new CountDownLatch(1); + Mockito.doAnswer(ans -> { + // block the processing to ensure we have concurrent load requests + blocker.await(); + return ans.callRealMethod(); + }).when(mSyncProcess).performSync(any(), any()); + + Future> task = mThreadPool.submit(() -> + mTaskTracker.checkTask(mMetadataSyncHandler, new AlluxioURI("/"), + new AlluxioURI("/"), null, + DescendantType.ALL, 0, DirectoryLoadType.SINGLE_LISTING)); + CommonUtils.waitForResult("Concurrent load", count::get, + v -> v == totalBatches - concurrentUfsLoads - 1, + WaitForOptions.defaults().setTimeoutMs(1000)); + // let the processing complete + blocker.countDown(); + Pair result = task.get(); + assertTrue(result.getFirst()); + result.getSecond().waitComplete(WAIT_TIMEOUT); + assertEquals(count.get(), 0); + TaskStats stats = result.getSecond().getTaskInfo().getStats(); + checkStats(stats, 100, 100, 0, 100, false, false, true); + } + } + + @Test + public void dirLoadTest() throws Throwable { + // Load nested directories one level at a time in different batch requests + mUfsClient.setListingResultFunc(path -> { + if (path.equals("/")) { + return new Pair<>(Stream.of(mFileStatus, mDirStatus), false); + } else if (path.equals("/dir")) { + return new Pair<>(Stream.of(mFileStatus, mFileStatus), false); + } else { + throw new RuntimeException("should not reach"); + } + }); + + for (int i = 0; i < 100; i++) { + // Use load type BFS, there should be a load task for both / and /dir + Mockito.doReturn(SyncCheck.shouldSyncWithTime(0)) + .when(mUfsSyncPathCache).shouldSyncPath(any(), anyLong(), any()); + Pair result = mTaskTracker.checkTask(mMetadataSyncHandler, + new AlluxioURI("/"), new AlluxioURI("/"), null, + DescendantType.ALL, 0, DirectoryLoadType.BFS); + assertTrue(result.getFirst()); + result.getSecond().waitComplete(WAIT_TIMEOUT); + TaskStats stats = result.getSecond().getTaskInfo().getStats(); + checkStats(stats, 2, 4, 0, 2, false, false, true); + + // run the same request, except have the sync for the nested directory not be needed + Mockito.doReturn(SyncCheck.shouldNotSyncWithTime(0)) + .when(mUfsSyncPathCache).shouldSyncPath(any(), anyLong(), any()); + result = mTaskTracker.checkTask(mMetadataSyncHandler, new AlluxioURI("/"), + new AlluxioURI("/"), null, + DescendantType.ALL, 0, DirectoryLoadType.BFS); + assertTrue(result.getFirst()); + result.getSecond().waitComplete(WAIT_TIMEOUT); + stats = result.getSecond().getTaskInfo().getStats(); + checkStats(stats, 1, 2, 0, 1, false, false, true); + } + } + + @Test + public void basicSyncTest() throws Throwable { + for (int i = 0; i < 100; i++) { + mUfsClient.setResult(Collections.singletonList(Stream.of(mFileStatus)).iterator()); + Pair result = mTaskTracker.checkTask(mMetadataSyncHandler, + new AlluxioURI("/"), new AlluxioURI("/"), null, + DescendantType.ONE, 0, DirectoryLoadType.SINGLE_LISTING); + assertTrue(result.getFirst()); + result.getSecond().waitComplete(WAIT_TIMEOUT); + TaskStats stats = result.getSecond().getTaskInfo().getStats(); + checkStats(stats, 1, 1, 0, 1, false, false, true); + } + } + + @Test + public void multiBatchTest() throws Throwable { + // load a directory of 2 batches of size 1 + for (int i = 0; i < 100; i++) { + mUfsClient.setResult(ImmutableList.of(Stream.of(mFileStatus), + Stream.of(mFileStatus)).iterator()); + Pair result = mTaskTracker.checkTask(mMetadataSyncHandler, + new AlluxioURI("/"), new AlluxioURI("/"), null, + DescendantType.ONE, 0, DirectoryLoadType.SINGLE_LISTING); + assertTrue(result.getFirst()); + result.getSecond().waitComplete(WAIT_TIMEOUT); + TaskStats stats = result.getSecond().getTaskInfo().getStats(); + checkStats(stats, 2, 2, 0, 2, false, false, true); + } + } + + @Test + public void loadErrorTest() throws Throwable { + // Ufs loads return errors until failure + for (int i = 0; i < 100; i++) { + mUfsClient.setError(new Throwable()); + Pair result = mTaskTracker.checkTask(mMetadataSyncHandler, + new AlluxioURI("/"), new AlluxioURI("/"), null, + DescendantType.ONE, 0, DirectoryLoadType.SINGLE_LISTING); + assertFalse(result.getFirst()); + assertThrows(Throwable.class, () -> result.getSecond().waitComplete(WAIT_TIMEOUT)); + TaskStats stats = result.getSecond().getTaskInfo().getStats(); + checkStats(stats, 0, 0, 4, 1, true, false, false); + } + } + + @Test + public void loadErrorRetryTest() throws Throwable { + int totalBatches = 100; + // Error on the first load, but let the next succeed + AtomicInteger count = new AtomicInteger(totalBatches); + mUfsClient.setListingResultFunc(path -> { + int nxtItem = count.decrementAndGet(); + boolean truncated = nxtItem != 0; + if (truncated && nxtItem % 2 == 0) { + throw new RuntimeException(); + } + return new Pair<>(Stream.of(mFileStatus), truncated); + }); + for (int i = 0; i < 100; i++) { + count.set(totalBatches); + Pair result = mTaskTracker.checkTask(mMetadataSyncHandler, + new AlluxioURI("/"), new AlluxioURI("/"), null, DescendantType.ONE, 0, + DirectoryLoadType.SINGLE_LISTING); + assertTrue(result.getFirst()); + result.getSecond().waitComplete(WAIT_TIMEOUT); + TaskStats stats = result.getSecond().getTaskInfo().getStats(); + int amount = totalBatches / 2; + checkStats(stats, amount + 1, amount + 1, amount - 1, amount + 1, false, false, true); + } + } + + @Test + public void processErrorTest() throws Throwable { + // An error happens during processing + for (int i = 0; i < 100; i++) { + mUfsClient.setResult(ImmutableList.of(Stream.of(mFileStatus), + Stream.of(mFileStatus)).iterator()); + Mockito.doThrow(new IOException()).when(mSyncProcess).performSync(any(), any()); + Pair result = mTaskTracker.checkTask(mMetadataSyncHandler, + new AlluxioURI("/"), new AlluxioURI("/"), null, + DescendantType.ONE, 0, DirectoryLoadType.SINGLE_LISTING); + assertFalse(result.getFirst()); + assertThrows(IOException.class, () -> result.getSecond().waitComplete(WAIT_TIMEOUT)); + TaskStats stats = result.getSecond().getTaskInfo().getStats(); + checkStats(stats, -1, -1, 0, 2, false, true, true); + } + } + + @Test + public void blockingSyncTest() throws Throwable { + // run two concurrent processing syncing on the same path + // be sure one is blocked and they both succeed + for (int i = 0; i < 2; i++) { + mUfsClient.setResult(Collections.singletonList(Stream.of(mFileStatus)).iterator()); + Semaphore blocker = new Semaphore(0); + Mockito.doAnswer(ans -> { + // block the processing of any task + blocker.acquire(); + return ans.callRealMethod(); + }).when(mSyncProcess).performSync(any(), any()); + // Submit two concurrent tasks on the same path + Future> task1 = mThreadPool.submit(() -> + mTaskTracker.checkTask(mMetadataSyncHandler, new AlluxioURI("/"), + new AlluxioURI("/"), null, + DescendantType.ONE, 0, DirectoryLoadType.SINGLE_LISTING)); + assertThrows(TimeoutException.class, () -> task1.get(1, TimeUnit.SECONDS)); + Future> task2 = mThreadPool.submit(() -> + mTaskTracker.checkTask(mMetadataSyncHandler, new AlluxioURI("/"), + new AlluxioURI("/"), null, + DescendantType.ONE, 0, DirectoryLoadType.SINGLE_LISTING)); + assertThrows(TimeoutException.class, () -> task2.get(1, TimeUnit.SECONDS)); + // Let one task be processed + blocker.release(); + // Only one task should have been executed, but both should finish since they + // were on the same path + assertTrue(task1.get().getFirst()); + assertTrue(task2.get().getFirst()); + TaskStats stats1 = task1.get().getSecond().getTaskInfo().getStats(); + checkStats(stats1, 1, 1, 0, 1, false, false, true); + TaskStats stats2 = task2.get().getSecond().getTaskInfo().getStats(); + checkStats(stats2, 1, 1, 0, 1, false, false, true); + } + } +} diff --git a/core/server/master/src/test/java/alluxio/master/file/mdsync/TestSyncProcessor.java b/core/server/master/src/test/java/alluxio/master/file/mdsync/TestSyncProcessor.java new file mode 100644 index 000000000000..7c6a7cdcc808 --- /dev/null +++ b/core/server/master/src/test/java/alluxio/master/file/mdsync/TestSyncProcessor.java @@ -0,0 +1,101 @@ +/* + * The Alluxio Open Foundation licenses this work under the Apache License, version 2.0 + * (the "License"). You may not use this work except in compliance with the License, which is + * available at www.apache.org/licenses/LICENSE-2.0 + * + * This software is distributed on an "AS IS" basis, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, + * either express or implied, as more fully set forth in the License. + * + * See the NOTICE file distributed with this work for information regarding copyright ownership. + */ + +package alluxio.master.file.mdsync; + +import alluxio.exception.AccessControlException; +import alluxio.exception.BlockInfoException; +import alluxio.exception.DirectoryNotEmptyException; +import alluxio.exception.FileAlreadyExistsException; +import alluxio.exception.FileDoesNotExistException; +import alluxio.exception.InvalidPathException; +import alluxio.master.file.DefaultFileSystemMaster; +import alluxio.master.file.meta.InodeIterationResult; +import alluxio.master.file.meta.InodeTree; +import alluxio.master.file.meta.MountTable; +import alluxio.master.file.meta.UfsAbsentPathCache; +import alluxio.master.file.meta.UfsSyncPathCache; +import alluxio.master.metastore.ReadOnlyInodeStore; + +import java.io.IOException; +import java.util.concurrent.Semaphore; +import javax.annotation.Nullable; + +/** + * The metadata syncer. + */ +public class TestSyncProcessor extends DefaultSyncProcess { + @FunctionalInterface + public interface Callback { + void apply() throws Exception; + } + + @FunctionalInterface + public interface SyncOneCallback { + void apply(SyncProcessContext context) throws Exception; + } + + public TestSyncProcessor(DefaultFileSystemMaster fsMaster, ReadOnlyInodeStore inodeStore, + MountTable mountTable, InodeTree inodeTree, + UfsSyncPathCache syncPathCache, UfsAbsentPathCache absentPathCache) { + super(fsMaster, inodeStore, mountTable, inodeTree, syncPathCache, absentPathCache); + } + + Semaphore mLock = new Semaphore(0); + private int mBlockOnNth = -1; + private int mSyncCount = 0; + private Callback mCallback = null; + private SyncOneCallback mCallbackBeforePerformSyncOne = null; + + @Override + protected SingleInodeSyncResult performSyncOne(SyncProcessState syncState, + @Nullable UfsItem currentUfsStatus, + @Nullable InodeIterationResult currentInode) + throws InvalidPathException, FileDoesNotExistException, FileAlreadyExistsException, + IOException, BlockInfoException, DirectoryNotEmptyException, AccessControlException { + if (mCallbackBeforePerformSyncOne != null) { + try { + mCallbackBeforePerformSyncOne.apply(syncState.mContext); + } catch (Exception e) { + throw new RuntimeException(); + } + } + mSyncCount++; + if (mSyncCount == mBlockOnNth && mCallback != null) { + try { + mCallback.apply(); + } catch (Exception e) { + throw new RuntimeException(); + } + mLock.release(); + } + return super.performSyncOne(syncState, currentUfsStatus, currentInode); + } + + public synchronized void beforePerformSyncOne(SyncOneCallback callback) + throws InterruptedException { + mCallbackBeforePerformSyncOne = callback; + } + + /** + * Blocks the current thread until the nth inode sync (root included) is ABOUT TO execute, + * executes the callback and resumes the sync. + * Used for testing concurrent modifications. + * @param nth the inode sync count + * @param callback the callback to execute + */ + public synchronized void blockUntilNthSyncThenDo(int nth, Callback callback) + throws InterruptedException { + mBlockOnNth = nth; + mCallback = callback; + mLock.acquire(); + } +} diff --git a/core/server/master/src/test/java/alluxio/master/file/mdsync/UfsLoadsTest.java b/core/server/master/src/test/java/alluxio/master/file/mdsync/UfsLoadsTest.java new file mode 100644 index 000000000000..932ba9dbf94d --- /dev/null +++ b/core/server/master/src/test/java/alluxio/master/file/mdsync/UfsLoadsTest.java @@ -0,0 +1,88 @@ +/* + * The Alluxio Open Foundation licenses this work under the Apache License, version 2.0 + * (the "License"). You may not use this work except in compliance with the License, which is + * available at www.apache.org/licenses/LICENSE-2.0 + * + * This software is distributed on an "AS IS" basis, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, + * either express or implied, as more fully set forth in the License. + * + * See the NOTICE file distributed with this work for information regarding copyright ownership. + */ + +package alluxio.master.file.mdsync; + +import static org.junit.Assert.assertFalse; +import static org.mockito.ArgumentMatchers.any; + +import alluxio.AlluxioURI; +import alluxio.master.file.meta.UfsAbsentPathCache; +import alluxio.master.file.meta.UfsSyncPathCache; +import alluxio.resource.CloseableResource; +import alluxio.underfs.UfsClient; +import alluxio.underfs.UfsDirectoryStatus; +import alluxio.underfs.UfsFileStatus; +import alluxio.underfs.UfsStatus; + +import org.junit.After; +import org.junit.Before; +import org.junit.Test; +import org.mockito.Mockito; + +import java.util.ArrayList; +import java.util.List; +import java.util.concurrent.ExecutorService; +import java.util.concurrent.Executors; + +public class UfsLoadsTest { + ExecutorService mThreadPool; + TaskTracker mTaskTracker; + MetadataSyncHandler mMetadataSyncHandler; + MockUfsClient mUfsClient; + UfsSyncPathCache mUfsSyncPathCache; + UfsAbsentPathCache mAbsentPathCache; + SyncProcess mSyncProcess; + List mProcessedItems; + UfsStatus mFileStatus = new UfsFileStatus("file", "", + 0L, 0L, "", "", (short) 0, 0L); + UfsStatus mDirStatus = new UfsDirectoryStatus("dir", "", "", (short) 0); + static final long WAIT_TIMEOUT = 5_000; + + private CloseableResource getClient(AlluxioURI ignored) { + return new CloseableResource(mUfsClient) { + @Override + public void closeResource() { + } + }; + } + + @Before + public void before() throws Throwable { + mThreadPool = Executors.newCachedThreadPool(); + mUfsClient = Mockito.spy(new MockUfsClient()); + mSyncProcess = Mockito.spy(new DummySyncProcess()); + mProcessedItems = new ArrayList<>(); + Mockito.doAnswer(ans -> { + LoadResult result = ans.getArgument(0); + result.getUfsLoadResult().getItems().peek(mProcessedItems::add); + return ans.callRealMethod(); + }).when(mSyncProcess).performSync(any(LoadResult.class), any(UfsSyncPathCache.class)); + mAbsentPathCache = Mockito.mock(UfsAbsentPathCache.class); + mUfsSyncPathCache = Mockito.mock(UfsSyncPathCache.class); + mTaskTracker = new TaskTracker( + 1, 1, false, false, + mUfsSyncPathCache, mAbsentPathCache, mSyncProcess, this::getClient); + mMetadataSyncHandler = new MetadataSyncHandler(mTaskTracker, null, null); + } + + @After + public void after() throws Throwable { + assertFalse(mTaskTracker.hasRunningTasks()); + mTaskTracker.close(); + mThreadPool.shutdown(); + } + + @Test + public void singleFileSync() { + mUfsClient.setGetStatusItem(mFileStatus); + } +} diff --git a/core/server/master/src/test/java/alluxio/master/metastore/InodeStoreTest.java b/core/server/master/src/test/java/alluxio/master/metastore/InodeStoreTest.java index 72ef4e72f4a3..320f9c287a3f 100644 --- a/core/server/master/src/test/java/alluxio/master/metastore/InodeStoreTest.java +++ b/core/server/master/src/test/java/alluxio/master/metastore/InodeStoreTest.java @@ -18,93 +18,36 @@ import static org.junit.Assert.assertTrue; import static org.junit.Assume.assumeTrue; -import alluxio.AlluxioTestDirectory; import alluxio.ConfigurationRule; -import alluxio.concurrent.LockMode; import alluxio.conf.Configuration; import alluxio.conf.PropertyKey; -import alluxio.master.file.contexts.CreateDirectoryContext; -import alluxio.master.file.contexts.CreateFileContext; -import alluxio.master.file.meta.Edge; import alluxio.master.file.meta.Inode; import alluxio.master.file.meta.InodeLockManager; -import alluxio.master.file.meta.InodeView; import alluxio.master.file.meta.MutableInode; import alluxio.master.file.meta.MutableInodeDirectory; import alluxio.master.file.meta.MutableInodeFile; import alluxio.master.metastore.InodeStore.WriteBatch; import alluxio.master.metastore.caching.CachingInodeStore; -import alluxio.master.metastore.heap.HeapInodeStore; import alluxio.master.metastore.rocks.RocksInodeStore; import alluxio.resource.CloseableIterator; -import alluxio.resource.LockResource; -import com.google.common.collect.ImmutableMap; -import io.netty.util.ResourceLeakDetector; -import org.junit.After; -import org.junit.Before; -import org.junit.Rule; import org.junit.Test; import org.junit.runner.RunWith; import org.junit.runners.Parameterized; -import org.junit.runners.Parameterized.Parameters; import org.rocksdb.RocksDBException; import java.io.File; import java.nio.charset.Charset; import java.util.ArrayList; -import java.util.Arrays; import java.util.HashMap; import java.util.List; import java.util.Optional; import java.util.function.Function; @RunWith(Parameterized.class) -public class InodeStoreTest { - private static final int CACHE_SIZE = 16; - private static String sDir; - private static final String CONF_NAME = "/rocks-inode.ini"; - - @Parameters - public static Iterable> parameters() throws Exception { - sDir = - AlluxioTestDirectory.createTemporaryDirectory("inode-store-test").getAbsolutePath(); - File confFile = new File(sDir + CONF_NAME); - writeStringToFile(confFile, ROCKS_CONFIG, (Charset) null); - - return Arrays.asList( - lockManager -> new HeapInodeStore(), - lockManager -> new RocksInodeStore(sDir), - lockManager -> new CachingInodeStore(new RocksInodeStore(sDir), lockManager)); - } - - @Rule - public ConfigurationRule mConf = new ConfigurationRule( - ImmutableMap.of(PropertyKey.MASTER_METASTORE_INODE_CACHE_MAX_SIZE, CACHE_SIZE, - PropertyKey.MASTER_METASTORE_INODE_CACHE_EVICT_BATCH_SIZE, 5, - PropertyKey.LEAK_DETECTOR_LEVEL, ResourceLeakDetector.Level.PARANOID, - PropertyKey.LEAK_DETECTOR_EXIT_ON_LEAK, true), - Configuration.modifiableGlobal()); - - private final MutableInodeDirectory mRoot = inodeDir(0, -1, ""); - - private final Function mStoreBuilder; - private InodeStore mStore; - private InodeLockManager mLockManager; - +public class InodeStoreTest extends InodeStoreTestBase { public InodeStoreTest(Function store) { - mStoreBuilder = store; - } - - @Before - public void before() { - mLockManager = new InodeLockManager(); - mStore = mStoreBuilder.apply(mLockManager); - } - - @After - public void after() { - mStore.close(); + super(store); } @Test @@ -304,63 +247,4 @@ public void manyOperations() { assertEquals(0, CloseableIterator.size(mStore.getChildren(mStore.get(middleDir - 1).get().asDirectory()))); } - - private void writeInode(MutableInode inode) { - try (LockResource lr = mLockManager.lockInode(inode, LockMode.WRITE, false)) { - mStore.writeInode(inode); - } - } - - private void writeEdge(MutableInode parent, MutableInode child) { - try (LockResource lr = - mLockManager.lockEdge(new Edge(parent.getId(), child.getName()), - LockMode.WRITE, false)) { - mStore.addChild(parent.getId(), child); - } - } - - private void removeInode(InodeView inode) { - try (LockResource lr = mLockManager.lockInode(inode, LockMode.WRITE, false)) { - mStore.remove(inode); - } - } - - private void removeParentEdge(InodeView child) { - try (LockResource lr = mLockManager - .lockEdge(new Edge(child.getParentId(), child.getName()), LockMode.WRITE, false)) { - mStore.removeChild(child.getParentId(), child.getName()); - } - } - - private static MutableInodeDirectory inodeDir(long id, long parentId, String name) { - return MutableInodeDirectory.create(id, parentId, name, CreateDirectoryContext.defaults()); - } - - private static MutableInodeFile inodeFile(long containerId, long parentId, String name) { - return MutableInodeFile.create(containerId, parentId, name, 0, CreateFileContext.defaults()); - } - - // RocksDB configuration options used for the unit tests - private static final String ROCKS_CONFIG = "[Version]\n" - + " rocksdb_version=7.0.3\n" - + " options_file_version=1.1\n" - + "\n" - + "[DBOptions]\n" - + " create_missing_column_families=true\n" - + " create_if_missing=true\n" - + "\n" - + "\n" - + "[CFOptions \"default\"]\n" - + "\n" - + " \n" - + "[TableOptions/BlockBasedTable \"default\"]\n" - + "\n" - + "\n" - + "[CFOptions \"inodes\"]\n" - + " \n" - + "[TableOptions/BlockBasedTable \"inodes\"]\n" - + " \n" - + "\n" - + "[CFOptions \"edges\"]\n" - + " \n"; } diff --git a/core/server/master/src/test/java/alluxio/master/metastore/InodeStoreTestBase.java b/core/server/master/src/test/java/alluxio/master/metastore/InodeStoreTestBase.java new file mode 100644 index 000000000000..3d7ab1abe0e4 --- /dev/null +++ b/core/server/master/src/test/java/alluxio/master/metastore/InodeStoreTestBase.java @@ -0,0 +1,151 @@ +/* + * The Alluxio Open Foundation licenses this work under the Apache License, version 2.0 + * (the "License"). You may not use this work except in compliance with the License, which is + * available at www.apache.org/licenses/LICENSE-2.0 + * + * This software is distributed on an "AS IS" basis, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, + * either express or implied, as more fully set forth in the License. + * + * See the NOTICE file distributed with this work for information regarding copyright ownership. + */ + +package alluxio.master.metastore; + +import static org.apache.commons.io.FileUtils.writeStringToFile; + +import alluxio.AlluxioTestDirectory; +import alluxio.ConfigurationRule; +import alluxio.concurrent.LockMode; +import alluxio.conf.Configuration; +import alluxio.conf.PropertyKey; +import alluxio.master.file.contexts.CreateDirectoryContext; +import alluxio.master.file.contexts.CreateFileContext; +import alluxio.master.file.meta.Edge; +import alluxio.master.file.meta.InodeLockManager; +import alluxio.master.file.meta.InodeView; +import alluxio.master.file.meta.MutableInode; +import alluxio.master.file.meta.MutableInodeDirectory; +import alluxio.master.file.meta.MutableInodeFile; +import alluxio.master.metastore.caching.CachingInodeStore; +import alluxio.master.metastore.heap.HeapInodeStore; +import alluxio.master.metastore.rocks.RocksInodeStore; +import alluxio.resource.LockResource; + +import com.google.common.collect.ImmutableMap; +import io.netty.util.ResourceLeakDetector; +import org.junit.After; +import org.junit.Before; +import org.junit.Rule; +import org.junit.runners.Parameterized.Parameters; + +import java.io.File; +import java.nio.charset.Charset; +import java.util.Arrays; +import java.util.function.Function; + +public class InodeStoreTestBase { + protected static final int CACHE_SIZE = 16; + protected static String sDir; + protected static final String CONF_NAME = "/rocks-inode.ini"; + + @Parameters + public static Iterable> parameters() throws Exception { + sDir = + AlluxioTestDirectory.createTemporaryDirectory("inode-store-test").getAbsolutePath(); + File confFile = new File(sDir + CONF_NAME); + writeStringToFile(confFile, ROCKS_CONFIG, (Charset) null); + + return Arrays.asList( + lockManager -> new HeapInodeStore(), + lockManager -> new RocksInodeStore(sDir), + lockManager -> new CachingInodeStore(new RocksInodeStore(sDir), lockManager)); + } + + @Rule + public ConfigurationRule mConf = new ConfigurationRule( + ImmutableMap.of(PropertyKey.MASTER_METASTORE_INODE_CACHE_MAX_SIZE, CACHE_SIZE, + PropertyKey.MASTER_METASTORE_INODE_CACHE_EVICT_BATCH_SIZE, 5, + PropertyKey.LEAK_DETECTOR_LEVEL, ResourceLeakDetector.Level.PARANOID, + PropertyKey.LEAK_DETECTOR_EXIT_ON_LEAK, true), + Configuration.modifiableGlobal()); + + protected final MutableInodeDirectory mRoot = inodeDir(0, -1, ""); + + protected final Function mStoreBuilder; + protected InodeStore mStore; + protected InodeLockManager mLockManager; + + public InodeStoreTestBase(Function store) { + mStoreBuilder = store; + } + + @Before + public void before() { + mLockManager = new InodeLockManager(); + mStore = mStoreBuilder.apply(mLockManager); + } + + @After + public void after() { + mStore.close(); + } + + protected void writeInode(MutableInode inode) { + try (LockResource lr = mLockManager.lockInode(inode, LockMode.WRITE, false)) { + mStore.writeInode(inode); + } + } + + protected void writeEdge(MutableInode parent, MutableInode child) { + try (LockResource lr = + mLockManager.lockEdge(new Edge(parent.getId(), child.getName()), + LockMode.WRITE, false)) { + mStore.addChild(parent.getId(), child); + } + } + + protected void removeInode(InodeView inode) { + try (LockResource lr = mLockManager.lockInode(inode, LockMode.WRITE, false)) { + mStore.remove(inode); + } + } + + protected void removeParentEdge(InodeView child) { + try (LockResource lr = mLockManager + .lockEdge(new Edge(child.getParentId(), child.getName()), LockMode.WRITE, false)) { + mStore.removeChild(child.getParentId(), child.getName()); + } + } + + protected static MutableInodeDirectory inodeDir(long id, long parentId, String name) { + return MutableInodeDirectory.create(id, parentId, name, CreateDirectoryContext.defaults()); + } + + protected static MutableInodeFile inodeFile(long containerId, long parentId, String name) { + return MutableInodeFile.create(containerId, parentId, name, 0, CreateFileContext.defaults()); + } + + // RocksDB configuration options used for the unit tests + private static final String ROCKS_CONFIG = "[Version]\n" + + " rocksdb_version=7.0.3\n" + + " options_file_version=1.1\n" + + "\n" + + "[DBOptions]\n" + + " create_missing_column_families=true\n" + + " create_if_missing=true\n" + + "\n" + + "\n" + + "[CFOptions \"default\"]\n" + + "\n" + + " \n" + + "[TableOptions/BlockBasedTable \"default\"]\n" + + "\n" + + "\n" + + "[CFOptions \"inodes\"]\n" + + " \n" + + "[TableOptions/BlockBasedTable \"inodes\"]\n" + + " \n" + + "\n" + + "[CFOptions \"edges\"]\n" + + " \n"; +} diff --git a/core/server/master/src/test/java/alluxio/master/metastore/RecursiveInodeIteratorTest.java b/core/server/master/src/test/java/alluxio/master/metastore/RecursiveInodeIteratorTest.java new file mode 100644 index 000000000000..871b9b7b7803 --- /dev/null +++ b/core/server/master/src/test/java/alluxio/master/metastore/RecursiveInodeIteratorTest.java @@ -0,0 +1,415 @@ +/* + * The Alluxio Open Foundation licenses this work under the Apache License, version 2.0 + * (the "License"). You may not use this work except in compliance with the License, which is + * available at www.apache.org/licenses/LICENSE-2.0 + * + * This software is distributed on an "AS IS" basis, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, + * either express or implied, as more fully set forth in the License. + * + * See the NOTICE file distributed with this work for information regarding copyright ownership. + */ + +package alluxio.master.metastore; + +import static org.junit.Assert.assertEquals; + +import alluxio.AlluxioURI; +import alluxio.file.options.DescendantType; +import alluxio.master.block.ContainerIdGenerable; +import alluxio.master.file.meta.InodeDirectoryIdGenerator; +import alluxio.master.file.meta.InodeIterationResult; +import alluxio.master.file.meta.InodeLockManager; +import alluxio.master.file.meta.InodeTree; +import alluxio.master.file.meta.LockedInodePath; +import alluxio.master.file.meta.LockingScheme; +import alluxio.master.file.meta.MountTable; +import alluxio.master.file.meta.MutableInode; +import alluxio.master.file.meta.options.MountInfo; +import alluxio.master.journal.NoopJournalContext; +import alluxio.underfs.UfsManager; + +import org.junit.Test; +import org.junit.runner.RunWith; +import org.junit.runners.Parameterized; +import org.mockito.Mockito; + +import java.time.Clock; +import java.util.Arrays; +import java.util.Collections; +import java.util.List; +import java.util.function.Function; + +@RunWith(Parameterized.class) +public class RecursiveInodeIteratorTest extends InodeStoreTestBase { + + public RecursiveInodeIteratorTest(Function store) { + super(store); + } + + MutableInode mInodeA = inodeDir(1, 0, "a"); + MutableInode mInodeAB = inodeDir(2, 1, "b"); + MutableInode mInodeABC = inodeDir(3, 2, "c"); + MutableInode mInodeABCF1 = inodeFile(4, 3, "f1"); + MutableInode mInodeABCF2 = inodeFile(5, 3, "f2"); + MutableInode mInodeAC = inodeDir(6, 1, "c"); + MutableInode mInodeACF1 = inodeFile(7, 6, "f1"); + MutableInode mInodeACF2 = inodeFile(8, 6, "f2"); + MutableInode mInodeACF3 = inodeFile(9, 6, "f3"); + MutableInode mInodeAF1 = inodeFile(10, 1, "f1"); + MutableInode mInodeB = inodeDir(11, 0, "b"); + MutableInode mInodeC = inodeDir(12, 0, "c"); + MutableInode mInodeF1 = inodeFile(13, 0, "f1"); + MutableInode mInodeF2 = inodeFile(14, 0, "f2"); + MutableInode mInodeG = inodeDir(15, 0, "g"); + + /* + / + /a + /a/b + /a/b/c + /a/b/c/f1 + /a/b/c/f2 + /a/c + /a/c/f1 + /a/c/f2 + /a/c/f3 + /a/f1 + /b + /c + /f1 + /f2 + /g + */ + private void createInodeTree() { + writeInode(mRoot); + writeInode(mInodeA); + writeInode(mInodeAB); + writeInode(mInodeABC); + writeInode(mInodeABCF1); + writeInode(mInodeABCF2); + writeInode(mInodeAC); + writeInode(mInodeACF1); + writeInode(mInodeACF2); + writeInode(mInodeACF3); + writeInode(mInodeAF1); + writeInode(mInodeB); + writeInode(mInodeC); + writeInode(mInodeF1); + writeInode(mInodeF2); + writeInode(mInodeG); + + writeEdge(mRoot, mInodeA); + writeEdge(mInodeA, mInodeAB); + writeEdge(mInodeAB, mInodeABC); + writeEdge(mInodeABC, mInodeABCF1); + writeEdge(mInodeABC, mInodeABCF2); + writeEdge(mInodeA, mInodeAC); + writeEdge(mInodeAC, mInodeACF1); + writeEdge(mInodeAC, mInodeACF2); + writeEdge(mInodeAC, mInodeACF3); + writeEdge(mInodeA, mInodeAF1); + writeEdge(mRoot, mInodeB); + writeEdge(mRoot, mInodeC); + writeEdge(mRoot, mInodeF1); + writeEdge(mRoot, mInodeF2); + writeEdge(mRoot, mInodeG); + } + + @Test + public void recursiveListing() throws Exception { + createInodeTree(); + + List> inodes = Arrays.asList( + mRoot, mInodeA, mInodeAB, mInodeABC, mInodeABCF1, mInodeABCF2, mInodeAC, mInodeACF1, + mInodeACF2, mInodeACF3, mInodeAF1, mInodeB, mInodeC, mInodeF1, mInodeF2, mInodeG + ); + + List paths = Arrays.asList( + "/", + "/a", + "/a/b", + "/a/b/c", + "/a/b/c/f1", + "/a/b/c/f2", + "/a/c", + "/a/c/f1", + "/a/c/f2", + "/a/c/f3", + "/a/f1", + "/b", + "/c", + "/f1", + "/f2", + "/g" + ); + + InodeTree tree = new InodeTree(mStore, Mockito.mock(ContainerIdGenerable.class), + Mockito.mock(InodeDirectoryIdGenerator.class), new MountTable( + Mockito.mock(UfsManager.class), Mockito.mock(MountInfo.class), Clock.systemUTC()), + mLockManager); + + LockingScheme lockingScheme = new LockingScheme(new AlluxioURI("/"), + InodeTree.LockPattern.READ, false); + int idx = 0; + try (LockedInodePath lockedPath = + tree.lockInodePath(lockingScheme, NoopJournalContext.INSTANCE)) { + RecursiveInodeIterator iterator = (RecursiveInodeIterator) + mStore.getSkippableChildrenIterator(ReadOption.defaults(), + DescendantType.ALL, true, lockedPath); + while (iterator.hasNext()) { + InodeIterationResult result = iterator.next(); + assertEquals(paths.get(idx), result.getLockedPath().getUri().getPath()); + result.getLockedPath().traverse(); + assertEquals(inodes.get(idx).getId(), result.getInode().getId()); + assertEquals(inodes.get(idx).getId(), result.getLockedPath().getInode().getId()); + idx++; + } + iterator.close(); + } + } + + @Test + public void recursiveListingSkipChildren() throws Exception { + /* + / + /a + /a/b -> SKIP CHILDREN + /a/b/c (SKIPPED) + /a/b/c/f1 (SKIPPED) + /a/b/c/f2 (SKIPPED) + /a/c -> SKIP CHILDREN + /a/c/f1 (SKIPPED) + /a/c/f2 (SKIPPED) + /a/c/f3 (SKIPPED) + /a/f1 + /b -> SKIP CHILDREN + /c + /f1 + /f2 + /g -> SKIP CHILDREN + */ + + createInodeTree(); + + List> inodes = Arrays.asList( + mRoot, mInodeA, mInodeAB, mInodeAC, mInodeAF1, mInodeB, mInodeC, mInodeF1, mInodeF2, mInodeG + ); + + List paths = Arrays.asList( + "/", + "/a", + "/a/b", + "/a/c", + "/a/f1", + "/b", + "/c", + "/f1", + "/f2", + "/g" + ); + + InodeTree tree = new InodeTree(mStore, Mockito.mock(ContainerIdGenerable.class), + Mockito.mock(InodeDirectoryIdGenerator.class), new MountTable( + Mockito.mock(UfsManager.class), Mockito.mock(MountInfo.class), Clock.systemUTC()), + mLockManager); + + LockingScheme lockingScheme = new LockingScheme(new AlluxioURI("/"), + InodeTree.LockPattern.READ, false); + int idx = 0; + try (LockedInodePath lockedPath = + tree.lockInodePath(lockingScheme, NoopJournalContext.INSTANCE)) { + RecursiveInodeIterator iterator = (RecursiveInodeIterator) + mStore.getSkippableChildrenIterator(ReadOption.defaults(), + DescendantType.ALL, true, lockedPath); + while (iterator.hasNext()) { + InodeIterationResult result = iterator.next(); + assertEquals(paths.get(idx), result.getLockedPath().getUri().getPath()); + result.getLockedPath().traverse(); + assertEquals(inodes.get(idx).getId(), result.getInode().getId()); + assertEquals(inodes.get(idx).getId(), result.getLockedPath().getInode().getId()); + // The locked inode path will become stale after skipChildrenOfTheCurrent() is called. + if (result.getLockedPath().getUri().getPath().equals("/a/b") + || result.getLockedPath().getUri().getPath().equals("/b") + || result.getLockedPath().getUri().getPath().equals("/a/c") + || result.getLockedPath().getUri().getPath().equals("/g")) { + iterator.skipChildrenOfTheCurrent(); + } + idx++; + } + iterator.close(); + } + } + + @Test + public void recursiveListingStartFrom1() throws Exception { + /* + / + /a + /a/b + /a/b/c + /a/b/c/f1 (SKIPPED) + /a/b/c/f2 + /a/c + /a/c/f1 + /a/c/f2 + /a/c/f3 + /a/f1 + /b + /c + /f1 + /f2 + /g + */ + + createInodeTree(); + + List> inodes = Arrays.asList( + mRoot, mInodeA, mInodeAB, mInodeABC, mInodeABCF2, mInodeAC, mInodeACF1, mInodeACF2, + mInodeACF3, mInodeAF1, mInodeB, mInodeC, mInodeF1, mInodeF2, mInodeG + ); + + List paths = Arrays.asList( + "/", + "/a", + "/a/b", + "/a/b/c", + "/a/b/c/f2", + "/a/c", + "/a/c/f1", + "/a/c/f2", + "/a/c/f3", + "/a/f1", + "/b", + "/c", + "/f1", + "/f2", + "/g" + ); + + InodeTree tree = new InodeTree(mStore, Mockito.mock(ContainerIdGenerable.class), + Mockito.mock(InodeDirectoryIdGenerator.class), new MountTable( + Mockito.mock(UfsManager.class), Mockito.mock(MountInfo.class), Clock.systemUTC()), + mLockManager); + + LockingScheme lockingScheme = new LockingScheme(new AlluxioURI("/"), + InodeTree.LockPattern.READ, false); + int idx = 0; + try (LockedInodePath lockedPath = + tree.lockInodePath(lockingScheme, NoopJournalContext.INSTANCE)) { + RecursiveInodeIterator iterator = (RecursiveInodeIterator) + mStore.getSkippableChildrenIterator( + ReadOption.newBuilder().setReadFrom("a/b/c/f11").build(), + DescendantType.ALL, true, lockedPath); + while (iterator.hasNext()) { + InodeIterationResult result = iterator.next(); + assertEquals(paths.get(idx), result.getLockedPath().getUri().getPath()); + result.getLockedPath().traverse(); + assertEquals(inodes.get(idx).getId(), result.getInode().getId()); + assertEquals(inodes.get(idx).getId(), result.getLockedPath().getInode().getId()); + idx++; + } + iterator.close(); + } + } + + @Test + public void recursiveListingStartFrom2() throws Exception { + /* + / + /a + /a/b (SKIPPED) + /a/b/c (SKIPPED) + /a/b/c/f1 (SKIPPED) + /a/b/c/f2 (SKIPPED) + /a/c + /a/c/f1 (SKIPPED) + /a/c/f2 (SKIPPED) + /a/c/f3 + /a/f1 + /b + /c + /f1 + /f2 + /g + */ + + createInodeTree(); + + List> inodes = Arrays.asList( + mRoot, mInodeA, mInodeAC, mInodeACF3, mInodeAF1, mInodeB, mInodeC, mInodeF1, mInodeF2, + mInodeG + ); + + List paths = Arrays.asList( + "/", + "/a", + "/a/c", + "/a/c/f3", + "/a/f1", + "/b", + "/c", + "/f1", + "/f2", + "/g" + ); + + InodeTree tree = new InodeTree(mStore, Mockito.mock(ContainerIdGenerable.class), + Mockito.mock(InodeDirectoryIdGenerator.class), new MountTable( + Mockito.mock(UfsManager.class), Mockito.mock(MountInfo.class), Clock.systemUTC()), + mLockManager); + + LockingScheme lockingScheme = new LockingScheme(new AlluxioURI("/"), + InodeTree.LockPattern.READ, false); + int idx = 0; + try (LockedInodePath lockedPath = + tree.lockInodePath(lockingScheme, NoopJournalContext.INSTANCE)) { + RecursiveInodeIterator iterator = (RecursiveInodeIterator) + mStore.getSkippableChildrenIterator( + ReadOption.newBuilder().setReadFrom("a/c/f3").build(), + DescendantType.ALL, true, lockedPath); + while (iterator.hasNext()) { + InodeIterationResult result = iterator.next(); + assertEquals(paths.get(idx), result.getLockedPath().getUri().getPath()); + result.getLockedPath().traverse(); + assertEquals(inodes.get(idx).getId(), result.getInode().getId()); + assertEquals(inodes.get(idx).getId(), result.getLockedPath().getInode().getId()); + idx++; + } + iterator.close(); + } + } + + @Test + public void recursiveListingStartFromSkipAll() throws Exception { + createInodeTree(); + + List> inodes = Collections.singletonList(mRoot); + + List paths = Collections.singletonList("/"); + + InodeTree tree = new InodeTree(mStore, Mockito.mock(ContainerIdGenerable.class), + Mockito.mock(InodeDirectoryIdGenerator.class), new MountTable( + Mockito.mock(UfsManager.class), Mockito.mock(MountInfo.class), Clock.systemUTC()), + mLockManager); + + LockingScheme lockingScheme = new LockingScheme(new AlluxioURI("/"), + InodeTree.LockPattern.READ, false); + int idx = 0; + try (LockedInodePath lockedPath = + tree.lockInodePath(lockingScheme, NoopJournalContext.INSTANCE)) { + RecursiveInodeIterator iterator = (RecursiveInodeIterator) + mStore.getSkippableChildrenIterator( + ReadOption.newBuilder().setReadFrom("z").build(), + DescendantType.ALL, true, lockedPath); + while (iterator.hasNext()) { + InodeIterationResult result = iterator.next(); + assertEquals(paths.get(idx), result.getLockedPath().getUri().getPath()); + result.getLockedPath().traverse(); + assertEquals(inodes.get(idx).getId(), result.getInode().getId()); + assertEquals(inodes.get(idx).getId(), result.getLockedPath().getInode().getId()); + idx++; + } + iterator.close(); + } + } +} diff --git a/core/transport/src/main/proto/grpc/file_system_master.proto b/core/transport/src/main/proto/grpc/file_system_master.proto index 5dde330eb43a..35db38e889e9 100644 --- a/core/transport/src/main/proto/grpc/file_system_master.proto +++ b/core/transport/src/main/proto/grpc/file_system_master.proto @@ -279,7 +279,7 @@ message ListStatusPartialPOptions { // the ListStatusPartialPRequest is a prefix of startAfter (e.g. if listing "/dir", // then startAfter could be "/dir/next"). Otherwise if start after does not start with "/", // then startAfter is appended to the path given in the ListStatusPartialPRequest - // (e.g. if the listing path is "/dir" and startAfter is "/after" then files that + // (e.g. if the listing path is "/dir" and startAfter is "after" then files that // start after "/dir/after" in lexicographic order will be listed). // The full path itself does not need to exist. // This offset type is recommended to use if POSIX compatible listings are needed. @@ -664,6 +664,72 @@ message GetJobProgressPResponse { optional JobProgressReportFormat format = 2; } +message SyncMetadataPOptions { + optional fscommon.LoadDescendantPType loadDescendantType = 1; + optional fscommon.DirectoryLoadPType directoryLoadType = 2; +} + +message SyncMetadataPRequest { + required string path = 1; + optional SyncMetadataPOptions options = 2; +} + +enum SyncMetadataState { + UNKNOWN = 0; + RUNNING = 1; + SUCCEEDED = 2; + FAILED = 3; + CANCELED = 4; +} + +message SyncMetadataTask { + message Exception { + optional string exceptionType = 1; + optional string exceptionMessage = 2; + optional string stacktrace = 3; + } + + optional int64 id = 1; + optional SyncMetadataState state = 2; + optional int64 syncDurationMs = 3; + optional Exception exception = 4; + optional int64 successOpCount = 5; + + optional string taskInfoString = 100; + optional string taskStatString = 101; + +} + +message SyncMetadataPResponse { + repeated SyncMetadataTask task = 1; + + optional string debugInfo = 1000; +} + +message SyncMetadataAsyncPResponse { + optional bool submitted = 1; + optional int64 taskGroupId = 2; + repeated int64 taskIds = 3; +} + +message GetSyncProgressPRequest { + optional int64 taskGroupId = 1; +} + +message GetSyncProgressPResponse { + repeated SyncMetadataTask task = 1; + + optional string debugInfo = 1000; +} + +message CancelSyncMetadataPRequest { + optional int64 taskGroupId = 1; +} + +message CancelSyncMetadataPResponse { + optional bool success = 1; +} + /** * This interface contains file system master service endpoints for Alluxio clients. */ @@ -818,6 +884,14 @@ service FileSystemMasterClientService { rpc StopJob(StopJobPRequest) returns (StopJobPResponse); rpc GetJobProgress(GetJobProgressPRequest) returns (GetJobProgressPResponse); + + /** + * Load metadata from up into Alluxio. + */ + rpc SyncMetadata(SyncMetadataPRequest) returns (SyncMetadataPResponse); + rpc SyncMetadataAsync(SyncMetadataPRequest) returns (SyncMetadataAsyncPResponse); + rpc GetSyncProgress(GetSyncProgressPRequest) returns (GetSyncProgressPResponse); + rpc CancelSyncMetadata(CancelSyncMetadataPRequest) returns (CancelSyncMetadataPResponse); } message FileSystemHeartbeatPResponse { diff --git a/core/transport/src/main/proto/grpc/fscommon.proto b/core/transport/src/main/proto/grpc/fscommon.proto index 458b5e72df1d..d9d8510242b4 100644 --- a/core/transport/src/main/proto/grpc/fscommon.proto +++ b/core/transport/src/main/proto/grpc/fscommon.proto @@ -11,3 +11,9 @@ enum LoadDescendantPType { ONE = 1; ALL = 2; } + +enum DirectoryLoadPType { + SINGLE_LISTING = 0; + BFS = 1; + DFS = 2; +} diff --git a/core/transport/src/main/proto/proto.lock b/core/transport/src/main/proto/proto.lock index f41422aa1781..78251d3512cf 100644 --- a/core/transport/src/main/proto/proto.lock +++ b/core/transport/src/main/proto/proto.lock @@ -2141,6 +2141,30 @@ "integer": 2 } ] + }, + { + "name": "SyncMetadataState", + "enum_fields": [ + { + "name": "UNKNOWN" + }, + { + "name": "RUNNING", + "integer": 1 + }, + { + "name": "SUCCEEDED", + "integer": 2 + }, + { + "name": "FAILED", + "integer": 3 + }, + { + "name": "CANCELED", + "integer": 4 + } + ] } ], "messages": [ @@ -3998,6 +4022,181 @@ } ] }, + { + "name": "SyncMetadataPOptions", + "fields": [ + { + "id": 1, + "name": "loadDescendantType", + "type": "fscommon.LoadDescendantPType" + }, + { + "id": 2, + "name": "directoryLoadType", + "type": "fscommon.DirectoryLoadPType" + } + ] + }, + { + "name": "SyncMetadataPRequest", + "fields": [ + { + "id": 1, + "name": "path", + "type": "string" + }, + { + "id": 2, + "name": "options", + "type": "SyncMetadataPOptions" + } + ] + }, + { + "name": "SyncMetadataTask", + "fields": [ + { + "id": 1, + "name": "id", + "type": "int64" + }, + { + "id": 2, + "name": "state", + "type": "SyncMetadataState" + }, + { + "id": 3, + "name": "syncDurationMs", + "type": "int64" + }, + { + "id": 4, + "name": "exception", + "type": "Exception" + }, + { + "id": 5, + "name": "successOpCount", + "type": "int64" + }, + { + "id": 100, + "name": "taskInfoString", + "type": "string" + }, + { + "id": 101, + "name": "taskStatString", + "type": "string" + } + ], + "messages": [ + { + "name": "Exception", + "fields": [ + { + "id": 1, + "name": "exceptionType", + "type": "string" + }, + { + "id": 2, + "name": "exceptionMessage", + "type": "string" + }, + { + "id": 3, + "name": "stacktrace", + "type": "string" + } + ] + } + ] + }, + { + "name": "SyncMetadataPResponse", + "fields": [ + { + "id": 1, + "name": "task", + "type": "SyncMetadataTask", + "is_repeated": true + }, + { + "id": 1000, + "name": "debugInfo", + "type": "string" + } + ] + }, + { + "name": "SyncMetadataAsyncPResponse", + "fields": [ + { + "id": 1, + "name": "submitted", + "type": "bool" + }, + { + "id": 2, + "name": "taskGroupId", + "type": "int64" + }, + { + "id": 3, + "name": "taskIds", + "type": "int64", + "is_repeated": true + } + ] + }, + { + "name": "GetSyncProgressPRequest", + "fields": [ + { + "id": 1, + "name": "taskGroupId", + "type": "int64" + } + ] + }, + { + "name": "GetSyncProgressPResponse", + "fields": [ + { + "id": 1, + "name": "task", + "type": "SyncMetadataTask", + "is_repeated": true + }, + { + "id": 1000, + "name": "debugInfo", + "type": "string" + } + ] + }, + { + "name": "CancelSyncMetadataPRequest", + "fields": [ + { + "id": 1, + "name": "taskGroupId", + "type": "int64" + } + ] + }, + { + "name": "CancelSyncMetadataPResponse", + "fields": [ + { + "id": 1, + "name": "success", + "type": "bool" + } + ] + }, { "name": "FileSystemHeartbeatPResponse", "fields": [ @@ -4280,6 +4479,26 @@ "name": "GetJobProgress", "in_type": "GetJobProgressPRequest", "out_type": "GetJobProgressPResponse" + }, + { + "name": "SyncMetadata", + "in_type": "SyncMetadataPRequest", + "out_type": "SyncMetadataPResponse" + }, + { + "name": "SyncMetadataAsync", + "in_type": "SyncMetadataPRequest", + "out_type": "SyncMetadataAsyncPResponse" + }, + { + "name": "GetSyncProgress", + "in_type": "GetSyncProgressPRequest", + "out_type": "GetSyncProgressPResponse" + }, + { + "name": "CancelSyncMetadata", + "in_type": "CancelSyncMetadataPRequest", + "out_type": "CancelSyncMetadataPResponse" } ] }, @@ -4373,6 +4592,22 @@ "integer": 2 } ] + }, + { + "name": "DirectoryLoadPType", + "enum_fields": [ + { + "name": "SINGLE_LISTING" + }, + { + "name": "BFS", + "integer": 1 + }, + { + "name": "DFS", + "integer": 2 + } + ] } ], "package": { diff --git a/docs/en/operation/User-CLI.md b/docs/en/operation/User-CLI.md index 80fd1c16d8ab..efbcfa8cdd59 100644 --- a/docs/en/operation/User-CLI.md +++ b/docs/en/operation/User-CLI.md @@ -1153,6 +1153,9 @@ $ ./bin/alluxio fs load --local The `loadMetadata` command loads metadata about a path in the UFS to Alluxio. No data will be transferred. + +#### loadMetadata V1(legacy) + This command is a client-side optimization without storing all returned `ls` results, preventing OOM for massive amount of small files. This is useful when data has been added to the UFS outside of Alluxio and users are expected to reference the new data. This command is more efficient than using the `ls` command since it does not store any directory or file information to be returned. @@ -1167,6 +1170,39 @@ The -F option will force the loading of metadata even if there are existing meta $ ./bin/alluxio fs loadMetadata -R -F ``` +#### loadMetadata V2(new) + +The load metadata v2 is a better implementation of metadata sync that designs for object storage (e.g. s3), +with better resource control and performance. +To use the v2 implementation, please attach the option `-v2` in your command. `-F` is no longer supported in v2. +The command will always load the metadata from UFS. If files are in alluxio already, they will be compared with and updated based on the UFS result. +The v2 implementation also has some unique options: +```console +$ ./bin/alluxio fs loadMetadata -v2 -R -d -a +``` + +Options: +* `-d ` option that determines how alluxio will load metadata of subdirectories, if a recursive loading is required. Possible values: + * SINGLE_LISTING (default): Loads the file infos from UFS using a single listing. Use this mode if the directory does not contain or only contains few subdirectories. This mode gives you better reliability. This mode is only allowed on some object storage where single listing is allowed (e.g. ListObjectsV2 in s3). + * BFS: Loads the file infos on a directory basis; Creates a new job to load the subdirectory; Use this mode if your UFS directory contains many subdirectories. This mode loads the metadata for each subdirectory concurrently and gives you the best performance. Note that this is only an approximate BFS, as batches are processed and loaded concurrently and may be loaded in different orders. + * DFS: Loads the file infos directory by directory, in a DFS way. Note that this is only an approximate DFS, as batches are processed and loaded concurrently and may be loaded in different orders. +* `-R` option recursively loads metadata in subdirectories +* `-a/--async` If specified, the metadata loading states are pulled and printed every couple of seconds until the sync job is finished. Otherwise, the command line is blocked until the sync job is finished. Note that regardless this option is specified or not, the metadata sync task is processed asynchronously by alluxio master and this option only changes the behavior of display. Hence closing the terminal or CTRL+C do not cancel the sync job. + +If `-a` is used, the console will print the task group id when the task is submitted. A task will be created for each mount point in the sync root. +One can use the task group id to get the metadata load progress or cancel the load. + +To get the status a task group, use +```console +$ ./bin/alluxio fs loadMetadata -v2 -o get -id +``` + +To cancel the task group, use +```console +$ ./bin/alluxio fs loadMetadata -v2 -o get -id +``` + + ### location The `location` command returns the addresses of all the Alluxio workers which contain blocks diff --git a/integration/fuse/src/test/java/alluxio/fuse/auth/AbstractAuthPolicyTest.java b/integration/fuse/src/test/java/alluxio/fuse/auth/AbstractAuthPolicyTest.java index b07449ec7511..0f7592f69f21 100644 --- a/integration/fuse/src/test/java/alluxio/fuse/auth/AbstractAuthPolicyTest.java +++ b/integration/fuse/src/test/java/alluxio/fuse/auth/AbstractAuthPolicyTest.java @@ -24,6 +24,7 @@ import alluxio.exception.AlluxioException; import alluxio.exception.FileDoesNotExistException; import alluxio.fuse.AlluxioFuseUtils; +import alluxio.grpc.CancelSyncMetadataPResponse; import alluxio.grpc.CheckAccessPOptions; import alluxio.grpc.CreateDirectoryPOptions; import alluxio.grpc.CreateFilePOptions; @@ -31,6 +32,7 @@ import alluxio.grpc.ExistsPOptions; import alluxio.grpc.FreePOptions; import alluxio.grpc.GetStatusPOptions; +import alluxio.grpc.GetSyncProgressPResponse; import alluxio.grpc.JobProgressReportFormat; import alluxio.grpc.ListStatusPOptions; import alluxio.grpc.ListStatusPartialPOptions; @@ -41,6 +43,9 @@ import alluxio.grpc.SetAclAction; import alluxio.grpc.SetAclPOptions; import alluxio.grpc.SetAttributePOptions; +import alluxio.grpc.SyncMetadataAsyncPResponse; +import alluxio.grpc.SyncMetadataPOptions; +import alluxio.grpc.SyncMetadataPResponse; import alluxio.grpc.UnmountPOptions; import alluxio.jnifuse.FuseFileSystem; import alluxio.jnifuse.struct.FuseContext; @@ -315,6 +320,31 @@ public String getJobProgress(JobDescription jobDescription, throw new UnsupportedOperationException(); } + @Override + public SyncMetadataPResponse syncMetadata(AlluxioURI path, SyncMetadataPOptions options) + throws FileDoesNotExistException, IOException, AlluxioException { + throw new UnsupportedOperationException(); + } + + @Override + public SyncMetadataAsyncPResponse syncMetadataAsync(AlluxioURI path, + SyncMetadataPOptions options) + throws FileDoesNotExistException, IOException, AlluxioException { + throw new UnsupportedOperationException(); + } + + @Override + public GetSyncProgressPResponse getSyncProgress(long taskGroupId) + throws FileDoesNotExistException, IOException, AlluxioException { + throw new UnsupportedOperationException(); + } + + @Override + public CancelSyncMetadataPResponse cancelSyncMetadata(long taskGroupId) + throws IOException, AlluxioException { + throw new UnsupportedOperationException(); + } + @Override public void close() throws IOException { throw new UnsupportedOperationException(); diff --git a/integration/fuse/src/test/java/alluxio/fuse/cli/MockFuseFileSystemMasterClient.java b/integration/fuse/src/test/java/alluxio/fuse/cli/MockFuseFileSystemMasterClient.java index cceb7c7ee042..6662ca4b843a 100644 --- a/integration/fuse/src/test/java/alluxio/fuse/cli/MockFuseFileSystemMasterClient.java +++ b/integration/fuse/src/test/java/alluxio/fuse/cli/MockFuseFileSystemMasterClient.java @@ -17,6 +17,7 @@ import alluxio.client.file.URIStatus; import alluxio.exception.status.AlluxioStatusException; import alluxio.exception.status.UnavailableException; +import alluxio.grpc.CancelSyncMetadataPResponse; import alluxio.grpc.CheckAccessPOptions; import alluxio.grpc.CheckConsistencyPOptions; import alluxio.grpc.CompleteFilePOptions; @@ -26,6 +27,7 @@ import alluxio.grpc.ExistsPOptions; import alluxio.grpc.FreePOptions; import alluxio.grpc.GetStatusPOptions; +import alluxio.grpc.GetSyncProgressPResponse; import alluxio.grpc.JobProgressReportFormat; import alluxio.grpc.ListStatusPOptions; import alluxio.grpc.ListStatusPartialPOptions; @@ -35,6 +37,9 @@ import alluxio.grpc.SetAclAction; import alluxio.grpc.SetAclPOptions; import alluxio.grpc.SetAttributePOptions; +import alluxio.grpc.SyncMetadataAsyncPResponse; +import alluxio.grpc.SyncMetadataPOptions; +import alluxio.grpc.SyncMetadataPResponse; import alluxio.grpc.UpdateUfsModePOptions; import alluxio.job.JobDescription; import alluxio.job.JobRequest; @@ -221,6 +226,29 @@ public String getJobProgress(JobDescription jobDescription, return null; } + @Override + public SyncMetadataPResponse syncMetadata(AlluxioURI path, SyncMetadataPOptions options) + throws AlluxioStatusException { + return null; + } + + @Override + public SyncMetadataAsyncPResponse syncMetadataAsync(AlluxioURI path, SyncMetadataPOptions options) + throws AlluxioStatusException { + return null; + } + + @Override + public GetSyncProgressPResponse getSyncProgress(long taskGroupId) throws AlluxioStatusException { + return null; + } + + @Override + public CancelSyncMetadataPResponse cancelSyncMetadata(long taskGroupId) + throws AlluxioStatusException { + return null; + } + @Override public void connect() throws IOException { } diff --git a/pom.xml b/pom.xml index b0d98b1e8371..ecabdab22388 100644 --- a/pom.xml +++ b/pom.xml @@ -125,6 +125,8 @@ 4.2.0 1.11.815 + + 2.16.104 build 1.2.1 2.3.13 @@ -171,7 +173,7 @@ 0.5.5 1.0.1 8.5.9 - 0.2.6 + 2.0.0 @@ -706,6 +708,13 @@ kerby-util ${kerby.version} + + software.amazon.awssdk + bom + ${awssdk.version} + pom + import + it.unimi.dsi fastutil-core @@ -719,10 +728,16 @@ test - io.findify - s3mock_2.13 - ${s3mock.version} + org.gaul + s3proxy + ${s3proxy.version} test + + + ch.qos.logback + logback-classic + + io.grpc diff --git a/shell/src/main/java/alluxio/cli/fs/command/LoadMetadataCommand.java b/shell/src/main/java/alluxio/cli/fs/command/LoadMetadataCommand.java index bf8728f59f1b..39c61b5851b3 100644 --- a/shell/src/main/java/alluxio/cli/fs/command/LoadMetadataCommand.java +++ b/shell/src/main/java/alluxio/cli/fs/command/LoadMetadataCommand.java @@ -12,19 +12,33 @@ package alluxio.cli.fs.command; import alluxio.AlluxioURI; +import alluxio.Constants; import alluxio.annotation.PublicApi; import alluxio.cli.CommandUtils; import alluxio.client.file.FileSystemContext; import alluxio.exception.AlluxioException; import alluxio.exception.status.InvalidArgumentException; +import alluxio.grpc.DirectoryLoadPType; import alluxio.grpc.FileSystemMasterCommonPOptions; +import alluxio.grpc.GetSyncProgressPResponse; import alluxio.grpc.ListStatusPOptions; +import alluxio.grpc.LoadDescendantPType; +import alluxio.grpc.SyncMetadataAsyncPResponse; +import alluxio.grpc.SyncMetadataPOptions; +import alluxio.grpc.SyncMetadataPResponse; +import alluxio.grpc.SyncMetadataState; +import alluxio.grpc.SyncMetadataTask; +import alluxio.util.CommonUtils; import org.apache.commons.cli.CommandLine; import org.apache.commons.cli.Option; import org.apache.commons.cli.Options; import java.io.IOException; +import java.util.Arrays; +import java.util.Comparator; +import java.util.List; +import java.util.stream.Collectors; import javax.annotation.concurrent.ThreadSafe; /** @@ -49,6 +63,53 @@ public class LoadMetadataCommand extends AbstractFileSystemCommand { .desc("update the metadata of the existing sub file forcibly") .build(); + private static final Option ASYNC_OPTION = + Option.builder("a") + .longOpt("async") + .required(false) + .hasArg(false) + .desc("load the metadata asynchronously") + .build(); + + private static final Option V2_OPTION = + Option.builder("v2") + .required(false) + .hasArg(false) + .desc("use the load metadata v2 implementation") + .build(); + + private static final Option DIR_LOAD_TYPE_OPTION = + Option.builder("d") + .required(false) + .hasArg() + .desc("load directory type, can be SINGLE_LISTING, BFS, or DFS") + .build(); + + private static final Option OPERATION_OPTION = + Option.builder("o") + .required(false) + .longOpt("option") + .hasArg() + .desc("operation, can be load, get, cancel") + .build(); + + private static final Option POLLING_OPTION = + Option.builder("p") + .required(false) + .longOpt("polling") + .hasArg() + .desc("when running a task asynchronously, how often to poll the task progress in ms") + .build(); + + private static final Option TASK_ID_OPTION = + Option.builder("id") + .required(false) + .hasArg() + .desc("the numeric task group id") + .build(); + + private final List mOperationValues = Arrays.asList("load", "get", "cancel"); + /** * Constructs a new instance to load metadata for the given Alluxio path from UFS. * @@ -67,25 +128,157 @@ public String getCommandName() { public Options getOptions() { return new Options() .addOption(RECURSIVE_OPTION) - .addOption(FORCE_OPTION); + .addOption(FORCE_OPTION) + .addOption(ASYNC_OPTION) + .addOption(DIR_LOAD_TYPE_OPTION) + .addOption(V2_OPTION) + .addOption(OPERATION_OPTION) + .addOption(POLLING_OPTION) + .addOption(TASK_ID_OPTION); } @Override protected void runPlainPath(AlluxioURI plainPath, CommandLine cl) throws AlluxioException, IOException { - loadMetadata(plainPath, cl.hasOption(RECURSIVE_OPTION.getOpt()), - cl.hasOption(FORCE_OPTION.getOpt())); + String operation = cl.getOptionValue(OPERATION_OPTION.getOpt(), "load"); + if (operation.equals("get")) { + getSyncProgress(Long.parseLong(cl.getOptionValue(TASK_ID_OPTION.getOpt()))); + } else if (operation.equals("cancel")) { + cancel(Long.parseLong(cl.getOptionValue(TASK_ID_OPTION.getOpt()))); + } else if (cl.hasOption(V2_OPTION.getOpt())) { + DirectoryLoadPType loadPType = DirectoryLoadPType.valueOf(cl.getOptionValue( + DIR_LOAD_TYPE_OPTION.getOpt(), "SINGLE_LISTING")); + loadMetadataV2(plainPath, cl.hasOption(RECURSIVE_OPTION.getOpt()), loadPType, + cl.hasOption(ASYNC_OPTION.getOpt()), + Integer.parseInt(cl.getOptionValue(POLLING_OPTION.getOpt(), "10000"))); + } else { + loadMetadata(plainPath, cl.hasOption(RECURSIVE_OPTION.getOpt()), + cl.hasOption(FORCE_OPTION.getOpt())); + } } @Override public int run(CommandLine cl) throws AlluxioException, IOException { String[] args = cl.getArgs(); - AlluxioURI path = new AlluxioURI(args[0]); - runWildCardCmd(path, cl); + AlluxioURI path; + if (args.length > 0) { + path = new AlluxioURI(args[0]); + runWildCardCmd(path, cl); + } else { + // -o cancel [task_id] / -o get [task_id] + runPlainPath(null, cl); + } return 0; } + private void printTask(SyncMetadataTask task) { + System.out.println("Task id: " + task.getId()); + if (task.getState() == SyncMetadataState.SUCCEEDED) { + System.out.println(Constants.ANSI_GREEN + "State: " + task.getState() + Constants.ANSI_RESET); + } else if (task.getState() == SyncMetadataState.FAILED) { + System.out.println(Constants.ANSI_RED + "State: " + task.getState() + Constants.ANSI_RESET); + } else { + System.out.println("State: " + task.getState()); + } + System.out.println("Sync duration: " + task.getSyncDurationMs()); + double opsSec = task.getSyncDurationMs() == 0 ? 0 + : (double) task.getSuccessOpCount() / ((double) task.getSyncDurationMs() / (double) 1000); + System.out.println("Ops/sec: " + opsSec); + if (task.hasException()) { + System.out.println(Constants.ANSI_RED + "Exception: " + Constants.ANSI_RESET); + System.out.println(Constants.ANSI_RED + "\t" + task.getException().getExceptionType() + + Constants.ANSI_RESET); + System.out.println(Constants.ANSI_RED + "\t" + task.getException().getExceptionMessage() + + Constants.ANSI_RESET); + System.out.println(Constants.ANSI_RED + "\t" + task.getException().getStacktrace() + + Constants.ANSI_RESET); + } + System.out.println("Task info: "); + System.out.println("\t" + task.getTaskInfoString()); + System.out.println("Task stats: "); + System.out.println("\t" + task.getTaskStatString()); + if (task.getState() == SyncMetadataState.SUCCEEDED) { + System.out.println(Constants.ANSI_GREEN + "Load Metadata Completed." + Constants.ANSI_RESET); + } + if (task.getState() == SyncMetadataState.FAILED) { + System.out.println( + Constants.ANSI_RED + "Load Metadata Failed. Please check the server log or retry!" + + Constants.ANSI_RESET); + } + if (task.getState() == SyncMetadataState.CANCELED) { + System.out.println("Load Metadata Canceled."); + } + } + + private void getSyncProgress(long taskId) throws IOException, AlluxioException { + GetSyncProgressPResponse syncProgress = mFileSystem.getSyncProgress(taskId); + for (SyncMetadataTask task : syncProgress.getTaskList()) { + printTask(task); + } + } + + private void cancel(long taskGroupId) throws IOException, AlluxioException { + mFileSystem.cancelSyncMetadata(taskGroupId); + System.out.println("Task group " + taskGroupId + " cancelled"); + } + + private void loadMetadataV2( + AlluxioURI path, boolean recursive, DirectoryLoadPType dirLoadType, + boolean async, long pollingIntervalMs) throws IOException { + SyncMetadataPOptions options = + SyncMetadataPOptions.newBuilder().setLoadDescendantType(recursive + ? LoadDescendantPType.ALL : LoadDescendantPType.ONE) + .setDirectoryLoadType(dirLoadType).build(); + if (!async) { + try { + System.out.println("Starting metadata sync.."); + SyncMetadataPResponse response = mFileSystem.syncMetadata(path, options); + System.out.println("Sync Metadata finished"); + for (SyncMetadataTask task : response.getTaskList()) { + printTask(task); + } + return; + } catch (AlluxioException e) { + throw new IOException(e.getMessage()); + } + } + try { + System.out.println("Submitting metadata sync task..."); + SyncMetadataAsyncPResponse response = mFileSystem.syncMetadataAsync(path, options); + long taskGroupId = response.getTaskGroupId(); + System.out.println("Task group " + taskGroupId + " has been submitted successfully."); + System.out.println("Task ids: " + Arrays.toString(response.getTaskIdsList().toArray())); + System.out.println("Polling sync progress every " + pollingIntervalMs + "ms"); + System.out.println("You can also poll the sync progress in another terminal using:"); + System.out.println("\t$bin/alluxio fs loadMetadata -o get -id " + taskGroupId); + System.out.println("Sync is being executed asynchronously. Ctrl+C or closing the terminal " + + "does not stop the task group. To cancel the task, you can use: "); + System.out.println("\t$bin/alluxio fs loadMetadata -o cancel -id " + taskGroupId); + while (true) { + System.out.println("------------------------------------------------------"); + GetSyncProgressPResponse syncProgress = mFileSystem.getSyncProgress(taskGroupId); + List tasks = syncProgress.getTaskList().stream() + .sorted(Comparator.comparingLong(SyncMetadataTask::getId)).collect(Collectors.toList()); + boolean allComplete = true; + System.out.println("Task group id: " + taskGroupId); + for (SyncMetadataTask task : tasks) { + printTask(task); + if (task.getState() == SyncMetadataState.RUNNING) { + allComplete = false; + } + System.out.println(); + } + if (allComplete) { + return; + } + CommonUtils.sleepMs(pollingIntervalMs); + } + } catch (AlluxioException e) { + throw new IOException(e.getMessage()); + } + } + private void loadMetadata(AlluxioURI path, boolean recursive, boolean force) throws IOException { try { ListStatusPOptions options; @@ -98,7 +291,9 @@ private void loadMetadata(AlluxioURI path, boolean recursive, boolean force) thr } else { options = ListStatusPOptions.newBuilder().setRecursive(recursive).build(); } + long time = CommonUtils.getCurrentMs(); mFileSystem.loadMetadata(path, options); + System.out.println("Time elapsed " + (CommonUtils.getCurrentMs() - time)); } catch (AlluxioException e) { throw new IOException(e.getMessage()); } @@ -106,7 +301,9 @@ private void loadMetadata(AlluxioURI path, boolean recursive, boolean force) thr @Override public String getUsage() { - return "loadMetadata [-R] [-F] "; + return + "loadMetadata [-R] [-F] [-v2] [-a/--async] [-o/--operation ] " + + "[-d ] [-p ] "; } @Override @@ -116,6 +313,33 @@ public String getDescription() { @Override public void validateArgs(CommandLine cl) throws InvalidArgumentException { - CommandUtils.checkNumOfArgsNoLessThan(this, cl, 1); + String operation = cl.getOptionValue(OPERATION_OPTION.getOpt(), "load"); + if (!mOperationValues.contains(operation)) { + throw new InvalidArgumentException( + "Operation value " + operation + " invalid. Possible values: load/cancel/get"); + } + if (operation.equals("load")) { + CommandUtils.checkNumOfArgsNoLessThan(this, cl, 1); + } else { + CommandUtils.checkNumOfArgsNoMoreThan(this, cl, 0); + if (cl.hasOption(FORCE_OPTION.getOpt()) + || cl.hasOption(ASYNC_OPTION.getOpt()) + || cl.hasOption(DIR_LOAD_TYPE_OPTION.getOpt())) { + throw new InvalidArgumentException("-o load/cancel only supports -id option"); + } + if (!cl.hasOption(TASK_ID_OPTION.getOpt())) { + throw new InvalidArgumentException("-o load/cancel only comes with an -id option"); + } + } + if (cl.hasOption(FORCE_OPTION.getOpt()) && cl.hasOption(V2_OPTION.getOpt())) { + throw new InvalidArgumentException("LoadMetadata v2 does not support -F option."); + } + if (cl.hasOption(ASYNC_OPTION.getOpt()) && !cl.hasOption(V2_OPTION.getOpt())) { + throw new InvalidArgumentException("LoadMetadata v1 does not support -a/--async option."); + } + if (cl.hasOption(TASK_ID_OPTION.getOpt()) && operation.equals("load")) { + throw new InvalidArgumentException( + "-id option only works with get and cancel operation type"); + } } } diff --git a/tests/pom.xml b/tests/pom.xml index 5e9b20917958..b42c551c41ee 100644 --- a/tests/pom.xml +++ b/tests/pom.xml @@ -55,11 +55,6 @@ commons-io commons-io - - io.findify - s3mock_2.13 - test - org.hamcrest hamcrest @@ -100,6 +95,12 @@ org.apache.parquet parquet-avro + + com.google.inject + guice + 5.0.1 + test + @@ -228,6 +229,17 @@ ${project.version} test + + org.gaul + s3proxy + + + ch.qos.logback + logback-classic + + + test + diff --git a/tests/src/test/java/alluxio/client/fs/FileSystemS3UfsIntegrationTest.java b/tests/src/test/java/alluxio/client/fs/FileSystemS3UfsIntegrationTest.java index bb1af5ab85d9..bcc1704e9620 100644 --- a/tests/src/test/java/alluxio/client/fs/FileSystemS3UfsIntegrationTest.java +++ b/tests/src/test/java/alluxio/client/fs/FileSystemS3UfsIntegrationTest.java @@ -25,13 +25,14 @@ import alluxio.testutils.LocalAlluxioClusterResource; import com.amazonaws.auth.AWSStaticCredentialsProvider; -import com.amazonaws.auth.AnonymousAWSCredentials; +import com.amazonaws.auth.BasicAWSCredentials; import com.amazonaws.client.builder.AwsClientBuilder; +import com.amazonaws.regions.Regions; import com.amazonaws.services.s3.AmazonS3; import com.amazonaws.services.s3.AmazonS3ClientBuilder; import com.amazonaws.services.s3.model.S3Object; -import io.findify.s3mock.S3Mock; import org.apache.commons.io.IOUtils; +import org.gaul.s3proxy.junit.S3ProxyRule; import org.junit.After; import org.junit.Before; import org.junit.Rule; @@ -44,8 +45,15 @@ public class FileSystemS3UfsIntegrationTest extends BaseIntegrationTest { private static final String TEST_CONTENT = "TestContents"; private static final String TEST_FILE = "test_file"; + private static final String TEST_FILE2 = "test_file2"; private static final int USER_QUOTA_UNIT_BYTES = 1000; + @Rule + public S3ProxyRule mS3Proxy = S3ProxyRule.builder() + .withPort(8001) + .withCredentials("_", "_") + .build(); + public LocalAlluxioClusterResource mLocalAlluxioClusterResource = new LocalAlluxioClusterResource.Builder() .setProperty(PropertyKey.USER_FILE_BUFFER_BYTES, USER_QUOTA_UNIT_BYTES) @@ -53,29 +61,28 @@ public class FileSystemS3UfsIntegrationTest extends BaseIntegrationTest { .setProperty(PropertyKey.UNDERFS_S3_ENDPOINT_REGION, "us-west-2") .setProperty(PropertyKey.UNDERFS_S3_DISABLE_DNS_BUCKETS, true) .setProperty(PropertyKey.MASTER_MOUNT_TABLE_ROOT_UFS, "s3://" + TEST_BUCKET) - .setProperty(PropertyKey.S3A_ACCESS_KEY, "_") - .setProperty(PropertyKey.S3A_SECRET_KEY, "_") + .setProperty(PropertyKey.S3A_ACCESS_KEY, mS3Proxy.getAccessKey()) + .setProperty(PropertyKey.S3A_SECRET_KEY, mS3Proxy.getSecretKey()) .setStartCluster(false) .build(); private FileSystem mFileSystem = null; private AmazonS3 mS3Client = null; @Rule public ExpectedException mThrown = ExpectedException.none(); - private S3Mock mS3MockServer; + private static final String TEST_BUCKET = "test-bucket"; @Before public void before() throws Exception { - mS3MockServer = new S3Mock.Builder().withPort(8001).withInMemoryBackend().build(); - mS3MockServer.start(); - AwsClientBuilder.EndpointConfiguration - endpoint = new AwsClientBuilder.EndpointConfiguration( - "http://localhost:8001", "us-west-2"); mS3Client = AmazonS3ClientBuilder .standard() .withPathStyleAccessEnabled(true) - .withEndpointConfiguration(endpoint) - .withCredentials(new AWSStaticCredentialsProvider(new AnonymousAWSCredentials())) + .withCredentials( + new AWSStaticCredentialsProvider( + new BasicAWSCredentials(mS3Proxy.getAccessKey(), mS3Proxy.getSecretKey()))) + .withEndpointConfiguration( + new AwsClientBuilder.EndpointConfiguration(mS3Proxy.getUri().toString(), + Regions.US_WEST_2.getName())) .build(); mS3Client.createBucket(TEST_BUCKET); @@ -86,13 +93,6 @@ public void before() throws Exception { @After public void after() { mS3Client = null; - try { - if (mS3MockServer != null) { - mS3MockServer.shutdown(); - } - } finally { - mS3MockServer = null; - } } @Test @@ -105,11 +105,11 @@ public void basicMetadataSync() throws IOException, AlluxioException { @Test public void basicWriteThrough() throws IOException, AlluxioException { FileOutStream fos = mFileSystem.createFile( - new AlluxioURI("/" + TEST_FILE), + new AlluxioURI("/" + TEST_FILE2), CreateFilePOptions.newBuilder().setWriteType(WritePType.CACHE_THROUGH).build()); fos.write(TEST_CONTENT.getBytes()); fos.close(); - try (S3Object s3Object = mS3Client.getObject(TEST_BUCKET, TEST_FILE)) { + try (S3Object s3Object = mS3Client.getObject(TEST_BUCKET, TEST_FILE2)) { assertEquals( TEST_CONTENT, IOUtils.toString(s3Object.getObjectContent(), StandardCharsets.UTF_8)); } diff --git a/tests/src/test/java/alluxio/testutils/underfs/delegating/DelegatingUnderFileSystem.java b/tests/src/test/java/alluxio/testutils/underfs/delegating/DelegatingUnderFileSystem.java index 366f56200e05..9f443a5cad55 100755 --- a/tests/src/test/java/alluxio/testutils/underfs/delegating/DelegatingUnderFileSystem.java +++ b/tests/src/test/java/alluxio/testutils/underfs/delegating/DelegatingUnderFileSystem.java @@ -15,12 +15,14 @@ import alluxio.SyncInfo; import alluxio.collections.Pair; import alluxio.conf.AlluxioConfiguration; +import alluxio.file.options.DescendantType; import alluxio.security.authorization.AccessControlList; import alluxio.security.authorization.AclEntry; import alluxio.security.authorization.DefaultAccessControlList; import alluxio.underfs.Fingerprint; import alluxio.underfs.UfsDirectoryStatus; import alluxio.underfs.UfsFileStatus; +import alluxio.underfs.UfsLoadResult; import alluxio.underfs.UfsMode; import alluxio.underfs.UfsStatus; import alluxio.underfs.UnderFileSystem; @@ -31,14 +33,17 @@ import alluxio.underfs.options.ListOptions; import alluxio.underfs.options.MkdirsOptions; import alluxio.underfs.options.OpenOptions; +import alluxio.util.RateLimiter; import org.jetbrains.annotations.Nullable; import java.io.IOException; import java.io.InputStream; import java.io.OutputStream; +import java.util.Iterator; import java.util.List; import java.util.Map; +import java.util.function.Consumer; /** * UFS which delegates to another UFS. Extend this class to override method behavior. @@ -255,6 +260,13 @@ public UfsStatus[] listStatus(String path, ListOptions options) throws IOExcepti return mUfs.listStatus(path, options); } + @javax.annotation.Nullable + @Override + public Iterator listStatusIterable( + String path, ListOptions options, String startAfter, int batchSize) throws IOException { + return mUfs.listStatusIterable(path, options, startAfter, batchSize); + } + @Override public boolean mkdirs(String path) throws IOException { return mUfs.mkdirs(path); @@ -359,4 +371,18 @@ public boolean startActiveSyncPolling(long txId) throws IOException { public boolean stopActiveSyncPolling() throws IOException { return mUfs.stopActiveSyncPolling(); } + + @Override + public void performListingAsync( + String path, @Nullable String continuationToken, @Nullable String startAfter, + DescendantType descendantType, boolean checkStatus, Consumer onComplete, + Consumer onError) { + mUfs.performListingAsync(path, continuationToken, + startAfter, descendantType, checkStatus, onComplete, onError); + } + + @Override + public RateLimiter getRateLimiter() { + return mUfs.getRateLimiter(); + } } diff --git a/underfs/local/pom.xml b/underfs/local/pom.xml index 9e7156c59693..4942869d68f1 100644 --- a/underfs/local/pom.xml +++ b/underfs/local/pom.xml @@ -35,6 +35,14 @@ ${project.version} provided + + + org.alluxio + alluxio-core-common + ${project.version} + test-jar + test + diff --git a/underfs/local/src/test/java/alluxio/underfs/local/LocalUnderFileSystemTest.java b/underfs/local/src/test/java/alluxio/underfs/local/LocalUnderFileSystemTest.java index 5e135cda15dd..3dd265c8bef1 100644 --- a/underfs/local/src/test/java/alluxio/underfs/local/LocalUnderFileSystemTest.java +++ b/underfs/local/src/test/java/alluxio/underfs/local/LocalUnderFileSystemTest.java @@ -22,12 +22,16 @@ import alluxio.conf.Configuration; import alluxio.conf.InstancedConfiguration; import alluxio.conf.PropertyKey; +import alluxio.file.options.DescendantType; import alluxio.underfs.UfsDirectoryStatus; import alluxio.underfs.UfsFileStatus; +import alluxio.underfs.UfsLoadResult; import alluxio.underfs.UfsMode; import alluxio.underfs.UfsStatus; import alluxio.underfs.UnderFileSystem; import alluxio.underfs.UnderFileSystemConfiguration; +import alluxio.underfs.UnderFileSystemTestUtil; +import alluxio.underfs.options.CreateOptions; import alluxio.underfs.options.DeleteOptions; import alluxio.underfs.options.MkdirsOptions; import alluxio.util.io.PathUtils; @@ -298,6 +302,35 @@ public void testSymlinkNonSkip() throws IOException { assertThrows(NoSuchFileException.class, () -> mLocalUfs.listStatus(mLocalUfsRoot)); } + @Test + public void testListAsync() throws Throwable { + mLocalUfs.create(PathUtils.concatPath(mLocalUfsRoot, "root/d/f"), + CreateOptions.defaults(Configuration.global()).setCreateParent(true)).close(); + mLocalUfs.create(PathUtils.concatPath(mLocalUfsRoot, "root/d/d/f"), + CreateOptions.defaults(Configuration.global()).setCreateParent(true)).close(); + mLocalUfs.create(PathUtils.concatPath(mLocalUfsRoot, "root/f"), + CreateOptions.defaults(Configuration.global()).setCreateParent(true)).close(); + UfsLoadResult result = UnderFileSystemTestUtil.performListingAsyncAndGetResult( + mLocalUfs, PathUtils.concatPath(mLocalUfsRoot, "root"), DescendantType.ALL); + Assert.assertEquals(5, result.getItemsCount()); + + result = UnderFileSystemTestUtil.performListingAsyncAndGetResult( + mLocalUfs, PathUtils.concatPath(mLocalUfsRoot, "root"), DescendantType.ONE); + Assert.assertEquals(2, result.getItemsCount()); + + result = UnderFileSystemTestUtil.performListingAsyncAndGetResult( + mLocalUfs, PathUtils.concatPath(mLocalUfsRoot, "root"), DescendantType.NONE); + Assert.assertEquals(1, result.getItemsCount()); + + result = UnderFileSystemTestUtil.performListingAsyncAndGetResult( + mLocalUfs, PathUtils.concatPath(mLocalUfsRoot, "root/d/d/f"), DescendantType.NONE); + Assert.assertEquals(1, result.getItemsCount()); + + result = UnderFileSystemTestUtil.performListingAsyncAndGetResult( + mLocalUfs, PathUtils.concatPath(mLocalUfsRoot, "root/foobar"), DescendantType.NONE); + Assert.assertEquals(0, result.getItemsCount()); + } + private Path createNonExistentSymlink() throws IOException { Path linkPath = Paths.get(mLocalUfsRoot, "test"); diff --git a/underfs/pom.xml b/underfs/pom.xml index a1c8e26f2995..2731610dcd22 100755 --- a/underfs/pom.xml +++ b/underfs/pom.xml @@ -79,8 +79,8 @@ - io.findify - s3mock_2.13 + org.gaul + s3proxy test diff --git a/underfs/s3a/pom.xml b/underfs/s3a/pom.xml index 088f0daf88e9..a103a2c31232 100644 --- a/underfs/s3a/pom.xml +++ b/underfs/s3a/pom.xml @@ -29,6 +29,19 @@ + + org.apache.commons + commons-collections4 + 4.0 + + + software.amazon.awssdk + s3 + + + software.amazon.awssdk + netty-nio-client + com.amazonaws aws-java-sdk-core diff --git a/underfs/s3a/src/main/java/alluxio/underfs/s3a/S3AUnderFileSystem.java b/underfs/s3a/src/main/java/alluxio/underfs/s3a/S3AUnderFileSystem.java index 440c9762180b..b992272962e6 100644 --- a/underfs/s3a/src/main/java/alluxio/underfs/s3a/S3AUnderFileSystem.java +++ b/underfs/s3a/src/main/java/alluxio/underfs/s3a/S3AUnderFileSystem.java @@ -14,8 +14,13 @@ import alluxio.AlluxioURI; import alluxio.Constants; import alluxio.conf.PropertyKey; +import alluxio.file.options.DescendantType; import alluxio.retry.RetryPolicy; import alluxio.underfs.ObjectUnderFileSystem; +import alluxio.underfs.UfsDirectoryStatus; +import alluxio.underfs.UfsFileStatus; +import alluxio.underfs.UfsLoadResult; +import alluxio.underfs.UfsStatus; import alluxio.underfs.UnderFileSystem; import alluxio.underfs.UnderFileSystemConfiguration; import alluxio.underfs.options.OpenOptions; @@ -62,19 +67,48 @@ import com.google.common.util.concurrent.ListeningExecutorService; import com.google.common.util.concurrent.MoreExecutors; import org.apache.commons.codec.digest.DigestUtils; +import org.apache.commons.collections4.IteratorUtils; import org.slf4j.Logger; import org.slf4j.LoggerFactory; +import software.amazon.awssdk.auth.credentials.AwsBasicCredentials; +import software.amazon.awssdk.auth.credentials.AwsCredentialsProvider; +import software.amazon.awssdk.auth.credentials.DefaultCredentialsProvider; +import software.amazon.awssdk.auth.credentials.StaticCredentialsProvider; +import software.amazon.awssdk.core.client.config.ClientAsyncConfiguration; +import software.amazon.awssdk.core.client.config.ClientOverrideConfiguration; +import software.amazon.awssdk.http.nio.netty.Http2Configuration; +import software.amazon.awssdk.http.nio.netty.NettyNioAsyncHttpClient; +import software.amazon.awssdk.http.nio.netty.ProxyConfiguration; +import software.amazon.awssdk.regions.Region; +import software.amazon.awssdk.services.s3.S3AsyncClient; +import software.amazon.awssdk.services.s3.S3AsyncClientBuilder; +import software.amazon.awssdk.services.s3.S3Configuration; +import software.amazon.awssdk.services.s3.model.CommonPrefix; +import software.amazon.awssdk.services.s3.model.HeadObjectRequest; +import software.amazon.awssdk.services.s3.model.ListObjectsV2Response; +import software.amazon.awssdk.services.s3.model.NoSuchKeyException; +import software.amazon.awssdk.services.s3.model.S3Exception; +import software.amazon.awssdk.services.s3.model.S3Object; import java.io.ByteArrayInputStream; import java.io.IOException; import java.io.InputStream; import java.io.OutputStream; import java.net.URI; +import java.time.Instant; import java.util.ArrayList; import java.util.Date; +import java.util.Iterator; import java.util.List; +import java.util.Optional; +import java.util.Spliterator; +import java.util.Spliterators; +import java.util.concurrent.CompletionException; import java.util.concurrent.ExecutorService; +import java.util.function.Consumer; import java.util.function.Supplier; +import java.util.stream.Stream; +import java.util.stream.StreamSupport; import javax.annotation.Nullable; import javax.annotation.concurrent.ThreadSafe; @@ -99,6 +133,8 @@ public class S3AUnderFileSystem extends ObjectUnderFileSystem { /** AWS-SDK S3 client. */ private final AmazonS3 mClient; + private final S3AsyncClient mAsyncClient; + /** Bucket name of user's configured Alluxio bucket. */ private final String mBucketName; @@ -212,6 +248,7 @@ public static S3AUnderFileSystem createInstance(AlluxioURI uri, AmazonS3 amazonS3Client = createAmazonS3(credentials, clientConf, endpointConfiguration, conf); + S3AsyncClient asyncClient = createAmazonS3Async(conf, clientConf); ExecutorService service = ExecutorServiceFactories .fixedThreadPool("alluxio-s3-transfer-manager-worker", @@ -223,10 +260,95 @@ public static S3AUnderFileSystem createInstance(AlluxioURI uri, .withMultipartCopyThreshold(MULTIPART_COPY_THRESHOLD) .build(); - return new S3AUnderFileSystem(uri, amazonS3Client, bucketName, + return new S3AUnderFileSystem(uri, amazonS3Client, asyncClient, bucketName, service, transferManager, conf, streamingUploadEnabled); } + /** + * Create an async S3 client. + * @param conf the conf + * @param clientConf the client conf + * @return the client + */ + public static S3AsyncClient createAmazonS3Async( + UnderFileSystemConfiguration conf, + ClientConfiguration clientConf) { + + S3AsyncClientBuilder clientBuilder = S3AsyncClient.builder(); + // need to check all the additional parameters for these + S3Configuration.builder(); + ClientOverrideConfiguration.builder(); + Http2Configuration.builder(); + ClientAsyncConfiguration.builder(); + + NettyNioAsyncHttpClient.Builder httpClientBuilder = NettyNioAsyncHttpClient.builder(); + AwsCredentialsProvider credentialsProvider; + // Set the aws credential system properties based on Alluxio properties, if they are set; + // otherwise, use the default credential provider. + if (conf.isSet(PropertyKey.S3A_ACCESS_KEY) + && conf.isSet(PropertyKey.S3A_SECRET_KEY)) { + credentialsProvider = StaticCredentialsProvider.create(AwsBasicCredentials.create( + conf.getString(PropertyKey.S3A_ACCESS_KEY), conf.getString(PropertyKey.S3A_SECRET_KEY))); + } else { + // Checks, in order, env variables, system properties, profile file, and instance profile. + credentialsProvider = DefaultCredentialsProvider.builder().build(); + } + + if (conf.getBoolean(PropertyKey.UNDERFS_S3_DISABLE_DNS_BUCKETS)) { + LOG.warn("Path style is not supported by aws asy"); + } + + // Proxy host + if (conf.isSet(PropertyKey.UNDERFS_S3_PROXY_HOST)) { + ProxyConfiguration.Builder proxyBuilder = ProxyConfiguration.builder(); + proxyBuilder.host(conf.getString(PropertyKey.UNDERFS_S3_PROXY_HOST)); + // Proxy port + if (conf.isSet(PropertyKey.UNDERFS_S3_PROXY_PORT)) { + proxyBuilder.port(conf.getInt(PropertyKey.UNDERFS_S3_PROXY_PORT)); + } + httpClientBuilder.proxyConfiguration(proxyBuilder.build()); + } + boolean regionSet = false; + if (conf.isSet(PropertyKey.UNDERFS_S3_ENDPOINT)) { + String endpoint = conf.getString(PropertyKey.UNDERFS_S3_ENDPOINT); + final URI epr = RuntimeHttpUtils.toUri(endpoint, clientConf); + clientBuilder.endpointOverride(epr); + if (conf.isSet(PropertyKey.UNDERFS_S3_ENDPOINT_REGION)) { + regionSet = setRegionAsync(clientBuilder, + conf.getString(PropertyKey.UNDERFS_S3_ENDPOINT_REGION)); + } + } else if (conf.isSet(PropertyKey.UNDERFS_S3_REGION)) { + regionSet = setRegionAsync(clientBuilder, + conf.getString(PropertyKey.UNDERFS_S3_REGION)); + } + + if (!regionSet) { + String defaultRegion = Regions.US_EAST_1.getName(); + clientBuilder.region(Region.of(defaultRegion)); + LOG.warn("Cannot find S3 endpoint or s3 region in Alluxio configuration, " + + "set region to {} as default. S3 client v2 does not support global bucket access, " + + "considering specify the region in alluxio config.", + defaultRegion); + } + clientBuilder.httpClientBuilder(httpClientBuilder); + clientBuilder.credentialsProvider(credentialsProvider); + return clientBuilder.build(); + } + + private static boolean setRegionAsync( + S3AsyncClientBuilder builder, String region) { + try { + builder.region(Region.of(region)); + LOG.debug("Set S3 region {} to {}", PropertyKey.UNDERFS_S3_REGION.getName(), region); + return true; + } catch (SdkClientException e) { + LOG.error("S3 region {} cannot be recognized, " + + "fall back to use global bucket access with an extra HEAD request", + region, e); + return false; + } + } + /** * Create an AmazonS3 client. * @@ -327,11 +449,13 @@ private static AwsClientBuilder.EndpointConfiguration createEndpointConfiguratio * @param conf configuration for this S3A ufs * @param streamingUploadEnabled whether streaming upload is enabled */ - protected S3AUnderFileSystem(AlluxioURI uri, AmazonS3 amazonS3Client, String bucketName, + protected S3AUnderFileSystem( + AlluxioURI uri, AmazonS3 amazonS3Client, S3AsyncClient asyncClient, String bucketName, ExecutorService executor, TransferManager transferManager, UnderFileSystemConfiguration conf, boolean streamingUploadEnabled) { super(uri, conf); mClient = amazonS3Client; + mAsyncClient = asyncClient; mBucketName = bucketName; mExecutor = MoreExecutors.listeningDecorator(executor); mManager = transferManager; @@ -457,6 +581,14 @@ protected String getFolderSuffix() { @Nullable protected ObjectListingChunk getObjectListingChunk(String key, boolean recursive) throws IOException { + return getObjectListingChunk(key, recursive, null, 0); + } + + @Nullable + @Override + protected ObjectListingChunk getObjectListingChunk( + String key, boolean recursive, @Nullable String startAfter, int batchSize) + throws IOException { String delimiter = recursive ? "" : PATH_SEPARATOR; key = PathUtils.normalizePath(key, PATH_SEPARATOR); // In case key is root (empty string) do not normalize prefix. @@ -474,6 +606,12 @@ protected ObjectListingChunk getObjectListingChunk(String key, boolean recursive ListObjectsV2Request request = new ListObjectsV2Request().withBucketName(mBucketName).withPrefix(key) .withDelimiter(delimiter).withMaxKeys(getListingChunkLength(mUfsConf)); + if (startAfter != null) { + request.setStartAfter(startAfter); + } + if (batchSize > 0) { + request.setMaxKeys(batchSize); + } ListObjectsV2Result result = getObjectListingChunk(request); if (result != null) { return new S3AObjectListingChunk(request, result); @@ -510,6 +648,221 @@ private ObjectListing getObjectListingChunkV1(ListObjectsRequest request) { return result; } + void performGetStatusAsync( + String path, Consumer onComplete, + Consumer onError) { + String folderSuffix = getFolderSuffix(); + path = stripPrefixIfPresent(path); + path = path.equals(folderSuffix) ? "" : path; + if (path.isEmpty()) { + onComplete.accept(null); + return; + } + HeadObjectRequest request = + HeadObjectRequest.builder().bucket(mBucketName).key(path).build(); + String finalPath = path; + mAsyncClient.headObject(request).whenCompleteAsync((result, err) -> { + if (err != null) { + if (err.getCause() instanceof NoSuchKeyException) { + onComplete.accept(null); + } else { + onError.accept(parseS3AsyncException(err)); + } + } else { + try { + ObjectPermissions permissions = getPermissions(); + long bytes = mUfsConf.getBytes(PropertyKey.USER_BLOCK_SIZE_BYTES_DEFAULT); + Instant lastModifiedDate = result.lastModified(); + Long lastModifiedTime = lastModifiedDate == null ? null + : lastModifiedDate.toEpochMilli(); + UfsStatus status; + if (finalPath.endsWith(folderSuffix)) { + status = new UfsDirectoryStatus(finalPath, permissions.getOwner(), + permissions.getGroup(), permissions.getMode()); + } else { + status = new UfsFileStatus(finalPath, + result.eTag().substring(1, result.eTag().length() - 1), + result.contentLength(), lastModifiedTime, permissions.getOwner(), + permissions.getGroup(), permissions.getMode(), bytes); + } + onComplete.accept(status); + } catch (Throwable t) { + onError.accept(t); + } + } + }); + } + + @Override + public void performListingAsync( + String path, @Nullable String continuationToken, @Nullable String startAfter, + DescendantType descendantType, boolean checkStatus, + Consumer onComplete, Consumer onError) { + if (checkStatus) { + Preconditions.checkState(continuationToken == null); + performGetStatusAsync(path, status -> { + if (status != null && (status.isFile() || descendantType == DescendantType.NONE)) { + onComplete.accept(new UfsLoadResult(Stream.of(status), 1, null, + null, false, status.isFile(), true)); + } else { + finishListingAsync(status, path, null, startAfter, + descendantType, onComplete, onError); + } + }, onError); + } else { + finishListingAsync(null, path, continuationToken, startAfter, + descendantType, onComplete, onError); + } + } + + private Throwable parseS3AsyncException(Throwable e) { + if (e instanceof CompletionException) { + final Throwable innerErr = e.getCause(); + if (innerErr instanceof S3Exception) { + S3Exception innerS3Err = (S3Exception) innerErr; + if (innerS3Err.statusCode() == 307 + || (innerS3Err.awsErrorDetails().errorCode().equals("AuthorizationHeaderMalformed") + && innerS3Err.getMessage().contains("region"))) { + return new IOException( + "AWS s3 v2 client does not support global region. " + + "Please either specify the region using alluxio.underfs.s3.region " + + "or in your s3 endpoint alluxio.underfs.s3.endpoint.", innerS3Err); + } + } + return new IOException(e.getCause()); + } + return e; + } + + private void finishListingAsync(@Nullable UfsStatus baseStatus, + String path, @Nullable String continuationToken, @Nullable String startAfter, + DescendantType descendantType, + Consumer onComplete, Consumer onError) { + // if descendant type is NONE then we only want to return the directory itself + int maxKeys = descendantType == DescendantType.NONE ? 1 : getListingChunkLength(mUfsConf); + path = stripPrefixIfPresent(path); + String delimiter = descendantType == DescendantType.ALL ? "" : PATH_SEPARATOR; + path = PathUtils.normalizePath(path, PATH_SEPARATOR); + // In case key is root (empty string) do not normalize prefix. + path = path.equals(PATH_SEPARATOR) ? "" : path; + String s3StartAfter = null; + if (path.equals("")) { + s3StartAfter = startAfter; + } else if (startAfter != null) { + s3StartAfter = PathUtils.concatPath(path, startAfter); + } + software.amazon.awssdk.services.s3.model.ListObjectsV2Request.Builder request = + software.amazon.awssdk.services.s3.model.ListObjectsV2Request + .builder().bucket(mBucketName).prefix(path).continuationToken(continuationToken) + .startAfter(startAfter == null ? null : s3StartAfter) + .delimiter(delimiter).maxKeys(maxKeys); + String finalPath = path; + mAsyncClient.listObjectsV2(request.build()) + .whenCompleteAsync((result, err) -> { + if (err != null) { + onError.accept(parseS3AsyncException(err)); + } else { + try { + AlluxioURI lastItem = null; + String lastPrefix = result.commonPrefixes().size() == 0 ? null + : result.commonPrefixes().get(result.commonPrefixes().size() - 1).prefix(); + String lastResult = result.contents().size() == 0 ? null + : result.contents().get(result.contents().size() - 1).key(); + if (lastPrefix == null && lastResult != null) { + lastItem = new AlluxioURI(lastResult); + } else if (lastPrefix != null && lastResult == null) { + lastItem = new AlluxioURI(lastPrefix); + } else if (lastPrefix != null) { // both are non-null + lastItem = new AlluxioURI(lastPrefix.compareTo(lastResult) > 0 + ? lastPrefix : lastResult); + } + int keyCount = result.keyCount(); + Stream resultStream = resultToStream(baseStatus, result); + if (descendantType == DescendantType.NONE) { + Preconditions.checkState(baseStatus == null); + // if descendant type is NONE then we only want to return the directory itself + Optional> str = resultStream.findFirst().map(item -> { + if (item.isDirectory() && item.getName().equals(finalPath)) { + return Stream.of(item); + } else { + if (item.getName().startsWith(finalPath)) { + // in this case we received a file nested under the path, this can happen + // if there was no marker object for the directory, and it contained + // a nested object + ObjectPermissions permissions = getPermissions(); + return Stream.of(new UfsDirectoryStatus(finalPath, + permissions.getOwner(), permissions.getGroup(), permissions.getMode())); + } + } + return Stream.empty(); + }); + resultStream = str.orElse(Stream.empty()); + } + onComplete.accept( + new UfsLoadResult(resultStream, + keyCount, + result.nextContinuationToken(), lastItem, + descendantType != DescendantType.NONE && result.isTruncated(), + false, true)); + } catch (Throwable t) { + onError.accept(t); + } + } + }); + } + + private UfsStatus s3ObjToUfsStatus( + S3Object obj, String folderSuffix, ObjectPermissions permissions, long bytes) { + if (obj.key().endsWith(folderSuffix)) { + return new UfsDirectoryStatus(obj.key(), permissions.getOwner(), + permissions.getGroup(), permissions.getMode()); + } else { + Instant lastModifiedDate = obj.lastModified(); + Long lastModifiedTime = lastModifiedDate == null ? null + : lastModifiedDate.toEpochMilli(); + return new UfsFileStatus(obj.key(), + obj.eTag().substring(1, obj.eTag().length() - 1), obj.size(), lastModifiedTime, + permissions.getOwner(), permissions.getGroup(), permissions.getMode(), bytes); + } + } + + private UfsStatus prefixToUfsStatus(CommonPrefix prefix, ObjectPermissions permissions) { + return new UfsDirectoryStatus( + prefix.prefix(), permissions.getOwner(), permissions.getGroup(), + permissions.getMode()); + } + + private Stream resultToStream( + @Nullable UfsStatus baseStatus, ListObjectsV2Response response) { + // Directories are either keys that end with / + // Or common prefixes which will also end with / + // All results contain the full path from the bucket root + ObjectPermissions permissions = getPermissions(); + String folderSuffix = getFolderSuffix(); + long bytes = mUfsConf.getBytes(PropertyKey.USER_BLOCK_SIZE_BYTES_DEFAULT); + Iterator prefixes = response.commonPrefixes().stream().map( + prefix -> prefixToUfsStatus(prefix, permissions)).iterator(); + Stream itemStream = response.contents().stream().map(obj -> + s3ObjToUfsStatus(obj, folderSuffix, permissions, bytes)); + if (baseStatus != null) { + itemStream = Stream.concat(Stream.of(baseStatus), itemStream); + } + Iterator items = itemStream.iterator(); + return StreamSupport.stream(Spliterators.spliteratorUnknownSize( + IteratorUtils.collatedIterator((s1, s2) -> { + int val = s1.getName().compareTo(s2.getName()); + if (val != 0) { + return val; + } + // If they have the same name, then return the directory first + if (s1.isDirectory() && s2.isDirectory()) { + return 0; + } + return s1.isDirectory() ? -1 : 1; + }, prefixes, items), + Spliterator.ORDERED), false); + } + /** * Wrapper over S3 {@link ListObjectsV2Request}. */ @@ -554,6 +907,11 @@ public ObjectListingChunk getNextChunk() throws IOException { } return null; } + + @Override + public Boolean hasNextChunk() { + return mResult.isTruncated(); + } } /** diff --git a/underfs/s3a/src/test/java/alluxio/underfs/s3a/S3AUnderFileSystemMockServerTest.java b/underfs/s3a/src/test/java/alluxio/underfs/s3a/S3AUnderFileSystemMockServerTest.java index 6df76561cb23..37be5c746275 100644 --- a/underfs/s3a/src/test/java/alluxio/underfs/s3a/S3AUnderFileSystemMockServerTest.java +++ b/underfs/s3a/src/test/java/alluxio/underfs/s3a/S3AUnderFileSystemMockServerTest.java @@ -11,38 +11,52 @@ package alluxio.underfs.s3a; +import static org.junit.Assert.assertArrayEquals; import static org.junit.Assert.assertEquals; import static org.junit.Assert.assertNotNull; import alluxio.AlluxioURI; import alluxio.conf.Configuration; import alluxio.conf.InstancedConfiguration; +import alluxio.file.options.DescendantType; +import alluxio.underfs.UfsLoadResult; import alluxio.underfs.UfsStatus; import alluxio.underfs.UnderFileSystemConfiguration; +import alluxio.underfs.UnderFileSystemTestUtil; import alluxio.underfs.options.ListOptions; import com.amazonaws.AmazonClientException; import com.amazonaws.auth.AWSStaticCredentialsProvider; -import com.amazonaws.auth.AnonymousAWSCredentials; +import com.amazonaws.auth.BasicAWSCredentials; import com.amazonaws.client.builder.AwsClientBuilder; +import com.amazonaws.regions.Regions; import com.amazonaws.services.s3.AmazonS3; import com.amazonaws.services.s3.AmazonS3ClientBuilder; import com.amazonaws.services.s3.transfer.TransferManager; -import io.findify.s3mock.S3Mock; +import com.google.common.collect.Iterators; import org.apache.commons.io.IOUtils; +import org.gaul.s3proxy.junit.S3ProxyRule; import org.junit.After; +import org.junit.Assert; import org.junit.Before; import org.junit.Rule; import org.junit.Test; import org.junit.rules.ExpectedException; +import software.amazon.awssdk.auth.credentials.AwsBasicCredentials; +import software.amazon.awssdk.auth.credentials.StaticCredentialsProvider; +import software.amazon.awssdk.regions.Region; +import software.amazon.awssdk.services.s3.S3AsyncClient; import java.io.IOException; import java.io.InputStream; import java.nio.charset.StandardCharsets; +import java.util.Arrays; +import java.util.Comparator; +import java.util.Iterator; import java.util.concurrent.Executors; /** - * Unit tests for the {@link S3AUnderFileSystem} using an s3 mock server. + * Unit tests for the {@link S3AUnderFileSystem} using a s3 mock server. */ public class S3AUnderFileSystemMockServerTest { private static final InstancedConfiguration CONF = Configuration.copyGlobal(); @@ -55,28 +69,41 @@ public class S3AUnderFileSystemMockServerTest { private S3AUnderFileSystem mS3UnderFileSystem; private AmazonS3 mClient; - private S3Mock mS3MockServer; + @Rule + public S3ProxyRule mS3Proxy = S3ProxyRule.builder() + // This is a must to close the behavior gap between native s3 and s3 proxy + .withBlobStoreProvider("transient") + .withPort(8001) + .withCredentials("_", "_") + .build(); @Rule public final ExpectedException mThrown = ExpectedException.none(); @Before public void before() throws AmazonClientException { - mS3MockServer = new S3Mock.Builder().withPort(8001).withInMemoryBackend().build(); - mS3MockServer.start(); - AwsClientBuilder.EndpointConfiguration endpoint = new AwsClientBuilder.EndpointConfiguration( "http://localhost:8001", "us-west-2"); mClient = AmazonS3ClientBuilder .standard() .withPathStyleAccessEnabled(true) - .withEndpointConfiguration(endpoint) - .withCredentials(new AWSStaticCredentialsProvider(new AnonymousAWSCredentials())) + .withCredentials( + new AWSStaticCredentialsProvider( + new BasicAWSCredentials(mS3Proxy.getAccessKey(), mS3Proxy.getSecretKey()))) + .withEndpointConfiguration( + new AwsClientBuilder.EndpointConfiguration(mS3Proxy.getUri().toString(), + Regions.US_WEST_2.getName())) .build(); + S3AsyncClient asyncClient = + S3AsyncClient.builder().credentialsProvider(StaticCredentialsProvider.create( + AwsBasicCredentials.create(mS3Proxy.getAccessKey(), mS3Proxy.getSecretKey()))) + .endpointOverride(mS3Proxy.getUri()).region(Region.US_WEST_2).build(); mClient.createBucket(TEST_BUCKET); + mS3UnderFileSystem = - new S3AUnderFileSystem(new AlluxioURI("s3://" + TEST_BUCKET), mClient, TEST_BUCKET, + new S3AUnderFileSystem(new AlluxioURI("s3://" + TEST_BUCKET), mClient, + asyncClient, TEST_BUCKET, Executors.newSingleThreadExecutor(), new TransferManager(), UnderFileSystemConfiguration.defaults(CONF), false); } @@ -84,13 +111,6 @@ public void before() throws AmazonClientException { @After public void after() { mClient = null; - try { - if (mS3MockServer != null) { - mS3MockServer.shutdown(); - } - } finally { - mS3MockServer = null; - } } @Test @@ -103,18 +123,17 @@ public void read() throws IOException { } @Test - public void listRecursive() throws IOException { + public void nestedDirectory() throws Throwable { mClient.putObject(TEST_BUCKET, "d1/d1/f1", TEST_CONTENT); mClient.putObject(TEST_BUCKET, "d1/d1/f2", TEST_CONTENT); mClient.putObject(TEST_BUCKET, "d1/d2/f1", TEST_CONTENT); mClient.putObject(TEST_BUCKET, "d2/d1/f1", TEST_CONTENT); mClient.putObject(TEST_BUCKET, "d3/", ""); + mClient.putObject(TEST_BUCKET, "d4/", ""); + mClient.putObject(TEST_BUCKET, "d4/f1", TEST_CONTENT); mClient.putObject(TEST_BUCKET, "f1", TEST_CONTENT); mClient.putObject(TEST_BUCKET, "f2", TEST_CONTENT); - UfsStatus[] ufsStatuses = mS3UnderFileSystem.listStatus( - "/", ListOptions.defaults().setRecursive(true)); - /* Objects: d1/ @@ -127,10 +146,85 @@ public void listRecursive() throws IOException { d2/d1/ d2/d1/f1 d3/ + d4/ + d4/f1 f1 f2 */ + + UfsStatus[] ufsStatuses = mS3UnderFileSystem.listStatus( + "/", ListOptions.defaults().setRecursive(true)); assertNotNull(ufsStatuses); - assertEquals(12, ufsStatuses.length); + assertEquals(14, ufsStatuses.length); + + UfsLoadResult result = UnderFileSystemTestUtil.performListingAsyncAndGetResult( + mS3UnderFileSystem, "/", DescendantType.ALL); + Assert.assertEquals(9, result.getItemsCount()); + + result = UnderFileSystemTestUtil.performListingAsyncAndGetResult( + mS3UnderFileSystem, "/", DescendantType.ONE); + Assert.assertEquals(6, result.getItemsCount()); + + result = UnderFileSystemTestUtil.performListingAsyncAndGetResult( + mS3UnderFileSystem, "d1", DescendantType.NONE); + assertEquals(1, result.getItemsCount()); + + result = UnderFileSystemTestUtil.performListingAsyncAndGetResult( + mS3UnderFileSystem, "d1/", DescendantType.NONE); + assertEquals(1, result.getItemsCount()); + + result = UnderFileSystemTestUtil.performListingAsyncAndGetResult( + mS3UnderFileSystem, "d3", DescendantType.NONE); + assertEquals(1, result.getItemsCount()); + + result = UnderFileSystemTestUtil.performListingAsyncAndGetResult( + mS3UnderFileSystem, "d3/", DescendantType.NONE); + assertEquals(1, result.getItemsCount()); + + result = UnderFileSystemTestUtil.performListingAsyncAndGetResult( + mS3UnderFileSystem, "d4", DescendantType.NONE); + assertEquals(1, result.getItemsCount()); + + result = UnderFileSystemTestUtil.performListingAsyncAndGetResult( + mS3UnderFileSystem, "d4/", DescendantType.NONE); + assertEquals(1, result.getItemsCount()); + + result = UnderFileSystemTestUtil.performListingAsyncAndGetResult( + mS3UnderFileSystem, "f1", DescendantType.NONE); + assertEquals(1, result.getItemsCount()); + + result = UnderFileSystemTestUtil.performListingAsyncAndGetResult( + mS3UnderFileSystem, "f1/", DescendantType.NONE); + assertEquals(0, result.getItemsCount()); + + result = UnderFileSystemTestUtil.performListingAsyncAndGetResult( + mS3UnderFileSystem, "f3", DescendantType.NONE); + assertEquals(0, result.getItemsCount()); + + result = UnderFileSystemTestUtil.performListingAsyncAndGetResult( + mS3UnderFileSystem, "f3/", DescendantType.NONE); + assertEquals(0, result.getItemsCount()); + } + + @Test + public void iterator() throws IOException { + for (int i = 0; i < 5; ++i) { + for (int j = 0; j < 5; ++j) { + for (int k = 0; k < 5; ++k) { + mClient.putObject(TEST_BUCKET, String.format("%d/%d/%d", i, j, k), TEST_CONTENT); + } + } + } + + Iterator ufsStatusesIterator = mS3UnderFileSystem.listStatusIterable( + "/", ListOptions.defaults().setRecursive(true), null, 5); + UfsStatus[] statusesFromListing = + mS3UnderFileSystem.listStatus("/", ListOptions.defaults().setRecursive(true)); + assertNotNull(statusesFromListing); + assertNotNull(ufsStatusesIterator); + UfsStatus[] statusesFromIterator = + Iterators.toArray(ufsStatusesIterator, UfsStatus.class); + Arrays.sort(statusesFromListing, Comparator.comparing(UfsStatus::getName)); + assertArrayEquals(statusesFromIterator, statusesFromListing); } } diff --git a/underfs/s3a/src/test/java/alluxio/underfs/s3a/S3AUnderFileSystemTest.java b/underfs/s3a/src/test/java/alluxio/underfs/s3a/S3AUnderFileSystemTest.java index d0412f0d8dc9..dbc7ab4bc663 100644 --- a/underfs/s3a/src/test/java/alluxio/underfs/s3a/S3AUnderFileSystemTest.java +++ b/underfs/s3a/src/test/java/alluxio/underfs/s3a/S3AUnderFileSystemTest.java @@ -40,6 +40,7 @@ import org.junit.rules.ExpectedException; import org.mockito.ArgumentMatchers; import org.mockito.Mockito; +import software.amazon.awssdk.services.s3.S3AsyncClient; import java.io.Closeable; import java.io.IOException; @@ -62,6 +63,7 @@ public class S3AUnderFileSystemTest { private S3AUnderFileSystem mS3UnderFileSystem; private AmazonS3Client mClient; + private S3AsyncClient mAsyncClient; private ListeningExecutorService mExecutor; private TransferManager mManager; @@ -73,8 +75,10 @@ public void before() throws AmazonClientException { mClient = Mockito.mock(AmazonS3Client.class); mExecutor = Mockito.mock(ListeningExecutorService.class); mManager = Mockito.mock(TransferManager.class); + mAsyncClient = Mockito.mock(S3AsyncClient.class); mS3UnderFileSystem = - new S3AUnderFileSystem(new AlluxioURI("s3a://" + BUCKET_NAME), mClient, BUCKET_NAME, + new S3AUnderFileSystem(new AlluxioURI("s3a://" + BUCKET_NAME), + mClient, mAsyncClient, BUCKET_NAME, mExecutor, mManager, UnderFileSystemConfiguration.defaults(CONF), false); } @@ -183,8 +187,9 @@ public void getPermissionsWithMapping() throws Exception { conf.put(PropertyKey.UNDERFS_S3_OWNER_ID_TO_USERNAME_MAPPING, "111=altname"); try (Closeable c = new ConfigurationRule(conf, CONF).toResource()) { S3AUnderFileSystem s3UnderFileSystem = - new S3AUnderFileSystem(new AlluxioURI("s3a://" + BUCKET_NAME), mClient, BUCKET_NAME, - mExecutor, mManager, UnderFileSystemConfiguration.defaults(CONF), false); + new S3AUnderFileSystem(new AlluxioURI("s3a://" + BUCKET_NAME), mClient, + mAsyncClient, BUCKET_NAME, + mExecutor, mManager, UnderFileSystemConfiguration.defaults(CONF), false); Mockito.when(mClient.getS3AccountOwner()).thenReturn(new Owner("111", "test")); Mockito.when(mClient.getBucketAcl(Mockito.anyString())).thenReturn(new AccessControlList()); @@ -202,8 +207,9 @@ public void getPermissionsNoMapping() throws Exception { conf.put(PropertyKey.UNDERFS_S3_OWNER_ID_TO_USERNAME_MAPPING, "111=userid"); try (Closeable c = new ConfigurationRule(conf, CONF).toResource()) { S3AUnderFileSystem s3UnderFileSystem = - new S3AUnderFileSystem(new AlluxioURI("s3a://" + BUCKET_NAME), mClient, BUCKET_NAME, - mExecutor, mManager, UnderFileSystemConfiguration.defaults(CONF), false); + new S3AUnderFileSystem(new AlluxioURI("s3a://" + BUCKET_NAME), + mClient, mAsyncClient, BUCKET_NAME, + mExecutor, mManager, UnderFileSystemConfiguration.defaults(CONF), false); Mockito.when(mClient.getS3AccountOwner()).thenReturn(new Owner("0", "test")); Mockito.when(mClient.getBucketAcl(Mockito.anyString())).thenReturn(new AccessControlList()); From 0e03c6551698d9152d65f6d7ad3827c5373cd46c Mon Sep 17 00:00:00 2001 From: Arthur Jenoudet <23088925+jenoudet@users.noreply.github.com> Date: Tue, 9 May 2023 01:00:38 -0700 Subject: [PATCH 260/334] Fix startup master health check A workaround to fix the master health check issue, related to #17382 which is caused by a recent change that starts grpc servers on standby masters. Follower masters have a gRPC server always active. When the `MasterHealthCheckClient` connects to the gRPC server, it may find that some services aren't registered (NotFound) rather than unavailable. This manifests in a bug where `bin/alluxio-start.sh master` performs a health check after starting a master and incorrectly reports the master as unhealthy. pr-link: Alluxio/alluxio#17383 change-id: cid-38f5babf01a3597099a821ec3be61f325630c098 --- .../src/main/java/alluxio/common/RpcPortHealthCheckClient.java | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/shell/src/main/java/alluxio/common/RpcPortHealthCheckClient.java b/shell/src/main/java/alluxio/common/RpcPortHealthCheckClient.java index b661cabf1242..107e22202075 100644 --- a/shell/src/main/java/alluxio/common/RpcPortHealthCheckClient.java +++ b/shell/src/main/java/alluxio/common/RpcPortHealthCheckClient.java @@ -14,6 +14,7 @@ import alluxio.HealthCheckClient; import alluxio.conf.AlluxioConfiguration; import alluxio.exception.status.AlluxioStatusException; +import alluxio.exception.status.NotFoundException; import alluxio.exception.status.UnavailableException; import alluxio.grpc.ServiceType; import alluxio.retry.RetryPolicy; @@ -66,7 +67,7 @@ public boolean isServing() { NetworkAddressUtils.pingService(mNodeAddress, mServiceType, mConf, mUserState); LOG.debug("Successfully connected to {}", mNodeAddress); return true; - } catch (UnavailableException e) { + } catch (UnavailableException | NotFoundException e) { LOG.debug("Failed to connect to {} on attempt #{}", mNodeAddress, retry.getAttemptCount()); } catch (AlluxioStatusException e) { From b7804f7c801636bbdc80acdadbf20874c9869de8 Mon Sep 17 00:00:00 2001 From: humengyu Date: Wed, 10 May 2023 01:52:33 +0800 Subject: [PATCH 261/334] Remove s3 handler map in s3 proxy ### What changes are proposed in this pull request? Remove s3 handler map in s3 proxy. ### Why are the changes needed? https://github.com/Alluxio/alluxio/issues/17403 ### Does this PR introduce any user facing changes? Please list the user-facing changes introduced by your change, including 1. change in user-facing APIs NO 2. addition or removal of property keys NO 3. webui NO pr-link: Alluxio/alluxio#17406 change-id: cid-ddf598601ab15f9d9ecb5a22901c191b54935d85 --- .../src/main/java/alluxio/proxy/s3/S3RequestServlet.java | 6 +----- .../proxy/src/main/java/alluxio/web/ProxyWebServer.java | 7 ++----- 2 files changed, 3 insertions(+), 10 deletions(-) diff --git a/core/server/proxy/src/main/java/alluxio/proxy/s3/S3RequestServlet.java b/core/server/proxy/src/main/java/alluxio/proxy/s3/S3RequestServlet.java index 40c192a0b26b..747ffc4874cc 100644 --- a/core/server/proxy/src/main/java/alluxio/proxy/s3/S3RequestServlet.java +++ b/core/server/proxy/src/main/java/alluxio/proxy/s3/S3RequestServlet.java @@ -18,12 +18,10 @@ import alluxio.util.ThreadUtils; import alluxio.web.ProxyWebServer; -import org.eclipse.jetty.server.Request; import org.slf4j.Logger; import org.slf4j.LoggerFactory; import java.io.IOException; -import java.util.concurrent.ConcurrentHashMap; import java.util.concurrent.ExecutorService; import javax.servlet.AsyncContext; import javax.servlet.ServletException; @@ -76,9 +74,7 @@ public void service(HttpServletRequest request, S3Handler.processResponse(response, errorResponse); return; } - ((ConcurrentHashMap) getServletContext() - .getAttribute(ProxyWebServer.PROXY_S3_HANDLER_MAP)) - .put((Request) request, s3Handler); + request.setAttribute(ProxyWebServer.S3_HANDLER_ATTRIBUTE, s3Handler); // Handle request async if (Configuration.getBoolean(PropertyKey.PROXY_S3_V2_ASYNC_PROCESSING_ENABLED)) { S3BaseTask.OpTag opTag = s3Handler.getS3Task().mOPType.getOpTag(); diff --git a/core/server/proxy/src/main/java/alluxio/web/ProxyWebServer.java b/core/server/proxy/src/main/java/alluxio/web/ProxyWebServer.java index 8f82a949eccc..d43c6e24f914 100644 --- a/core/server/proxy/src/main/java/alluxio/web/ProxyWebServer.java +++ b/core/server/proxy/src/main/java/alluxio/web/ProxyWebServer.java @@ -44,7 +44,6 @@ import java.net.InetSocketAddress; import java.util.Collections; import java.util.concurrent.ArrayBlockingQueue; -import java.util.concurrent.ConcurrentHashMap; import java.util.concurrent.ThreadPoolExecutor; import java.util.concurrent.TimeUnit; import java.util.stream.Collectors; @@ -72,13 +71,12 @@ public final class ProxyWebServer extends WebServer { private final RateLimiter mGlobalRateLimiter; private final FileSystem mFileSystem; private AsyncUserAccessAuditLogWriter mAsyncAuditLogWriter; - public static final String PROXY_S3_HANDLER_MAP = "Proxy S3 Handler Map"; - public ConcurrentHashMap mS3HandlerMap = new ConcurrentHashMap<>(); + public static final String S3_HANDLER_ATTRIBUTE = "Proxy S3 Handler Attribute"; class ProxyListener implements HttpChannel.Listener { public void onComplete(Request request) { - S3Handler s3Hdlr = mS3HandlerMap.get(request); + S3Handler s3Hdlr = (S3Handler) request.getAttribute(S3_HANDLER_ATTRIBUTE); if (s3Hdlr != null) { ProxyWebServer.logAccess(s3Hdlr.getServletRequest(), s3Hdlr.getServletResponse(), s3Hdlr.getStopwatch(), s3Hdlr.getS3Task() != null @@ -168,7 +166,6 @@ public void init() throws ServletException { mAsyncAuditLogWriter); getServletContext().setAttribute(PROXY_S3_V2_LIGHT_POOL, createLightThreadPool()); getServletContext().setAttribute(PROXY_S3_V2_HEAVY_POOL, createHeavyThreadPool()); - getServletContext().setAttribute(PROXY_S3_HANDLER_MAP, mS3HandlerMap); } }); mServletContextHandler From 4238c5f342d24d58e21b42c06b05f1f1b38d6987 Mon Sep 17 00:00:00 2001 From: Bowen Ding <6999708+dbw9580@users.noreply.github.com> Date: Wed, 10 May 2023 19:33:12 +0800 Subject: [PATCH 262/334] Fix verbose config deprecation warning ### What changes are proposed in this pull request? Don't print a warning in log if the deprecated config is not set by the user explicitly. ### Why are the changes needed? Even if a deprecated config is not set, when a client or a worker loads cluster default config from master, the config is considered to be `CLUSTER_DEFUALT` level, and triggers a deprecation warning. This PR changes the condition for a deprecation message to be logged to be that the level is higher than `CLUSTER_DEFAULT`, which is the same condition used by `isSetByUser`. After this change, if a deprecated config does get set in `alluxio-site.properties`, the config source level would be `SITE_PROPERTIES` and get logged at the master's log. ### Does this PR introduce any user facing changes? No. pr-link: Alluxio/alluxio#17333 change-id: cid-584c3b66bbb701e7fc70d6e27d91dbd3072851f7 --- .../src/main/java/alluxio/conf/InstancedConfiguration.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/core/common/src/main/java/alluxio/conf/InstancedConfiguration.java b/core/common/src/main/java/alluxio/conf/InstancedConfiguration.java index bed376293f4e..64f1db7c465e 100644 --- a/core/common/src/main/java/alluxio/conf/InstancedConfiguration.java +++ b/core/common/src/main/java/alluxio/conf/InstancedConfiguration.java @@ -377,7 +377,7 @@ public void validate() { + "If no JVM property is present, Alluxio will use default value '%s'.", key.getName(), key.getDefaultValue()); - if (PropertyKey.isDeprecated(key) && getSource(key).compareTo(Source.DEFAULT) != 0) { + if (PropertyKey.isDeprecated(key) && isSetByUser(key)) { LOG.warn("{} is deprecated. Please avoid using this key in the future. {}", key.getName(), PropertyKey.getDeprecationMessage(key)); } From ea7ac65e96b40a5287a3caf900197e998a99f40e Mon Sep 17 00:00:00 2001 From: elega <445092967@qq.com> Date: Thu, 11 May 2023 23:34:35 +0800 Subject: [PATCH 263/334] Fix s3 force path style ### What changes are proposed in this pull request? Fix the path style ### Why are the changes needed? Path style was not respected for aws s3 async client previously ### Does this PR introduce any user facing changes? N/A pr-link: Alluxio/alluxio#17420 change-id: cid-859d1f5bd69a13e56b5030c1dc175dec81752bc9 --- .../main/java/alluxio/underfs/s3a/S3AUnderFileSystem.java | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/underfs/s3a/src/main/java/alluxio/underfs/s3a/S3AUnderFileSystem.java b/underfs/s3a/src/main/java/alluxio/underfs/s3a/S3AUnderFileSystem.java index b992272962e6..953d4369ff99 100644 --- a/underfs/s3a/src/main/java/alluxio/underfs/s3a/S3AUnderFileSystem.java +++ b/underfs/s3a/src/main/java/alluxio/underfs/s3a/S3AUnderFileSystem.java @@ -295,7 +295,10 @@ public static S3AsyncClient createAmazonS3Async( } if (conf.getBoolean(PropertyKey.UNDERFS_S3_DISABLE_DNS_BUCKETS)) { - LOG.warn("Path style is not supported by aws asy"); + S3Configuration config = S3Configuration.builder() + .pathStyleAccessEnabled(true) + .build(); + clientBuilder.serviceConfiguration(config); } // Proxy host From b7bf1784bccda8ccd9fb483a8721d259094d49f6 Mon Sep 17 00:00:00 2001 From: maobaolong <307499405@qq.com> Date: Fri, 12 May 2023 10:36:56 +0800 Subject: [PATCH 264/334] Support display config hash and timestamp in web page ### What changes are proposed in this pull request? Display the config hash and config updated time image image ### Why are the changes needed? Please clarify why the changes are needed. For instance, 1. If you propose a new API, clarify the use case for a new API. 2. If you fix a bug, describe the bug. ### Does this PR introduce any user facing changes? Please list the user-facing changes introduced by your change, including 1. change in user-facing APIs 2. addition or removal of property keys 3. webui pr-link: Alluxio/alluxio#16642 change-id: cid-075d4f9ea1cee81f23706781dcf5cc5ca155e416 --- .../java/alluxio/conf/AlluxioProperties.java | 7 ++ .../main/java/alluxio/conf/Configuration.java | 7 ++ .../src/main/java/alluxio/conf/Hash.java | 9 +++ .../alluxio/conf/InstancedConfiguration.java | 7 ++ .../main/java/alluxio/wire/ConfigHash.java | 44 ++++++++++- .../main/java/alluxio/wire/Configuration.java | 50 +++++++++++- .../wire/MasterWebUIConfiguration.java | 18 +++++ .../wire/WorkerWebUIConfiguration.java | 76 +++++++++++++++++++ .../meta/AlluxioMasterRestServiceHandler.java | 11 +-- .../master/meta/DefaultMetaMaster.java | 5 +- .../alluxio/master/meta/PathProperties.java | 9 ++- .../master/meta/PathPropertiesView.java | 13 +++- .../AlluxioWorkerRestServiceHandler.java | 24 ++++-- .../worker/block/DefaultBlockWorker.java | 2 +- .../src/main/proto/grpc/meta_master.proto | 2 + .../fsmaster/BenchStandaloneGrpcServer.java | 2 +- 16 files changed, 263 insertions(+), 23 deletions(-) diff --git a/core/common/src/main/java/alluxio/conf/AlluxioProperties.java b/core/common/src/main/java/alluxio/conf/AlluxioProperties.java index 389e25d22c99..41c22edb75f5 100644 --- a/core/common/src/main/java/alluxio/conf/AlluxioProperties.java +++ b/core/common/src/main/java/alluxio/conf/AlluxioProperties.java @@ -273,4 +273,11 @@ public Source getSource(PropertyKey key) { public String hash() { return mHash.get(); } + + /** + * @return the last update time of the properties + */ + public long getLastUpdateTime() { + return mHash.getLastUpdateTime(); + } } diff --git a/core/common/src/main/java/alluxio/conf/Configuration.java b/core/common/src/main/java/alluxio/conf/Configuration.java index 09d250f2a222..1561025eeb4d 100644 --- a/core/common/src/main/java/alluxio/conf/Configuration.java +++ b/core/common/src/main/java/alluxio/conf/Configuration.java @@ -641,4 +641,11 @@ private static Optional loadProperties(InputStream stream) { } return Optional.of(properties); } + + /** + * @return the last update time + */ + public static long getLastUpdateTime() { + return SERVER_CONFIG_REFERENCE.get().getLastUpdateTime(); + } } diff --git a/core/common/src/main/java/alluxio/conf/Hash.java b/core/common/src/main/java/alluxio/conf/Hash.java index ddfe6f04a91c..498a0782c318 100644 --- a/core/common/src/main/java/alluxio/conf/Hash.java +++ b/core/common/src/main/java/alluxio/conf/Hash.java @@ -29,6 +29,7 @@ public final class Hash { private final Supplier> mProperties; private final AtomicBoolean mShouldUpdate; private volatile String mVersion; + private volatile long mLastUpdateTime; /** * @param properties a stream of encoded properties @@ -70,10 +71,18 @@ public String get() { // If another thread has recomputed the version, no need to recompute again. if (mShouldUpdate.get()) { mVersion = compute(); + mLastUpdateTime = System.currentTimeMillis(); mShouldUpdate.set(false); } } } return mVersion; } + + /** + * @return the latest update time + */ + public synchronized long getLastUpdateTime() { + return mLastUpdateTime; + } } diff --git a/core/common/src/main/java/alluxio/conf/InstancedConfiguration.java b/core/common/src/main/java/alluxio/conf/InstancedConfiguration.java index 64f1db7c465e..a56075ec7a31 100644 --- a/core/common/src/main/java/alluxio/conf/InstancedConfiguration.java +++ b/core/common/src/main/java/alluxio/conf/InstancedConfiguration.java @@ -694,6 +694,13 @@ && getInt(PropertyKey.MASTER_THROTTLE_STRESSED_RPC_QUEUE_SIZE) } } + /** + * @return the last update time + */ + public long getLastUpdateTime() { + return mProperties.getLastUpdateTime(); + } + private class UnresolvablePropertyException extends Exception { public UnresolvablePropertyException(String msg) { diff --git a/core/common/src/main/java/alluxio/wire/ConfigHash.java b/core/common/src/main/java/alluxio/wire/ConfigHash.java index b41a60e09c46..cf4b4aafab2d 100644 --- a/core/common/src/main/java/alluxio/wire/ConfigHash.java +++ b/core/common/src/main/java/alluxio/wire/ConfigHash.java @@ -11,7 +11,9 @@ package alluxio.wire; +import alluxio.conf.PropertyKey; import alluxio.grpc.GetConfigHashPResponse; +import alluxio.util.CommonUtils; import com.google.common.base.Objects; import com.google.common.base.Preconditions; @@ -25,23 +27,29 @@ public class ConfigHash { private final String mClusterConfigHash; private final String mPathConfigHash; + private long mClusterConfigLastUpdateTime; + private long mPathConfigLastUpdateTime; /** * Constructs a new ConfigHash. * * @param clusterConfigHash cluster configuration hash, cannot be null * @param pathConfigHash path configuration hash, cannot be null + * @param clusterConfigLastUpdateTime the cluster config last update time + * @param pathConfigLastUpdateTime path config last update time */ - public ConfigHash(String clusterConfigHash, String pathConfigHash) { + public ConfigHash(String clusterConfigHash, String pathConfigHash, + long clusterConfigLastUpdateTime, long pathConfigLastUpdateTime) { Preconditions.checkNotNull(clusterConfigHash, "clusterConfigHash"); Preconditions.checkNotNull(pathConfigHash, "pathConfigHash"); mClusterConfigHash = clusterConfigHash; mPathConfigHash = pathConfigHash; + mClusterConfigLastUpdateTime = clusterConfigLastUpdateTime; + mPathConfigLastUpdateTime = pathConfigLastUpdateTime; } private ConfigHash(GetConfigHashPResponse response) { - mClusterConfigHash = response.getClusterConfigHash(); - mPathConfigHash = response.getPathConfigHash(); + this(response.getClusterConfigHash(), response.getPathConfigHash(), 0, 0); } /** @@ -80,6 +88,36 @@ public String getPathConfigHash() { return mPathConfigHash; } + /** + * @return cluster config last update time + */ + public long getClusterConfigLastUpdateTime() { + return mClusterConfigLastUpdateTime; + } + + /** + * @return path config last update time + */ + public long getPathConfigLastUpdateTime() { + return mPathConfigLastUpdateTime; + } + + /** + * @return cluster config last update time text + */ + public String getClusterConfigLastUpdateTimeText() { + return CommonUtils.convertMsToDate(mClusterConfigLastUpdateTime, + alluxio.conf.Configuration.getString(PropertyKey.USER_DATE_FORMAT_PATTERN)); + } + + /** + * @return path config last update time text + */ + public String getPathConfigLastUpdateTimeText() { + return CommonUtils.convertMsToDate(mPathConfigLastUpdateTime, + alluxio.conf.Configuration.getString(PropertyKey.USER_DATE_FORMAT_PATTERN)); + } + @Override public boolean equals(Object o) { if (this == o) { diff --git a/core/common/src/main/java/alluxio/wire/Configuration.java b/core/common/src/main/java/alluxio/wire/Configuration.java index 566af58b2b3a..be3f790e987f 100644 --- a/core/common/src/main/java/alluxio/wire/Configuration.java +++ b/core/common/src/main/java/alluxio/wire/Configuration.java @@ -40,6 +40,10 @@ public final class Configuration { private final String mClusterConfHash; /** Path configuration hash. */ private final String mPathConfHash; + /** Cluster configuration last update time. */ + private final long mClusterConfLastUpdateTime; + /** Path configuration last update time. */ + private final long mPathConfLastUpdateTime; /** * @return new configuration builder @@ -57,6 +61,8 @@ public static final class Builder { private Map> mPathConf = new HashMap<>(); private String mClusterConfHash; private String mPathConfHash; + private long mClusterConfLastUpdateTime; + private long mPathConfLastUpdateTime; /** * Adds a cluster level property. @@ -101,20 +107,42 @@ public void setPathConfHash(String hash) { mPathConfHash = hash; } + /** + * Sets cluster config last update time. + * + * @param lastUpdateTime the last update time + */ + public void setClusterConfLastUpdateTime(long lastUpdateTime) { + mClusterConfLastUpdateTime = lastUpdateTime; + } + + /** + * Sets path config last update time. + * + * @param lastUpdateTime the last update time + */ + public void setPathConfLastUpdateTime(long lastUpdateTime) { + mPathConfLastUpdateTime = lastUpdateTime; + } + /** * @return a newly constructed configuration */ public Configuration build() { - return new Configuration(mClusterConf, mPathConf, mClusterConfHash, mPathConfHash); + return new Configuration(mClusterConf, mPathConf, mClusterConfHash, mPathConfHash, + mClusterConfLastUpdateTime, mPathConfLastUpdateTime); } } private Configuration(List clusterConf, Map> pathConf, - String clusterConfHash, String pathConfHash) { + String clusterConfHash, String pathConfHash, + long clusterConfLastUpdateTime, long pathConfLastUpdateTime) { mClusterConf = clusterConf; mPathConf = pathConf; mClusterConfHash = clusterConfHash; mPathConfHash = pathConfHash; + mClusterConfLastUpdateTime = clusterConfLastUpdateTime; + mPathConfLastUpdateTime = pathConfLastUpdateTime; } private Configuration(GetConfigurationPResponse conf) { @@ -131,6 +159,8 @@ private Configuration(GetConfigurationPResponse conf) { mClusterConfHash = conf.getClusterConfigHash(); mPathConfHash = conf.getPathConfigHash(); + mClusterConfLastUpdateTime = conf.getClusterConfigLastUpdateTime(); + mPathConfLastUpdateTime = conf.getPathConfigLastUpdateTime(); } /** @@ -164,6 +194,8 @@ public GetConfigurationPResponse toProto() { if (mPathConfHash != null) { response.setPathConfigHash(mPathConfHash); } + response.setClusterConfigLastUpdateTime(mClusterConfLastUpdateTime); + response.setPathConfigLastUpdateTime(mPathConfLastUpdateTime); return response.build(); } @@ -194,4 +226,18 @@ public String getClusterConfHash() { public String getPathConfHash() { return mPathConfHash; } + + /** + * @return cluster conf last update time + */ + public long getClusterConfLastUpdateTime() { + return mClusterConfLastUpdateTime; + } + + /** + * @return path conf last update time + */ + public long getPathConfLastUpdateTime() { + return mPathConfLastUpdateTime; + } } diff --git a/core/common/src/main/java/alluxio/wire/MasterWebUIConfiguration.java b/core/common/src/main/java/alluxio/wire/MasterWebUIConfiguration.java index 2f91532d0dbd..ce12ff38800b 100644 --- a/core/common/src/main/java/alluxio/wire/MasterWebUIConfiguration.java +++ b/core/common/src/main/java/alluxio/wire/MasterWebUIConfiguration.java @@ -28,6 +28,7 @@ public final class MasterWebUIConfiguration implements Serializable { private List mWhitelist; private TreeSet> mConfiguration; + private ConfigHash mConfigHash; /** * Creates a new instance of {@link MasterWebUIConfiguration}. @@ -76,6 +77,23 @@ public MasterWebUIConfiguration setWhitelist(List whitelist) { return this; } + /** + * @return cluster config hash + */ + public ConfigHash getConfigHash() { + return mConfigHash; + } + + /** + * Sets config hash. + * @param configHash the config hash + * @return the configuration + */ + public MasterWebUIConfiguration setConfigHash(ConfigHash configHash) { + mConfigHash = configHash; + return this; + } + @Override public String toString() { return MoreObjects.toStringHelper(this).add("configuration", mConfiguration) diff --git a/core/common/src/main/java/alluxio/wire/WorkerWebUIConfiguration.java b/core/common/src/main/java/alluxio/wire/WorkerWebUIConfiguration.java index d1d2435c229b..8411e98354da 100644 --- a/core/common/src/main/java/alluxio/wire/WorkerWebUIConfiguration.java +++ b/core/common/src/main/java/alluxio/wire/WorkerWebUIConfiguration.java @@ -28,6 +28,10 @@ public final class WorkerWebUIConfiguration implements Serializable { private List mWhitelist; private TreeSet> mConfiguration; + private String mClusterConfigHash; + private String mPathConfigHash; + private String mClusterConfigLastUpdateTime; + private String mPathConfigLastUpdateTime; /** * Creates a new instance of {@link WorkerWebUIConfiguration}. @@ -76,6 +80,78 @@ public WorkerWebUIConfiguration setWhitelist(List whitelist) { return this; } + /** + * @return cluster config hash + */ + public String getClusterConfigHash() { + return mClusterConfigHash; + } + + /** + * Sets cluster config hash. + * @param clusterConfigHash the cluster config hash + * @return the configuration + */ + public WorkerWebUIConfiguration setClusterConfigHash(String clusterConfigHash) { + mClusterConfigHash = clusterConfigHash; + return this; + } + + /** + * @return path config hash + */ + public String getPathConfigHash() { + return mPathConfigHash; + } + + /** + * Sets path config hash. + * + * @param pathConfigHash the path config hash + * @return the configuration + */ + public WorkerWebUIConfiguration setPathConfigHash(String pathConfigHash) { + mPathConfigHash = pathConfigHash; + return this; + } + + /** + * @return cluster config last update time + */ + public String getClusterConfigLastUpdateTime() { + return mClusterConfigLastUpdateTime; + } + + /** + * Sets cluster config last update time. + * + * @param clusterConfigLastUpdateTime the cluster config last update time + * @return the configuration + */ + public WorkerWebUIConfiguration setClusterConfigLastUpdateTime( + String clusterConfigLastUpdateTime) { + mClusterConfigLastUpdateTime = clusterConfigLastUpdateTime; + return this; + } + + /** + * @return path config last update time + */ + public String getPathConfigLastUpdateTime() { + return mPathConfigLastUpdateTime; + } + + /** + * Sets the path config last update time. + * @param pathConfigLastUpdateTime path config last update time + * @return the configuration + */ + public WorkerWebUIConfiguration setPathConfigLastUpdateTime( + String pathConfigLastUpdateTime) { + mPathConfigLastUpdateTime = pathConfigLastUpdateTime; + return this; + } + @Override public String toString() { return MoreObjects.toStringHelper(this).add("configuration", mConfiguration) diff --git a/core/server/master/src/main/java/alluxio/master/meta/AlluxioMasterRestServiceHandler.java b/core/server/master/src/main/java/alluxio/master/meta/AlluxioMasterRestServiceHandler.java index 3898a6ef4bd0..1a74ded14dce 100644 --- a/core/server/master/src/main/java/alluxio/master/meta/AlluxioMasterRestServiceHandler.java +++ b/core/server/master/src/main/java/alluxio/master/meta/AlluxioMasterRestServiceHandler.java @@ -63,6 +63,7 @@ import alluxio.wire.BlockLocation; import alluxio.wire.Capacity; import alluxio.wire.ConfigCheckReport; +import alluxio.wire.ConfigHash; import alluxio.wire.FileBlockInfo; import alluxio.wire.FileInfo; import alluxio.wire.MasterInfo; @@ -816,12 +817,11 @@ public Response getWebUIConfiguration() { MasterWebUIConfiguration response = new MasterWebUIConfiguration(); response.setWhitelist(mFileSystemMaster.getWhiteList()); - + alluxio.wire.Configuration conf = mMetaMaster.getConfiguration( + GetConfigurationPOptions.newBuilder().setRawValue(true).build()); TreeSet> sortedProperties = new TreeSet<>(); Set alluxioConfExcludes = Sets.newHashSet(PropertyKey.MASTER_WHITELIST.toString()); - for (ConfigProperty configProperty : mMetaMaster - .getConfiguration(GetConfigurationPOptions.newBuilder().setRawValue(true).build()) - .toProto().getClusterConfigsList()) { + for (ConfigProperty configProperty : conf.toProto().getClusterConfigsList()) { String confName = configProperty.getName(); if (!alluxioConfExcludes.contains(confName)) { sortedProperties.add(new ImmutableTriple<>(confName, @@ -831,7 +831,8 @@ public Response getWebUIConfiguration() { } response.setConfiguration(sortedProperties); - + response.setConfigHash(new ConfigHash(conf.getClusterConfHash(), conf.getPathConfHash(), + conf.getClusterConfLastUpdateTime(), conf.getPathConfLastUpdateTime())); return response; }, Configuration.global()); } diff --git a/core/server/master/src/main/java/alluxio/master/meta/DefaultMetaMaster.java b/core/server/master/src/main/java/alluxio/master/meta/DefaultMetaMaster.java index 42822132b27c..49c30b276f4b 100644 --- a/core/server/master/src/main/java/alluxio/master/meta/DefaultMetaMaster.java +++ b/core/server/master/src/main/java/alluxio/master/meta/DefaultMetaMaster.java @@ -463,6 +463,7 @@ public alluxio.wire.Configuration getConfiguration(GetConfigurationPOptions opti // NOTE(cc): assumes that Configuration is read-only when master is running, otherwise, // the following hash might not correspond to the above cluster configuration. builder.setClusterConfHash(Configuration.hash()); + builder.setClusterConfLastUpdateTime(Configuration.getLastUpdateTime()); } if (!options.getIgnorePathConf()) { @@ -471,6 +472,7 @@ public alluxio.wire.Configuration getConfiguration(GetConfigurationPOptions opti properties.forEach((key, value) -> builder.addPathProperty(path, key, value))); builder.setPathConfHash(pathProperties.getHash()); + builder.setPathConfLastUpdateTime(pathProperties.getLastUpdateTime()); } return builder.build(); @@ -478,7 +480,8 @@ public alluxio.wire.Configuration getConfiguration(GetConfigurationPOptions opti @Override public ConfigHash getConfigHash() { - return new ConfigHash(Configuration.hash(), mPathProperties.hash()); + return new ConfigHash(Configuration.hash(), mPathProperties.hash(), + Configuration.getLastUpdateTime(), mPathProperties.getLastUpdateTime()); } @Override diff --git a/core/server/master/src/main/java/alluxio/master/meta/PathProperties.java b/core/server/master/src/main/java/alluxio/master/meta/PathProperties.java index 717871cc691d..a38a743d1a87 100644 --- a/core/server/master/src/main/java/alluxio/master/meta/PathProperties.java +++ b/core/server/master/src/main/java/alluxio/master/meta/PathProperties.java @@ -66,7 +66,7 @@ public final class PathProperties implements DelegatingJournaled { */ public PathPropertiesView snapshot() { try (LockResource r = new LockResource(mLock.readLock())) { - return new PathPropertiesView(get(), hash()); + return new PathPropertiesView(get(), hash(), mHash.getLastUpdateTime()); } } @@ -154,6 +154,13 @@ public Journaled getDelegate() { return mState; } + /** + * @return the last update time of the properties + */ + public long getLastUpdateTime() { + return mHash.getLastUpdateTime(); + } + /** * Journaled state of path level properties. */ diff --git a/core/server/master/src/main/java/alluxio/master/meta/PathPropertiesView.java b/core/server/master/src/main/java/alluxio/master/meta/PathPropertiesView.java index 5eb1cdbb1fd2..2fc4831c8c6e 100644 --- a/core/server/master/src/main/java/alluxio/master/meta/PathPropertiesView.java +++ b/core/server/master/src/main/java/alluxio/master/meta/PathPropertiesView.java @@ -19,16 +19,20 @@ public final class PathPropertiesView { private final Map> mProperties; private final String mHash; + private final long mLastUpdateTime; /** * Constructs a read-only view of path level properties. * * @param properties map from path to properties * @param hash hash of all path level properties + * @param lastUpdateTime last update time */ - public PathPropertiesView(Map> properties, String hash) { + public PathPropertiesView(Map> properties, String hash, + long lastUpdateTime) { mProperties = properties; mHash = hash; + mLastUpdateTime = lastUpdateTime; } /** @@ -44,4 +48,11 @@ public Map> getProperties() { public String getHash() { return mHash; } + + /** + * @return last update time + */ + public long getLastUpdateTime() { + return mLastUpdateTime; + } } diff --git a/core/server/worker/src/main/java/alluxio/worker/AlluxioWorkerRestServiceHandler.java b/core/server/worker/src/main/java/alluxio/worker/AlluxioWorkerRestServiceHandler.java index f5dc57f26d47..405314eeb84e 100644 --- a/core/server/worker/src/main/java/alluxio/worker/AlluxioWorkerRestServiceHandler.java +++ b/core/server/worker/src/main/java/alluxio/worker/AlluxioWorkerRestServiceHandler.java @@ -30,6 +30,7 @@ import alluxio.master.block.BlockId; import alluxio.metrics.MetricKey; import alluxio.metrics.MetricsSystem; +import alluxio.util.CommonUtils; import alluxio.util.ConfigurationUtils; import alluxio.util.FormatUtils; import alluxio.util.LogUtils; @@ -590,23 +591,30 @@ public Response getWebUILogs(@DefaultValue("") @QueryParam("path") String reques public Response getWebUIConfiguration() { return RestUtils.call(() -> { WorkerWebUIConfiguration response = new WorkerWebUIConfiguration(); - response.setWhitelist(mBlockWorker.getWhiteList()); + response.setWhitelist(mBlockWorker.getWhiteList()); + alluxio.wire.Configuration conf = mBlockWorker.getConfiguration( + GetConfigurationPOptions.newBuilder().setRawValue(true).build()); TreeSet> sortedProperties = new TreeSet<>(); - Set alluxioConfExcludes = Sets.newHashSet(PropertyKey.WORKER_WHITELIST.toString()); - for (ConfigProperty configProperty : mBlockWorker - .getConfiguration(GetConfigurationPOptions.newBuilder().setRawValue(true).build()) - .toProto().getClusterConfigsList()) { + Set alluxioConfExcludes = Sets.newHashSet(PropertyKey.MASTER_WHITELIST.toString()); + for (ConfigProperty configProperty : conf.toProto().getClusterConfigsList()) { String confName = configProperty.getName(); if (!alluxioConfExcludes.contains(confName)) { sortedProperties.add(new ImmutableTriple<>(confName, - ConfigurationUtils.valueAsString(configProperty.getValue()), - configProperty.getSource())); + ConfigurationUtils.valueAsString(configProperty.getValue()), + configProperty.getSource())); } } response.setConfiguration(sortedProperties); - + response.setClusterConfigHash(conf.getClusterConfHash()); + response.setPathConfigHash(conf.getPathConfHash()); + response.setClusterConfigLastUpdateTime( + CommonUtils.convertMsToDate(conf.getClusterConfLastUpdateTime(), + alluxio.conf.Configuration.getString(PropertyKey.USER_DATE_FORMAT_PATTERN))); + response.setPathConfigLastUpdateTime( + CommonUtils.convertMsToDate(conf.getPathConfLastUpdateTime(), + alluxio.conf.Configuration.getString(PropertyKey.USER_DATE_FORMAT_PATTERN))); return response; }, Configuration.global()); } diff --git a/core/server/worker/src/main/java/alluxio/worker/block/DefaultBlockWorker.java b/core/server/worker/src/main/java/alluxio/worker/block/DefaultBlockWorker.java index 23de3f7fd05f..7741351cb0b3 100644 --- a/core/server/worker/src/main/java/alluxio/worker/block/DefaultBlockWorker.java +++ b/core/server/worker/src/main/java/alluxio/worker/block/DefaultBlockWorker.java @@ -500,8 +500,8 @@ public alluxio.wire.Configuration getConfiguration(GetConfigurationPOptions opti // NOTE(cc): assumes that Configuration is read-only when master is running, otherwise, // the following hash might not correspond to the above cluster configuration. builder.setClusterConfHash(Configuration.hash()); + builder.setClusterConfLastUpdateTime(Configuration.getLastUpdateTime()); } - return builder.build(); } diff --git a/core/transport/src/main/proto/grpc/meta_master.proto b/core/transport/src/main/proto/grpc/meta_master.proto index 5a408c68714a..f896b34f6d5e 100644 --- a/core/transport/src/main/proto/grpc/meta_master.proto +++ b/core/transport/src/main/proto/grpc/meta_master.proto @@ -22,6 +22,8 @@ message GetConfigurationPResponse{ map pathConfigs = 2; optional string clusterConfigHash = 3; optional string pathConfigHash = 4; + optional int64 clusterConfigLastUpdateTime = 5; + optional int64 pathConfigLastUpdateTime = 6; } enum ConfigStatus { diff --git a/microbench/src/main/java/alluxio/fsmaster/BenchStandaloneGrpcServer.java b/microbench/src/main/java/alluxio/fsmaster/BenchStandaloneGrpcServer.java index cbf50a657120..a864d948d6e9 100644 --- a/microbench/src/main/java/alluxio/fsmaster/BenchStandaloneGrpcServer.java +++ b/microbench/src/main/java/alluxio/fsmaster/BenchStandaloneGrpcServer.java @@ -139,7 +139,7 @@ public void getConfiguration(GetConfigurationPOptions request, } private final GetConfigHashPResponse mGetConfigHashResponse = - new ConfigHash(Configuration.hash(), new PathProperties().hash()).toProto(); + new ConfigHash(Configuration.hash(), new PathProperties().hash(), 0, 0).toProto(); @Override public void getConfigHash(GetConfigHashPOptions request, From d21a37bedf6caa8237a4de69dc9a2471dfd12c4a Mon Sep 17 00:00:00 2001 From: maobaolong <307499405@qq.com> Date: Fri, 12 May 2023 10:37:45 +0800 Subject: [PATCH 265/334] Support print xattr for stat command ### What changes are proposed in this pull request? Support print xattr for stat command. ### Why are the changes needed? Needs to show the xattr for user. ### Does this PR introduce any user facing changes? User can see the xattr leverage this PR. pr-link: Alluxio/alluxio#17395 change-id: cid-edcb428ee08c8d460c61ae5ce9d555cc6453f02e --- core/common/src/main/java/alluxio/wire/FileInfo.java | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/core/common/src/main/java/alluxio/wire/FileInfo.java b/core/common/src/main/java/alluxio/wire/FileInfo.java index 3181f177ada5..9a613181a01f 100644 --- a/core/common/src/main/java/alluxio/wire/FileInfo.java +++ b/core/common/src/main/java/alluxio/wire/FileInfo.java @@ -27,6 +27,7 @@ import java.util.List; import java.util.Map; import java.util.Set; +import java.util.stream.Collectors; import javax.annotation.Nullable; import javax.annotation.concurrent.NotThreadSafe; @@ -700,6 +701,10 @@ public String toString() { .add("ufsFingerprint", mUfsFingerprint) .add("acl", mAcl.toString()) .add("defaultAcl", mDefaultAcl.toString()) + .add("xattr", "[" + (mXAttr == null ? null : mXAttr.entrySet().stream() + .map(entry -> entry.getKey() + ":" + + (entry.getValue() == null ? null : new String(entry.getValue()))) + .collect(Collectors.joining(", "))) + "]") .toString(); } } From b6adbf04e0be265f83785f98b0d7aaead5868eb2 Mon Sep 17 00:00:00 2001 From: maobaolong <307499405@qq.com> Date: Fri, 12 May 2023 10:39:08 +0800 Subject: [PATCH 266/334] Support config obs configuration from mount option ### What changes are proposed in this pull request? Support config obs configuration from mount option ### Why are the changes needed? For now, we cannot specify other obs client configurations through any way. ### Does this PR introduce any user facing changes? After this PR, we can config any obs configuration. pr-link: Alluxio/alluxio#17191 change-id: cid-a7f9ac3929d6deceb9ee71a02a2dc5f6c44bb489 --- .../underfs/obs/OBSUnderFileSystem.java | 3 +- .../alluxio/underfs/obs/ObsClientExt.java | 43 +++++++++++++++++++ 2 files changed, 45 insertions(+), 1 deletion(-) create mode 100644 underfs/obs/src/main/java/alluxio/underfs/obs/ObsClientExt.java diff --git a/underfs/obs/src/main/java/alluxio/underfs/obs/OBSUnderFileSystem.java b/underfs/obs/src/main/java/alluxio/underfs/obs/OBSUnderFileSystem.java index b402c9bb8134..59f9c25afe29 100644 --- a/underfs/obs/src/main/java/alluxio/underfs/obs/OBSUnderFileSystem.java +++ b/underfs/obs/src/main/java/alluxio/underfs/obs/OBSUnderFileSystem.java @@ -103,7 +103,8 @@ public static OBSUnderFileSystem createInstance(AlluxioURI uri, String endPoint = conf.getString(PropertyKey.OBS_ENDPOINT); String bucketType = conf.getString(PropertyKey.OBS_BUCKET_TYPE); - ObsClient obsClient = new ObsClient(accessKey, secretKey, endPoint); + ObsClient obsClient = new ObsClientExt(accessKey, secretKey, endPoint, + conf.getMountSpecificConf()); String bucketName = UnderFileSystemUtils.getBucketName(uri); return new OBSUnderFileSystem(uri, obsClient, bucketName, bucketType, conf); } diff --git a/underfs/obs/src/main/java/alluxio/underfs/obs/ObsClientExt.java b/underfs/obs/src/main/java/alluxio/underfs/obs/ObsClientExt.java new file mode 100644 index 000000000000..5649249b3f6b --- /dev/null +++ b/underfs/obs/src/main/java/alluxio/underfs/obs/ObsClientExt.java @@ -0,0 +1,43 @@ +/* + * The Alluxio Open Foundation licenses this work under the Apache License, version 2.0 + * (the "License"). You may not use this work except in compliance with the License, which is + * available at www.apache.org/licenses/LICENSE-2.0 + * + * This software is distributed on an "AS IS" basis, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, + * either express or implied, as more fully set forth in the License. + * + * See the NOTICE file distributed with this work for information regarding copyright ownership. + */ + +package alluxio.underfs.obs; + +import com.obs.services.ObsClient; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import java.util.Map; + +/** + * An extension of ObsClient to support config. + */ +public class ObsClientExt extends ObsClient { + private static final Logger LOG = LoggerFactory.getLogger(ObsClientExt.class); + + /** + * Construct obs client. + * + * @param accessKey ak in the access key secretKey + * @param secretKey sk in the access key endPoint + * @param endPoint OBS endpoint + * @param conf the map of OBS configuration + */ + public ObsClientExt(String accessKey, String secretKey, String endPoint, + Map conf) { + super(accessKey, secretKey, endPoint); + for (Map.Entry entry : conf.entrySet()) { + obsProperties.setProperty(entry.getKey(), + entry.getValue() == null ? null : entry.getValue().toString()); + LOG.debug("Set obs client conf: {}={}", entry.getKey(), entry.getValue()); + } + } +} From f027447f181fa1d92c2e0dd3a03b55568cf8eee9 Mon Sep 17 00:00:00 2001 From: qian0817 Date: Fri, 12 May 2023 11:25:59 +0800 Subject: [PATCH 267/334] Make the client poll the master concurrently ### What changes are proposed in this pull request? Make the client poll the master concurrently. ### Why are the changes needed? Currently, we find the active master node by polling each node in series. If a node did not respond due to network or other issues, this may take a lot of time and affect service availability. Make the client poll the master concurrently can alleviate this problem. ### Does this PR introduce any user facing changes? pr-link: Alluxio/alluxio#17186 change-id: cid-d1ef20b596deda02375324978adb23e6f8ce8dce --- .../main/java/alluxio/conf/PropertyKey.java | 9 ++ .../master/PollingMasterInquireClient.java | 98 +++++++++++++++---- .../java/alluxio/network/RejectingServer.java | 16 +++ .../alluxio/conf/ConfigurationBuilder.java | 2 +- .../PollingMasterInquireClientTest.java | 46 ++++++++- 5 files changed, 149 insertions(+), 22 deletions(-) diff --git a/core/common/src/main/java/alluxio/conf/PropertyKey.java b/core/common/src/main/java/alluxio/conf/PropertyKey.java index fb392f860a06..d5c6a5e58fa7 100755 --- a/core/common/src/main/java/alluxio/conf/PropertyKey.java +++ b/core/common/src/main/java/alluxio/conf/PropertyKey.java @@ -6331,6 +6331,13 @@ public String toString() { .setConsistencyCheckLevel(ConsistencyCheckLevel.WARN) .setScope(Scope.CLIENT) .build(); + public static final PropertyKey USER_MASTER_POLLING_CONCURRENT = + booleanBuilder(Name.USER_MASTER_POLLING_CONCURRENT) + .setDefaultValue(false) + .setDescription("Whether to concurrently polling the master.") + .setConsistencyCheckLevel(ConsistencyCheckLevel.WARN) + .setScope(Scope.CLIENT) + .build(); public static final PropertyKey USER_METADATA_CACHE_ENABLED = booleanBuilder(Name.USER_METADATA_CACHE_ENABLED) .setDefaultValue(false) @@ -8936,6 +8943,8 @@ public static final class Name { "alluxio.user.local.writer.chunk.size.bytes"; public static final String USER_LOGGING_THRESHOLD = "alluxio.user.logging.threshold"; public static final String USER_MASTER_POLLING_TIMEOUT = "alluxio.user.master.polling.timeout"; + public static final String USER_MASTER_POLLING_CONCURRENT = + "alluxio.user.master.polling.concurrent"; public static final String USER_METADATA_CACHE_ENABLED = "alluxio.user.metadata.cache.enabled"; public static final String USER_METADATA_CACHE_MAX_SIZE = diff --git a/core/common/src/main/java/alluxio/master/PollingMasterInquireClient.java b/core/common/src/main/java/alluxio/master/PollingMasterInquireClient.java index a1939fb98e11..849a154ad5af 100644 --- a/core/common/src/main/java/alluxio/master/PollingMasterInquireClient.java +++ b/core/common/src/main/java/alluxio/master/PollingMasterInquireClient.java @@ -33,14 +33,21 @@ import alluxio.uri.MultiMasterAuthority; import com.google.common.collect.Lists; +import com.google.common.util.concurrent.ThreadFactoryBuilder; import io.grpc.StatusRuntimeException; import org.slf4j.Logger; import org.slf4j.LoggerFactory; import java.net.InetSocketAddress; +import java.util.ArrayList; import java.util.Collections; import java.util.List; import java.util.Objects; +import java.util.concurrent.ExecutionException; +import java.util.concurrent.ExecutorCompletionService; +import java.util.concurrent.ExecutorService; +import java.util.concurrent.Executors; +import java.util.concurrent.Future; import java.util.concurrent.TimeUnit; import java.util.function.Supplier; import javax.annotation.Nullable; @@ -52,6 +59,13 @@ */ public class PollingMasterInquireClient implements MasterInquireClient { private static final Logger LOG = LoggerFactory.getLogger(PollingMasterInquireClient.class); + private static final ExecutorService EXECUTOR_SERVICE = + Executors.newCachedThreadPool( + new ThreadFactoryBuilder() + .setDaemon(true) + .setNameFormat("pollingMasterThread-%d") + .build() + ); private final MultiMasterConnectDetails mConnectDetails; private final Supplier mRetryPolicySupplier; @@ -129,31 +143,81 @@ private InetSocketAddress getAddress() { addresses = mConnectDetails.getAddresses(); } + if (mConfiguration.getBoolean(PropertyKey.USER_MASTER_POLLING_CONCURRENT)) { + return findActiveAddressConcurrent(addresses); + } else { + return findActiveAddress(addresses); + } + } + + @Nullable + private InetSocketAddress findActiveAddressConcurrent(List addresses) { + List> futures = new ArrayList<>(addresses.size()); + try { + ExecutorCompletionService completionService = + new ExecutorCompletionService<>(EXECUTOR_SERVICE); + for (InetSocketAddress address : addresses) { + futures.add(completionService.submit(() -> checkActiveAddress(address))); + } + for (int i = 0; i < addresses.size(); i++) { + try { + Future future = completionService.take(); + InetSocketAddress address = future.get(); + if (address != null) { + return address; + } + } catch (InterruptedException | ExecutionException e) { + break; + } + } + return null; + } finally { + futures.forEach(it -> it.cancel(true)); + } + } + + @Nullable + private InetSocketAddress findActiveAddress(List addresses) { for (InetSocketAddress address : addresses) { try { - LOG.debug("Checking whether {} is listening for RPCs", address); - pingMetaService(address); - LOG.debug("Successfully connected to {}", address); - return address; - } catch (UnavailableException e) { - LOG.debug("Failed to connect to {}", address); - } catch (DeadlineExceededException e) { - LOG.debug("Timeout while connecting to {}", address); - } catch (CancelledException e) { - LOG.debug("Cancelled while connecting to {}", address); - } catch (NotFoundException e) { - // If the gRPC server is enabled but the metadata service isn't enabled, - // try the next master address. - LOG.debug("Meta service rpc endpoint not found on {}. {}", address, e); - } catch (AlluxioStatusException e) { - LOG.error("Error while connecting to {}. {}", address, e); - // Breaking the loop on non filtered error. + if (checkActiveAddress(address) != null) { + return address; + } + } catch (AlluxioStatusException e) { break; } } return null; } + private InetSocketAddress checkActiveAddress(InetSocketAddress address) + throws AlluxioStatusException { + try { + LOG.debug("Checking whether {} is listening for RPCs", address); + pingMetaService(address); + LOG.debug("Successfully connected to {}", address); + return address; + } catch (UnavailableException e) { + LOG.debug("Failed to connect to {}", address); + return null; + } catch (DeadlineExceededException e) { + LOG.debug("Timeout while connecting to {}", address); + return null; + } catch (CancelledException e) { + LOG.debug("Cancelled while connecting to {}", address); + return null; + } catch (NotFoundException e) { + // If the gRPC server is enabled but the metadata service isn't enabled, + // try the next master address. + LOG.debug("Meta service rpc endpoint not found on {}. {}", address, e); + return null; + } catch (AlluxioStatusException e) { + LOG.error("Error while connecting to {}. {}", address, e); + // Breaking the loop on non filtered error. + throw e; + } + } + private void pingMetaService(InetSocketAddress address) throws AlluxioStatusException { // disable authentication in the channel since version service does not require authentication GrpcChannel channel = diff --git a/core/common/src/main/java/alluxio/network/RejectingServer.java b/core/common/src/main/java/alluxio/network/RejectingServer.java index 23b39ccbd943..8d922e9cf7d3 100644 --- a/core/common/src/main/java/alluxio/network/RejectingServer.java +++ b/core/common/src/main/java/alluxio/network/RejectingServer.java @@ -12,7 +12,9 @@ package alluxio.network; import alluxio.Constants; +import alluxio.util.CommonUtils; +import com.google.common.annotations.VisibleForTesting; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -31,13 +33,24 @@ public final class RejectingServer extends Thread { private final InetSocketAddress mAddress; private ServerSocket mServerSocket; + private final long mSleepTime; /** * @param address the socket address to reject requests on */ public RejectingServer(InetSocketAddress address) { + this(address, 0); + } + + /** + * @param address the socket address to reject requests on + * @param sleepTime sleep time before close connection + */ + @VisibleForTesting + public RejectingServer(InetSocketAddress address, long sleepTime) { super("RejectingServer-" + address); mAddress = address; + mSleepTime = sleepTime; } @Override @@ -52,6 +65,9 @@ public void run() { while (!Thread.interrupted()) { try { Socket s = mServerSocket.accept(); + if (mSleepTime > 0) { + CommonUtils.sleepMs(mSleepTime); + } s.close(); } catch (SocketException e) { return; diff --git a/core/common/src/test/java/alluxio/conf/ConfigurationBuilder.java b/core/common/src/test/java/alluxio/conf/ConfigurationBuilder.java index f306139fbd4e..87bf6cd90f55 100644 --- a/core/common/src/test/java/alluxio/conf/ConfigurationBuilder.java +++ b/core/common/src/test/java/alluxio/conf/ConfigurationBuilder.java @@ -24,7 +24,7 @@ public class ConfigurationBuilder { * @return the updated configuration builder */ public ConfigurationBuilder setProperty(PropertyKey key, Object value) { - mProperties.put(key, value.toString(), Source.RUNTIME); + mProperties.put(key, value, Source.RUNTIME); return this; } diff --git a/core/common/src/test/java/alluxio/master/PollingMasterInquireClientTest.java b/core/common/src/test/java/alluxio/master/PollingMasterInquireClientTest.java index a5b1c088d3ca..27f9e9be568b 100644 --- a/core/common/src/test/java/alluxio/master/PollingMasterInquireClientTest.java +++ b/core/common/src/test/java/alluxio/master/PollingMasterInquireClientTest.java @@ -11,12 +11,20 @@ package alluxio.master; -import static org.junit.Assert.fail; +import static org.junit.Assert.assertThrows; import alluxio.Constants; +import alluxio.conf.AlluxioProperties; import alluxio.conf.ConfigurationBuilder; +import alluxio.conf.InstancedConfiguration; +import alluxio.conf.PropertyKey; import alluxio.exception.status.UnavailableException; +import alluxio.grpc.GrpcServer; +import alluxio.grpc.GrpcServerAddress; +import alluxio.grpc.GrpcServerBuilder; +import alluxio.grpc.GrpcService; import alluxio.grpc.ServiceType; +import alluxio.grpc.ServiceVersionClientServiceGrpc; import alluxio.network.RejectingServer; import alluxio.retry.CountingRetry; import alluxio.util.network.NetworkAddressUtils; @@ -46,11 +54,41 @@ public void pollRejectingDoesntHang() throws Exception { PollingMasterInquireClient client = new PollingMasterInquireClient(addrs, () -> new CountingRetry(0), new ConfigurationBuilder().build(), ServiceType.META_MASTER_CLIENT_SERVICE); + assertThrows("Expected polling to fail", UnavailableException.class, + client::getPrimaryRpcAddress); + } + + @Test(timeout = 10000) + public void concurrentPollingMaster() throws Exception { + int port1 = PortRegistry.reservePort(); + int port2 = PortRegistry.reservePort(); + InetSocketAddress serverAddress1 = new InetSocketAddress("127.0.0.1", port1); + InetSocketAddress serverAddress2 = new InetSocketAddress("127.0.0.1", port2); + RejectingServer s1 = new RejectingServer(serverAddress1, 20000); + GrpcServer s2 = + GrpcServerBuilder.forAddress(GrpcServerAddress.create(serverAddress2), + new InstancedConfiguration(new AlluxioProperties())) + .addService(ServiceType.META_MASTER_CLIENT_SERVICE, new GrpcService( + new ServiceVersionClientServiceGrpc.ServiceVersionClientServiceImplBase() { + })).build(); try { + s1.start(); + s2.start(); + List addrs = + Arrays.asList(InetSocketAddress.createUnresolved("127.0.0.1", port1), + InetSocketAddress.createUnresolved("127.0.0.1", port2)); + PollingMasterInquireClient client = new PollingMasterInquireClient(addrs, + () -> new CountingRetry(0), + new ConfigurationBuilder() + .setProperty(PropertyKey.USER_MASTER_POLLING_CONCURRENT, true) + .build(), + ServiceType.META_MASTER_CLIENT_SERVICE); client.getPrimaryRpcAddress(); - fail("Expected polling to fail"); - } catch (UnavailableException e) { - // Expected + } finally { + s1.stopAndJoin(); + s2.shutdown(); + PortRegistry.release(port1); + PortRegistry.release(port2); } } } From 69be372c8f12e8be878d099eacc900bd11a76a9f Mon Sep 17 00:00:00 2001 From: voddle Date: Tue, 16 May 2023 21:28:25 +0800 Subject: [PATCH 268/334] Cache block worker metrics ### What changes are proposed in this pull request? This PR change replace the `registerGaugeIfAbsent` in `DefaultBlockWorker:Metrics:registerGauges` with `registerCachedGaugeIfAbsent`, and made a container `BlockWorkerMetrics` for `BlockStoreMeta` of `DefaultBlockWorker` ### Why are the changes needed? Previously the MetricSystem will copy a full BlcokStoreMeta everytime it's trying to update the metric, which cost is expensive and can be avoid. ### Does this PR introduce any user facing changes? No pr-link: Alluxio/alluxio#17357 change-id: cid-0e05812a75e51e9b74dbb802b82abb8c56d19b27 --- .../worker/block/BlockWorkerMetrics.java | 131 ++++++++++++++++++ .../worker/block/DefaultBlockWorker.java | 42 +++--- .../worker/block/BlockWorkerMetricsTest.java | 8 +- 3 files changed, 163 insertions(+), 18 deletions(-) create mode 100644 core/common/src/main/java/alluxio/worker/block/BlockWorkerMetrics.java diff --git a/core/common/src/main/java/alluxio/worker/block/BlockWorkerMetrics.java b/core/common/src/main/java/alluxio/worker/block/BlockWorkerMetrics.java new file mode 100644 index 000000000000..e106c37b7e42 --- /dev/null +++ b/core/common/src/main/java/alluxio/worker/block/BlockWorkerMetrics.java @@ -0,0 +1,131 @@ +/* + * The Alluxio Open Foundation licenses this work under the Apache License, version 2.0 + * (the "License"). You may not use this work except in compliance with the License, which is + * available at www.apache.org/licenses/LICENSE-2.0 + * + * This software is distributed on an "AS IS" basis, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, + * either express or implied, as more fully set forth in the License. + * + * See the NOTICE file distributed with this work for information regarding copyright ownership. + */ + +package alluxio.worker.block; + +import alluxio.StorageTierAssoc; + +import java.util.HashMap; +import java.util.Map; + +/** + * The BlockMetaMetricCache cache the metric data of the BlockMeta from the BlockWorker. + * + * So the BlockWorker can pass this MetricCache to registerGauge instead of let registerGauge + * copy a whole BlockMeta everytime updating the metrics. + */ +public class BlockWorkerMetrics { + private final long mCapacityBytes; + private final long mUsedBytes; + private final long mCapacityFree; + + private final Map mCapacityBytesOnTiers; + private final Map mUsedBytesOnTiers; + private final Map mFreeBytesOnTiers; + private final int mNumberOfBlocks; + + /** + * construct func of BlockWorkerMetrics. + * @param capacityBytes + * @param usedBytes + * @param capacityFree + * @param capacityBytesOnTiers + * @param usedBytesOnTiers + * @param freeBytesOnTiers + * @param numberOfBlocks + */ + public BlockWorkerMetrics(long capacityBytes, long usedBytes, long capacityFree, + Map capacityBytesOnTiers, + Map usedBytesOnTiers, + Map freeBytesOnTiers, int numberOfBlocks) { + mCapacityBytes = capacityBytes; + mUsedBytes = usedBytes; + mCapacityFree = capacityFree; + mCapacityBytesOnTiers = capacityBytesOnTiers; + mUsedBytesOnTiers = usedBytesOnTiers; + mFreeBytesOnTiers = freeBytesOnTiers; + mNumberOfBlocks = numberOfBlocks; + } + + /** + * @return the capacityBytes + */ + public long getCapacityBytes() { + return mCapacityBytes; + } + + /** + * @return the usedBytes + */ + public long getUsedBytes() { + return mUsedBytes; + } + + /** + * @return the freeCapacityBytes + */ + public long getCapacityFree() { + return mCapacityFree; + } + + /** + * @return the tierCapacityBytes map + */ + public Map getCapacityBytesOnTiers() { + return mCapacityBytesOnTiers; + } + + /** + * @return the tierUsedBytes map + */ + public Map getUsedBytesOnTiers() { + return mUsedBytesOnTiers; + } + + /** + * @return the tierFreeBytes map + */ + public Map getFreeBytesOnTiers() { + return mFreeBytesOnTiers; + } + + /** + * @return the numberOfBlocks + */ + public int getNumberOfBlocks() { + return mNumberOfBlocks; + } + + /** + * return a new BlockWorkerMetrics form a new BlockStoreMeta. + * @param meta new BlockStoreMeta + * @param s the StorageTierAssoc, can't import here so pass it as param + * @return a new BlockWorkerMetrics + */ + public static BlockWorkerMetrics from(BlockStoreMeta meta, StorageTierAssoc s) { + long capacityBytes = meta.getCapacityBytes(); + long usedBytes = meta.getUsedBytes(); + long capacityFree = capacityBytes - usedBytes; + Map capacityBytesOnTiers = meta.getCapacityBytesOnTiers(); + Map usedBytesOnTiers = meta.getUsedBytesOnTiers(); + // freeBytesOnTiers is recalculated + Map freeBytesOnTiers = new HashMap<>(); + for (int i = 0; i < s.size(); i++) { + String tier = s.getAlias(i); + freeBytesOnTiers.put(tier, capacityBytesOnTiers + .getOrDefault(tier, 0L) + - usedBytesOnTiers.getOrDefault(tier, 0L)); + } + int numberOfBlocks = meta.getNumberOfBlocks(); + return new BlockWorkerMetrics(capacityBytes, usedBytes, capacityFree, + capacityBytesOnTiers, usedBytesOnTiers, freeBytesOnTiers, numberOfBlocks); + } +} diff --git a/core/server/worker/src/main/java/alluxio/worker/block/DefaultBlockWorker.java b/core/server/worker/src/main/java/alluxio/worker/block/DefaultBlockWorker.java index 7741351cb0b3..513a8664f9cf 100644 --- a/core/server/worker/src/main/java/alluxio/worker/block/DefaultBlockWorker.java +++ b/core/server/worker/src/main/java/alluxio/worker/block/DefaultBlockWorker.java @@ -60,6 +60,7 @@ import alluxio.worker.grpc.GrpcExecutors; import alluxio.worker.page.PagedBlockStore; +import com.codahale.metrics.CachedGauge; import com.codahale.metrics.Counter; import com.google.common.annotations.VisibleForTesting; import com.google.common.base.Preconditions; @@ -77,6 +78,7 @@ import java.util.Map; import java.util.Set; import java.util.concurrent.CompletableFuture; +import java.util.concurrent.TimeUnit; import java.util.concurrent.atomic.AtomicReference; import javax.annotation.concurrent.NotThreadSafe; import javax.annotation.concurrent.ThreadSafe; @@ -93,6 +95,7 @@ @NotThreadSafe public class DefaultBlockWorker extends AbstractWorker implements BlockWorker { private static final Logger LOG = LoggerFactory.getLogger(DefaultBlockWorker.class); + public static final int CACHEGAUGE_UPDATE_INTERVAL = 5000; /** Used to close resources during stop. */ protected final Closer mResourceCloser = Closer.create(); @@ -526,41 +529,48 @@ public static final class Metrics { /** * Registers metric gauges. * - * @param blockWorker the block worker handle + * @param blockWorker the BlockWorker */ public static void registerGauges(final BlockWorker blockWorker) { - MetricsSystem.registerGaugeIfAbsent( + CachedGauge cache = + new CachedGauge(CACHEGAUGE_UPDATE_INTERVAL, TimeUnit.MILLISECONDS) { + @Override + protected BlockWorkerMetrics loadValue() { + BlockStoreMeta meta = blockWorker.getStoreMetaFull(); + BlockWorkerMetrics metrics = BlockWorkerMetrics.from(meta, WORKER_STORAGE_TIER_ASSOC); + return metrics; + } + }; + MetricsSystem.registerCachedGaugeIfAbsent( MetricsSystem.getMetricName(MetricKey.WORKER_CAPACITY_TOTAL.getName()), - () -> blockWorker.getStoreMeta().getCapacityBytes()); + () -> cache.getValue().getCapacityBytes()); - MetricsSystem.registerGaugeIfAbsent( + MetricsSystem.registerCachedGaugeIfAbsent( MetricsSystem.getMetricName(MetricKey.WORKER_CAPACITY_USED.getName()), - () -> blockWorker.getStoreMeta().getUsedBytes()); + () -> cache.getValue().getUsedBytes()); - MetricsSystem.registerGaugeIfAbsent( + MetricsSystem.registerCachedGaugeIfAbsent( MetricsSystem.getMetricName(MetricKey.WORKER_CAPACITY_FREE.getName()), - () -> blockWorker.getStoreMeta().getCapacityBytes() - blockWorker.getStoreMeta() - .getUsedBytes()); + () -> cache.getValue().getCapacityFree()); for (int i = 0; i < WORKER_STORAGE_TIER_ASSOC.size(); i++) { String tier = WORKER_STORAGE_TIER_ASSOC.getAlias(i); // TODO(lu) Add template to dynamically generate MetricKey MetricsSystem.registerGaugeIfAbsent(MetricsSystem.getMetricName( MetricKey.WORKER_CAPACITY_TOTAL.getName() + MetricInfo.TIER + tier), - () -> blockWorker.getStoreMeta().getCapacityBytesOnTiers().getOrDefault(tier, 0L)); + () -> cache.getValue().getCapacityBytesOnTiers().getOrDefault(tier, 0L)); - MetricsSystem.registerGaugeIfAbsent(MetricsSystem.getMetricName( + MetricsSystem.registerCachedGaugeIfAbsent(MetricsSystem.getMetricName( MetricKey.WORKER_CAPACITY_USED.getName() + MetricInfo.TIER + tier), - () -> blockWorker.getStoreMeta().getUsedBytesOnTiers().getOrDefault(tier, 0L)); + () -> cache.getValue().getUsedBytesOnTiers().getOrDefault(tier, 0L)); - MetricsSystem.registerGaugeIfAbsent(MetricsSystem.getMetricName( + MetricsSystem.registerCachedGaugeIfAbsent(MetricsSystem.getMetricName( MetricKey.WORKER_CAPACITY_FREE.getName() + MetricInfo.TIER + tier), - () -> blockWorker.getStoreMeta().getCapacityBytesOnTiers().getOrDefault(tier, 0L) - - blockWorker.getStoreMeta().getUsedBytesOnTiers().getOrDefault(tier, 0L)); + () -> cache.getValue().getFreeBytesOnTiers().getOrDefault(tier, 0L)); } - MetricsSystem.registerGaugeIfAbsent(MetricsSystem.getMetricName( + MetricsSystem.registerCachedGaugeIfAbsent(MetricsSystem.getMetricName( MetricKey.WORKER_BLOCKS_CACHED.getName()), - () -> blockWorker.getStoreMetaFull().getNumberOfBlocks()); + () -> cache.getValue().getNumberOfBlocks()); } private Metrics() {} // prevent instantiation diff --git a/core/server/worker/src/test/java/alluxio/worker/block/BlockWorkerMetricsTest.java b/core/server/worker/src/test/java/alluxio/worker/block/BlockWorkerMetricsTest.java index 0ca981e5af98..972c9e4050b3 100644 --- a/core/server/worker/src/test/java/alluxio/worker/block/BlockWorkerMetricsTest.java +++ b/core/server/worker/src/test/java/alluxio/worker/block/BlockWorkerMetricsTest.java @@ -52,19 +52,23 @@ public void before() throws Exception { } @Test - public void testMetricsCapacity() { + public void testMetricsCapacity() throws InterruptedException { when(mBlockStoreMeta.getCapacityBytes()).thenReturn(1000L); Assert.assertEquals(1000L, getGauge(MetricKey.WORKER_CAPACITY_TOTAL.getName())); when(mBlockStoreMeta.getUsedBytes()).thenReturn(200L); + // sleep 5 seconds because the timeout of this registered is CacheGauge, + // and it's update interval is 5 seconds + Thread.sleep(DefaultBlockWorker.CACHEGAUGE_UPDATE_INTERVAL); Assert.assertEquals(200L, getGauge(MetricKey.WORKER_CAPACITY_USED.getName())); Assert.assertEquals(800L, getGauge(MetricKey.WORKER_CAPACITY_FREE.getName())); } @Test - public void testMetricsTierCapacity() { + public void testMetricsTierCapacity() throws InterruptedException { when(mBlockStoreMeta.getCapacityBytesOnTiers()) .thenReturn(ImmutableMap.of(MEM, 1000L, HDD, 2000L)); when(mBlockStoreMeta.getUsedBytesOnTiers()).thenReturn(ImmutableMap.of(MEM, 100L, HDD, 200L)); + Thread.sleep(DefaultBlockWorker.CACHEGAUGE_UPDATE_INTERVAL); assertEquals(1000L, getGauge(MetricKey.WORKER_CAPACITY_TOTAL.getName() + MetricInfo.TIER + MEM)); assertEquals(2000L, From b415e022ec6f0e703418a8119e87da64d24b5b66 Mon Sep 17 00:00:00 2001 From: humengyu Date: Wed, 17 May 2023 03:16:25 +0800 Subject: [PATCH 269/334] Use local conf to create filesystem for s3proxy ### What changes are proposed in this pull request? When the filesystem establishes a connection with the master, the configuration of the filesystem will be overridden by the master's configuration, resulting in changes. Therefore, we should not use the configuration of the filesystem anymore, but instead use the configuration of the local client. ### Why are the changes needed? It is very common for the master configuration and client configuration to be different. We should avoid the newly created filesystem from being influenced by the server. ### Does this PR introduce any user facing changes? Please list the user-facing changes introduced by your change, including 1. change in user-facing APIs 2. addition or removal of property keys 3. webui pr-link: Alluxio/alluxio#17439 change-id: cid-1af4fd37ee8c52cd32552818989dce557256b228 --- .../proxy/src/main/java/alluxio/proxy/s3/S3RestUtils.java | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/core/server/proxy/src/main/java/alluxio/proxy/s3/S3RestUtils.java b/core/server/proxy/src/main/java/alluxio/proxy/s3/S3RestUtils.java index 5013db2a0170..e65f627c1c0d 100644 --- a/core/server/proxy/src/main/java/alluxio/proxy/s3/S3RestUtils.java +++ b/core/server/proxy/src/main/java/alluxio/proxy/s3/S3RestUtils.java @@ -428,7 +428,9 @@ public static FileSystem createFileSystemForUser( final Subject subject = new Subject(); subject.getPrincipals().add(new User(user)); - return FileSystem.Factory.get(subject, fs.getConf()); + // Use local conf to create filesystem rather than fs.getConf() + // due to fs conf will be changed by merged cluster conf. + return FileSystem.Factory.get(subject, Configuration.global()); } /** From ca92441b717a0a6f26902c3e44a9493233bd23f6 Mon Sep 17 00:00:00 2001 From: Rico Chiu Date: Thu, 18 May 2023 02:03:32 -0700 Subject: [PATCH 270/334] [DOCFIX] Remove rolling upgrade from docs the rolling upgrade feature is for limited experimental use currently with many limitations. we may bring back the docs in a future version when it is more widely applicable to various user scenarios. pr-link: Alluxio/alluxio#17455 change-id: cid-9a65f7f7b1fa55373594f7ca3ddcc33a979addec --- docs/en/administration/Upgrade.md | 144 ------------------------------ docs/en/operation/Admin-CLI.md | 6 +- 2 files changed, 1 insertion(+), 149 deletions(-) diff --git a/docs/en/administration/Upgrade.md b/docs/en/administration/Upgrade.md index ff031df59ac4..4d35aa5fd701 100644 --- a/docs/en/administration/Upgrade.md +++ b/docs/en/administration/Upgrade.md @@ -87,150 +87,6 @@ $ ./bin/alluxio-start.sh all ``` 5. If you have updated the Alluxio client jar for an application, restart that application to use the new Alluxio client jar. -### Rolling upgrade/restart masters - -When the cluster is running in high-availability mode (running multiple Alluxio masters), if the admin wants to restart all masters -in the cluster, it should be done in a rolling restart fashion to minimize service unavailable time. -The service should only be unavailable during primary master failover, once there is a primary master in HA, -restarting standby masters will not interrupt the service. - -If the HA is on Embeddded Journal (using Raft), this is an example of how to perform rolling upgrade: -```shell -# First check all master nodes in the cluster -$ ./bin/alluxio fsadmin report -... -Raft journal addresses: - master-0:19200 - master-1:19200 - master-2:19200 -Master Address State Version REVISION -master-0:19998 PRIMARY alluxio-2.9.0 abcde -master-1:19998 STANDBY alluxio-2.9.0 abcde -master-2:19998 STANDBY alluxio-2.9.0 abcde - -# Pick one standby master and restart it using the higher version -$ ssh master-1 -$ bin/alluxio-start.sh master - -# Wait for that master to join the quorum and observe it is using the higher verison -$ ./bin/alluxio fsadmin report -... -Raft journal addresses: - master-0:19200 - master-1:19200 - master-2:19200 -Master Address State Version REVISION -master-0:19998 PRIMARY alluxio-2.9.0 abcde -master-1:19998 STANDBY alluxio-2.9.1 hijkl -master-2:19998 STANDBY alluxio-2.9.0 abcde - -# Do the same for the other standby master master2 - -# Manually failover the primary to one upgraded standby master, now master-0 becomes standby -$ ./bin/alluxio fsadmin journal quorum elect -address master-1:19200 - -# Restart master-0 with the higher version and wait for it to re-join the quorum -# Then you should observe all masters are on the higher version -$ ./bin/alluxio fsadmin report -... -Raft journal addresses: - master-0:19200 - master-1:19200 - master-2:19200 -Master Address State Version REVISION -master-0:19998 STANDBY alluxio-2.9.1 hijkl -master-1:19998 PRIMARY alluxio-2.9.1 hijkl -master-2:19998 STANDBY alluxio-2.9.1 hijkl - -# Wait for all workers register with the new primary, and run tests to validate the service -$ bin/alluxio runTests -``` - -Similarly, if the HA is on UFS Journal (using ZooKeeper), the admin can restart masters one by one in the same order. -The only difference is there is no command to manually trigger a primary master failover. The admin can -directly kill the primary master process, after a brief timeout, one standby master will realize and become the new primary. - -### Rolling upgrade/restart workers - -If the admin wants to restart workers without interrupting ongoing service, there are now ways to rolling restart -all workers without failing ongoing I/O requests. Typically, we want to restart workers to apply configuration changes, -or to upgrade to a newer version. - -A typical workflow of rolling upgrade workers looks as follows: -```shell -# First check all worker nodes in the cluster -$ ./bin/alluxio fsadmin report capacity -... -Worker Name State Last Heartbeat Storage MEM Version Revision -data-worker-1 ACTIVE 1 capacity 10.67GB 2.9.0 abcde - used 0B (0%) -data-worker-0 ACTIVE 2 capacity 10.67GB 2.9.0 abcde - used 0B (0%) -data-worker-2 ACTIVE 0 capacity 10.67GB 2.9.0 abcde - used 0B (0%) -... - -# Pick a batch of workers to decommission, e.g. this batch is 2 workers -$ ./bin/alluxio fsadmin decommissionWorker -a data-worker-0,data-worker-1 -w 5m -Decommissioning worker data-worker-0:30000 -Set worker data-worker-0:30000 decommissioned on master -Decommissioning worker data-worker-1:30000 -Set worker data-worker-1:30000 decommissioned on master -Sent decommission messages to the master, 0 failed and 2 succeeded -Failed ones: [] -Clients take alluxio.user.worker.list.refresh.interval=2min to be updated on the new worker list so this command will block for the same amount of time to ensure the update propagates to clients in the cluster. -Verifying the decommission has taken effect by listing all available workers on the master -Now on master the available workers are: [data-worker-2,data-worker-3,...] -Polling status from worker data-worker-0:30000 -Polling status from worker data-worker-1:30000 -... -There is no operation on worker data-worker-0:30000 for 20 times in a row. Worker is considered safe to stop. -Polling status from worker data-worker-1:30000 -There is no operation on worker data-worker-1:30000 for 20 times in a row. Worker is considered safe to stop. -Waited 3 minutes for workers to be idle -All workers are successfully decommissioned and now idle. Safe to kill or restart this batch of workers now. - -# Now you will be able to observe those workers' state have changed from ACTIVE to DECOMMISSIONED. -$ ./bin/alluxio fsadmin report capacity -... -Worker Name State Last Heartbeat Storage MEM Version Revision -data-worker-1 DECOMMISSIONED 1 capacity 10.67GB 2.9.0 abcde - used 0B (0%) -data-worker-0 DECOMMISSIONED 2 capacity 10.67GB 2.9.0 abcde - used 0B (0%) -data-worker-2 ACTIVE 0 capacity 10.67GB 2.9.0 abcde - used 0B (0%) - -# Then you can restart the decommissioned workers. The workers will start normally and join the cluster. -$ ssh data-worker-0 -$ ./bin/alluxio-start.sh worker -... - -# Now you will be able to observe those workers become ACTIVE again and have a higher version -$ ./bin/alluxio fsadmin report capacity -... -Worker Name State Last Heartbeat Storage MEM Version Revision -data-worker-1 ACTIVE 1 capacity 10.67GB 2.9.1 hijkl - used 0B (0%) -data-worker-0 ACTIVE 2 capacity 10.67GB 2.9.1 hijkl - used 0B (0%) -data-worker-2 ACTIVE 0 capacity 10.67GB 2.9.0 abcde - used 0B (0%) - -# You can run I/O tests against the upgraded workers to validate they are serving, before moving to upgrade the next batch -$ bin/alluxio runTests --workers data-worker-0,data-worker-1 - -# Keep performing the steps above until all workers are upgraded -``` - -See more details about the `decommissionWorker` command in -[documentation]({{ '/en/operation/Admin-CLI.html' | relativize_url }}#decommissionworker). - -### Rolling restart/upgrade other components - -Other components like the Job Master, Job Worker and Proxy do not support rolling upgrade at the moment. -The admin can manually restart them in batches. - ## Additional Options ### Alluxio worker ramdisk cache persistence diff --git a/docs/en/operation/Admin-CLI.md b/docs/en/operation/Admin-CLI.md index c31cc0c64c6a..529ed9a080b3 100644 --- a/docs/en/operation/Admin-CLI.md +++ b/docs/en/operation/Admin-CLI.md @@ -402,9 +402,7 @@ alluxio.master.metastore.inode.inherit.owner.and.group The `decommissionWorker` command can be used to take the target workers off-line from the cluster, so Alluxio clients and proxy instances stop using those workers, and therefore they can be killed or restarted gracefully. Note that this command will NOT kill worker processes. This command will NOT remove the cache on the workers. -This command can be typically used for the following use cases: -1. Perform a graceful rolling restart of all workers in the cluster, where no user requests should fail. -2. Scale down the cluster without interrupting user I/O workflow. +This command can be typically used to scale down the cluster without interrupting user I/O workflow. ```shell $ ./bin/alluxio fsadmin decommissionWorker --addresses data-worker-0,data-worker-1 [--wait 5m] [--disable] @@ -450,8 +448,6 @@ and after all ongoing requests have been served, those workers should not receiv Therefore, no matter when the admin restarts/kills those worker processes, that should not fail any requests. However, there are a few exceptions. See the next section for more details. -See [Rolling Upgrade Workers]({{ '/en/administration/Upgrade.html#rolling-upgraderestart-workers' | relativize_url }}) for how this command is used. - **Limitations** This has some limitations. In some cases, the `decommissionWorker` command may return code 0 (success) From 943aa24ff958fbb39c08e3a990f9a0d4b1d11b5c Mon Sep 17 00:00:00 2001 From: Shawn Sun <32376495+ssz1997@users.noreply.github.com> Date: Thu, 18 May 2023 22:57:12 -0700 Subject: [PATCH 271/334] Fix Xmx check Regarding https://github.com/Alluxio/alluxio/pull/16940#discussion_r1198367841. Add the substring "Xmx" that's supposed to be checked on. pr-link: Alluxio/alluxio#17459 change-id: cid-b4cc0afb6d84e7198a9fe245e994c9eb2100d0ac --- bin/launch-process | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/bin/launch-process b/bin/launch-process index 2112406b3dc7..604915557c27 100755 --- a/bin/launch-process +++ b/bin/launch-process @@ -125,7 +125,7 @@ launch_master() { fi # use a default Xmx value for the master - local contain_xmx="$(contains "${ALLUXIO_MASTER_JAVA_OPTS}")" + local contain_xmx="$(contains "${ALLUXIO_MASTER_JAVA_OPTS}" "Xmx")" local contain_max_percentage="$(contains "${ALLUXIO_MASTER_JAVA_OPTS}" "MaxRAMPercentage")" if [[ "${contain_xmx}" -eq "0" ]] && [[ "${contain_max_percentage}" -eq "0" ]]; then ALLUXIO_MASTER_JAVA_OPTS+=" -Xmx8g " @@ -144,7 +144,7 @@ launch_master() { # Launch a secondary master process launch_secondary_master() { # use a default Xmx value for the master - local contain_xmx="$(contains "${ALLUXIO_SECONDARY_MASTER_JAVA_OPTS}")" + local contain_xmx="$(contains "${ALLUXIO_SECONDARY_MASTER_JAVA_OPTS}" "Xmx")" local contain_max_percentage="$(contains "${ALLUXIO_SECONDARY_MASTER_JAVA_OPTS}" "MaxRAMPercentage")" if [[ "${contain_xmx}" -eq "0" ]] && [[ "${contain_max_percentage}" -eq "0" ]]; then ALLUXIO_SECONDARY_MASTER_JAVA_OPTS+=" -Xmx8g " @@ -164,7 +164,7 @@ launch_job_master() { # Launch a worker process launch_worker() { # use a default Xmx value for the worker - local contain_xmx="$(contains "${ALLUXIO_WORKER_JAVA_OPTS}")" + local contain_xmx="$(contains "${ALLUXIO_WORKER_JAVA_OPTS}" "Xmx")" local contain_max_percentage="$(contains "${ALLUXIO_WORKER_JAVA_OPTS}" "MaxRAMPercentage")" if [[ "${contain_xmx}" -eq "0" ]] && [[ "${contain_max_percentage}" -eq "0" ]]; then ALLUXIO_WORKER_JAVA_OPTS+=" -Xmx4g " From a8c0ecd9958715215133a3ab32dbb8cb41bd1475 Mon Sep 17 00:00:00 2001 From: qian0817 Date: Fri, 19 May 2023 14:14:02 +0800 Subject: [PATCH 272/334] Avoid unnecessary config update checks ### What changes are proposed in this pull request? Check sleep timer interval change when update conf ### Why are the changes needed? We currently generate these logs every time we execute the updateConf command, even if the value has not been modified. ``` 2023-04-24 11:37:27,445 INFO [config-hash-master-heartbeat-0](SleepingTimer.java:88) - update Master Cluster Metrics Updater interval supplier. 2023-04-24 11:37:27,445 INFO [config-hash-master-heartbeat-0](SleepingTimer.java:88) - update Master Lost Worker Detection interval supplier. 2023-04-24 11:37:27,445 INFO [config-hash-master-heartbeat-0](SleepingTimer.java:88) - update Worker register stream session cleaner interval supplier. 2023-04-24 11:37:27,445 INFO [config-hash-master-heartbeat-0](SleepingTimer.java:88) - update Master Block Integrity Check interval supplier. 2023-04-24 11:37:27,445 INFO [config-hash-master-heartbeat-0](SleepingTimer.java:88) - update Master TTL Check interval supplier. 2023-04-24 11:37:27,445 INFO [config-hash-master-heartbeat-0](SleepingTimer.java:88) - update Master Lost Files Detection interval supplier. 2023-04-24 11:37:27,445 INFO [config-hash-master-heartbeat-0](SleepingTimer.java:88) - update Master Replication Check interval supplier. 2023-04-24 11:37:27,445 INFO [config-hash-master-heartbeat-0](SleepingTimer.java:88) - update Master Persistence Scheduler interval supplier. 2023-04-24 11:37:27,445 INFO [config-hash-master-heartbeat-0](SleepingTimer.java:88) - update Master Persistence Checker interval supplier. 2023-04-24 11:37:27,445 INFO [config-hash-master-heartbeat-0](SleepingTimer.java:88) - update Master Metrics Time Series interval supplier. 2023-04-24 11:37:27,445 INFO [config-hash-master-heartbeat-0](SleepingTimer.java:88) - update Master Lost Master Detection interval supplier. 2023-04-24 11:37:27,446 INFO [config-hash-master-heartbeat-0](SleepingTimer.java:88) - update Master Log Config Report Scheduling interval supplier. 2023-04-24 11:37:27,446 INFO [config-hash-master-heartbeat-0](SleepingTimer.java:88) - update Master Journal Space Monitor interval supplier. 2023-04-24 11:37:27,446 INFO [config-hash-master-heartbeat-0](SleepingTimer.java:88) - update Master Throttle interval supplier. ``` ### Does this PR introduce any user facing changes? No. pr-link: Alluxio/alluxio#17309 change-id: cid-4352f517ca1f1b37cab9784e1f89b104f09426d8 --- .../CronExpressionIntervalSupplier.java | 21 +++++++++++++++++++ .../heartbeat/FixedIntervalSupplier.java | 19 +++++++++++++++++ .../java/alluxio/heartbeat/SleepingTimer.java | 10 ++++++--- 3 files changed, 47 insertions(+), 3 deletions(-) diff --git a/core/common/src/main/java/alluxio/heartbeat/CronExpressionIntervalSupplier.java b/core/common/src/main/java/alluxio/heartbeat/CronExpressionIntervalSupplier.java index e632e472dac6..7efe522aa3ca 100644 --- a/core/common/src/main/java/alluxio/heartbeat/CronExpressionIntervalSupplier.java +++ b/core/common/src/main/java/alluxio/heartbeat/CronExpressionIntervalSupplier.java @@ -11,11 +11,13 @@ package alluxio.heartbeat; +import com.google.common.base.Preconditions; import org.apache.logging.log4j.core.util.CronExpression; import java.time.Duration; import java.time.Instant; import java.util.Date; +import java.util.Objects; /** * Calculate the next interval by given cron expression. @@ -31,6 +33,7 @@ public class CronExpressionIntervalSupplier implements SleepIntervalSupplier { * @param fixedInterval the fixed interval */ public CronExpressionIntervalSupplier(CronExpression cronExpression, long fixedInterval) { + Preconditions.checkNotNull(cronExpression, "CronExpression is null"); mInterval = fixedInterval; mCron = cronExpression; } @@ -56,4 +59,22 @@ public long getRunLimit(long mPreviousTickedMs) { return Duration.between(now.toInstant(), mCron.getNextInvalidTimeAfter(now).toInstant()).toMillis(); } + + @Override + public boolean equals(Object o) { + if (this == o) { + return true; + } + if (o == null || getClass() != o.getClass()) { + return false; + } + CronExpressionIntervalSupplier that = (CronExpressionIntervalSupplier) o; + return mInterval == that.mInterval + && Objects.equals(mCron.getCronExpression(), that.mCron.getCronExpression()); + } + + @Override + public int hashCode() { + return Objects.hash(mInterval, mCron.getCronExpression()); + } } diff --git a/core/common/src/main/java/alluxio/heartbeat/FixedIntervalSupplier.java b/core/common/src/main/java/alluxio/heartbeat/FixedIntervalSupplier.java index 1269f5996112..d366b6d48aef 100644 --- a/core/common/src/main/java/alluxio/heartbeat/FixedIntervalSupplier.java +++ b/core/common/src/main/java/alluxio/heartbeat/FixedIntervalSupplier.java @@ -14,6 +14,8 @@ import org.slf4j.Logger; import org.slf4j.helpers.NOPLogger; +import java.util.Objects; + /** * Fixed interval supplier. */ @@ -60,4 +62,21 @@ public long getNextInterval(long mPreviousTickedMs, long nowTimeStampMillis) { public long getRunLimit(long mPreviousTickedMs) { return mInterval; } + + @Override + public boolean equals(Object o) { + if (this == o) { + return true; + } + if (o == null || getClass() != o.getClass()) { + return false; + } + FixedIntervalSupplier that = (FixedIntervalSupplier) o; + return mInterval == that.mInterval; + } + + @Override + public int hashCode() { + return Objects.hash(mInterval); + } } diff --git a/core/common/src/main/java/alluxio/heartbeat/SleepingTimer.java b/core/common/src/main/java/alluxio/heartbeat/SleepingTimer.java index 2e444de5b892..0bd730d96e14 100644 --- a/core/common/src/main/java/alluxio/heartbeat/SleepingTimer.java +++ b/core/common/src/main/java/alluxio/heartbeat/SleepingTimer.java @@ -19,6 +19,7 @@ import java.time.Clock; import java.time.Duration; +import java.util.Objects; import java.util.function.Supplier; import javax.annotation.concurrent.NotThreadSafe; @@ -33,7 +34,7 @@ public class SleepingTimer implements HeartbeatTimer { protected final Clock mClock; protected final Sleeper mSleeper; protected final Supplier mIntervalSupplierSupplier; - protected SleepIntervalSupplier mIntervalSupplier; + protected volatile SleepIntervalSupplier mIntervalSupplier; /** * Creates a new instance of {@link SleepingTimer}. @@ -84,7 +85,10 @@ public long tick() throws InterruptedException { @Override public void update() { - mIntervalSupplier = mIntervalSupplierSupplier.get(); - mLogger.info("update {} interval supplier.", mThreadName); + SleepIntervalSupplier newSupplier = mIntervalSupplierSupplier.get(); + if (!Objects.equals(mIntervalSupplier, newSupplier)) { + mIntervalSupplier = newSupplier; + mLogger.info("update {} interval supplier.", mThreadName); + } } } From 07f03eb9b7aa13a54134265e89d16da4332a2d38 Mon Sep 17 00:00:00 2001 From: maobaolong <307499405@qq.com> Date: Mon, 22 May 2023 13:42:09 +0800 Subject: [PATCH 273/334] Fix CCE for metric system Fix CCE for metric system Without this fix, start a single master will encountered the following CCE. ``` 2023-05-21 21:41:27,422 INFO [main](WebServer.java:211) - Alluxio Master Web service started @ /0.0.0.0:19999 2023-05-21 21:41:27,422 INFO [main](PrimaryOnlyMetricsService.java:27) - Promoting PrimaryOnlyMetricsService 2023-05-21 21:41:27,422 INFO [main](MetricsService.java:29) - Start metric sinks. 2023-05-21 21:41:27,425 INFO [main](AlluxioMasterProcess.java:294) - Primary started 2023-05-21 21:41:28,244 ERROR [Master Throttle](HeartbeatThread.java:157) - Uncaught exception in heartbeat executor, Heartbeat Thread shutting down java.lang.ClassCastException: java.lang.Integer cannot be cast to java.lang.Long at alluxio.master.throttle.ServerIndicator.createFromMetrics(ServerIndicator.java:127) at alluxio.master.throttle.SystemMonitor.collectServerIndicators(SystemMonitor.java:260) at alluxio.master.throttle.SystemMonitor.collectIndicators(SystemMonitor.java:220) at alluxio.master.throttle.SystemMonitor.run(SystemMonitor.java:211) at alluxio.master.throttle.DefaultThrottleMaster$ThrottleExecutor.heartbeat(DefaultThrottleMaster.java:147) at alluxio.heartbeat.HeartbeatThread.run(HeartbeatThread.java:152) at java.util.concurrent.Executors$RunnableAdapter.call(Executors.java:511) at java.util.concurrent.FutureTask.run$$$capture(FutureTask.java:266) at java.util.concurrent.FutureTask.run(FutureTask.java) at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1149) at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:624) at java.lang.Thread.run(Thread.java:748) ``` pr-link: Alluxio/alluxio#17466 change-id: cid-866a9275cc9c0b60a3d4e27d678b336c5e37ce50 --- .../main/java/alluxio/master/AlluxioExecutorService.java | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/core/server/common/src/main/java/alluxio/master/AlluxioExecutorService.java b/core/server/common/src/main/java/alluxio/master/AlluxioExecutorService.java index 9cfb6e36d1d6..d12f4cb5437b 100644 --- a/core/server/common/src/main/java/alluxio/master/AlluxioExecutorService.java +++ b/core/server/common/src/main/java/alluxio/master/AlluxioExecutorService.java @@ -58,7 +58,7 @@ public AlluxioExecutorService(ExecutorService executor, Counter counter) { /** * @return the current RPC queue size */ - public int getRpcQueueLength() { + public long getRpcQueueLength() { if (mExecutor instanceof ThreadPoolExecutor) { return ((ThreadPoolExecutor) mExecutor).getQueue().size(); } else if (mExecutor instanceof ForkJoinPool) { @@ -72,7 +72,7 @@ public int getRpcQueueLength() { /** * @return the current RPC active thread count */ - public int getActiveCount() { + public long getActiveCount() { if (mExecutor instanceof ThreadPoolExecutor) { return ((ThreadPoolExecutor) mExecutor).getActiveCount(); } else if (mExecutor instanceof ForkJoinPool) { @@ -86,7 +86,7 @@ public int getActiveCount() { /** * @return the current RPC thread pool size */ - public int getPoolSize() { + public long getPoolSize() { if (mExecutor instanceof ThreadPoolExecutor) { return ((ThreadPoolExecutor) mExecutor).getPoolSize(); } else if (mExecutor instanceof ForkJoinPool) { From 0e39a2d5a0ae9bf8748bd68635c7c6d6e20ca6d8 Mon Sep 17 00:00:00 2001 From: secfree Date: Tue, 23 May 2023 23:33:40 +0800 Subject: [PATCH 274/334] Support changing log level at runtime for alluxio-fuse ### What changes are proposed in this pull request? Support changing log level at runtime for alluxio-fuse. Now with the configuration "alluxio.fuse.web.enabled=true", the user can change the logLevel for alluxio-fuse with the following ways 1. With `logLevel` command ``` $ ./bin/alluxio logLevel --logName=alluxio.fuse.AlluxioJniFuseFileSystem --target=localhost:49999 --level=INFO Role inferred from port: localhost:49999[fuse] localhost:49999[fuse]LogInfo{INFO, alluxio.fuse.AlluxioJniFuseFileSystem, Setting Level to INFO} ``` 2. With `curl` ``` $ curl -X POST "http://localhost:49999/api/v1/fuse/logLevel?logName=alluxio.fuse.AlluxioJniFuseFileSystem&level=DEBUG" {"logName":"alluxio.fuse.AlluxioJniFuseFileSystem","message":"Setting Level to DEBUG","level":"DEBUG"} ``` ### Why are the changes needed? Please refer #17174 ### Does this PR introduce any user facing changes? NO pr-link: Alluxio/alluxio#17175 change-id: cid-4130150f647217da8fd396dc542753400d6b568f --- integration/fuse/pom.xml | 16 +++++ .../fuse/AlluxioFuseRestServiceHandler.java | 59 +++++++++++++++++++ .../main/java/alluxio/fuse/FuseWebServer.java | 15 +++++ shell/src/main/java/alluxio/cli/LogLevel.java | 3 + 4 files changed, 93 insertions(+) create mode 100644 integration/fuse/src/main/java/alluxio/fuse/AlluxioFuseRestServiceHandler.java diff --git a/integration/fuse/pom.xml b/integration/fuse/pom.xml index 9363c462280e..b6b6c2205aa7 100644 --- a/integration/fuse/pom.xml +++ b/integration/fuse/pom.xml @@ -63,6 +63,22 @@ alluxio-integration-jnifuse-fs ${project.version} + + org.glassfish.jersey.containers + jersey-container-servlet-core + + + org.glassfish.jersey.core + jersey-server + + + org.glassfish.jersey.inject + jersey-hk2 + + + org.glassfish.jersey.media + jersey-media-json-jackson + diff --git a/integration/fuse/src/main/java/alluxio/fuse/AlluxioFuseRestServiceHandler.java b/integration/fuse/src/main/java/alluxio/fuse/AlluxioFuseRestServiceHandler.java new file mode 100644 index 000000000000..69ad569523e5 --- /dev/null +++ b/integration/fuse/src/main/java/alluxio/fuse/AlluxioFuseRestServiceHandler.java @@ -0,0 +1,59 @@ +/* + * The Alluxio Open Foundation licenses this work under the Apache License, version 2.0 + * (the "License"). You may not use this work except in compliance with the License, which is + * available at www.apache.org/licenses/LICENSE-2.0 + * + * This software is distributed on an "AS IS" basis, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, + * either express or implied, as more fully set forth in the License. + * + * See the NOTICE file distributed with this work for information regarding copyright ownership. + */ + +package alluxio.fuse; + +import alluxio.RestUtils; +import alluxio.conf.Configuration; +import alluxio.util.LogUtils; + +import io.swagger.annotations.Api; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import javax.annotation.concurrent.NotThreadSafe; +import javax.ws.rs.POST; +import javax.ws.rs.Path; +import javax.ws.rs.Produces; +import javax.ws.rs.QueryParam; +import javax.ws.rs.core.MediaType; +import javax.ws.rs.core.Response; + +/** + * This class is a REST handler for requesting general FUSE information. + */ +@NotThreadSafe +@Api(value = "/fuse", description = "Alluxio FUSE Rest Service") +@Path(AlluxioFuseRestServiceHandler.SERVICE_PREFIX) +@Produces(MediaType.APPLICATION_JSON) +public class AlluxioFuseRestServiceHandler { + private static final Logger LOG = LoggerFactory.getLogger(AlluxioFuseRestServiceHandler.class); + + public static final String SERVICE_PREFIX = "fuse"; + + // log + public static final String LOG_LEVEL = "logLevel"; + public static final String LOG_ARGUMENT_NAME = "logName"; + public static final String LOG_ARGUMENT_LEVEL = "level"; + + /** + * @summary set the Alluxio log information + * @param logName the log's name + * @param level the log level + * @return the response object + */ + @POST + @Path(LOG_LEVEL) + public Response logLevel(@QueryParam(LOG_ARGUMENT_NAME) final String logName, + @QueryParam(LOG_ARGUMENT_LEVEL) final String level) { + return RestUtils.call(() -> LogUtils.setLogLevel(logName, level), Configuration.global()); + } +} diff --git a/integration/fuse/src/main/java/alluxio/fuse/FuseWebServer.java b/integration/fuse/src/main/java/alluxio/fuse/FuseWebServer.java index 03d24a277dbe..b47a7bfc76f7 100644 --- a/integration/fuse/src/main/java/alluxio/fuse/FuseWebServer.java +++ b/integration/fuse/src/main/java/alluxio/fuse/FuseWebServer.java @@ -11,8 +11,15 @@ package alluxio.fuse; +import alluxio.Constants; +import alluxio.util.io.PathUtils; +import alluxio.web.JacksonProtobufObjectMapperProvider; import alluxio.web.WebServer; +import org.eclipse.jetty.servlet.ServletHolder; +import org.glassfish.jersey.server.ResourceConfig; +import org.glassfish.jersey.servlet.ServletContainer; + import java.net.InetSocketAddress; import javax.annotation.concurrent.NotThreadSafe; @@ -29,5 +36,13 @@ public final class FuseWebServer extends WebServer { */ public FuseWebServer(String serviceName, InetSocketAddress address) { super(serviceName, address); + // REST configuration + ResourceConfig config = new ResourceConfig() + .packages("alluxio.fuse") + .register(JacksonProtobufObjectMapperProvider.class); + ServletContainer servlet = new ServletContainer(config); + ServletHolder servletHolder = new ServletHolder("Alluxio FUSE Web Service", servlet); + mServletContextHandler + .addServlet(servletHolder, PathUtils.concatPath(Constants.REST_API_PREFIX, "*")); } } diff --git a/shell/src/main/java/alluxio/cli/LogLevel.java b/shell/src/main/java/alluxio/cli/LogLevel.java index 7f2842879d8a..50471e6bb7ad 100644 --- a/shell/src/main/java/alluxio/cli/LogLevel.java +++ b/shell/src/main/java/alluxio/cli/LogLevel.java @@ -67,6 +67,7 @@ public final class LogLevel { public static final String ROLE_JOB_MASTERS = "job_masters"; public static final String ROLE_JOB_WORKER = "job_worker"; public static final String ROLE_JOB_WORKERS = "job_workers"; + public static final String ROLE_FUSE = "fuse"; public static final String TARGET_SEPARATOR = ","; public static final String TARGET_OPTION_NAME = "target"; private static final Option TARGET_OPTION = @@ -305,6 +306,8 @@ private static String inferRoleFromPort(int port, AlluxioConfiguration conf) { return ROLE_JOB_MASTER; } else if (port == NetworkAddressUtils.getPort(ServiceType.JOB_WORKER_WEB, conf)) { return ROLE_JOB_WORKER; + } else if (port == NetworkAddressUtils.getPort(ServiceType.FUSE_WEB, conf)) { + return ROLE_FUSE; } else { throw new IllegalArgumentException(String.format( "Unrecognized port in %s. Please make sure the port is in %s", From d23b8ca56d7c37db9ecb75672b37305150718e3c Mon Sep 17 00:00:00 2001 From: Jiacheng Liu Date: Fri, 26 May 2023 11:54:04 +0800 Subject: [PATCH 275/334] Improve monitor script printout ### What changes are proposed in this pull request? Given that the startup time is unpredictable and there is always chance that the process is simply not serving and may just start to serve eventually, mention that a double check is needed when the monitor shows the master/worker start "[FAILED]". Screen Shot 2023-05-25 at 3 28 09 PM pr-link: Alluxio/alluxio#17491 change-id: cid-4d99a8d6a918c9214778fdcd3f28d56136b6fa14 --- bin/alluxio-monitor.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/bin/alluxio-monitor.sh b/bin/alluxio-monitor.sh index ed57a5578e18..7b00017523cb 100755 --- a/bin/alluxio-monitor.sh +++ b/bin/alluxio-monitor.sh @@ -125,7 +125,7 @@ run_monitor() { else "${JAVA}" -cp ${CLASSPATH} ${alluxio_config} ${monitor_exec} if [[ $? -ne 0 ]]; then - echo -e "${WHITE}---${NC} ${RED}[ FAILED ]${NC} The ${CYAN}${node_type}${NC} @ ${PURPLE}$(hostname -f)${NC} is not serving requests.${NC}" + echo -e "${WHITE}---${NC} ${RED}[ FAILED ]${NC} The ${CYAN}${node_type}${NC} @ ${PURPLE}$(hostname -f)${NC} is not serving requests after 120s. Please check if the process is running and the logs/ if necessary.${NC}" print_node_logs "${node_type}" return 1 fi From 812855fe5040ae94bcdfb2ebd7fed233632fcf70 Mon Sep 17 00:00:00 2001 From: Bin Fan Date: Sat, 27 May 2023 02:45:28 +0800 Subject: [PATCH 276/334] Fix a bug that creates 0 byte block file mistakenly ### What changes are proposed in this pull request? Fix a bug that may create 0-byte block file on worker, when there is issue to read a file from UFS. Also fix the logging as it is too spammy when HDFSUnderFileSystem fails to read a UFS file ### Why are the changes needed? When we are caching a file (async), and somehow the file does not exist on UFS any more (perhaps modified out of band). An exception will be thrown from `UnderFileSystemBlockStore.createBlockReader`. In its exception handling part, we treated this case the same as a normal close and commit the temp block. This commit fixes this by abort the temp block instead on error cases. Besides, the exception message in `createUfsBlockReader` is constructed wrong by also attaching the stacktrace into errorMessage. This is also fixed. In addition, surpressing the warn log on HDFS UFS when attempting to read a file to debug level, but only show the last error. ``` 2023-05-17 06:43:13,039 WARN UfsInputStreamCache - Failed to create a new cached ufs instream of file id 6321787562360831 and path hdfs://nameservice1/user/hive/warehouse/some/table/period_name_desc=2023-17/period_end_date=2023-03-31/000008_0 java.util.concurrent.ExecutionException: java.io.FileNotFoundException: File does not exist: /user/hive/warehouse/some/table/period_name_desc=2023-17/period_end_date=2023-03-31/000008_0 at org.apache.hadoop.hdfs.server.namenode.INodeFile.valueOf(INodeFile.java:66) at org.apache.hadoop.hdfs.server.namenode.INodeFile.valueOf(INodeFile.java:56) at org.apache.hadoop.hdfs.server.namenode.FSNamesystem.getBlockLocationsInt(FSNamesystem.java:2168) at org.apache.hadoop.hdfs.server.namenode.FSNamesystem.getBlockLocations(FSNamesystem.java:2138) at org.apache.hadoop.hdfs.server.namenode.FSNamesystem.getBlockLocations(FSNamesystem.java:2049) at org.apache.hadoop.hdfs.server.namenode.NameNodeRpcServer.getBlockLocations(NameNodeRpcServer.java:583) at org.apache.hadoop.hdfs.server.namenode.AuthorizationProviderProxyClientProtocol.getBlockLocations(AuthorizationProviderProxyClientProtocol.java:94) at org.apache.hadoop.hdfs.protocolPB.ClientNamenodeProtocolServerSideTranslatorPB.getBlockLocations(ClientNamenodeProtocolServerSideTranslatorPB.java:377) at org.apache.hadoop.hdfs.protocol.proto.ClientNamenodeProtocolProtos$ClientNamenodeProtocol$2.callBlockingMethod(ClientNamenodeProtocolProtos.java) at org.apache.hadoop.ipc.ProtobufRpcEngine$Server$ProtoBufRpcInvoker.call(ProtobufRpcEngine.java:617) at org.apache.hadoop.ipc.RPC$Server.call(RPC.java:1073) at org.apache.hadoop.ipc.Server$Handler$1.run(Server.java:2278) at org.apache.hadoop.ipc.Server$Handler$1.run(Server.java:2274) at java.security.AccessController.doPrivileged(Native Method) at javax.security.auth.Subject.doAs(Subject.java:422) at org.apache.hadoop.security.UserGroupInformation.doAs(UserGroupInformation.java:1924) at org.apache.hadoop.ipc.Server$Handler.run(Server.java:2272) at com.google.common.util.concurrent.AbstractFuture.getDoneValue(AbstractFuture.java:588) at com.google.common.util.concurrent.AbstractFuture.get(AbstractFuture.java:547) at com.google.common.util.concurrent.AbstractFuture$TrustedFuture.get(AbstractFuture.java:113) at com.google.common.util.concurrent.Uninterruptibles.getUninterruptibly(Uninterruptibles.java:244) at com.google.common.cache.LocalCache$Segment.getAndRecordStats(LocalCache.java:2317) at com.google.common.cache.LocalCache$Segment.loadSync(LocalCache.java:2283) at com.google.common.cache.LocalCache$Segment.lockedGetOrLoad(LocalCache.java:2159) at com.google.common.cache.LocalCache$Segment.get(LocalCache.java:2049) at com.google.common.cache.LocalCache.get(LocalCache.java:3966) at com.google.common.cache.LocalCache$LocalManualCache.get(LocalCache.java:4863) at alluxio.worker.block.UfsInputStreamCache.acquire(UfsInputStreamCache.java:227) at alluxio.worker.block.UnderFileSystemBlockReader.updateUnderFileSystemInputStream(UnderFileSystemBlockReader.java:373) at alluxio.worker.block.UnderFileSystemBlockReader.init(UnderFileSystemBlockReader.java:194) at alluxio.worker.block.UnderFileSystemBlockReader.create(UnderFileSystemBlockReader.java:137) at alluxio.worker.block.UnderFileSystemBlockStore.createBlockReader(UnderFileSystemBlockStore.java:306) at alluxio.worker.block.MonoBlockStore.createUfsBlockReader(MonoBlockStore.java:199) at alluxio.worker.block.DefaultBlockWorker.createUfsBlockReader(DefaultBlockWorker.java:413) at alluxio.worker.block.CacheRequestManager.cacheBlockFromUfs(CacheRequestManager.java:261) at alluxio.worker.block.CacheRequestManager.cacheBlock(CacheRequestManager.java:239) at alluxio.worker.block.CacheRequestManager.access$000(CacheRequestManager.java:56) at alluxio.worker.block.CacheRequestManager$CacheTask.call(CacheRequestManager.java:210) at alluxio.worker.block.CacheRequestManager$CacheTask.call(CacheRequestManager.java:164) at java.base/java.util.concurrent.FutureTask.run(FutureTask.java:264) at alluxio.worker.grpc.GrpcExecutors$ImpersonateThreadPoolExecutor.lambda$execute$0(GrpcExecutors.java:159) at java.base/java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1128) at java.base/java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:628) at java.base/java.lang.Thread.run(Thread.java:834) ``` ### Does this PR introduce any user facing changes? No pr-link: Alluxio/alluxio#17497 change-id: cid-92e96a46cf67606c3087115cf065a8470d929421 --- .../java/alluxio/worker/block/MonoBlockStore.java | 14 +++++++++----- .../alluxio/underfs/hdfs/HdfsUnderFileSystem.java | 9 +++++++-- 2 files changed, 16 insertions(+), 7 deletions(-) diff --git a/core/server/worker/src/main/java/alluxio/worker/block/MonoBlockStore.java b/core/server/worker/src/main/java/alluxio/worker/block/MonoBlockStore.java index 26b2757c885c..3b2f4a17e568 100644 --- a/core/server/worker/src/main/java/alluxio/worker/block/MonoBlockStore.java +++ b/core/server/worker/src/main/java/alluxio/worker/block/MonoBlockStore.java @@ -183,18 +183,18 @@ public BlockReader createUfsBlockReader(long sessionId, long blockId, long offse BlockReader reader = mUnderFileSystemBlockStore.createBlockReader(sessionId, blockId, offset, positionShort, options); BlockReader blockReader = new DelegatingBlockReader(reader, - () -> closeUfsBlock(sessionId, blockId)); + () -> closeUfsBlock(sessionId, blockId, true)); Metrics.WORKER_ACTIVE_CLIENTS.inc(); return blockReader; } catch (Exception e) { try { - closeUfsBlock(sessionId, blockId); + closeUfsBlock(sessionId, blockId, false); } catch (Exception ee) { LOG.warn("Failed to close UFS block", ee); } String errorMessage = format("Failed to read from UFS, sessionId=%d, " + "blockId=%d, offset=%d, positionShort=%s, options=%s: %s", - sessionId, blockId, offset, positionShort, options, e); + sessionId, blockId, offset, positionShort, options, e.toString()); if (e instanceof FileNotFoundException) { throw new NotFoundException(errorMessage, e); } @@ -202,13 +202,17 @@ public BlockReader createUfsBlockReader(long sessionId, long blockId, long offse } } - private void closeUfsBlock(long sessionId, long blockId) + private void closeUfsBlock(long sessionId, long blockId, boolean successful) throws IOException { try { mUnderFileSystemBlockStore.closeBlock(sessionId, blockId); Optional tempBlockMeta = mLocalBlockStore.getTempBlockMeta(blockId); if (tempBlockMeta.isPresent() && tempBlockMeta.get().getSessionId() == sessionId) { - commitBlock(sessionId, blockId, false); + if (successful) { + commitBlock(sessionId, blockId, false); + } else { + abortBlock(sessionId, blockId); + } } else { // When getTempBlockMeta() return null, such as a block readType NO_CACHE writeType THROUGH. // Counter will not be decrement in the commitblock(). diff --git a/underfs/hdfs/src/main/java/alluxio/underfs/hdfs/HdfsUnderFileSystem.java b/underfs/hdfs/src/main/java/alluxio/underfs/hdfs/HdfsUnderFileSystem.java index 2c876fa0d979..aa672d4dd248 100755 --- a/underfs/hdfs/src/main/java/alluxio/underfs/hdfs/HdfsUnderFileSystem.java +++ b/underfs/hdfs/src/main/java/alluxio/underfs/hdfs/HdfsUnderFileSystem.java @@ -684,7 +684,7 @@ public InputStream open(String path, OpenOptions options) throws IOException { LOG.debug("Using original API to HDFS"); return new HdfsUnderFileInputStream(inputStream); } catch (IOException e) { - LOG.warn("{} try to open {} : {}", retryPolicy.getAttemptCount(), path, e.toString()); + LOG.debug("{} try to open {} : {}", retryPolicy.getAttemptCount(), path, e.toString()); te = e; if (options.getRecoverFailedOpen() && dfs != null && e.getMessage().toLowerCase() .startsWith("cannot obtain block length for")) { @@ -711,7 +711,12 @@ public InputStream open(String path, OpenOptions options) throws IOException { } } } - throw te; + if (te != null) { + LOG.error("{} failed attempts to open \"{}\" with last error:", + retryPolicy.getAttemptCount(), path, te); + throw te; + } + throw new IllegalStateException("Exceeded the number of retry attempts with no exception"); } @Override From 9d2f9cfc3ad94b9e8cc3a9db0a97f171c13c4699 Mon Sep 17 00:00:00 2001 From: maobaolong <307499405@qq.com> Date: Sat, 27 May 2023 06:03:20 +0800 Subject: [PATCH 277/334] Cleanup HADOOP1 support As some people are confused about why we encountered some build error while running alluxio tests. I will suggest them to remove the HADOOP1 related code to pass the build. pr-link: Alluxio/alluxio#17469 change-id: cid-1dde372d4a0aa0604b44ddbbe90762293f31c20c --- integration/tools/pom.xml | 28 ---------------- tests/pom.xml | 32 ------------------- .../FileSystemRenameIntegrationTest.java | 6 ---- underfs/hdfs/pom.xml | 28 ---------------- .../hdfs/HdfsUnderFileOutputStream.java | 4 --- .../underfs/hdfs/HdfsUnderFileSystem.java | 12 ------- 6 files changed, 110 deletions(-) diff --git a/integration/tools/pom.xml b/integration/tools/pom.xml index 891f6777458f..fe8918feea72 100644 --- a/integration/tools/pom.xml +++ b/integration/tools/pom.xml @@ -47,34 +47,6 @@ - - - - ufs-hadoop-1 - - - org.alluxio - alluxio-shaded-hadoop - ${ufs.hadoop.version} - - - - - - com.igormaznitsa - jcp - - - - HADOOP1 - - - - - - - - diff --git a/tests/pom.xml b/tests/pom.xml index b42c551c41ee..6bd401c2a058 100644 --- a/tests/pom.xml +++ b/tests/pom.xml @@ -243,38 +243,6 @@ - - - - - hadoop-1 - - - org.apache.hadoop - hadoop-core - ${hadoop.version} - provided - - - - - - com.igormaznitsa - jcp - - - - HADOOP1 - - - - - - - - diff --git a/tests/src/test/java/alluxio/client/hadoop/FileSystemRenameIntegrationTest.java b/tests/src/test/java/alluxio/client/hadoop/FileSystemRenameIntegrationTest.java index df11400cfbfe..3665df4dac4e 100644 --- a/tests/src/test/java/alluxio/client/hadoop/FileSystemRenameIntegrationTest.java +++ b/tests/src/test/java/alluxio/client/hadoop/FileSystemRenameIntegrationTest.java @@ -233,13 +233,7 @@ public void basicRenameTest7() throws Exception { sTFS.mkdirs(dirA); FSDataOutputStream o = sTFS.create(fileA); o.writeBytes("Test Bytes"); - // Due to Hadoop 1 support we stick with the deprecated version. If we drop support for it - // FSDataOutputStream.hflush will be the new one. - //#ifdef HADOOP1 - o.sync(); - //#else o.hflush(); - //#endif Assert.assertTrue(sTFS.rename(dirA, dirB)); diff --git a/underfs/hdfs/pom.xml b/underfs/hdfs/pom.xml index fc3d322ae6d0..f18eda19ede5 100644 --- a/underfs/hdfs/pom.xml +++ b/underfs/hdfs/pom.xml @@ -57,34 +57,6 @@ - - - - ufs-hadoop-1 - - - org.alluxio - alluxio-shaded-hadoop - ${ufs.hadoop.version} - - - - - - com.igormaznitsa - jcp - - - - HADOOP1 - - - - - - - - diff --git a/underfs/hdfs/src/main/java/alluxio/underfs/hdfs/HdfsUnderFileOutputStream.java b/underfs/hdfs/src/main/java/alluxio/underfs/hdfs/HdfsUnderFileOutputStream.java index 6bf3159a5bac..69546bada127 100644 --- a/underfs/hdfs/src/main/java/alluxio/underfs/hdfs/HdfsUnderFileOutputStream.java +++ b/underfs/hdfs/src/main/java/alluxio/underfs/hdfs/HdfsUnderFileOutputStream.java @@ -58,13 +58,9 @@ public void close() throws IOException { @Override public void flush() throws IOException { // TODO(calvin): This functionality should be restricted to select output streams. - //#ifdef HADOOP1 - mOut.sync(); - //#else // Note that, hsync() flushes out the data in client's user buffer all the way to the disk // device which may result in much slower performance than sync(). mOut.hsync(); - //#endif } @Override diff --git a/underfs/hdfs/src/main/java/alluxio/underfs/hdfs/HdfsUnderFileSystem.java b/underfs/hdfs/src/main/java/alluxio/underfs/hdfs/HdfsUnderFileSystem.java index aa672d4dd248..f5c435d2f470 100755 --- a/underfs/hdfs/src/main/java/alluxio/underfs/hdfs/HdfsUnderFileSystem.java +++ b/underfs/hdfs/src/main/java/alluxio/underfs/hdfs/HdfsUnderFileSystem.java @@ -466,25 +466,13 @@ public long getSpace(String path, SpaceType type) throws IOException { // Hadoop 2 and removed in Hadoop 3 switch (type) { case SPACE_TOTAL: - //#ifdef HADOOP1 - space = ((DistributedFileSystem) hdfs).getDiskStatus().getCapacity(); - //#else space = hdfs.getStatus().getCapacity(); - //#endif break; case SPACE_USED: - //#ifdef HADOOP1 - space = ((DistributedFileSystem) hdfs).getDiskStatus().getDfsUsed(); - //#else space = hdfs.getStatus().getUsed(); - //#endif break; case SPACE_FREE: - //#ifdef HADOOP1 - space = ((DistributedFileSystem) hdfs).getDiskStatus().getRemaining(); - //#else space = hdfs.getStatus().getRemaining(); - //#endif break; default: throw new IOException("Unknown space type: " + type); From 68ac842717a8d3d7339d1a2b943d9967e0d73a96 Mon Sep 17 00:00:00 2001 From: secfree Date: Wed, 31 May 2023 00:40:57 +0800 Subject: [PATCH 278/334] Call needsCache in LocalCacheFileSystem ### What changes are proposed in this pull request? Check if the `uri` needs cache in `alluxio.client.file.cache.LocalCacheFileSystem` ### Why are the changes needed? `needsCache` is checked in `alluxio.hadoop.LocalCacheFileSystem` but not `alluxio.client.file.cache.LocalCacheFileSystem`. However, it is `alluxio.client.file.cache.LocalCacheFileSystem` used by local cache. I also checked the code and cannot find a place is using `alluxio.hadoop.LocalCacheFileSystem` in Alluxio. Should it be deleted from code or it may be used by some clients? ### Does this PR introduce any user facing changes? NO pr-link: Alluxio/alluxio#17512 change-id: cid-b39101ac6bbf65b4f2ad91ed2c165b114ba92309 --- .../alluxio/client/file/cache/LocalCacheFileSystem.java | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/core/client/fs/src/main/java/alluxio/client/file/cache/LocalCacheFileSystem.java b/core/client/fs/src/main/java/alluxio/client/file/cache/LocalCacheFileSystem.java index a54ef7eaacc8..ed541972ce6b 100644 --- a/core/client/fs/src/main/java/alluxio/client/file/cache/LocalCacheFileSystem.java +++ b/core/client/fs/src/main/java/alluxio/client/file/cache/LocalCacheFileSystem.java @@ -16,6 +16,7 @@ import alluxio.client.file.FileInStream; import alluxio.client.file.FileSystem; import alluxio.client.file.URIStatus; +import alluxio.client.file.cache.filter.CacheFilter; import alluxio.conf.AlluxioConfiguration; import alluxio.exception.AlluxioException; import alluxio.grpc.OpenFilePOptions; @@ -32,6 +33,7 @@ public class LocalCacheFileSystem extends DelegatingFileSystem { private static final Logger LOG = LoggerFactory.getLogger(LocalCacheFileSystem.class); private final CacheManager mCacheManager; + private final CacheFilter mCacheFilter; private final AlluxioConfiguration mConf; /** @@ -43,6 +45,7 @@ public LocalCacheFileSystem(CacheManager cacheManage, FileSystem fs, AlluxioConf super(fs); mCacheManager = Preconditions.checkNotNull(cacheManage, "cacheManager"); mConf = Preconditions.checkNotNull(conf, "conf"); + mCacheFilter = CacheFilter.create(conf); } @Override @@ -62,7 +65,8 @@ public FileInStream openFile(AlluxioURI path, OpenFilePOptions options) @Override public FileInStream openFile(URIStatus status, OpenFilePOptions options) throws IOException, AlluxioException { - if (mCacheManager == null || mCacheManager.state() == CacheManager.State.NOT_IN_USE) { + if (mCacheManager == null || mCacheManager.state() == CacheManager.State.NOT_IN_USE + || !mCacheFilter.needsCache(status)) { return mDelegatedFileSystem.openFile(status, options); } return new LocalCacheFileInStream(status, From 81b871604818990a498ac42a7bea8a8d7060e2e3 Mon Sep 17 00:00:00 2001 From: elega <445092967@qq.com> Date: Wed, 31 May 2023 15:40:58 +0800 Subject: [PATCH 279/334] Remove unnecessary inode.isComplete() check from completeFile() ### What changes are proposed in this pull request? This PR fixes https://github.com/Alluxio/alluxio/issues/17083 This PR moved the inode completion check from scheduleAsyncPersistenceInternal() method to scheduleAsyncPersistence() . So we no longer unintentionally check `file.isComplete()` in `completeFile()` and throw and exception. ### Why are the changes needed? The inode model fetched from RockDB might become stale because the model is in memory and won't reflect the recent rocksDB change. Such stale in-memory model might fail some checks unexpectedly. ### Does this PR introduce any user facing changes? N/A pr-link: Alluxio/alluxio#17145 change-id: cid-8d45a29ea286a8dbc0e4de4f30ef3e2f315df279 --- .../master/file/DefaultFileSystemMaster.java | 25 ++++++++++++++----- 1 file changed, 19 insertions(+), 6 deletions(-) diff --git a/core/server/master/src/main/java/alluxio/master/file/DefaultFileSystemMaster.java b/core/server/master/src/main/java/alluxio/master/file/DefaultFileSystemMaster.java index ac8230977032..f640b854af59 100644 --- a/core/server/master/src/main/java/alluxio/master/file/DefaultFileSystemMaster.java +++ b/core/server/master/src/main/java/alluxio/master/file/DefaultFileSystemMaster.java @@ -1702,7 +1702,7 @@ public void completeFile(AlluxioURI path, CompleteFileContext context) } // Even readonly mount points should be able to complete a file, for UFS reads in CACHE mode. completeFileInternal(rpcContext, inodePath, context); - // Schedule async persistence if requested. + // Inode completion check is skipped because we know the file we completed is complete. if (context.getOptions().hasAsyncPersistOptions()) { scheduleAsyncPersistenceInternal(inodePath, ScheduleAsyncPersistenceContext .create(context.getOptions().getAsyncPersistOptionsBuilder()), rpcContext); @@ -4040,18 +4040,31 @@ public void scheduleAsyncPersistence(AlluxioURI path, ScheduleAsyncPersistenceCo mInodeTree .lockFullInodePath(path, LockPattern.WRITE_INODE, rpcContext.getJournalContext()) ) { + InodeFile inode = inodePath.getInodeFile(); + if (!inode.isCompleted()) { + throw new InvalidPathException( + "Cannot persist an incomplete Alluxio file: " + inodePath.getUri()); + } scheduleAsyncPersistenceInternal(inodePath, context, rpcContext); } } + /** + * Persists an inode asynchronously. + * This method does not do the completion check. When this method is invoked, + * please make sure the inode has been completed. + * Currently, two places call this method. One is completeFile(), where we know that + * the file is completed. Another place is scheduleAsyncPersistence(), where we check + * if the inode is completed and throws an exception if it is not. + * @param inodePath the locked inode path + * @param context the context + * @param rpcContext the rpc context + * @throws FileDoesNotExistException if the file does not exist + */ private void scheduleAsyncPersistenceInternal(LockedInodePath inodePath, ScheduleAsyncPersistenceContext context, RpcContext rpcContext) - throws InvalidPathException, FileDoesNotExistException { + throws FileDoesNotExistException { InodeFile inode = inodePath.getInodeFile(); - if (!inode.isCompleted()) { - throw new InvalidPathException( - "Cannot persist an incomplete Alluxio file: " + inodePath.getUri()); - } if (shouldPersistPath(inodePath.toString())) { mInodeTree.updateInode(rpcContext, UpdateInodeEntry.newBuilder().setId(inode.getId()) .setPersistenceState(PersistenceState.TO_BE_PERSISTED.name()).build()); From 4a57bab5a96ff34f7eaa5eca196157e49d905665 Mon Sep 17 00:00:00 2001 From: Arthur Jenoudet <23088925+jenoudet@users.noreply.github.com> Date: Wed, 31 May 2023 14:06:12 -0700 Subject: [PATCH 280/334] Deprecate some Raft journal RPCs Following #16998, some messages, fields, and RPCs are no longer in use. This PR marks them as deprecated. pr-link: Alluxio/alluxio#17358 change-id: cid-d69c86f931511b5c85a55a419a9399ba6d9718f9 --- .../src/main/proto/grpc/raft_journal.proto | 40 +++-- core/transport/src/main/proto/proto.lock | 154 ++++++++++++++++-- 2 files changed, 165 insertions(+), 29 deletions(-) diff --git a/core/transport/src/main/proto/grpc/raft_journal.proto b/core/transport/src/main/proto/grpc/raft_journal.proto index 09168749e17c..dee7f92cddbe 100644 --- a/core/transport/src/main/proto/grpc/raft_journal.proto +++ b/core/transport/src/main/proto/grpc/raft_journal.proto @@ -9,13 +9,14 @@ package alluxio.grpc.meta; import "grpc/common.proto"; message JournalQueryRequest { - optional GetSnapshotInfoRequest snapshotInfoRequest = 1; - optional GetSnapshotRequest snapshotRequest = 2; + optional GetSnapshotInfoRequest snapshotInfoRequest = 1 [deprecated = true]; + optional GetSnapshotRequest snapshotRequest = 2 [deprecated = true]; optional AddQuorumServerRequest addQuorumServerRequest = 3; } message JournalQueryResponse { - optional GetSnapshotInfoResponse snapshotInfoResponse = 1; + option deprecated = true; + optional GetSnapshotInfoResponse snapshotInfoResponse = 1 [deprecated = true]; } message AddQuorumServerRequest { @@ -23,14 +24,17 @@ message AddQuorumServerRequest { } message GetSnapshotInfoRequest { - optional SnapshotMetadata snapshotInfo = 1; + option deprecated = true; + optional SnapshotMetadata snapshotInfo = 1 [deprecated = true]; } message GetSnapshotInfoResponse { - optional SnapshotMetadata latest = 1; + option deprecated = true; + optional SnapshotMetadata latest = 1 [deprecated = true]; } message GetSnapshotRequest { + option deprecated = true; } message SnapshotMetadata { @@ -43,24 +47,28 @@ message SnapshotData { optional int64 snapshotTerm = 1; optional int64 snapshotIndex = 2; optional bytes chunk = 3; - optional int64 offset = 4; - optional bool eof = 5; + optional int64 offset = 4 [deprecated = true]; + optional bool eof = 5 [deprecated = true]; } message UploadSnapshotPRequest { - optional SnapshotData data = 1; + option deprecated = true; + optional SnapshotData data = 1 [deprecated = true]; } message UploadSnapshotPResponse { - optional int64 offsetReceived = 1; + option deprecated = true; + optional int64 offsetReceived = 1 [deprecated = true]; } message DownloadSnapshotPRequest { - optional int64 offsetReceived = 1; + option deprecated = true; + optional int64 offsetReceived = 1 [deprecated = true]; } message DownloadSnapshotPResponse { - optional SnapshotData data = 1; + option deprecated = true; + optional SnapshotData data = 1 [deprecated = true]; } message LatestSnapshotInfoPRequest {} @@ -71,16 +79,18 @@ message LatestSnapshotInfoPRequest {} service RaftJournalService { /** - * Deprecated. * Uploads a snapshot to primary master. */ - rpc UploadSnapshot (stream UploadSnapshotPRequest) returns (stream UploadSnapshotPResponse); + rpc UploadSnapshot (stream UploadSnapshotPRequest) returns (stream UploadSnapshotPResponse) { + option deprecated = true; + }; /** - * Deprecated. * Downloads a snapshot from primary master. */ - rpc DownloadSnapshot (stream DownloadSnapshotPRequest) returns (stream DownloadSnapshotPResponse); + rpc DownloadSnapshot (stream DownloadSnapshotPRequest) returns (stream DownloadSnapshotPResponse) { + option deprecated = true; + }; /** * Requests information about snapshots on a particular machine. diff --git a/core/transport/src/main/proto/proto.lock b/core/transport/src/main/proto/proto.lock index 78251d3512cf..8dfbdb55129a 100644 --- a/core/transport/src/main/proto/proto.lock +++ b/core/transport/src/main/proto/proto.lock @@ -7196,12 +7196,24 @@ { "id": 1, "name": "snapshotInfoRequest", - "type": "GetSnapshotInfoRequest" + "type": "GetSnapshotInfoRequest", + "options": [ + { + "name": "deprecated", + "value": "true" + } + ] }, { "id": 2, "name": "snapshotRequest", - "type": "GetSnapshotRequest" + "type": "GetSnapshotRequest", + "options": [ + { + "name": "deprecated", + "value": "true" + } + ] }, { "id": 3, @@ -7216,7 +7228,19 @@ { "id": 1, "name": "snapshotInfoResponse", - "type": "GetSnapshotInfoResponse" + "type": "GetSnapshotInfoResponse", + "options": [ + { + "name": "deprecated", + "value": "true" + } + ] + } + ], + "options": [ + { + "name": "deprecated", + "value": "true" } ] }, @@ -7236,7 +7260,19 @@ { "id": 1, "name": "snapshotInfo", - "type": "SnapshotMetadata" + "type": "SnapshotMetadata", + "options": [ + { + "name": "deprecated", + "value": "true" + } + ] + } + ], + "options": [ + { + "name": "deprecated", + "value": "true" } ] }, @@ -7246,12 +7282,30 @@ { "id": 1, "name": "latest", - "type": "SnapshotMetadata" + "type": "SnapshotMetadata", + "options": [ + { + "name": "deprecated", + "value": "true" + } + ] + } + ], + "options": [ + { + "name": "deprecated", + "value": "true" } ] }, { - "name": "GetSnapshotRequest" + "name": "GetSnapshotRequest", + "options": [ + { + "name": "deprecated", + "value": "true" + } + ] }, { "name": "SnapshotMetadata", @@ -7294,12 +7348,24 @@ { "id": 4, "name": "offset", - "type": "int64" + "type": "int64", + "options": [ + { + "name": "deprecated", + "value": "true" + } + ] }, { "id": 5, "name": "eof", - "type": "bool" + "type": "bool", + "options": [ + { + "name": "deprecated", + "value": "true" + } + ] } ] }, @@ -7309,7 +7375,19 @@ { "id": 1, "name": "data", - "type": "SnapshotData" + "type": "SnapshotData", + "options": [ + { + "name": "deprecated", + "value": "true" + } + ] + } + ], + "options": [ + { + "name": "deprecated", + "value": "true" } ] }, @@ -7319,7 +7397,19 @@ { "id": 1, "name": "offsetReceived", - "type": "int64" + "type": "int64", + "options": [ + { + "name": "deprecated", + "value": "true" + } + ] + } + ], + "options": [ + { + "name": "deprecated", + "value": "true" } ] }, @@ -7329,7 +7419,19 @@ { "id": 1, "name": "offsetReceived", - "type": "int64" + "type": "int64", + "options": [ + { + "name": "deprecated", + "value": "true" + } + ] + } + ], + "options": [ + { + "name": "deprecated", + "value": "true" } ] }, @@ -7339,7 +7441,19 @@ { "id": 1, "name": "data", - "type": "SnapshotData" + "type": "SnapshotData", + "options": [ + { + "name": "deprecated", + "value": "true" + } + ] + } + ], + "options": [ + { + "name": "deprecated", + "value": "true" } ] }, @@ -7356,14 +7470,26 @@ "in_type": "UploadSnapshotPRequest", "out_type": "UploadSnapshotPResponse", "in_streamed": true, - "out_streamed": true + "out_streamed": true, + "options": [ + { + "name": "deprecated", + "value": "true" + } + ] }, { "name": "DownloadSnapshot", "in_type": "DownloadSnapshotPRequest", "out_type": "DownloadSnapshotPResponse", "in_streamed": true, - "out_streamed": true + "out_streamed": true, + "options": [ + { + "name": "deprecated", + "value": "true" + } + ] }, { "name": "RequestLatestSnapshotInfo", From 93ef64a8c14da1835877311522691691896f9b77 Mon Sep 17 00:00:00 2001 From: secfree Date: Mon, 5 Jun 2023 14:36:28 +0800 Subject: [PATCH 281/334] Fix LocalPageStore NPE ### What changes are proposed in this pull request? Fix LocalPageStore NPE. ### Why are the changes needed? I encountered the following exception while using local cache ``` 2023-05-31T17:25:13.453+0800 ERROR 20230531_092513_00010_uqx2a.1.0.0-7-153 alluxio.client.file.cache.NoExceptionCacheManager Failed to put page PageId{FileId=76f9c79d5d43c725de31295c263291e0, PageIndex=534}, cacheContext CacheContext{cacheIdentifier=null, cacheQuota=alluxio.client.quota.CacheQuota$1@1f, cacheScope=CacheScope{id=.}, hiveCacheContext=null, isTemporary=false} java.lang.NullPointerException: Cannot invoke "String.contains(java.lang.CharSequence)" because the return value of "java.lang.Exception.getMessage()" is null at alluxio.client.file.cache.store.LocalPageStore.put(LocalPageStore.java:80) at alluxio.client.file.cache.LocalCacheManager.putAttempt(LocalCacheManager.java:345) at alluxio.client.file.cache.LocalCacheManager.putInternal(LocalCacheManager.java:274) at alluxio.client.file.cache.LocalCacheManager.put(LocalCacheManager.java:234) at alluxio.client.file.cache.CacheManagerWithShadowCache.put(CacheManagerWithShadowCache.java:52) at alluxio.client.file.cache.NoExceptionCacheManager.put(NoExceptionCacheManager.java:55) at alluxio.client.file.cache.CacheManager.put(CacheManager.java:196) at alluxio.client.file.cache.LocalCacheFileInStream.localCachedRead(LocalCacheFileInStream.java:218) at alluxio.client.file.cache.LocalCacheFileInStream.bufferedRead(LocalCacheFileInStream.java:144) at alluxio.client.file.cache.LocalCacheFileInStream.readInternal(LocalCacheFileInStream.java:242) at alluxio.client.file.cache.LocalCacheFileInStream.positionedRead(LocalCacheFileInStream.java:287) at alluxio.hadoop.HdfsFileInputStream.read(HdfsFileInputStream.java:153) at alluxio.hadoop.HdfsFileInputStream.readFully(HdfsFileInputStream.java:170) at org.apache.hadoop.fs.FSDataInputStream.readFully(FSDataInputStream.java:111) at io.trino.filesystem.hdfs.HdfsInput.readFully(HdfsInput.java:42) at io.trino.plugin.hive.parquet.TrinoParquetDataSource.readInternal(TrinoParquetDataSource.java:64) at io.trino.parquet.AbstractParquetDataSource.readFully(AbstractParquetDataSource.java:120) at io.trino.parquet.AbstractParquetDataSource$ReferenceCountedReader.read(AbstractParquetDataSource.java:330) at io.trino.parquet.ChunkReader.readUnchecked(ChunkReader.java:31) at io.trino.parquet.reader.ChunkedInputStream.readNextChunk(ChunkedInputStream.java:149) at io.trino.parquet.reader.ChunkedInputStream.read(ChunkedInputStream.java:93) ``` ### Does this PR introduce any user facing changes? NO pr-link: Alluxio/alluxio#17552 change-id: cid-f35c64b837748c1d46ba7092dae5ad6ef5003bb7 --- .../java/alluxio/client/file/cache/store/LocalPageStore.java | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/core/client/fs/src/main/java/alluxio/client/file/cache/store/LocalPageStore.java b/core/client/fs/src/main/java/alluxio/client/file/cache/store/LocalPageStore.java index 923c4d653c41..446c9e5e5a95 100644 --- a/core/client/fs/src/main/java/alluxio/client/file/cache/store/LocalPageStore.java +++ b/core/client/fs/src/main/java/alluxio/client/file/cache/store/LocalPageStore.java @@ -77,11 +77,11 @@ public void put(PageId pageId, } } catch (Exception e) { Files.deleteIfExists(pagePath); - if (e.getMessage().contains(ERROR_NO_SPACE_LEFT)) { + if (e.getMessage() != null && e.getMessage().contains(ERROR_NO_SPACE_LEFT)) { throw new ResourceExhaustedException( String.format("%s is full, configured with %d bytes", mRoot, mCapacity), e); } - throw new IOException("Failed to write file " + pagePath + " for page " + pageId); + throw new IOException("Failed to write file " + pagePath + " for page " + pageId, e); } } From 9813817b2eb31998cfebb9495776fc508d550a82 Mon Sep 17 00:00:00 2001 From: Zihao Zhao Date: Thu, 8 Jun 2023 12:06:07 +0800 Subject: [PATCH 282/334] [DOCFIX]Update cn version of docs/_data/table/cn/master-metrics.yml ### What changes are proposed in this pull request? [DOCFIX]Update cn version of docs/_data/table/cn/master-metrics.yml ### Why are the changes needed? The Chinese docs/_data/table/cn/master-metrics.yml doc is problematic: the description of Master.CreateDirectoryOps is wrong. This PR synchronizes these updates. ### Does this PR introduce any user facing changes? Developers can get to know Alluxio in Chinese easily. pr-link: Alluxio/alluxio#17011 change-id: cid-15f7792d87a6a9c2d537ce5ff82f697dc9b38117 --- docs/_data/table/cn/master-metrics.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/_data/table/cn/master-metrics.yml b/docs/_data/table/cn/master-metrics.yml index 761ef8319034..9b4e06e5b5e1 100644 --- a/docs/_data/table/cn/master-metrics.yml +++ b/docs/_data/table/cn/master-metrics.yml @@ -27,7 +27,7 @@ Master.CompleteFileOps: Master.CompletedOperationRetryCount: '已由客户端重试的完成操作总数' Master.CreateDirectoryOps: - '这个指标通过汇总 Master.RocksBlockEstimatedMemUsage 和 Master.RocksInodeEstimatedMemUsage 的值,给出了 RocksDB 内存使用总量的估计值' + 'CreateDirectory 操作的总数' Master.CreateFileOps: 'CreateFile 操作的总数' Master.DeletePathOps: From 2920169048cb294e1d283ea2d5a7047d7ff121a3 Mon Sep 17 00:00:00 2001 From: Zihao Zhao Date: Thu, 8 Jun 2023 12:06:43 +0800 Subject: [PATCH 283/334] [DOCFIX] Update cn version of OSS doc ### What changes are proposed in this pull request? [DOCFIX] Update cn version of OSS doc ### Why are the changes needed? The Chinese ufs/OSS doc is not updated with the latest changes, this PR synchronizes these updates. ### Does this PR introduce any user facing changes? Developers can get to know Alluxio in Chinese easily. pr-link: Alluxio/alluxio#16932 change-id: cid-fcfee42cb97534141c02c74b8011f31f015dce17 --- docs/cn/ufs/OSS.md | 76 ++++++++++++++++++++++++---------------------- 1 file changed, 40 insertions(+), 36 deletions(-) diff --git a/docs/cn/ufs/OSS.md b/docs/cn/ufs/OSS.md index 4ca4e37aaddd..e3e38da1783b 100644 --- a/docs/cn/ufs/OSS.md +++ b/docs/cn/ufs/OSS.md @@ -1,35 +1,38 @@ --- layout: global -title: Alluxio集成OSS作为底层存储 -nickname: Alluxio集成OSS作为底层存储 +title: Aliyun Object Storage Service +nickname: 阿里云对象存储服务 group: Storage Integrations -priority: 5 +priority: 10 --- -* 内容列表 +* Table of Contents {:toc} -该指南介绍如何配置Alluxio以使用[Aliyun OSS](http://www.aliyun.com/product/oss/?lang=en)作为底层文件系统。对象存储服务(OSS)是阿里云提供的一个大容量、安全、高可靠性的云存储服务。 +本指南介绍了如何配置[阿里云 OSS](https://intl.aliyun.com/product/oss) 将其作为Alluxio 的底层存储系统。对象存储服务(Object Storage Service, OSS)是阿里云提供的海量、安全、高可靠的云存储服务。 -## 初始步骤 +## 部署条件 -要在许多机器上运行Alluxio集群,需要在这些机器上部署二进制包。你可以自己[编译Alluxio](Building-Alluxio-From-Source.html),或者[下载二进制包](Running-Alluxio-Locally.html) +电脑上应已安装好 Alluxio 程序。如果没有安装,可[编译Alluxio源代码]({{ '/cn/contributor/Building-Alluxio-From-Source.html' | relativize_url }}), 或在[本地下载Alluxio程序]({{ '/cn/deploy/Running-Alluxio-Locally.html' | relativize_url }}). -另外,为了在OSS上使用Alluxio,需要创建一个bucket(或者使用一个已有的bucket)。还要注意在该bucket里使用的目录,可以在该bucket中新建一个目录,或者使用一个存在的目录。在该指南中,OSS bucket的名称为OSS_BUCKET,在该bucket里的目录名称为OSS_DIRECTORY。另外,要使用OSS服务,还需提供一个oss 端点,该端点指定了你的bucket在哪个范围,本向导中的端点名为OSS_ENDPOINT。要了解更多指定范围的端点的具体内容,可以参考[这里](http://intl.aliyun.com/docs#/pub/oss_en_us/product-documentation/domain-region),要了解更多OSS Bucket的信息,请参考[这里](http://intl.aliyun.com/docs#/pub/oss_en_us/product-documentation/function&bucket) +在将 OSS 与 Alluxio 一起运行前,请参照 [OSS 快速上手指南](https://www.alibabacloud.com/help/doc-detail/31883.htm)注册 OSS 或创建一个 OSS bucket。 -## 安装OSS -Alluxio通过[统一命名空间](Unified-and-Transparent-Namespace.html)统一访问不同存储系统。 OSS的安装位置可以在Alluxio命名空间的根目录或嵌套目录下。 +## 基本设置 -### 根目录安装 - -若要在Alluxio中使用OSS作为底层文件系统,一定要修改`conf/alluxio-site.properties`配置文件。首先要指定一个已有的OSS bucket和其中的目录作为底层文件系统,可以在`conf/alluxio-site.properties`中添加如下语句指定它: +如果要使用OSS作为 Alluxio的底层存储,需要通过修改 `conf/alluxio-site.properties` 来配置Alluxio。如果该配置文件不存在,可通过模板创建。 ``` -alluxio.master.mount.table.root.ufs=oss://// +$ cp conf/alluxio-site.properties.template conf/alluxio-site.properties +``` + +编辑 `conf/alluxio-site.properties` 文件,将底层存储地址设置为 OSS bucket 和要挂载到 Alluxio 的 OSS 目录。例如,如果要将整个 bucket 挂载到 Alluxio,底层存储地址可以是 `oss://alluxio-bucket/` ,如果将名为 alluxio-bucket、目录为 `/alluxio/data` 的 OSS bucket 挂载到 Alluxio,则底层存储地址为 `oss://alluxio-bucket/alluxio/data`。 + ``` +alluxio.master.mount.table.root.ufs=oss:/// +``` -接着,需要指定Aliyun证书以便访问OSS,在`conf/alluxio-site.properties`中添加: +指定访问 OSS 的阿里云凭证。在 `conf/alluxio-site.properties` 中,添加: ``` fs.oss.accessKeyId= @@ -37,45 +40,46 @@ fs.oss.accessKeySecret= fs.oss.endpoint= ``` -此处, `fs.oss.accessKeyId `和`fs.oss.accessKeySecret`分别为`Access Key ID`字符串和`Access Key Secret`字符串,均受阿里云[AccessKeys管理界面](https://ak-console.aliyun.com)管理;`fs.oss.endpoint`是Bucket概述中所说的Bucket的endpoint,其可能的取值比如`oss-us-west-1.aliyuncs.com `,`oss-cn-shanghai.aliyuncs.com`。 -([OSS Internet Endpoint](https://intl.aliyun.com/help/doc-detail/31837.htm))。 - -更改完成后,Alluxio应该能够将OSS作为底层文件系统运行,你可以尝试[使用OSS在本地运行Alluxio](#使用OSS在本地运行Alluxio) - -### 嵌套目录安装 +`fs.oss.accessKeyId` 和 `fs.oss.accessKeySecret` 是 OSS 的 [AccessKey](https://www.alibabacloud.com/help/doc-detail/29009.htm), 由[阿里云AccessKey管理工作台](https://ram.console.aliyun.com/)创建和管理。 -OSS可以安装在Alluxio命名空间中的嵌套目录中,以统一访问多个存储系统。 -[Mount 命令]({{ '/cn/operation/User-CLI.html' | relativize_url }}#mount)可以实现这一目的。例如,下面的命令将OSS容器内部的目录挂载到Alluxio的`/oss`目录 +`fs.oss.endpoint` 是这个bucket的网络端点 (endpoint),见 bucket 概览页面,包含如 `oss-us-west-1.aliyuncs.com` 和 `oss-cn-shanghai.aliyuncs.com` 这样的值。可用的 endpoint 清单见 +[OSS网络端点文档](https://intl.aliyun.com/help/doc-detail/31837.htm). -```console -$ ./bin/alluxio fs mount --option fs.oss.accessKeyId= \ - --option fs.oss.accessKeySecret= \ - --option fs.oss.endpoint= \ - /oss oss://// -``` +## 示例:将 Alluxio 与 OSS 一起在本地运行 -## 使用OSS在本地运行Alluxio - -配置完成后,你可以在本地启动Alluxio,观察一切是否正常运行: +启动 Alluxio 服务器: ```console $ ./bin/alluxio format $ ./bin/alluxio-start.sh local ``` -该命令应当会启动一个Alluxio master和一个Alluxio worker,可以在浏览器中访问[http://localhost:19999](http://localhost:19999)查看master UI。 +该命令会启动一个 Alluxio master 和一个 Alluxio worker。可通过 [http://localhost:19999](http://localhost:19999) 查看 master UI。 -接着,你可以运行一个简单的示例程序: +运行一个简单的示例程序: ```console $ ./bin/alluxio runTests ``` -运行成功后,访问你的OSS目录`oss:///`,确认其中包含了由Alluxio创建的文件和目录。在该测试中,创建的文件名称应像`OSS_BUCKET/OSS_DIRECTORY/default_tests_files/BasicFile_CACHE_PROMOTE_MUST_CACHE`这样。。 +访问 OSS 的目录 `oss:///` 以验证 Alluxio 创建的文件和目录是否存在。就本次测试而言,将看到如下的文件:`//default_tests_files/BasicFile_CACHE_PROMOTE_MUST_CACHE`. -运行以下命令停止Alluxio: +运行以下命令终止 Alluxio: ```console $ ./bin/alluxio-stop.sh local ``` +## 高级设置 + +### 嵌套挂载 + +OSS 存储位置可以挂载在 Alluxio 命名空间中的嵌套目录下,以便统一访问多个底层存储系统。可使用 Alluxio 的 +[Mount Command]({{ '/cn/operation/User-CLI.html' | relativize_url }}#mount)(挂载命令)来进行挂载。例如:下述命令将 OSS bucket 里的一个目录挂载到 Alluxio 目录 `/oss`: + +```console +$ ./bin/alluxio fs mount --option fs.oss.accessKeyId= \ + --option fs.oss.accessKeySecret= \ + --option fs.oss.endpoint= \ + /oss oss://// +``` From dbf41384233d5b82238ad8f1d6b653a311d6d5f8 Mon Sep 17 00:00:00 2001 From: Zihao Zhao Date: Thu, 8 Jun 2023 12:07:13 +0800 Subject: [PATCH 284/334] [DOCFIX] Update cn version of Azure-Data-Lake-Gen2 doc ### What changes are proposed in this pull request? [DOCFIX] Update cn version of Azure-Data-Lake-Gen2 doc ### Why are the changes needed? The Chinese ufs/Azure-Data-Lake-Gen2 doc is not updated with the latest changes, this PR synchronizes these updates. ### Does this PR introduce any user facing changes? Developers can get to know Alluxio in Chinese easily. pr-link: Alluxio/alluxio#16929 change-id: cid-f0c0ac2323fda070c69a1a1b4ea0d7be76c4f0f3 --- docs/cn/ufs/Azure-Data-Lake-Gen2.md | 159 ++++++++++++++++++++++++++++ docs/cn/ufs/Azure-Data-Lake.md | 23 ++-- 2 files changed, 167 insertions(+), 15 deletions(-) create mode 100644 docs/cn/ufs/Azure-Data-Lake-Gen2.md diff --git a/docs/cn/ufs/Azure-Data-Lake-Gen2.md b/docs/cn/ufs/Azure-Data-Lake-Gen2.md new file mode 100644 index 000000000000..558e070f5800 --- /dev/null +++ b/docs/cn/ufs/Azure-Data-Lake-Gen2.md @@ -0,0 +1,159 @@ +--- +layout: global +title: Azure Storage Gen2 +nickname: Azure Data Lake Storage Gen2 +group: Storage Integrations +priority: 2 +--- + +* Table of Contents +{:toc} + +本指南介绍如何配置 Alluxio,使其与底层存储系统 [Azure Data Lake Storage Gen2](https://learn.microsoft.com/zh-cn/azure/storage/blobs/data-lake-storage-introduction) 一起运行。 + +## 部署条件 + +电脑上应已安装好 Alluxio 程序。如果没有安装,可[编译Alluxio源代码]({{ '/cn/contributor/Building-Alluxio-From-Source.html' | relativize_url }}), +或直接 [下载已编译好的Alluxio程序]({{ '/cn/deploy/Running-Alluxio-Locally.html' | relativize_url }}). + +在将 Azure 数据湖存储与 Alluxio 一起运行前,请在 [Azure 帐户中创建一个新的 Data Lake Storage](https://learn.microsoft.com/zh-cn/azure/storage/blobs/create-data-lake-storage-account) 或使用现有的 Data Lake Storage。这里还应指定需使用的 directory(目录),创建一个新的目录或使用现有目录均可。此外,还需要一个[共享密钥](https://learn.microsoft.com/zh-cn/rest/api/storageservices/authorize-with-shared-key)。 +本指南中的 Azure 存储帐户名为 ``,该存储帐户中的目录 ``, container(容器)名为 ``. + +## 通过共享密钥配置 + +### 根挂载 + +如果要使用 Azure Data Lake Storage 作为 Alluxio 根挂载点的 UFS,需要通过修改 `conf/alluxio-site.properties` 来配置 Alluxio,使其可访问底层存储系统。如果该配置文件不存在,可通过模板创建。 + +```console +$ cp conf/alluxio-site.properties.template conf/alluxio-site.properties +``` + +修改 `conf/alluxio-site.properties` 来指定 UFS 地址,需包括: + +```properties +alluxio.master.mount.table.root.ufs=abfs://@.dfs.core.windows.net// +``` + +通过在 `conf/alluxio-site.properties` 中添加以下属性来指定共享密钥: + +```properties +alluxio.master.mount.table.root.option.fs.azure.account.key..dfs.core.windows.net= +``` + +### 嵌套挂载 +Azure Data Lake 存储位置可以挂载在 Alluxio 命名空间中的嵌套目录下,以便统一访问多个底层存储系统。可使用 Alluxio 的 [Command Line Interface]({{ '/cn/operation/User-CLI.html' | relativize_url }})(命令行)来进行挂载。 + +```console +$ ./bin/alluxio fs mount \ + --option fs.azure.account.key..dfs.core.windows.net= \ + /mnt/abfs abfs://@.dfs.core.windows.net// +``` + +在完成这些修改之后,Alluxio 已经配置完毕,可以与底层存储 Azure Data Lake 一起在本地运行。 + +## 通过 OAuth 2.0 客户端凭证配置 + +### 根挂载 + +如果要使用 Azure Data Lake Storage 作为 Alluxio 根挂载点的 UFS,需要通过修改 `conf/alluxio-site.properties` 来配置 Alluxio,使其可访问底层存储系统。如果该配置文件不存在,可通过模板创建。 + +```console +$ cp conf/alluxio-site.properties.template conf/alluxio-site.properties +``` + +修改 `conf/alluxio-site.properties` 来指定 UFS 地址,需包括: + +```properties +alluxio.master.mount.table.root.ufs=abfs://@.dfs.core.windows.net// +``` + +通过在 `conf/alluxio-site.properties` 中添加以下属性来指定 OAuth 2.0 客户端凭证(注意 URL 的 Endpoint 应使用 V1 token Endpoint): + + +```properties +alluxio.master.mount.table.root.option.fs.azure.account.oauth2.client.endpoint= +alluxio.master.mount.table.root.option.fs.azure.account.oauth2.client.id= +alluxio.master.mount.table.root.option.fs.azure.account.oauth2.client.secret= +``` + +### 嵌套挂载 + +Azure Data Lake 存储位置可以挂载在 Alluxio 命名空间中的嵌套目录下,以便统一访问多个底层存储系统。可使用 Alluxio 的 [Command Line Interface]({{ '/cn/operation/User-CLI.html' | relativize_url }})(命令行)来进行挂载。 + +```console +$ ./bin/alluxio fs mount \ + --option fs.azure.account.oauth2.client.endpoint= \ + --option fs.azure.account.oauth2.client.id= \ + --option fs.azure.account.oauth2.client.secret= \ + /mnt/abfs abfs://@.dfs.core.windows.net// +``` + +在完成这些修改之后,Alluxio 已经配置完毕,可以与底层存储 Azure Data Lake 一起在本地运行。 + +## 通过 Azure 托管身份服务配置 + +### 根挂载 + +如果要使用 Azure Data Lake Storage 作为 Alluxio 根挂载点的 UFS,需要通过修改 `conf/alluxio-site.properties` 来配置 Alluxio,使其可访问底层存储系统。如果该配置文件不存在,可通过模板创建。 +template. + +```console +$ cp conf/alluxio-site.properties.template conf/alluxio-site.properties +``` + +修改 `conf/alluxio-site.properties` 来指定UFS 地址,需包括: + +```properties +alluxio.master.mount.table.root.ufs=abfs://@.dfs.core.windows.net// +``` + +通过在 `conf/alluxio-site.properties` 中添加以下属性来指定 Azure 托管身份: + +```properties +alluxio.master.mount.table.root.option.fs.azure.account.oauth2.msi.endpoint= +alluxio.master.mount.table.root.option.fs.azure.account.oauth2.client.id= +alluxio.master.mount.table.root.option.fs.azure.account.oauth2.msi.tenant= +``` + +### 嵌套挂载 +Azure Data Lake 存储位置可以挂载在 Alluxio 命名空间中的嵌套目录下,以便统一访问多个底层存储系统。可使用 Alluxio 的 [Command Line Interface]({{ '/cn/operation/User-CLI.html' | relativize_url }})(命令行)来进行挂载。 + +```console +$ ./bin/alluxio fs mount \ + --option fs.azure.account.oauth2.msi.endpoint= \ + --option fs.azure.account.oauth2.client.id= \ + --option fs.azure.account.oauth2.msi.tenant= \ + /mnt/abfs abfs://@.dfs.core.windows.net// +``` + +在完成这些修改之后,Alluxio 已经配置完毕,可以与底层存储 Azure Data Lake 一起在本地运行。 + +## 将 Alluxio 与 Data Lake Storage 一起在本地运行 + +在本地启动Alluxio,检查是否一切运行正常。 + +```console +./bin/alluxio format +./bin/alluxio-start.sh local +``` + +该命令会启动一个 Alluxio master 和一个 Alluxio worker。可通过 [http://localhost:19999](http://localhost:19999) 查看 master UI。 + +运行一个简单的示例程序: + +```console +./bin/alluxio runTests +``` + +访问目录 ``,以验证 Alluxio 创建的文件和目录是否存在。就本次测试而言,将看到如下的文件: + +``` +/default_tests_files/BASIC_CACHE_PROMOTE_CACHE_THROUGH +``` + +要终止Alluxio, 可运行以下命令: + +```console +./bin/alluxio-stop.sh local +``` diff --git a/docs/cn/ufs/Azure-Data-Lake.md b/docs/cn/ufs/Azure-Data-Lake.md index b99b5b4c2ad8..38bdb7d41f2f 100644 --- a/docs/cn/ufs/Azure-Data-Lake.md +++ b/docs/cn/ufs/Azure-Data-Lake.md @@ -6,18 +6,14 @@ group: Storage Integrations priority: 2 --- -* Table of Contents -{:toc} +* Table of Contents {:toc} -本指南介绍如何配置Alluxio,使其与底层存储系统 [Azure Data Lake Storage Gen1](https://docs.microsoft.com/en-in/azure/data-lake-store/data-lake-store-overview) 一起运行。 +本指南介绍如何配置Alluxio,使其与底层存储系统 [Azure Data Lake Storage Gen1](https://learn.microsoft.com/zh-cn/azure/data-lake-store/data-lake-store-overview) 一起运行。 ## 部署条件 +电脑上应已安装好Alluxio程序。如果没有安装,可[编译Alluxio源代码]({{ '/cn/contributor/Building-Alluxio-From-Source.html' | relativize_url }}), 或直接[下载已编译好的Alluxio程序]({{ '/cn/deploy/Running-Alluxio-Locally.html' | relativize_url }}). -电脑上应已安装好Alluxio程序。如果没有安装,可[编译Alluxio源代码]({{ '/cn/contributor/Building-Alluxio-From-Source.html' | relativize_url }}), -或直接[下载已编译好的Alluxio程序]({{ '/cn/deploy/Running-Alluxio-Locally.html' | relativize_url }}). - -在将 Azure 数据湖存储与 Alluxio 一起运行前,请在 Azure 帐户中创建一个新的 Data Lake Storage 或使用现有的 Data Lake Storage。这里还应指定需使用的 directory(目录),创建一个新的 directory 或使用现有 directory 均可。此外,还需为存储帐户设置[服务到服务验证](https://docs.microsoft.com/en-us/azure/data-lake-store/data-lake-store-service-to-service-authenticate-using-active-directory)。本指南中的 Azure 存储帐户名为 ``,该存储帐户中的 directory 名为 ``。 要了解有关 Azure 存储帐户的更多信息,请点击[此处](https://docs.microsoft.com/en-us/azure/data-lake-store/data-lake-store-get-started-portal). - +在将 Azure 数据湖存储与 Alluxio 一起运行前,请在 Azure 帐户中创建一个新的 Data Lake Storage 或使用现有的 Data Lake Storage。这里还应指定需使用的 directory(目录),创建一个新的 directory 或使用现有 directory 均可。此外,还需为存储帐户设置[服务到服务验证](https://learn.microsoft.com/zh-cn/azure/data-lake-store/data-lake-store-service-to-service-authenticate-using-active-directory)。本指南中的 Azure 存储帐户名为 ``,该存储帐户中的 directory 名为 ``。 要了解有关 Azure 存储帐户的更多信息,请点击[此处](https://learn.microsoft.com/zh-cn/azure/data-lake-store/data-lake-store-get-started-portal). ## 基本设置 @@ -36,8 +32,9 @@ alluxio.master.mount.table.root.ufs=adl://.azuredatalakestore.net ``` 通过在 `conf/alluxio-site.properties`中添加以下属性,来指定用于根挂载点的 Azure 帐户的 Azure AD Application 的应用ID、身份验证密钥和租户 ID: -- 有关如何获取应用 ID 和身份验证密钥(也称为客户端密钥)的说明,请参见 [Get application ID and authentication key](https://docs.microsoft.com/en-us/azure/active-directory/develop/howto-create-service-principal-portal#get-tenant-and-app-id-values-for-signing-in)。 -- 有关如何获取租户ID 的说明,请参见 [Get tenant ID](https://docs.microsoft.com/en-us/azure/active-directory/develop/howto-create-service-principal-portal#get-tenant-and-app-id-values-for-signing-in). + +- 有关如何获取应用 ID 和身份验证密钥(也称为客户端密钥)的说明,请参见 [Get application ID and authentication key](https://learn.microsoft.com/zh-cn/azure/active-directory/develop/howto-create-service-principal-portal)。 +- 有关如何获取租户ID 的说明,请参见 [Get tenant ID](https://learn.microsoft.com/zh-cn/azure/active-directory/develop/howto-create-service-principal-portal). ```properties alluxio.master.mount.table.root.option.fs.adl.account..oauth2.client.id= @@ -61,17 +58,14 @@ $ ./bin/alluxio fs mount \ ## 将 Alluxio 与 Data Lake Storage 一起在本地运行 在本地启动Alluxio,检查是否一切运行正常。 - ```console ./bin/alluxio format ./bin/alluxio-start.sh local ``` -该命令会启动一个 Alluxio master 和一个 Alluxio worker。可通过 -[http://localhost:19999](http://localhost:19999) 查看 master UI。 +该命令会启动一个 Alluxio master 和一个 Alluxio worker。可通过 [http://localhost:19999](http://localhost:19999) 查看 master UI。 运行一个简单的示例程序: - ```console ./bin/alluxio runTests ``` @@ -83,7 +77,6 @@ $ ./bin/alluxio fs mount \ ``` 要终止Alluxio, 可运行以下命令: - ```console ./bin/alluxio-stop.sh local ``` From 33ad48150fb0096dec1e8f721f9af2e2fbc0a6b9 Mon Sep 17 00:00:00 2001 From: Zihao Zhao Date: Thu, 8 Jun 2023 12:07:52 +0800 Subject: [PATCH 285/334] [DOCFIX] Update cn version of Azure-Data-Lake doc ### What changes are proposed in this pull request? [DOCFIX] Update cn version of Azure-Data-Lake doc ### Why are the changes needed? Chinese reader could more easily understand Azure Data Lake. ### Does this PR introduce any user facing changes? Developers can get to know Alluxio in Chinese easily. pr-link: Alluxio/alluxio#16921 change-id: cid-3b03ba284873b8d99e789be9acde4fc98028993b From 9f5c74ea35be0264246636cf52a1631fc032164d Mon Sep 17 00:00:00 2001 From: Zihao Zhao Date: Thu, 8 Jun 2023 12:08:27 +0800 Subject: [PATCH 286/334] [DOCFIX] Update cn version of Deep-Leaning doc ### What changes are proposed in this pull request? [DOCFIX] Update cn version of Deep-Leaning doc ### Why are the changes needed? The Chinese solutions/Deep-Leaning doc is not updated with the latest changes, this PR synchronizes these updates. ### Does this PR introduce any user facing changes? Developers can get to know Alluxio in Chinese easily. pr-link: Alluxio/alluxio#16920 change-id: cid-c75d057724dd6cd9db3da4605acc7bd7c9ece702 --- docs/cn/solutions/Deep-Learning.md | 107 +++++------------------------ 1 file changed, 17 insertions(+), 90 deletions(-) diff --git a/docs/cn/solutions/Deep-Learning.md b/docs/cn/solutions/Deep-Learning.md index a9edc0ff4272..f35f5c1acad5 100644 --- a/docs/cn/solutions/Deep-Learning.md +++ b/docs/cn/solutions/Deep-Learning.md @@ -6,105 +6,32 @@ group: Compute Integrations priority: 4 --- -* 内容列表 +* Table of Contents {:toc} -{:toc} - -随着数据集规模的增大以及计算能力的增强,深度学习已成为人工智能的一项流行技术。能够获取的数据量的增多以及训练更大神经网络的处理能力的增强使得深度 -学习模型在多个领域内的性能正在获得持续的提升。深度学习的兴起促进了人工智能的最新发展,但也暴露出了其在访问数据和存储系统的一些问题。在本文中,我 -们将进一步描述深度学习的工作负载所带来的存储挑战,并展示Alluxio如何帮助解决这些挑战。 +随着数据集的增长和计算能力的增强,深度学习已经成为人工智能领域的流行技术。深度学习模型在各个方面的性能都在不断提高,可访问的数据量越来越大,并具备训练规模更大的神经网络的处理能力。深度学习的兴起推动了人工智能最新技术的发展,但也暴露了数据访问和存储系统中存在的一些难题。在此页面中,我们将进一步介绍深度学习工作负载面临的存储挑战,并介绍 Alluxio 如何帮助应对这些挑战。 ## 深度学习的数据挑战 +深度学习在机器学习中之所以流行是因为有大量可用数据,而更大的数据量通常会带来更好的性能。当然,不是所有的训练数据都能用于深度学习框架(Tensorflow、Caffe、torch)。例如,深度学习框架已经与一些现有的存​​储系统集成,但并非所有集成的存储都可用。数据的子集可能无法用于训练,从而导致训练性能和效果下降。 -由于能够获取到海量有用的数据,深度学习已成为机器学习的热门,因为通常更多的数据会带来更好的性能。然而,并不是所有存储系统上的训练数据都可直接用于 -深度学习框架(Tensorflow,Caffe,torch)。例如,深度学习框架已经和一些已有的存储系统集成,但是并不是所有的存储系统集成都是可行的。因此,深度学 -习框架可能无法获取某个存储系统上的数据子集并进行训练,导致训练效率和效果的降低。 - -此外,随着分布式存储系统(HDFS,ceph)和云存储(AWS S3,Azure Blob Store,Google云存储)的流行,用户有了多种存储可供选择。然而,相较于简 -单地使用本地机器上的文件系统,从业者需要以不熟悉的方式与这些分布式和远程存储系统进行交互。正确配置每个存储系统和使用新的工具可能很困难,这使得使 -用深度学习框架访问不同存储系统获取数据变得困难。 - -最后,计算资源与存储资源分离的趋势导致远程存储系统的使用变得更为必要。这在云计算中很常见,使用远程存储系统可以实现资源按需分配,从而提高利 -用率,增加弹性和降低成本。但是,当深度学习需要使用远程存储系统的数据时,它必须通过网络获取,这可能会增加深度学习的训练时间。额外的网络IO会 -提高成本并增加处理数据的时间。 - -## Alluxio如何帮助解决深度学习的存储问题 - -Alluxio可以帮助解决深度学习的数据访问问题。Alluxio最简单的形式是一个虚拟文件系统,它透明地连接到现有的存储系统,并将它们作为一个单一 -的系统呈现给用户。使用Alluxio的[统一命名空间]({{ '/cn/core-services/Unified-Namespace.html' | relativize_url }}),可以将许多存储系统挂载到Alluxio中,包括S3, -Azure和GCS等云存储系统。由于Alluxio已经与存储系统集成了,因此深度学习框架只需与Alluxio进行交互即可访问所有存储中的数据。这为从任何数据源获得 -数据并进行训练打开了大门,从而可以提高深度学习学得的模型的性能。 - -Alluxio还包括一个可以提供便利和人性化的使用体验的FUSE界面。使用[Alluxio POSIX API]({{ '/cn/api/POSIX-API.html' | relativize_url }}),可以将Alluxio实 -例挂载到本地文件系统,因此与Alluxio的交互就跟与本地文件或者目录的交互一样简单。这使用户能够继续使用熟悉的工具和范例与其数据进行交互。Alluxio -可以连接到多个不同的存储系统,这意味着来自任何存储的任何数据看起来都跟本地文件或目录一样。 - -![Fuse]({{ site.baseurl }}/img/fuse.png) - -最后,Alluxio还提供常用数据的[本地缓存]({{ '/cn/core-services/Caching.html' | relativize_url }})。当数据远离计算时,这非常有用,例如存储环境中的计算分离。由于Alluxio可以 -在本地缓存数据,所以不需要通过网络IO来访问数据,从而使得深度学习训练的成本会更低,并且花费的时间会更少。 - -## 设置 Alluxio FUSE - -在本节中,我们将按照[FUSE部分](Mounting-Alluxio-FS-with-FUSE.html)中的说明设置FUSE,访问S3中ImageNet的训练数据,并允许深度学习框架 -通过FUSE访问数据。 - -首先在Alluxio的根目录下创建一个文件夹。 - -```console -$ ./bin/alluxio fs mkdir /training-data -``` - -然后我们可以把存储在S3桶中的ImageNet数据挂载到路径 `/training-data/imagenet`上。假定数据在s3中的路径是 `s3://alluxio-tensorflow-imagenet/`。 - -```console -$ ./bin/alluxio fs mount /training-data/imagenet/ \ -s3://alluxio-tensorflow-imagenet/ \ ---option s3a.accessKeyID= \ ---option s3a.secretKey= -``` - -请注意,此命令需要传递存储桶的S3证书。这些证书与挂载点相关联,这样之后的访问就不需要证书了。 - -之后,我们会启动Alluxio-FUSE进程。首先,我们创建一个名为 `/mnt/fuse` 的目录,把它的所有者改成当前的使用者(本文档中是ec2-user),并且设置 -权限为可读写。 - -```console -$ sudo mkdir -p /mnt/fuse -$ sudo chown ec2-user:ec2-user /mnt/fuse -$ chmod 664 /mnt/fuse -``` - -然后我们运行 Alluxio-FUSE shell 来将Alluxio目录下的 training-data 挂载到本地目录 `/mnt/fuse` 下面。 - -```console -$ ./integration/fuse/bin/alluxio-fuse mount /mnt/fuse /training-data -``` +计算资源与存储资源逐步分离的趋势使得远程存储系统成为必然。在云计算中使用远程存储系统很常见,可以实现按需资源分配,继而提高资源利用率、灵活性和弹性,并降低成本。当深度学习训练使用远程存储系统中的数据时,数据必须通过网络传输,由此增加深度学习的训练时间。额外的网络 I/O 也将增加成本和数据处理的时间。 -现在,你可以访问挂载目录并浏览其中的数据了,你应该能看到存储在云中的数据。 +## Alluxio 如何帮助解决深度学习的存储问题 +深度学习中存在一些与数据管理相关的问题,而 Alluxio 可以帮助应对数据访问的挑战。简单来说, Alluxio 是一个虚拟文件系统,它透明地连接到现有的存储系统,并将它们作为一个统一的系统呈现给用户。Alluxio通过[统一命名空间]({{ '/cn/core-services/Unified-Namespace.html' | relativize_url }}),可以将许多存储技术,包括 S3、Azure 和 GCS 等云存储挂载到 Alluxio 中。由于 Alluxio 已经实现与存储系统的集成,深度学习框架只需要与 Alluxio 交互就能够访问任何连接到 Alluxio 的存储中的数据。这样一来,来自任何数据源的数据都可以用于训练,因此可提升模型训练的性能。 -```console -$ cd /mnt/fuse -$ ls -``` +Alluxio 还包含一个 FUSE 接口,给用户带来便捷和熟悉的使用体验。Alluxio 实例可以通过 [Alluxio FUSE]({{ '/cn/api/POSIX-API.html' | relativize_url }}) 挂载到本地文件系统,因此与 Alluxio 交互就像与本地文件和目录交互一样简单。这使得用户能够继续使用熟悉的工具和范例来与其数据进行交互。由于 Alluxio 可以连接到多个不同的存储,因此用户可以像访问本地文件或目录一样访问任何存储中的数据。 -该文件夹已准备好供深度学习框架使用,深度学习框架将把Alluxio存储视为本地文件夹。我们将在下一节中使用此文件夹进行Tensorflow训练。 +![Fuse]({{ '/img/fuse.png' | relativize_url }}) -## 使用Tensorflow访问Alluxio FUSE +Alluxio 还可以为常用数据提供[本地缓存]({{ '/cn/core-services/Caching.html' | relativize_url }})。这一功能在数据离计算较远时尤其有用。由于 Alluxio 可以将数据缓存在本地,访问数据时不会产生网络I/O,因此可以让深度学习训练更经济高效,并且减少训练所需时间。 -在本文档中我们以深度学习框架Tensorflow为例,展示Alluxio如何帮助框架进行数据访问和管理。要通过Alluxio(Alluxio FUSE)访问S3中的训练数据, -我们可以简单地将`/mnt/fuse/imagenet`路径传递给基准脚本的参数`data_dir`[tf_cnn_benchmarsk.py](https://github.com/tensorflow/benchmarks/blob/master/scripts/tf_cnn_benchmarks/tf_cnn_benchmarks.py)。 +## 在 Alluxio FUSE上 使用 Tensorflow +本页中我们以 Tensorflow 深度学习框架为例,来介绍Alluxio如何帮助进行数据访问和管理。我们将按照 [Alluxio Tensorflow]({{ '/cn/compute/Tensorflow.html' | relativize_url }}) 文档的说明在 Alluxio Fuse 上运行 Tensorflow 基准测试。 -一旦挂载完底层存储器,即可立即通过Alluxio访问各种底层存储器中的数据。并且各种数据可以透明的放入benchmark中而无需对Tensorflow或benchmark脚 -本进行任何修改。这大大简化了应用程序开发,否则需要整合并且配置每个特定的存储系统。 +在将Alluxio一次性挂载到底层存储后,各类底层存储中的数据都可以通过Alluxio立即访问,并且基准测试也可以在不对Tensorflow或基准测试脚本进行修改的情况下透明地访问数据。这极大地简化了应用程序开发,否则应用程序开发将需要集成每个特定的存储系统并对访问凭证进行配置。 -除了提供统一的访问接口,Alluxio也可以带来性能上的好处。 -beanchmark通过输入的训练图像(单位为 图像数/秒)评价训练模型的吞吐量。 -训练过程涉及三个阶段,每个阶段使用不同的资源: -- 数据读取(I/O):从源中选择并且读取图像。 -- 图像处理(CPU):把图像记录解码成图像,预处理,然后组织成mini-batches。 -- 模型训练(GPU):在多个卷积层上计算并且更新参数。 +Alluxio还带来了性能上的优势。基准测试以图像/秒(images/sec.)为单位,根据输入训练图像评估训练模型的吞吐量。训练涉及各类资源利用的三个阶段: + - 数据读取 (I/O):从数据源中选择和读取图像文件。 + - 图像处理 (CPU):将图像记录解码为图像、预处理并将其分成小批次。 + - 模型训练 (GPU):计算和更新多个卷积层中的参数 -通过将Alluxio worker与深度学习框架搭配在一起,Alluxio将远程数据缓存到本地以供将来访问,从而提供数据本地性。没有Alluxio,缓慢的远程存储可能 -会导致I/O瓶颈,并使宝贵的GPU资源得不到利用。例如,在benchmark模型中,我们发现AlexNet架构相对简单,因此当存储变得更慢时,更容易出现I/O性能瓶 -颈。在一台EC2 p2.8xlarge机器上运行Alluxio可以带来近2倍的性能提升。 +通过将 Alluxio worker 与深度学习框架并置部署,Alluxio 可将远程数据缓存到本地供将来访问,从而实现数据本地化。如果没有Alluxio,缓慢的远程存储访问可能会导致 I/O 瓶颈并致使 GPU 资源无法得到充分利用。例如,在基准测试模型中,我们发现 AlexNet 的架构相对简单,因此当存储访问变慢时更容易导致 I/O 瓶颈。 Alluxio 在 EC2 p2.8xlarge 机器上可实现近 2 倍的性能提升。 From 0024a9ea836a3bc93c4c4f34e9661589e92339c7 Mon Sep 17 00:00:00 2001 From: jianghuazhu <740087514@qq.com> Date: Fri, 9 Jun 2023 11:30:16 +0800 Subject: [PATCH 287/334] Fix some incorrect code in WorkerWebUILogs ### What changes are proposed in this pull request? The purpose of this pr is to fix some incorrect code in WorkerWebUILogs. ### Why are the changes needed? There is some incorrect code in WorkerWebUILogs, we should fix them. ### Does this PR introduce any user facing changes? Please list the user-facing changes introduced by your change, including 1. change in user-facing APIs 2. addition or removal of property keys 3. webui pr-link: Alluxio/alluxio#17052 change-id: cid-47d099e6705ded625466492faa846a3ae6232cf1 --- core/common/src/main/java/alluxio/wire/WorkerWebUILogs.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/core/common/src/main/java/alluxio/wire/WorkerWebUILogs.java b/core/common/src/main/java/alluxio/wire/WorkerWebUILogs.java index 7120ab1ca794..e31258128f70 100644 --- a/core/common/src/main/java/alluxio/wire/WorkerWebUILogs.java +++ b/core/common/src/main/java/alluxio/wire/WorkerWebUILogs.java @@ -203,7 +203,7 @@ public WorkerWebUILogs setViewingOffset(long viewingOffset) { @Override public String toString() { - return MoreObjects.toStringHelper(this).add("currentPath", mCurrentPath).add("cebug", mDebug) + return MoreObjects.toStringHelper(this).add("currentPath", mCurrentPath).add("debug", mDebug) .add("fatalError", mFatalError).add("fileData", mFileData).add("fileInfos", mFileInfos) .add("invalidPathError", mInvalidPathError).add("nTotalFile", mNTotalFile) .add("viewingOffset", mViewingOffset).toString(); From fbc01858752a9b73c5f0e238028edcb3154a8c46 Mon Sep 17 00:00:00 2001 From: jianghuazhu <740087514@qq.com> Date: Fri, 9 Jun 2023 12:11:52 +0800 Subject: [PATCH 288/334] Add some logs when the standby master send a heartbeat request ### What changes are proposed in this pull request? When the standby master send a heartbeat request to the leader master, many times there is no trace. When necessary, we should record their correspondence. ### Why are the changes needed? Adding some logs is necessary, here are some reasons: 1. When the master node fails, logs can help troubleshoot the cause. 2. It can make the communication between the standby master and the leader master clearer. ### Does this PR introduce any user facing changes? Please list the user-facing changes introduced by your change, including 1. change in user-facing APIs 2. addition or removal of property keys 3. webui pr-link: Alluxio/alluxio#17204 change-id: cid-ec012f3a6736d030f9f15a141254df8cf77d861b --- .../src/main/java/alluxio/master/meta/DefaultMetaMaster.java | 1 + .../src/main/java/alluxio/master/meta/MetaMasterSync.java | 2 ++ 2 files changed, 3 insertions(+) diff --git a/core/server/master/src/main/java/alluxio/master/meta/DefaultMetaMaster.java b/core/server/master/src/main/java/alluxio/master/meta/DefaultMetaMaster.java index 49c30b276f4b..63ad27fd1488 100644 --- a/core/server/master/src/main/java/alluxio/master/meta/DefaultMetaMaster.java +++ b/core/server/master/src/main/java/alluxio/master/meta/DefaultMetaMaster.java @@ -627,6 +627,7 @@ public boolean isInSafeMode() { @Override public MetaCommand masterHeartbeat(long masterId, MasterHeartbeatPOptions options) { + LOG.debug("A heartbeat request was received from Standby master: {}.", masterId); MasterInfo master = mMasters.getFirstByField(ID_INDEX, masterId); if (master == null) { LOG.warn("Could not find master id: {} for heartbeat.", masterId); diff --git a/core/server/master/src/main/java/alluxio/master/meta/MetaMasterSync.java b/core/server/master/src/main/java/alluxio/master/meta/MetaMasterSync.java index 3b246cefae15..eb77cc181b5d 100644 --- a/core/server/master/src/main/java/alluxio/master/meta/MetaMasterSync.java +++ b/core/server/master/src/main/java/alluxio/master/meta/MetaMasterSync.java @@ -68,6 +68,8 @@ public void heartbeat(long timeLimitMs) { if (mMasterId.get() == UNINITIALIZED_MASTER_ID) { setIdAndRegister(); } + LOG.debug("Standby master: {} send a heartbeat request to the leader master.", + mMasterId.get()); command = mMasterClient.heartbeat(mMasterId.get()); handleCommand(command); } catch (IOException e) { From dcde424a9321c09a47ed9ec4728d60c36d3adec9 Mon Sep 17 00:00:00 2001 From: jianghuazhu <740087514@qq.com> Date: Fri, 9 Jun 2023 12:12:33 +0800 Subject: [PATCH 289/334] [DOCFIX]Improve annotations in DefaultMetaMaster add some @links ### What changes are proposed in this pull request? The purpose of this pr is to improve the comments related to DefaultMetaMaster and add some @links. ### Why are the changes needed? In DefaultMetaMaster, some of the documentation descriptions are not clear enough, and we should fix them as much as possible. ### Does this PR introduce any user facing changes? Please list the user-facing changes introduced by your change, including 1. change in user-facing APIs 2. addition or removal of property keys 3. webui pr-link: Alluxio/alluxio#17202 change-id: cid-3ca5c75c9374dfd5e92be806d115811a737c3aff --- .../src/main/java/alluxio/master/meta/DefaultMetaMaster.java | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/core/server/master/src/main/java/alluxio/master/meta/DefaultMetaMaster.java b/core/server/master/src/main/java/alluxio/master/meta/DefaultMetaMaster.java index 63ad27fd1488..57ad77dc2e2c 100644 --- a/core/server/master/src/main/java/alluxio/master/meta/DefaultMetaMaster.java +++ b/core/server/master/src/main/java/alluxio/master/meta/DefaultMetaMaster.java @@ -156,7 +156,7 @@ public final class DefaultMetaMaster extends CoreMaster implements MetaMaster { /** Path level properties. */ private final PathProperties mPathProperties; - /** Persisted state for MetaMaster. */ + /** Persisted state for {@link MetaMaster}. */ private final State mState; /** Value to be used for the cluster ID when not assigned. */ @@ -169,7 +169,7 @@ public final class DefaultMetaMaster extends CoreMaster implements MetaMaster { private final JournalSpaceMonitor mJournalSpaceMonitor; /** - * Journaled state for MetaMaster. + * Journaled state for {@link MetaMaster}. */ @NotThreadSafe public static final class State implements alluxio.master.journal.Journaled { From a84b6e611b118ef4fb8cd4da00cddf4c995f0def Mon Sep 17 00:00:00 2001 From: Kaijie Chen Date: Sat, 10 Jun 2023 00:49:21 +0800 Subject: [PATCH 290/334] Bump ratis version to 2.5.1 ### What changes are proposed in this pull request? Bump ratis version to 2.5.1 and fix API changes. ### Why are the changes needed? Ratis 2.5.1 is released and brings several improvements and bug fixes. ### Does this PR introduce any user facing changes? No. pr-link: Alluxio/alluxio#17394 change-id: cid-6a774735fdb86b0fd35098dcc26cd9f698707783 --- core/server/common/pom.xml | 2 +- .../master/journal/raft/RaftJournalUtils.java | 19 --------- .../journal/tool/RaftJournalDumper.java | 5 ++- .../master/journal/raft/RaftJournalTest.java | 42 +++---------------- ...dJournalIntegrationTestFaultTolerance.java | 11 +++-- 5 files changed, 15 insertions(+), 64 deletions(-) diff --git a/core/server/common/pom.xml b/core/server/common/pom.xml index ec5b82504477..db06bedd7966 100644 --- a/core/server/common/pom.xml +++ b/core/server/common/pom.xml @@ -26,7 +26,7 @@ ${project.parent.parent.parent.basedir}/build - 2.4.1 + 2.5.1 diff --git a/core/server/common/src/main/java/alluxio/master/journal/raft/RaftJournalUtils.java b/core/server/common/src/main/java/alluxio/master/journal/raft/RaftJournalUtils.java index ae46016840f4..6665702eb699 100644 --- a/core/server/common/src/main/java/alluxio/master/journal/raft/RaftJournalUtils.java +++ b/core/server/common/src/main/java/alluxio/master/journal/raft/RaftJournalUtils.java @@ -12,10 +12,8 @@ package alluxio.master.journal.raft; import org.apache.ratis.protocol.RaftPeerId; -import org.apache.ratis.statemachine.impl.SimpleStateMachineStorage; import java.io.File; -import java.io.IOException; import java.net.InetSocketAddress; import java.util.concurrent.CompletableFuture; @@ -60,23 +58,6 @@ public static File getRaftJournalDir(File baseDir) { return new File(baseDir, RAFT_DIR); } - /** - * Creates a temporary snapshot file. - * - * @param storage the snapshot storage - * @return the temporary snapshot file - * @throws IOException if error occurred while creating the snapshot file - */ - public static File createTempSnapshotFile(SimpleStateMachineStorage storage) throws IOException { - File tempDir = new File(storage.getSmDir().getParentFile(), "tmp"); - if (!tempDir.isDirectory() && !tempDir.mkdir()) { - throw new IOException( - "Cannot create temporary snapshot directory at " + tempDir.getAbsolutePath()); - } - return File.createTempFile("raft_snapshot_" + System.currentTimeMillis() + "_", - ".dat", tempDir); - } - /** * Creates a future that is completed exceptionally. * diff --git a/core/server/master/src/main/java/alluxio/master/journal/tool/RaftJournalDumper.java b/core/server/master/src/main/java/alluxio/master/journal/tool/RaftJournalDumper.java index 974f5ac7d305..780020c23034 100644 --- a/core/server/master/src/main/java/alluxio/master/journal/tool/RaftJournalDumper.java +++ b/core/server/master/src/main/java/alluxio/master/journal/tool/RaftJournalDumper.java @@ -30,6 +30,7 @@ import org.apache.ratis.statemachine.SnapshotInfo; import org.apache.ratis.statemachine.impl.SimpleStateMachineStorage; import org.apache.ratis.util.MD5FileUtil; +import org.apache.ratis.util.SizeInBytes; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -95,8 +96,8 @@ private void readRatisLogFromDir() { List paths = LogSegmentPath.getLogSegmentPaths(storage); for (LogSegmentPath path : paths) { final int entryCount = LogSegment.readSegmentFile(path.getPath().toFile(), - path.getStartEnd(), RaftServerConfigKeys.Log.CorruptionPolicy.EXCEPTION, - null, (proto) -> { + path.getStartEnd(), SizeInBytes.valueOf(Integer.MAX_VALUE), + RaftServerConfigKeys.Log.CorruptionPolicy.EXCEPTION, null, (proto) -> { if (proto.hasStateMachineLogEntry()) { try { Journal.JournalEntry entry = Journal.JournalEntry.parseFrom( diff --git a/core/server/master/src/test/java/alluxio/master/journal/raft/RaftJournalTest.java b/core/server/master/src/test/java/alluxio/master/journal/raft/RaftJournalTest.java index d51b1bfb0d6a..4871b97218ac 100644 --- a/core/server/master/src/test/java/alluxio/master/journal/raft/RaftJournalTest.java +++ b/core/server/master/src/test/java/alluxio/master/journal/raft/RaftJournalTest.java @@ -13,6 +13,7 @@ import alluxio.conf.Configuration; import alluxio.conf.PropertyKey; +import alluxio.grpc.NetAddress; import alluxio.grpc.QuorumServerInfo; import alluxio.master.NoopMaster; import alluxio.master.StateLockManager; @@ -24,9 +25,7 @@ import alluxio.util.CommonUtils; import alluxio.util.WaitForOptions; -import com.google.common.annotations.VisibleForTesting; import org.apache.ratis.conf.RaftProperties; -import org.apache.ratis.server.RaftServer; import org.apache.ratis.server.RaftServerConfigKeys; import org.junit.After; import org.junit.Assert; @@ -36,7 +35,6 @@ import org.junit.rules.TemporaryFolder; import org.junit.rules.Timeout; -import java.lang.reflect.Method; import java.net.InetSocketAddress; import java.net.ServerSocket; import java.util.ArrayList; @@ -400,8 +398,11 @@ private void promoteFollower() throws Exception { Assert.assertTrue(mLeaderJournalSystem.isLeader()); Assert.assertFalse(mFollowerJournalSystem.isLeader()); // Triggering rigged election via reflection to switch the leader. - changeToFollower(mLeaderJournalSystem); - changeToCandidate(mFollowerJournalSystem); + NetAddress followerAddress = + mLeaderJournalSystem.getQuorumServerInfoList().stream() + .filter(info -> !info.getIsLeader()).findFirst() + .map(QuorumServerInfo::getServerAddress).get(); + mLeaderJournalSystem.transferLeadership(followerAddress); CommonUtils.waitFor("follower becomes leader", () -> mFollowerJournalSystem.isLeader(), mWaitOptions); Assert.assertFalse(mLeaderJournalSystem.isLeader()); @@ -580,37 +581,6 @@ private List startJournalCluster(List jour return journalSystems; } - @VisibleForTesting - void changeToCandidate(RaftJournalSystem journalSystem) throws Exception { - RaftServer.Division serverImpl = journalSystem.getRaftServer() - .getDivision(RaftJournalSystem.RAFT_GROUP_ID); - Class raftServerImpl = (Class.forName("org.apache.ratis.server.impl.RaftServerImpl")); - Method method = raftServerImpl.getDeclaredMethod("changeToCandidate", boolean.class); - method.setAccessible(true); - method.invoke(serverImpl, true); - } - - @VisibleForTesting - void changeToFollower(RaftJournalSystem journalSystem) throws Exception { - RaftServer.Division serverImplObj = journalSystem.getRaftServer() - .getDivision(RaftJournalSystem.RAFT_GROUP_ID); - Class raftServerImplClass = Class.forName("org.apache.ratis.server.impl.RaftServerImpl"); - - Method getStateMethod = raftServerImplClass.getDeclaredMethod("getState"); - getStateMethod.setAccessible(true); - Object serverStateObj = getStateMethod.invoke(serverImplObj); - Class serverStateClass = Class.forName("org.apache.ratis.server.impl.ServerState"); - Method getCurrentTermMethod = serverStateClass.getDeclaredMethod("getCurrentTerm"); - getCurrentTermMethod.setAccessible(true); - long currentTermObj = (long) getCurrentTermMethod.invoke(serverStateObj); - - Method changeToFollowerMethod = raftServerImplClass.getDeclaredMethod("changeToFollower", - long.class, boolean.class, boolean.class, Object.class); - - changeToFollowerMethod.setAccessible(true); - changeToFollowerMethod.invoke(serverImplObj, currentTermObj, true, false, "test"); - } - /** * @return a list of free ports */ diff --git a/tests/src/test/java/alluxio/server/ft/journal/raft/EmbeddedJournalIntegrationTestFaultTolerance.java b/tests/src/test/java/alluxio/server/ft/journal/raft/EmbeddedJournalIntegrationTestFaultTolerance.java index 86c5c369a834..3fa21cd85745 100644 --- a/tests/src/test/java/alluxio/server/ft/journal/raft/EmbeddedJournalIntegrationTestFaultTolerance.java +++ b/tests/src/test/java/alluxio/server/ft/journal/raft/EmbeddedJournalIntegrationTestFaultTolerance.java @@ -39,8 +39,7 @@ import org.apache.ratis.server.RaftServerConfigKeys; import org.apache.ratis.server.storage.RaftStorage; import org.apache.ratis.server.storage.StorageImplUtils; -import org.apache.ratis.statemachine.impl.SimpleStateMachineStorage; -import org.apache.ratis.statemachine.impl.SingleFileSnapshotInfo; +import org.apache.ratis.statemachine.SnapshotInfo; import org.junit.Rule; import org.junit.Test; import org.junit.rules.TemporaryFolder; @@ -176,9 +175,9 @@ public void copySnapshotToMaster() throws Exception { RaftStorage.StartupOption.RECOVER, RaftServerConfigKeys.STORAGE_FREE_SPACE_MIN_DEFAULT.getSize()); rs.initialize(); - SimpleStateMachineStorage storage = new SimpleStateMachineStorage(); + SnapshotDirStateMachineStorage storage = new SnapshotDirStateMachineStorage(); storage.init(rs); - SingleFileSnapshotInfo snapshot = storage.findLatestSnapshot(); + SnapshotInfo snapshot = storage.getLatestSnapshot(); assertNotNull(snapshot); mCluster.notifySuccess(); } @@ -223,9 +222,9 @@ public void copySnapshotToFollower() throws Exception { RaftStorage.StartupOption.RECOVER, RaftServerConfigKeys.STORAGE_FREE_SPACE_MIN_DEFAULT.getSize()); rs.initialize(); - SimpleStateMachineStorage storage = new SimpleStateMachineStorage(); + SnapshotDirStateMachineStorage storage = new SnapshotDirStateMachineStorage(); storage.init(rs); - SingleFileSnapshotInfo snapshot = storage.findLatestSnapshot(); + SnapshotInfo snapshot = storage.getLatestSnapshot(); assertNotNull(snapshot); mCluster.notifySuccess(); } From 06ffdd5ebfc14b087bf367042a9f85e4de9a3033 Mon Sep 17 00:00:00 2001 From: Bowen Ding <6999708+dbw9580@users.noreply.github.com> Date: Sat, 10 Jun 2023 02:03:38 +0800 Subject: [PATCH 291/334] [DOCFIX] Remove table operations from user CLI docs ### What changes are proposed in this pull request? Remove table operations from user operation docs. ### Why are the changes needed? The SDS (table) service is deprecated. The link to `#table-operations` is dead. ### Does this PR introduce any user facing changes? Yes. pr-link: Alluxio/alluxio#17581 change-id: cid-ea608c71ef24c22da3399ce744c6937ccbc7eb45 --- docs/en/operation/User-CLI.md | 4 ---- 1 file changed, 4 deletions(-) diff --git a/docs/en/operation/User-CLI.md b/docs/en/operation/User-CLI.md index efbcfa8cdd59..e3c2067d2aaf 100644 --- a/docs/en/operation/User-CLI.md +++ b/docs/en/operation/User-CLI.md @@ -488,10 +488,6 @@ Supplying neither flag will default to generating both docs. > Note: This command does not require the Alluxio cluster to be running. -### table - -See [Table Operations](#table-operations). - ### version The `version` command prints Alluxio version. From 87ddfd4af076a67c7011229132d44938c033c379 Mon Sep 17 00:00:00 2001 From: Bowen Ding <6999708+dbw9580@users.noreply.github.com> Date: Mon, 12 Jun 2023 21:13:42 +0800 Subject: [PATCH 292/334] Fix corrupted block causing reads to fail ### What changes are proposed in this pull request? Fix read failure when a mismatch occurs between the block size recorded in memory by `BlockMeta` and the length of the actual physical block file. When such a mismatch is detected, the block is removed from worker storage, and worker falls back to reading from UFS. ### Why are the changes needed? This causes reading the block to fail. ### Does this PR introduce any user facing changes? No. pr-link: Alluxio/alluxio#17564 change-id: cid-2758ff97e5016c1aae7ededb244e919233ae6d3b --- .../BlockDoesNotExistRuntimeException.java | 12 +- .../main/java/alluxio/util/io/FileUtils.java | 19 ++ .../alluxio/worker/block/MonoBlockStore.java | 14 +- .../worker/block/TieredBlockStore.java | 87 +++++- .../grpc/ShortCircuitBlockReadHandler.java | 8 + ...edStoreBlockCorruptionIntegrationTest.java | 268 ++++++++++++++++++ 6 files changed, 400 insertions(+), 8 deletions(-) create mode 100644 tests/src/test/java/alluxio/server/tieredstore/TieredStoreBlockCorruptionIntegrationTest.java diff --git a/core/common/src/main/java/alluxio/exception/runtime/BlockDoesNotExistRuntimeException.java b/core/common/src/main/java/alluxio/exception/runtime/BlockDoesNotExistRuntimeException.java index 5eef0d297afa..ce7aef647412 100644 --- a/core/common/src/main/java/alluxio/exception/runtime/BlockDoesNotExistRuntimeException.java +++ b/core/common/src/main/java/alluxio/exception/runtime/BlockDoesNotExistRuntimeException.java @@ -19,11 +19,21 @@ public class BlockDoesNotExistRuntimeException extends NotFoundRuntimeException { /** - * Constructs a new exception with the specified detail message and cause. + * Constructs a new exception with the specified block ID. * * @param blockId block id */ public BlockDoesNotExistRuntimeException(long blockId) { super(MessageFormat.format("BlockMeta not found for blockId {0,number,#}", blockId)); } + + /** + * Constructs a new exception with the specified block ID and cause. + * + * @param blockId block id + * @param cause why the block is not found + */ + public BlockDoesNotExistRuntimeException(long blockId, Throwable cause) { + super(MessageFormat.format("Block {0,number,#} not found", blockId), cause); + } } diff --git a/core/common/src/main/java/alluxio/util/io/FileUtils.java b/core/common/src/main/java/alluxio/util/io/FileUtils.java index 98ffc7dabb4c..ca55fbcc873a 100644 --- a/core/common/src/main/java/alluxio/util/io/FileUtils.java +++ b/core/common/src/main/java/alluxio/util/io/FileUtils.java @@ -269,6 +269,25 @@ public static void delete(String path) { } } + /** + * Deletes the file or directory, if it exists. + * + * @param path pathname string of file or directory + */ + public static void deleteIfExists(String path) { + try { + Files.deleteIfExists(Paths.get(path)); + } catch (java.nio.file.InvalidPathException e) { + throw new InvalidArgumentRuntimeException(e); + } catch (DirectoryNotEmptyException e) { + throw new FailedPreconditionRuntimeException(e); + } catch (SecurityException e) { + throw new PermissionDeniedRuntimeException(e); + } catch (IOException e) { + throw new UnknownRuntimeException(e); + } + } + /** * Deletes a file or a directory, recursively if it is a directory. * diff --git a/core/server/worker/src/main/java/alluxio/worker/block/MonoBlockStore.java b/core/server/worker/src/main/java/alluxio/worker/block/MonoBlockStore.java index 3b2f4a17e568..5f18e4cac897 100644 --- a/core/server/worker/src/main/java/alluxio/worker/block/MonoBlockStore.java +++ b/core/server/worker/src/main/java/alluxio/worker/block/MonoBlockStore.java @@ -159,19 +159,23 @@ public BlockReader createBlockReader(long sessionId, long blockId, long offset, boolean positionShort, Protocol.OpenUfsBlockOptions options) throws IOException { BlockReader reader; - Optional blockMeta = mLocalBlockStore.getVolatileBlockMeta(blockId); - if (blockMeta.isPresent()) { + // first try reading from Alluxio cache + try { reader = mLocalBlockStore.createBlockReader(sessionId, blockId, offset); DefaultBlockWorker.Metrics.WORKER_ACTIVE_CLIENTS.inc(); - } else { + return reader; + } catch (BlockDoesNotExistRuntimeException e) { + LOG.debug("Block {} does not exist in Alluxio cache: {}", blockId, e.getMessage()); + // the block does not exist in Alluxio, try loading from UFS boolean checkUfs = options != null && (options.hasUfsPath() || options.getBlockInUfsTier()); if (!checkUfs) { - throw new BlockDoesNotExistRuntimeException(blockId); + throw e; } // When the block does not exist in Alluxio but exists in UFS, try to open the UFS block. reader = createUfsBlockReader(sessionId, blockId, offset, positionShort, options); + DefaultBlockWorker.Metrics.WORKER_ACTIVE_CLIENTS.inc(); + return reader; } - return reader; } @Override diff --git a/core/server/worker/src/main/java/alluxio/worker/block/TieredBlockStore.java b/core/server/worker/src/main/java/alluxio/worker/block/TieredBlockStore.java index d4c0d3886bb9..855203f9859d 100644 --- a/core/server/worker/src/main/java/alluxio/worker/block/TieredBlockStore.java +++ b/core/server/worker/src/main/java/alluxio/worker/block/TieredBlockStore.java @@ -48,6 +48,11 @@ import java.io.Closeable; import java.io.IOException; import java.nio.channels.FileChannel; +import java.nio.file.Files; +import java.nio.file.NoSuchFileException; +import java.nio.file.Path; +import java.nio.file.Paths; +import java.nio.file.attribute.BasicFileAttributes; import java.text.MessageFormat; import java.util.Collections; import java.util.HashSet; @@ -188,8 +193,35 @@ public BlockReader createBlockReader(long sessionId, long blockId, long offset) blockLock.close(); throw new BlockDoesNotExistRuntimeException(blockId); } + BlockMeta block = blockMeta.get(); try { - BlockReader reader = new StoreBlockReader(sessionId, blockMeta.get()); + validateBlockIntegrityForRead(block); + } catch (IllegalStateException validationError) { + LOG.warn("Block {} is corrupted, removing it: {}", + blockId, validationError.getMessage()); + // in case of a corrupted block, remove it and propagate the exception + // release the read lock because removeBlockInternal needs a write lock on the same block + blockLock.close(); + // at this point we are not holding any lock, so two threads may attempt to remove the same + // block concurrently. This is fine as long as removeBlockInternal is no-op for a + // non-existing block. + try { + removeBlockInternal(sessionId, blockId, REMOVE_BLOCK_TIMEOUT_MS); + for (BlockStoreEventListener listener : mBlockStoreEventListeners) { + synchronized (listener) { + listener.onRemoveBlockByWorker(blockId); + listener.onRemoveBlock(blockId, block.getBlockLocation()); + } + } + } catch (Exception removeBlockError) { + LOG.warn("Failed to remove a corrupted block {}", blockId, removeBlockError); + validationError.addSuppressed(removeBlockError); + } + throw new BlockDoesNotExistRuntimeException(blockId, validationError); + } + + try { + BlockReader reader = new StoreBlockReader(sessionId, block); ((FileChannel) reader.getChannel()).position(offset); accessBlock(sessionId, blockId); return new DelegatingBlockReader(reader, blockLock); @@ -200,6 +232,57 @@ public BlockReader createBlockReader(long sessionId, long blockId, long offset) } } + /** + * Validates the integrity of the block for reading: + * 1. the block file should exist + * 2. the length of the block file should match its BlockMeta + * If any of the above does not hold, this can be a result of corrupted block files + * due to faulty storage hardware, manual manipulation of the block files by admin, + * or a bug where the block was pre-maturely committed when it was not done writing. + * + * @param blockMeta the block meta acquired from meta data manager + * @throws IllegalStateException if the block is deemed corrupted + */ + public static void validateBlockIntegrityForRead(BlockMeta blockMeta) + throws IllegalStateException { + final long blockId = blockMeta.getBlockId(); + final Path blockPath = Paths.get(blockMeta.getPath()); + final BasicFileAttributes blockFileAttrs; + try { + blockFileAttrs = Files.readAttributes(blockPath, BasicFileAttributes.class); + } catch (NoSuchFileException e) { + throw new IllegalStateException(String.format( + "Block %s exists in block meta but actual physical block file %s does not exist", + blockId, blockPath)); + } catch (IOException e) { + // cannot read file attributes, possibly due to bad permission or bad file type + LOG.debug("Cannot read file attributes for block {}", blockId, e); + throw new IllegalStateException(String.format( + "Cannot read attributes of file %s for block %s during validation", blockId, blockPath)); + } + // need to check if file is a regular file, as for directories and device files the file length + // is unspecified + if (!blockFileAttrs.isRegularFile()) { + throw new IllegalStateException(String.format( + "Block file %s for block %s is not a regular file", blockPath, blockId)); + } + final long actualLength = blockFileAttrs.size(); + final long expectedLength = blockMeta.getBlockSize(); + // check if the actual file length matches the expected length from block meta + if (actualLength != expectedLength) { + LOG.debug("Block {} is expected to be {} bytes, " + + "but the actual block file length is {}", blockId, expectedLength, actualLength); + // Note: we only errors out on 0-sized blocks which are definitely not correct + // but if the size is not 0, we treat it as valid + if (actualLength == 0) { + throw new IllegalStateException(String.format( + "Block %s exists in block meta but the size from block meta does not match that of " + + "the block file %s, expected block size = %d, actual block file length = %d", + blockId, blockPath, expectedLength, actualLength)); + } + } + } + @Override public TempBlockMeta createBlock(long sessionId, long blockId, AllocateOptions options) { LOG.debug("createBlock: sessionId={}, blockId={}, options={}", sessionId, blockId, options); @@ -820,7 +903,7 @@ private MoveBlockResult moveBlockInternal(long sessionId, long blockId, * @param blockMeta block metadata */ private void removeBlockFileAndMeta(BlockMeta blockMeta) { - FileUtils.delete(blockMeta.getPath()); + FileUtils.deleteIfExists(blockMeta.getPath()); mMetaManager.removeBlockMeta(blockMeta); } diff --git a/core/server/worker/src/main/java/alluxio/worker/grpc/ShortCircuitBlockReadHandler.java b/core/server/worker/src/main/java/alluxio/worker/grpc/ShortCircuitBlockReadHandler.java index 4b7608fab229..6988c5aa63cf 100644 --- a/core/server/worker/src/main/java/alluxio/worker/grpc/ShortCircuitBlockReadHandler.java +++ b/core/server/worker/src/main/java/alluxio/worker/grpc/ShortCircuitBlockReadHandler.java @@ -27,6 +27,7 @@ import alluxio.worker.block.BlockStore; import alluxio.worker.block.BlockStoreLocation; import alluxio.worker.block.DefaultBlockWorker; +import alluxio.worker.block.TieredBlockStore; import alluxio.worker.block.meta.BlockMeta; import io.grpc.stub.StreamObserver; @@ -86,6 +87,13 @@ public OpenLocalBlockResponse call() throws Exception { if (!meta.isPresent()) { throw new BlockDoesNotExistRuntimeException(mRequest.getBlockId()); } + try { + // assuming the underlying BlockStore is TieredBlockStore, as it's the only impl + // that allows short-circuit read + TieredBlockStore.validateBlockIntegrityForRead(meta.get()); + } catch (IllegalStateException validationError) { + throw new BlockDoesNotExistRuntimeException(mRequest.getBlockId(), validationError); + } if (mRequest.getPromote()) { // TODO(calvin): Move this logic into BlockStore#moveBlockInternal if possible // Because the move operation is expensive, we first check if the operation is necessary diff --git a/tests/src/test/java/alluxio/server/tieredstore/TieredStoreBlockCorruptionIntegrationTest.java b/tests/src/test/java/alluxio/server/tieredstore/TieredStoreBlockCorruptionIntegrationTest.java new file mode 100644 index 000000000000..29890d12b1fd --- /dev/null +++ b/tests/src/test/java/alluxio/server/tieredstore/TieredStoreBlockCorruptionIntegrationTest.java @@ -0,0 +1,268 @@ +/* + * The Alluxio Open Foundation licenses this work under the Apache License, version 2.0 + * (the "License"). You may not use this work except in compliance with the License, which is + * available at www.apache.org/licenses/LICENSE-2.0 + * + * This software is distributed on an "AS IS" basis, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, + * either express or implied, as more fully set forth in the License. + * + * See the NOTICE file distributed with this work for information regarding copyright ownership. + */ + +package alluxio.server.tieredstore; + +import alluxio.AlluxioURI; +import alluxio.Constants; +import alluxio.client.block.stream.LocalFileDataReader; +import alluxio.client.file.FileInStream; +import alluxio.client.file.FileSystem; +import alluxio.client.file.FileSystemContext; +import alluxio.client.file.FileSystemTestUtils; +import alluxio.client.file.URIStatus; +import alluxio.client.file.options.InStreamOptions; +import alluxio.conf.PropertyKey; +import alluxio.exception.status.NotFoundException; +import alluxio.grpc.ReadPType; +import alluxio.grpc.WritePType; +import alluxio.testutils.BaseIntegrationTest; +import alluxio.testutils.LocalAlluxioClusterResource; +import alluxio.util.FileSystemOptionsUtils; +import alluxio.util.io.BufferUtils; +import alluxio.wire.BlockInfo; +import alluxio.wire.FileBlockInfo; +import alluxio.worker.block.BlockWorker; +import alluxio.worker.block.meta.BlockMeta; + +import com.google.common.io.ByteStreams; +import org.junit.Assert; +import org.junit.Before; +import org.junit.Rule; +import org.junit.Test; +import org.junit.rules.TemporaryFolder; + +import java.nio.file.Files; +import java.nio.file.Path; +import java.nio.file.Paths; +import java.nio.file.StandardOpenOption; +import java.util.Arrays; +import java.util.Collection; +import java.util.List; +import java.util.Optional; + +/** + * Tests that when a corrupt block is being read, tiered store will remove this block + * from both block meta and the physical block file, and falls back to read it from the UFS. + * A corrupt block is defined as: + * 1. its block meta exists in memory, but no physical block file exist in the cache directory, or + * 2. its length in block meta is non-zero, but the physical block file is 0-sized. + */ +public class TieredStoreBlockCorruptionIntegrationTest extends BaseIntegrationTest { + private static final int MEM_CAPACITY_BYTES = 1000; + private static final int BLOCK_SIZE = 100; + private final AlluxioURI mFile = new AlluxioURI("/file1"); + private final int mFileLength = 3 * 100; // 3 blocks, 100 bytes each + private BlockWorker mWorker; + private FileSystem mFileSystem; + + @Rule + public TemporaryFolder mTempFolder = new TemporaryFolder(); + + @Rule + public LocalAlluxioClusterResource mLocalAlluxioClusterResource = + new LocalAlluxioClusterResource.Builder() + .setProperty(PropertyKey.WORKER_RAMDISK_SIZE, MEM_CAPACITY_BYTES) + .setProperty(PropertyKey.USER_BLOCK_SIZE_BYTES_DEFAULT, BLOCK_SIZE) + .setProperty(PropertyKey.USER_FILE_BUFFER_BYTES, 100) + .setProperty(PropertyKey.WORKER_TIERED_STORE_LEVEL0_HIGH_WATERMARK_RATIO, 0.8) + .setProperty(PropertyKey.USER_FILE_RESERVED_BYTES, 100) + .setProperty(PropertyKey.WORKER_MANAGEMENT_TIER_ALIGN_ENABLED, false) + .setProperty(PropertyKey.WORKER_REVIEWER_CLASS, + "alluxio.worker.block.reviewer.AcceptingReviewer") + .build(); + + @Before + public final void before() throws Exception { + mFileSystem = mLocalAlluxioClusterResource.get().getClient(); + mWorker = mLocalAlluxioClusterResource.get() + .getWorkerProcess() + .getWorker(BlockWorker.class); + prepareFileWithCorruptBlocks(); + } + + @Test + @LocalAlluxioClusterResource.Config(confParams = { + PropertyKey.Name.USER_SHORT_CIRCUIT_ENABLED, "false", + }) + public void sequentialRead() throws Exception { + verifySequentialReadable(); + verifyBlockMetadata(); + } + + @Test + @LocalAlluxioClusterResource.Config(confParams = { + PropertyKey.Name.USER_SHORT_CIRCUIT_ENABLED, "false", + }) + public void positionedRead() throws Exception { + verifyPositionedReadable(); + verifyBlockMetadata(); + } + + @Test + @LocalAlluxioClusterResource.Config(confParams = { + PropertyKey.Name.USER_SHORT_CIRCUIT_ENABLED, "true", + }) + public void shortCircuitRead() throws Exception { + // verify that the block cannot be read via short-circuit + try (FileSystemContext fsContext = FileSystemContext.create()) { + URIStatus fileStatus = mFileSystem.getStatus(mFile); + List blocks = fileStatus.getFileBlockInfos(); + InStreamOptions inStreamOptions = new InStreamOptions(fileStatus, + fsContext.getClusterConf()); + Assert.assertThrows(NotFoundException.class, () -> new LocalFileDataReader.Factory( + fsContext, mWorker.getWorkerAddress(), blocks.get(0).getBlockInfo().getBlockId(), + Constants.KB, inStreamOptions)); + } + + verifySequentialReadable(); + verifyBlockMetadata(); + } + + /** + * Prepares a 3-block file, truncates the first block to 0 size, removes the second block, and + * leaves the third block intact. + */ + private void prepareFileWithCorruptBlocks() throws Exception { + BlockWorker worker = mLocalAlluxioClusterResource.get() + .getWorkerProcess() + .getWorker(BlockWorker.class); + FileSystemTestUtils.createByteFile(mFileSystem, mFile, WritePType.CACHE_THROUGH, mFileLength); + URIStatus fileStatus = mFileSystem.getStatus(mFile); + Path ufsFilePath = Paths.get(fileStatus.getFileInfo().getUfsPath()); + Assert.assertTrue(Files.exists(ufsFilePath)); + Assert.assertEquals(mFileLength, Files.size(ufsFilePath)); + + List blocks = fileStatus.getFileBlockInfos(); + Assert.assertEquals(3, blocks.size()); + Assert.assertTrue(blocks.get(0).getBlockInfo().getLocations().size() >= 1); + Assert.assertTrue(blocks.get(1).getBlockInfo().getLocations().size() >= 1); + Assert.assertTrue(blocks.get(2).getBlockInfo().getLocations().size() >= 1); + + // truncate the first block on disk, bypassing worker management to simulate block corruption + Optional firstBlockMeta = + worker.getBlockStore().getVolatileBlockMeta(blocks.get(0).getBlockInfo().getBlockId()); + Assert.assertTrue( + String.format("Block meta of first block does not exist on worker %s", + worker.getWorkerAddress()), firstBlockMeta.isPresent()); + Path blockFilePath = Paths.get(firstBlockMeta.get().getPath()); + Files.write(blockFilePath, new byte[0], StandardOpenOption.TRUNCATE_EXISTING); + Assert.assertTrue(Files.exists(blockFilePath)); + Assert.assertEquals(0, Files.size(blockFilePath)); + + // remove the second block file + Optional secondBlockMeta = + worker.getBlockStore().getVolatileBlockMeta(blocks.get(1).getBlockInfo().getBlockId()); + Assert.assertTrue( + String.format("Block meta of second block does not exist on worker %s", + worker.getWorkerAddress()), secondBlockMeta.isPresent()); + blockFilePath = Paths.get(secondBlockMeta.get().getPath()); + Files.deleteIfExists(blockFilePath); + Assert.assertFalse(Files.exists(blockFilePath)); + + Optional thirdBlockMeta = + worker.getBlockStore().getVolatileBlockMeta(blocks.get(2).getBlockInfo().getBlockId()); + Assert.assertTrue( + String.format("Block meta of third block does not exist on worker %s", + worker.getWorkerAddress()), thirdBlockMeta.isPresent()); + blockFilePath = Paths.get(thirdBlockMeta.get().getPath()); + Assert.assertTrue(Files.exists(blockFilePath)); + Assert.assertEquals(thirdBlockMeta.get().getBlockSize(), Files.size(blockFilePath)); + } + + private void verifySequentialReadable() throws Exception { + URIStatus fileStatus = mFileSystem.getStatus(mFile); + try (FileInStream is = mFileSystem.openFile( + fileStatus, + FileSystemOptionsUtils.openFileDefaults( + mLocalAlluxioClusterResource.get().getClient().getConf()) + .toBuilder() + .setReadType(ReadPType.NO_CACHE) // don't cache the corrupt block + .build())) { + byte[] fileContent = ByteStreams.toByteArray(is); + Assert.assertTrue( + BufferUtils.equalIncreasingByteArray(mFileLength, fileContent)); + } + } + + private void verifyPositionedReadable() throws Exception { + URIStatus fileStatus = mFileSystem.getStatus(mFile); + try (FileInStream is = mFileSystem.openFile( + fileStatus, + FileSystemOptionsUtils.openFileDefaults( + mLocalAlluxioClusterResource.get().getClient().getConf()) + .toBuilder() + .setReadType(ReadPType.NO_CACHE) // don't cache the corrupt block + .build())) { + final long startPos = 0; + int totalBytesRead = 0; + byte[] buffer = new byte[Constants.KB]; + int bytesRead = is.positionedRead(startPos, buffer, totalBytesRead, buffer.length); + while (bytesRead != -1) { + totalBytesRead += bytesRead; + Assert.assertTrue(totalBytesRead <= mFileLength); + bytesRead = is.positionedRead( + startPos + totalBytesRead, buffer, totalBytesRead, buffer.length); + } + byte[] fileContent = Arrays.copyOfRange(buffer, 0, totalBytesRead); + Assert.assertTrue( + BufferUtils.equalIncreasingByteArray((int) startPos, totalBytesRead, fileContent)); + } + } + + /** + * Verifies that after corrupt blocks are detected and removed by tiered store, the first + * two blocks are not cached by the worker, and their block location info from master does not + * contain the worker. + */ + private void verifyBlockMetadata() throws Exception { + URIStatus fileStatus = mFileSystem.getStatus(mFile); + List blocks = fileStatus.getFileBlockInfos(); + Assert.assertEquals(3, blocks.size()); + // verify that block meta are correct in block worker + Assert.assertFalse(mWorker + .getBlockStore() + .getVolatileBlockMeta(blocks.get(0).getBlockInfo().getBlockId()) + .isPresent()); + Assert.assertFalse(mWorker + .getBlockStore() + .getVolatileBlockMeta(blocks.get(1).getBlockInfo().getBlockId()) + .isPresent()); + Assert.assertTrue(mWorker + .getBlockStore() + .getVolatileBlockMeta(blocks.get(2).getBlockInfo().getBlockId()) + .isPresent()); + + // verify that the block location info has been updated in master after worker-master sync + Thread.sleep(mLocalAlluxioClusterResource.get() + .getClient() + .getConf() + .getMs(PropertyKey.WORKER_BLOCK_HEARTBEAT_INTERVAL_MS) * 2); + // retrieve latest block location info + fileStatus = mFileSystem.getStatus(mFile); + blocks = fileStatus.getFileBlockInfos(); + Assert.assertEquals(3, blocks.size()); + Assert.assertTrue(blocks + .stream() + .limit(2) + .map(FileBlockInfo::getBlockInfo) + .map(BlockInfo::getLocations) + .flatMap(Collection::stream) + .noneMatch(loc -> loc.getWorkerAddress().equals(mWorker.getWorkerAddress()))); + Assert.assertTrue(blocks + .stream() + .skip(2) + .map(FileBlockInfo::getBlockInfo) + .map(BlockInfo::getLocations) + .flatMap(Collection::stream) + .anyMatch(loc -> loc.getWorkerAddress().equals(mWorker.getWorkerAddress()))); + } +} From 73e20cad10b652dbcd305b2dcb1b27f32ebaf75a Mon Sep 17 00:00:00 2001 From: Liu Ziyang <55306596+lzy-a@users.noreply.github.com> Date: Wed, 14 Jun 2023 11:50:03 +0800 Subject: [PATCH 293/334] [DOCFIX] Improve desc on write type THROUGH MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit ### Summary In Section 6.4 Write to UFS Only (THROUGH), write completion and persistence are described in the wrong order. In the case of using UFS only, write should be completed after persistence to ensure that the written data will not be lost. Suggest to update the sentence ”This write type ensures that data will be persisted after the write completes“ to ”This write type ensures that data will be persisted before the write completes“ pr-link: Alluxio/alluxio#17536 change-id: cid-76189e192afafd96afe410aeedec73eb65e3e161 --- docs/en/overview/Architecture.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/en/overview/Architecture.md b/docs/en/overview/Architecture.md index 5cb79f8d9a00..77d9131457f6 100644 --- a/docs/en/overview/Architecture.md +++ b/docs/en/overview/Architecture.md @@ -309,7 +309,7 @@ If you are writing replica with `ASYNC_THROUGH` and all worker with the copies c ### Write to UFS Only (`THROUGH`) With `THROUGH`, data is written to under storage synchronously without being cached to Alluxio -workers. This write type ensures that data will be persisted after the write completes, but the +workers. This write type ensures that data will be persisted before the write completes, but the speed is limited by the under storage throughput. ### Data consistency From da97d73580be21a328b3608e620be4bbe7ec6d2b Mon Sep 17 00:00:00 2001 From: jianghuazhu <740087514@qq.com> Date: Fri, 16 Jun 2023 11:09:29 +0800 Subject: [PATCH 294/334] [DOCFIX] Fix some descriptions related to the FileSystemMaster module ### What changes are proposed in this pull request? The purpose of this pr is to fix some descriptions related to the FileSystemMaster module. ### Why are the changes needed? In the FileSystemMaster module, there are some inappropriate descriptions or comments, and we should fix them as much as possible. ### Does this PR introduce any user facing changes? Please list the user-facing changes introduced by your change, including 1. change in user-facing APIs 2. addition or removal of property keys 3. webui pr-link: Alluxio/alluxio#17215 change-id: cid-361c9b28832a3797941987663080f7e2a57d4965 --- .../master/file/DefaultFileSystemMaster.java | 28 +++++++++++-------- .../alluxio/master/file/meta/InodeTree.java | 2 +- .../alluxio/master/scheduler/Scheduler.java | 6 ++-- 3 files changed, 21 insertions(+), 15 deletions(-) diff --git a/core/server/master/src/main/java/alluxio/master/file/DefaultFileSystemMaster.java b/core/server/master/src/main/java/alluxio/master/file/DefaultFileSystemMaster.java index f640b854af59..38bf0c92a694 100644 --- a/core/server/master/src/main/java/alluxio/master/file/DefaultFileSystemMaster.java +++ b/core/server/master/src/main/java/alluxio/master/file/DefaultFileSystemMaster.java @@ -960,8 +960,9 @@ public FileInfo getFileInfo(AlluxioURI path, GetStatusContext context) FileSystemMasterCommonPOptions.newBuilder() .setTtl(context.getOptions().getCommonOptions().getTtl()) .setTtlAction(context.getOptions().getCommonOptions().getTtlAction()))); - /* - See the comments in #getFileIdInternal for an explanation on why the loop here is required. + /** + * See the comments in {@link #getFileIdInternal(AlluxioURI, boolean)} for an explanation + * on why the loop here is required. */ boolean run = true; boolean loadMetadata = false; @@ -1131,8 +1132,9 @@ public void listStatus(AlluxioURI path, ListStatusContext context, context.getOptions().setLoadMetadataType(LoadMetadataPType.NEVER); ufsAccessed = true; } - /* - See the comments in #getFileIdInternal for an explanation on why the loop here is required. + /** + * See the comments in {@link #getFileIdInternal(AlluxioURI, boolean)} for an explanation + * on why the loop here is required. */ DescendantType loadDescendantType; if (context.getOptions().getLoadMetadataType() == LoadMetadataPType.NEVER) { @@ -1519,8 +1521,9 @@ public boolean exists(AlluxioURI path, ExistsContext context) LoadMetadataPOptions.newBuilder() .setCommonOptions(context.getOptions().getCommonOptions()) .setLoadType(context.getOptions().getLoadMetadataType())); - /* - See the comments in #getFileIdInternal for an explanation on why the loop here is required. + /** + * See the comments in {@link #getFileIdInternal(AlluxioURI, boolean)} for an explanation + * on why the loop here is required. */ boolean run = true; boolean loadMetadata = false; @@ -1676,7 +1679,8 @@ public void completeFile(AlluxioURI path, CompleteFileContext context) UnavailableException { if (isOperationComplete(context)) { Metrics.COMPLETED_OPERATION_RETRIED_COUNT.inc(); - LOG.warn("A completed \"completeFile\" operation has been retried. {}", context); + LOG.warn("A completed \"completeFile\" operation has been retried. OperationContext={}", + context); return; } Metrics.COMPLETE_FILE_OPS.inc(); @@ -1941,7 +1945,8 @@ public FileInfo createFile(AlluxioURI path, CreateFileContext context) BlockInfoException, IOException, FileDoesNotExistException { if (isOperationComplete(context)) { Metrics.COMPLETED_OPERATION_RETRIED_COUNT.inc(); - LOG.warn("A completed \"createFile\" operation has been retried. {}", context); + LOG.warn("A completed \"createFile\" operation has been retried. OperationContext={}", + context); return getFileInfo(path, GetStatusContext.create(GetStatusPOptions.newBuilder() .setCommonOptions(FileSystemMasterCommonPOptions.newBuilder().setSyncIntervalMs(-1)) @@ -2173,7 +2178,7 @@ public void delete(AlluxioURI path, DeleteContext context) InvalidPathException, AccessControlException { if (isOperationComplete(context)) { Metrics.COMPLETED_OPERATION_RETRIED_COUNT.inc(); - LOG.warn("A completed \"delete\" operation has been retried. {}", context); + LOG.warn("A completed \"delete\" operation has been retried. OperationContext={}", context); return; } Metrics.DELETE_PATHS_OPS.inc(); @@ -2790,7 +2795,8 @@ public long createDirectory(AlluxioURI path, CreateDirectoryContext context) FileDoesNotExistException { if (isOperationComplete(context)) { Metrics.COMPLETED_OPERATION_RETRIED_COUNT.inc(); - LOG.warn("A completed \"createDirectory\" operation has been retried. {}", context); + LOG.warn("A completed \"createDirectory\" operation has been retried. OperationContext={}", + context); return getFileInfo(path, GetStatusContext.create(GetStatusPOptions.newBuilder() .setCommonOptions(FileSystemMasterCommonPOptions.newBuilder().setSyncIntervalMs(-1)) @@ -2917,7 +2923,7 @@ public void rename(AlluxioURI srcPath, AlluxioURI dstPath, RenameContext context IOException, AccessControlException { if (isOperationComplete(context)) { Metrics.COMPLETED_OPERATION_RETRIED_COUNT.inc(); - LOG.warn("A completed \"rename\" operation has been retried. {}", context); + LOG.warn("A completed \"rename\" operation has been retried. OperationContext={}", context); return; } Metrics.RENAME_PATH_OPS.inc(); diff --git a/core/server/master/src/main/java/alluxio/master/file/meta/InodeTree.java b/core/server/master/src/main/java/alluxio/master/file/meta/InodeTree.java index fa54fa301d2c..480e2ee6707a 100644 --- a/core/server/master/src/main/java/alluxio/master/file/meta/InodeTree.java +++ b/core/server/master/src/main/java/alluxio/master/file/meta/InodeTree.java @@ -191,7 +191,7 @@ public boolean isWrite() { /** * Class for managing the persistent state of the inode tree. All metadata changes must go - * through this class by calling mState.applyAndJournal(context, entry). + * through this class by calling {@link InodeTreePersistentState#applyAndJournal(context, entry)}. */ private final InodeTreePersistentState mState; diff --git a/core/server/master/src/main/java/alluxio/master/scheduler/Scheduler.java b/core/server/master/src/main/java/alluxio/master/scheduler/Scheduler.java index dbdd599a475d..ab230e7715ce 100644 --- a/core/server/master/src/main/java/alluxio/master/scheduler/Scheduler.java +++ b/core/server/master/src/main/java/alluxio/master/scheduler/Scheduler.java @@ -79,10 +79,10 @@ public final class Scheduler { private final WorkerProvider mWorkerProvider; /** - * Constructor. + * Creates a new instance of {@link Scheduler}. * - * @param workerProvider workerProvider - * @param jobMetaStore jobMetaStore + * @param workerProvider interface for providing worker information and client + * @param jobMetaStore job meta store that store job information */ public Scheduler(WorkerProvider workerProvider, JobMetaStore jobMetaStore) { mWorkerProvider = workerProvider; From f6632c9a6b9976b2064089981e2c4a161dcccf7b Mon Sep 17 00:00:00 2001 From: maobaolong <307499405@qq.com> Date: Fri, 16 Jun 2023 11:13:12 +0800 Subject: [PATCH 295/334] Support SteppingThreadSleeper ### What changes are proposed in this pull request? Implements light thread sleeper to support invoke a sleeping sleeper to determine whether need continue to sleep. ### Why are the changes needed? Without this feature, when we need to refresh the interval of Heartbeat thread, it cannot take effect. ### Does this PR introduce any user facing changes? No pr-link: Alluxio/alluxio#17298 change-id: cid-a4c471eb06e6389868278fab088556c8d7a23986 --- .../CronExpressionIntervalSupplier.java | 8 +- .../heartbeat/FixedIntervalSupplier.java | 8 +- .../heartbeat/SleepIntervalSupplier.java | 8 +- .../java/alluxio/heartbeat/SleepingTimer.java | 11 ++- .../src/main/java/alluxio/time/Sleeper.java | 11 +++ .../alluxio/time/SteppingThreadSleeper.java | 88 +++++++++++++++++++ ...ForCronExpressionIntervalSupplierTest.java | 4 +- .../alluxio/heartbeat/SleepingTimerTest.java | 47 +++++++++- 8 files changed, 163 insertions(+), 22 deletions(-) create mode 100644 core/common/src/main/java/alluxio/time/SteppingThreadSleeper.java diff --git a/core/common/src/main/java/alluxio/heartbeat/CronExpressionIntervalSupplier.java b/core/common/src/main/java/alluxio/heartbeat/CronExpressionIntervalSupplier.java index 7efe522aa3ca..64919dacbf5d 100644 --- a/core/common/src/main/java/alluxio/heartbeat/CronExpressionIntervalSupplier.java +++ b/core/common/src/main/java/alluxio/heartbeat/CronExpressionIntervalSupplier.java @@ -39,9 +39,9 @@ public CronExpressionIntervalSupplier(CronExpression cronExpression, long fixedI } @Override - public long getNextInterval(long mPreviousTickedMs, long nowTimeStampMillis) { + public long getNextInterval(long previousTickedMs, long nowTimeStampMillis) { long nextInterval = 0; - long executionTimeMs = nowTimeStampMillis - mPreviousTickedMs; + long executionTimeMs = nowTimeStampMillis - previousTickedMs; if (executionTimeMs < mInterval) { nextInterval = mInterval - executionTimeMs; } @@ -54,8 +54,8 @@ public long getNextInterval(long mPreviousTickedMs, long nowTimeStampMillis) { } @Override - public long getRunLimit(long mPreviousTickedMs) { - Date now = Date.from(Instant.ofEpochMilli(mPreviousTickedMs)); + public long getRunLimit(long previousTickedMs) { + Date now = Date.from(Instant.ofEpochMilli(previousTickedMs)); return Duration.between(now.toInstant(), mCron.getNextInvalidTimeAfter(now).toInstant()).toMillis(); } diff --git a/core/common/src/main/java/alluxio/heartbeat/FixedIntervalSupplier.java b/core/common/src/main/java/alluxio/heartbeat/FixedIntervalSupplier.java index d366b6d48aef..da816ef8580b 100644 --- a/core/common/src/main/java/alluxio/heartbeat/FixedIntervalSupplier.java +++ b/core/common/src/main/java/alluxio/heartbeat/FixedIntervalSupplier.java @@ -45,11 +45,11 @@ public FixedIntervalSupplier(long fixedInterval) { } @Override - public long getNextInterval(long mPreviousTickedMs, long nowTimeStampMillis) { - if (mPreviousTickedMs == -1) { + public long getNextInterval(long previousTickedMs, long nowTimeStampMillis) { + if (previousTickedMs == -1) { return -1; } - long executionTimeMs = nowTimeStampMillis - mPreviousTickedMs; + long executionTimeMs = nowTimeStampMillis - previousTickedMs; if (executionTimeMs > mInterval) { mLogger.warn("{} last execution took {} ms. Longer than the interval {}", Thread.currentThread().getName(), executionTimeMs, mInterval); @@ -59,7 +59,7 @@ public long getNextInterval(long mPreviousTickedMs, long nowTimeStampMillis) { } @Override - public long getRunLimit(long mPreviousTickedMs) { + public long getRunLimit(long previousTickedMs) { return mInterval; } diff --git a/core/common/src/main/java/alluxio/heartbeat/SleepIntervalSupplier.java b/core/common/src/main/java/alluxio/heartbeat/SleepIntervalSupplier.java index cde2ddd5ff3f..b022839f3229 100644 --- a/core/common/src/main/java/alluxio/heartbeat/SleepIntervalSupplier.java +++ b/core/common/src/main/java/alluxio/heartbeat/SleepIntervalSupplier.java @@ -18,17 +18,17 @@ public interface SleepIntervalSupplier { /** * Gets the next interval for sleeping. * - * @param mPreviousTickedMs previous ticked time stamp in millisecond + * @param previousTickedMs previous ticked time stamp in millisecond * @param nowTimeStampMillis current time stamp in millisecond * @return the interval to sleep starting from now before next time the timer triggers */ - long getNextInterval(long mPreviousTickedMs, long nowTimeStampMillis); + long getNextInterval(long previousTickedMs, long nowTimeStampMillis); /** * Gets the run limit from previous ticked. * - * @param mPreviousTickedMs previous ticked time stamp in millisecond + * @param previousTickedMs previous ticked time stamp in millisecond * @return the run limit */ - long getRunLimit(long mPreviousTickedMs); + long getRunLimit(long previousTickedMs); } diff --git a/core/common/src/main/java/alluxio/heartbeat/SleepingTimer.java b/core/common/src/main/java/alluxio/heartbeat/SleepingTimer.java index 0bd730d96e14..e1a20b1b9681 100644 --- a/core/common/src/main/java/alluxio/heartbeat/SleepingTimer.java +++ b/core/common/src/main/java/alluxio/heartbeat/SleepingTimer.java @@ -12,7 +12,7 @@ package alluxio.heartbeat; import alluxio.time.Sleeper; -import alluxio.time.ThreadSleeper; +import alluxio.time.SteppingThreadSleeper; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -46,7 +46,7 @@ public class SleepingTimer implements HeartbeatTimer { public SleepingTimer(String threadName, Clock clock, Supplier intervalSupplierSupplier) { this(threadName, LoggerFactory.getLogger(SleepingTimer.class), - clock, ThreadSleeper.INSTANCE, intervalSupplierSupplier); + clock, SteppingThreadSleeper.INSTANCE, intervalSupplierSupplier); } /** @@ -75,10 +75,9 @@ public SleepingTimer(String threadName, Logger logger, Clock clock, Sleeper slee */ @Override public long tick() throws InterruptedException { - long nextInterval = mIntervalSupplier.getNextInterval(mPreviousTickedMs, mClock.millis()); - if (nextInterval > 0) { - mSleeper.sleep(Duration.ofMillis(nextInterval)); - } + long now = mClock.millis(); + mSleeper.sleep( + () -> Duration.ofMillis(mIntervalSupplier.getNextInterval(mPreviousTickedMs, now))); mPreviousTickedMs = mClock.millis(); return mIntervalSupplier.getRunLimit(mPreviousTickedMs); } diff --git a/core/common/src/main/java/alluxio/time/Sleeper.java b/core/common/src/main/java/alluxio/time/Sleeper.java index cc972c7cd97e..27db10a8c14a 100644 --- a/core/common/src/main/java/alluxio/time/Sleeper.java +++ b/core/common/src/main/java/alluxio/time/Sleeper.java @@ -12,6 +12,7 @@ package alluxio.time; import java.time.Duration; +import java.util.function.Supplier; /** * An interface for a utility which provides a sleep method. @@ -25,4 +26,14 @@ public interface Sleeper { * @throws InterruptedException if the sleep is interrupted */ void sleep(Duration duration) throws InterruptedException; + + /** + * Sleeps for given duration but period wake-up by new interval supplier. + * @param durationSupplier New sleep interval supplier + * @throws InterruptedException + */ + default void sleep(Supplier durationSupplier) + throws InterruptedException { + sleep(durationSupplier.get()); + } } diff --git a/core/common/src/main/java/alluxio/time/SteppingThreadSleeper.java b/core/common/src/main/java/alluxio/time/SteppingThreadSleeper.java new file mode 100644 index 000000000000..c1e8800f81ef --- /dev/null +++ b/core/common/src/main/java/alluxio/time/SteppingThreadSleeper.java @@ -0,0 +1,88 @@ +/* + * The Alluxio Open Foundation licenses this work under the Apache License, version 2.0 + * (the "License"). You may not use this work except in compliance with the License, which is + * available at www.apache.org/licenses/LICENSE-2.0 + * + * This software is distributed on an "AS IS" basis, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, + * either express or implied, as more fully set forth in the License. + * + * See the NOTICE file distributed with this work for information regarding copyright ownership. + */ + +package alluxio.time; + +import alluxio.Constants; + +import com.google.common.annotations.VisibleForTesting; + +import java.time.Clock; +import java.time.Duration; +import java.util.function.Supplier; + +/** + * A progressive sleeper that wakes up multiple times during sleep to check if the requested sleep + * duration has changed, and adjust its sleep duration accordingly. + * */ +public final class SteppingThreadSleeper implements Sleeper { + private long mSleepStepMs = Constants.MINUTE; + + public static final SteppingThreadSleeper INSTANCE = new SteppingThreadSleeper(); + + private final Sleeper mInternalSleeper; + private final Clock mClock; + + private SteppingThreadSleeper() { + mInternalSleeper = ThreadSleeper.INSTANCE; + mClock = Clock.systemUTC(); + } + + /** + * Creates a new instance of {@link SteppingThreadSleeper}. + * @param internalSleeper the internal sleeper + * @param clock for telling the current time + */ + @VisibleForTesting + public SteppingThreadSleeper(Sleeper internalSleeper, Clock clock) { + mInternalSleeper = internalSleeper; + mClock = clock; + } + + @Override + public void sleep(Duration duration) throws InterruptedException { + mInternalSleeper.sleep(duration); + } + + @Override + public void sleep(Supplier durationSupplier) throws InterruptedException { + Duration duration = durationSupplier.get(); + if (duration.toMillis() < 0) { + return; + } + if (duration.toMillis() < mSleepStepMs) { + sleep(duration); + return; + } + long startSleepMs = mClock.millis(); + long sleepTo = startSleepMs + duration.toMillis(); + long timeNow; + while ((timeNow = mClock.millis()) < sleepTo) { + long sleepTime = Math.min(sleepTo - timeNow, mSleepStepMs); + mInternalSleeper.sleep(Duration.ofMillis(sleepTime)); + + long newInterval = durationSupplier.get().toMillis(); + if (newInterval >= 0) { + sleepTo = startSleepMs + newInterval; + } + } + } + + /** + * Sets the sleep step. + * + * @param sleepStepMs the sleep step + */ + @VisibleForTesting + public void setSleepStepMs(long sleepStepMs) { + mSleepStepMs = sleepStepMs; + } +} diff --git a/core/common/src/test/java/alluxio/heartbeat/SleepingTimerForCronExpressionIntervalSupplierTest.java b/core/common/src/test/java/alluxio/heartbeat/SleepingTimerForCronExpressionIntervalSupplierTest.java index 81d9d5e4bc06..667eb05843eb 100644 --- a/core/common/src/test/java/alluxio/heartbeat/SleepingTimerForCronExpressionIntervalSupplierTest.java +++ b/core/common/src/test/java/alluxio/heartbeat/SleepingTimerForCronExpressionIntervalSupplierTest.java @@ -18,6 +18,7 @@ import alluxio.Constants; import alluxio.clock.ManualClock; import alluxio.time.Sleeper; +import alluxio.time.SteppingThreadSleeper; import org.apache.logging.log4j.core.util.CronExpression; import org.junit.Assert; @@ -63,7 +64,8 @@ public void before() throws InterruptedException { @Test public void maintainInterval() throws Exception { SleepingTimer timer = - new SleepingTimer(THREAD_NAME, mMockLogger, mFakeClock, mMockSleeper, + new SleepingTimer(THREAD_NAME, mMockLogger, mFakeClock, + new SteppingThreadSleeper(mMockSleeper, mFakeClock), () -> { try { return new CronExpressionIntervalSupplier( diff --git a/core/common/src/test/java/alluxio/heartbeat/SleepingTimerTest.java b/core/common/src/test/java/alluxio/heartbeat/SleepingTimerTest.java index 6a4f79447574..fb79c749cbca 100644 --- a/core/common/src/test/java/alluxio/heartbeat/SleepingTimerTest.java +++ b/core/common/src/test/java/alluxio/heartbeat/SleepingTimerTest.java @@ -17,15 +17,21 @@ import static org.mockito.Mockito.times; import static org.mockito.Mockito.verify; +import alluxio.Constants; import alluxio.clock.ManualClock; +import alluxio.clock.SystemClock; import alluxio.time.Sleeper; +import alluxio.time.SteppingThreadSleeper; +import alluxio.time.ThreadSleeper; +import org.junit.Assert; import org.junit.Before; import org.junit.Test; import org.mockito.Mockito; import org.slf4j.Logger; import java.time.Duration; +import java.util.concurrent.atomic.AtomicLong; /** * Unit tests for {@link SleepingTimer}. @@ -47,7 +53,8 @@ public void before() { @Test public void warnWhenExecutionTakesLongerThanInterval() throws Exception { SleepingTimer timer = - new SleepingTimer(THREAD_NAME, mMockLogger, mFakeClock, mMockSleeper, + new SleepingTimer(THREAD_NAME, mMockLogger, mFakeClock, + new SteppingThreadSleeper(mMockSleeper, mFakeClock), () -> new FixedIntervalSupplier(INTERVAL_MS, mMockLogger)); timer.tick(); @@ -61,7 +68,8 @@ public void warnWhenExecutionTakesLongerThanInterval() throws Exception { @Test public void sleepForSpecifiedInterval() throws Exception { final SleepingTimer timer = - new SleepingTimer(THREAD_NAME, mMockLogger, mFakeClock, mMockSleeper, + new SleepingTimer(THREAD_NAME, mMockLogger, mFakeClock, + new SteppingThreadSleeper(mMockSleeper, mFakeClock), () -> new FixedIntervalSupplier(INTERVAL_MS)); timer.tick(); // first tick won't sleep verify(mMockSleeper, times(0)).sleep(any(Duration.class)); @@ -77,7 +85,8 @@ public void sleepForSpecifiedInterval() throws Exception { @Test public void maintainInterval() throws Exception { SleepingTimer stimer = - new SleepingTimer(THREAD_NAME, mMockLogger, mFakeClock, mMockSleeper, + new SleepingTimer(THREAD_NAME, mMockLogger, mFakeClock, + new SteppingThreadSleeper(mMockSleeper, mFakeClock), () -> new FixedIntervalSupplier(INTERVAL_MS)); stimer.tick(); @@ -85,4 +94,36 @@ public void maintainInterval() throws Exception { stimer.tick(); verify(mMockSleeper).sleep(Duration.ofMillis(INTERVAL_MS - (INTERVAL_MS / 3))); } + + @Test + public void updateIntervalForSteppingTimer() throws Exception { + AtomicLong interval = new AtomicLong(10 * Constants.SECOND_MS); + AtomicLong tickCount = new AtomicLong(0L); + SteppingThreadSleeper sts = + new SteppingThreadSleeper(ThreadSleeper.INSTANCE, SystemClock.systemUTC()); + sts.setSleepStepMs(Constants.SECOND_MS); + SleepingTimer stimer = + new SleepingTimer(THREAD_NAME, mMockLogger, SystemClock.systemUTC(), sts, + () -> new FixedIntervalSupplier(interval.get())); + new Thread(() -> { + while (true) { + try { + stimer.tick(); + tickCount.incrementAndGet(); + } catch (InterruptedException e) { + throw new RuntimeException(e); + } + } + }).start(); + long oldInterval = interval.get(); + Thread.sleep(oldInterval / 2); + long tickCountInit = tickCount.get(); + // scale in the interval + interval.set(oldInterval / 5); + stimer.update(); + Thread.sleep(oldInterval); + long newTickCount = tickCount.get(); + Assert.assertTrue("current tickCount = " + + newTickCount + " is not >= 5 + " + tickCountInit, newTickCount >= tickCountInit + 5); + } } From 545ecd4d36a9b00f21a5e8bed3ec3e6910b5ab22 Mon Sep 17 00:00:00 2001 From: Tyler Crain Date: Fri, 16 Jun 2023 14:49:43 -0700 Subject: [PATCH 296/334] Fix S3 UFS executor service thread leak ### What changes are proposed in this pull request? Calls shutdown on the executor service in the S3 UFS class. ### Why are the changes needed? If shutdown is not called the threads will not be garbage collected. ### Does this PR introduce any user facing changes? No pr-link: Alluxio/alluxio#15748 change-id: cid-86d16e80118882044bf529a619b7915c8451eb03 --- .../main/java/alluxio/underfs/s3a/S3AUnderFileSystem.java | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/underfs/s3a/src/main/java/alluxio/underfs/s3a/S3AUnderFileSystem.java b/underfs/s3a/src/main/java/alluxio/underfs/s3a/S3AUnderFileSystem.java index 953d4369ff99..35b8c34f3d18 100644 --- a/underfs/s3a/src/main/java/alluxio/underfs/s3a/S3AUnderFileSystem.java +++ b/underfs/s3a/src/main/java/alluxio/underfs/s3a/S3AUnderFileSystem.java @@ -488,6 +488,11 @@ public void cleanup() { mManager.abortMultipartUploads(mBucketName, cleanBefore); } + @Override + public void close() { + mExecutor.shutdown(); + } + @Override protected boolean copyObject(String src, String dst) { LOG.debug("Copying {} to {}", src, dst); From ec066dccaabf44a95c2fffb72cc9c4eadf274bab Mon Sep 17 00:00:00 2001 From: alluxio-bot Date: Tue, 27 Jun 2023 13:35:56 -0700 Subject: [PATCH 297/334] Simplify `elect` command Cherry-pick of existing commit. orig-pr: Alluxio/alluxio#17598 orig-commit: Alluxio/alluxio@fd5098ec8c28cde138af2c452695d184512c4aac orig-commit-author: Arthur Jenoudet <23088925+jenoudet@users.noreply.github.com> pr-link: Alluxio/alluxio#17642 change-id: cid-1de7738fcf7f96b1e1b1ca509c04f914f835c447 --- .../journal/raft/RaftJournalSystem.java | 111 ++++-------------- .../master/journal/DefaultJournalMaster.java | 9 -- .../alluxio/master/journal/JournalMaster.java | 10 +- .../JournalMasterClientServiceHandler.java | 47 +++++++- .../fsadmin/journal/QuorumElectCommand.java | 24 +--- .../command/QuorumCommandIntegrationTest.java | 8 +- ...rnalIntegrationTestTransferLeadership.java | 34 +----- 7 files changed, 73 insertions(+), 170 deletions(-) diff --git a/core/server/common/src/main/java/alluxio/master/journal/raft/RaftJournalSystem.java b/core/server/common/src/main/java/alluxio/master/journal/raft/RaftJournalSystem.java index 37ecc043c3cc..8040346ceb46 100644 --- a/core/server/common/src/main/java/alluxio/master/journal/raft/RaftJournalSystem.java +++ b/core/server/common/src/main/java/alluxio/master/journal/raft/RaftJournalSystem.java @@ -25,7 +25,6 @@ import alluxio.grpc.NodeState; import alluxio.grpc.QuorumServerInfo; import alluxio.grpc.QuorumServerState; -import alluxio.grpc.TransferLeaderMessage; import alluxio.master.Master; import alluxio.master.PrimarySelector; import alluxio.master.StateLockManager; @@ -109,7 +108,6 @@ import java.util.concurrent.ThreadLocalRandom; import java.util.concurrent.TimeUnit; import java.util.concurrent.TimeoutException; -import java.util.concurrent.atomic.AtomicBoolean; import java.util.concurrent.atomic.AtomicLong; import java.util.concurrent.atomic.AtomicReference; import java.util.stream.Collectors; @@ -184,8 +182,6 @@ public class RaftJournalSystem extends AbstractJournalSystem { private final File mPath; private final InetSocketAddress mLocalAddress; private final List mClusterAddresses; - /** Controls whether the quorum leadership can be transferred. */ - private final AtomicBoolean mTransferLeaderAllowed = new AtomicBoolean(false); private final Map mRatisMetricsMap = new ConcurrentHashMap<>(); @@ -244,7 +240,6 @@ public class RaftJournalSystem extends AbstractJournalSystem { private final ClientId mRawClientId = ClientId.randomId(); private RaftGroup mRaftGroup; private RaftPeerId mPeerId; - private final Map mErrorMessages = new ConcurrentHashMap<>(); static long nextCallId() { return CALL_ID_COUNTER.getAndIncrement() & Long.MAX_VALUE; @@ -550,7 +545,6 @@ public synchronized void gainPrimacy() { mRaftJournalWriter = new RaftJournalWriter(nextSN, client); mAsyncJournalWriter .set(new AsyncJournalWriter(mRaftJournalWriter, () -> getJournalSinks(null))); - mTransferLeaderAllowed.set(true); super.registerMetrics(); LOG.info("Gained primacy."); } @@ -562,7 +556,6 @@ public synchronized void losePrimacy() { // Avoid duplicate shut down Ratis server return; } - mTransferLeaderAllowed.set(false); try { // Close async writer first to flush pending entries. mAsyncJournalWriter.get().close(); @@ -985,7 +978,7 @@ public synchronized void removeQuorumServer(NetAddress serverNetAddress) throws */ public synchronized void resetPriorities() throws IOException { List resetPeers = new ArrayList<>(); - final int NEUTRAL_PRIORITY = 1; + final int NEUTRAL_PRIORITY = 0; for (RaftPeer peer : mRaftGroup.getPeers()) { resetPeers.add( RaftPeer.newBuilder(peer) @@ -996,7 +989,7 @@ public synchronized void resetPriorities() throws IOException { LOG.info("Resetting RaftPeer priorities"); try (RaftClient client = createClient()) { RaftClientReply reply = client.admin().setConfiguration(resetPeers); - processReply(reply, "failed to reset master priorities to 1"); + processReply(reply, "failed to reset master priorities to 0"); } } @@ -1004,81 +997,32 @@ public synchronized void resetPriorities() throws IOException { * Transfers the leadership of the quorum to another server. * * @param newLeaderNetAddress the address of the server - * @return the guid of transfer leader command + * @return error message if an error occurs or empty string if no error occurred */ public synchronized String transferLeadership(NetAddress newLeaderNetAddress) { - final boolean allowed = mTransferLeaderAllowed.getAndSet(false); - String transferId = UUID.randomUUID().toString(); - if (!allowed) { - String msg = "transfer is not allowed at the moment because the master is " - + (mRaftJournalWriter == null ? "still gaining primacy" : "already transferring the ") - + "leadership"; - mErrorMessages.put(transferId, TransferLeaderMessage.newBuilder().setMsg(msg).build()); - return transferId; + InetSocketAddress serverAddress = InetSocketAddress + .createUnresolved(newLeaderNetAddress.getHost(), newLeaderNetAddress.getRpcPort()); + Collection peers = mRaftGroup.getPeers(); + // The NetUtil function is used by Ratis to convert InetSocketAddress to string + String strAddr = NetUtils.address2String(serverAddress); + // if you cannot find the address in the quorum, return error message. + if (peers.stream().map(RaftPeer::getAddress).noneMatch(addr -> addr.equals(strAddr))) { + return String.format("<%s> is not part of the quorum <%s>.", + strAddr, peers.stream().map(RaftPeer::getAddress).collect(Collectors.toList())); } - try { - InetSocketAddress serverAddress = InetSocketAddress - .createUnresolved(newLeaderNetAddress.getHost(), newLeaderNetAddress.getRpcPort()); - List oldPeers = new ArrayList<>(mRaftGroup.getPeers()); - // The NetUtil function is used by Ratis to convert InetSocketAddress to string - String strAddr = NetUtils.address2String(serverAddress); - // if you cannot find the address in the quorum, throw exception. - if (oldPeers.stream().map(RaftPeer::getAddress).noneMatch(addr -> addr.equals(strAddr))) { - throw new IOException(String.format("<%s> is not part of the quorum <%s>.", - strAddr, oldPeers.stream().map(RaftPeer::getAddress).collect(Collectors.toList()))); - } - if (strAddr.equals(mRaftGroup.getPeer(mPeerId).getAddress())) { - throw new IOException(String.format("%s is already the leader", strAddr)); - } - RaftPeerId newLeaderPeerId = RaftJournalUtils.getPeerId(serverAddress); - /* update priorities to enable transfer */ - List peersWithNewPriorities = new ArrayList<>(); - for (RaftPeer peer : oldPeers) { - peersWithNewPriorities.add( - RaftPeer.newBuilder(peer) - .setPriority(peer.getId().equals(newLeaderPeerId) ? 2 : 1) - .build() - ); - } - try (RaftClient client = createClient()) { - String stringPeers = "[" + peersWithNewPriorities.stream().map(RaftPeer::toString) - .collect(Collectors.joining(", ")) + "]"; - LOG.info("Applying new peer state before transferring leadership: {}", stringPeers); - RaftClientReply reply = client.admin().setConfiguration(peersWithNewPriorities); - processReply(reply, "failed to set master priorities before initiating election"); - } - /* transfer leadership */ - LOG.info("Transferring leadership to master with address <{}> and with RaftPeerId <{}>", - serverAddress, newLeaderPeerId); - // fire and forget: need to immediately return as the master will shut down its RPC servers - // once the TransferLeadershipRequest is initiated. - final int SLEEP_TIME_MS = 3_000; - final int TRANSFER_LEADER_WAIT_MS = 30_000; - new Thread(() -> { - try (RaftClient client = createClient()) { - Thread.sleep(SLEEP_TIME_MS); - RaftClientReply reply1 = client.admin().transferLeadership(newLeaderPeerId, - TRANSFER_LEADER_WAIT_MS); - processReply(reply1, "election failed"); - } catch (Throwable t) { - LOG.error("caught an error when executing transfer: {}", t.getMessage()); - // we only allow transfers again if the transfer is unsuccessful: a success means it - // will soon lose primacy - mTransferLeaderAllowed.set(true); - mErrorMessages.put(transferId, TransferLeaderMessage.newBuilder() - .setMsg(t.getMessage()).build()); - /* checking the transfer happens in {@link QuorumElectCommand} */ - } - }).start(); - LOG.info("Transferring leadership initiated"); + RaftPeerId newLeaderPeerId = RaftJournalUtils.getPeerId(serverAddress); + /* transfer leadership */ + LOG.info("Transferring leadership to master with address <{}> and with RaftPeerId <{}>", + serverAddress, newLeaderPeerId); + try (RaftClient client = createClient()) { + RaftClientReply reply1 = client.admin().transferLeadership(newLeaderPeerId, 30_000); + processReply(reply1, "election failed"); } catch (Throwable t) { - mTransferLeaderAllowed.set(true); LOG.warn(t.getMessage()); - mErrorMessages.put(transferId, TransferLeaderMessage.newBuilder() - .setMsg(t.getMessage()).build()); + return t.getMessage(); } - return transferId; + return ""; } /** @@ -1095,19 +1039,6 @@ private void processReply(RaftClientReply reply, String msgToUser) throws IOExce } } - /** - * Gets exception message throwing when transfer leader. - * @param transferId the guid of transferLeader command - * @return the exception - */ - public synchronized TransferLeaderMessage getTransferLeaderMessage(String transferId) { - if (mErrorMessages.get(transferId) != null) { - return mErrorMessages.get(transferId); - } else { - return TransferLeaderMessage.newBuilder().setMsg("").build(); - } - } - /** * Adds a server to the quorum. * diff --git a/core/server/master/src/main/java/alluxio/master/journal/DefaultJournalMaster.java b/core/server/master/src/main/java/alluxio/master/journal/DefaultJournalMaster.java index eae8d452c37f..6ef347aab38f 100644 --- a/core/server/master/src/main/java/alluxio/master/journal/DefaultJournalMaster.java +++ b/core/server/master/src/main/java/alluxio/master/journal/DefaultJournalMaster.java @@ -15,7 +15,6 @@ import alluxio.clock.SystemClock; import alluxio.grpc.GetNodeStatePResponse; import alluxio.grpc.GetQuorumInfoPResponse; -import alluxio.grpc.GetTransferLeaderMessagePResponse; import alluxio.grpc.GrpcService; import alluxio.grpc.JournalDomain; import alluxio.grpc.NetAddress; @@ -90,14 +89,6 @@ public void resetPriorities() throws IOException { ((RaftJournalSystem) mJournalSystem).resetPriorities(); } - @Override - public GetTransferLeaderMessagePResponse getTransferLeaderMessage(String transferId) { - checkQuorumOpSupported(); - return GetTransferLeaderMessagePResponse.newBuilder() - .setTransMsg(((RaftJournalSystem) mJournalSystem).getTransferLeaderMessage(transferId)) - .build(); - } - @Override public GetNodeStatePResponse getNodeState() { return GetNodeStatePResponse.newBuilder() diff --git a/core/server/master/src/main/java/alluxio/master/journal/JournalMaster.java b/core/server/master/src/main/java/alluxio/master/journal/JournalMaster.java index a3eb7d19659b..4ae643af9cc8 100644 --- a/core/server/master/src/main/java/alluxio/master/journal/JournalMaster.java +++ b/core/server/master/src/main/java/alluxio/master/journal/JournalMaster.java @@ -13,7 +13,6 @@ import alluxio.grpc.GetNodeStatePResponse; import alluxio.grpc.GetQuorumInfoPResponse; -import alluxio.grpc.GetTransferLeaderMessagePResponse; import alluxio.grpc.NetAddress; import alluxio.master.Master; @@ -46,7 +45,7 @@ public interface JournalMaster extends Master { * {@link alluxio.master.journal.JournalType#EMBEDDED} journal. * * @param newLeaderAddress server address to remove from quorum - * @return the guid of transfer leader command + * @return an error message if an error occurred, otherwise empty string */ String transferLeadership(NetAddress newLeaderAddress); @@ -57,13 +56,6 @@ public interface JournalMaster extends Master { */ void resetPriorities() throws IOException; - /** - * Gets exception messages thrown when transferring the leader. - * @param transferId the guid of transferLeader command - * @return exception message - */ - GetTransferLeaderMessagePResponse getTransferLeaderMessage(String transferId); - /** * Gets the node state. This endpoint is available for both UFS and embedded journals. * If HA mode is turn off, the node state will always be returned as PRIMARY. diff --git a/core/server/master/src/main/java/alluxio/master/journal/JournalMasterClientServiceHandler.java b/core/server/master/src/main/java/alluxio/master/journal/JournalMasterClientServiceHandler.java index 37da2fcf39d8..5ea74a1b9a45 100644 --- a/core/server/master/src/main/java/alluxio/master/journal/JournalMasterClientServiceHandler.java +++ b/core/server/master/src/main/java/alluxio/master/journal/JournalMasterClientServiceHandler.java @@ -23,13 +23,19 @@ import alluxio.grpc.RemoveQuorumServerPResponse; import alluxio.grpc.ResetPrioritiesPRequest; import alluxio.grpc.ResetPrioritiesPResponse; +import alluxio.grpc.TransferLeaderMessage; import alluxio.grpc.TransferLeadershipPRequest; import alluxio.grpc.TransferLeadershipPResponse; +import io.grpc.StatusException; import io.grpc.stub.StreamObserver; import org.slf4j.Logger; import org.slf4j.LoggerFactory; +import java.util.Map; +import java.util.UUID; +import java.util.concurrent.ConcurrentHashMap; + /** * This class is a gRPC handler for journal master RPCs invoked by an Alluxio client. */ @@ -38,6 +44,8 @@ public class JournalMasterClientServiceHandler private static final Logger LOG = LoggerFactory.getLogger(JournalMasterClientServiceHandler.class); + private final Map mTransferLeaderMessages = new ConcurrentHashMap<>(); + private final JournalMaster mJournalMaster; /** @@ -68,10 +76,34 @@ public void removeQuorumServer(RemoveQuorumServerPRequest request, @Override public void transferLeadership(TransferLeadershipPRequest request, StreamObserver responseObserver) { - RpcUtils.call(LOG, () -> { - String transferId = mJournalMaster.transferLeadership(request.getServerAddress()); - return TransferLeadershipPResponse.newBuilder().setTransferId(transferId).build(); - }, "transferLeadership", "request=%s", responseObserver, request); + try { + // using RpcUtils wrapper for metrics tracking + RpcUtils.callAndReturn(LOG, () -> { + String transferId = UUID.randomUUID().toString(); + // atomically reserve UUID in map with empty message: if not in use (which is good), it + // will return null + while (mTransferLeaderMessages.putIfAbsent(transferId, "") != null) { + transferId = UUID.randomUUID().toString(); + } + String message; + try { + // return transfer id to caller before initiating transfer of leadership. this is because + // the leader will close its gRPC server when being demoted + responseObserver.onNext( + TransferLeadershipPResponse.newBuilder().setTransferId(transferId).build()); + responseObserver.onCompleted(); + // initiate transfer after replying with transfer ID + message = mJournalMaster.transferLeadership(request.getServerAddress()); + } catch (Throwable t) { + message = t.getMessage(); + } + mTransferLeaderMessages.put(transferId, message); + return null; + }, "transferLeadership", false, "request=%s", request); + } catch (StatusException e) { + // throws only if above callable throws, which it does not + LOG.warn("error thrown in transferLeadership rpc, should not be possible", e); + } } @Override @@ -86,8 +118,11 @@ public void resetPriorities(ResetPrioritiesPRequest request, @Override public void getTransferLeaderMessage(GetTransferLeaderMessagePRequest request, StreamObserver responseObserver) { - RpcUtils.call(LOG, () -> mJournalMaster.getTransferLeaderMessage(request.getTransferId()), - "GetTransferLeaderMessage", "request=%s", responseObserver, request); + RpcUtils.call(LOG, () -> GetTransferLeaderMessagePResponse.newBuilder() + .setTransMsg(TransferLeaderMessage.newBuilder() + .setMsg(mTransferLeaderMessages.getOrDefault(request.getTransferId(), ""))) + .build(), + "GetTransferLeaderMessage", "request=%s", responseObserver, request); } @Override diff --git a/shell/src/main/java/alluxio/cli/fsadmin/journal/QuorumElectCommand.java b/shell/src/main/java/alluxio/cli/fsadmin/journal/QuorumElectCommand.java index 7c8e13981734..59c82552170d 100644 --- a/shell/src/main/java/alluxio/cli/fsadmin/journal/QuorumElectCommand.java +++ b/shell/src/main/java/alluxio/cli/fsadmin/journal/QuorumElectCommand.java @@ -41,10 +41,6 @@ public class QuorumElectCommand extends AbstractFsAdminCommand { public static final String TRANSFER_INIT = "Initiating transfer of leadership to %s"; public static final String TRANSFER_SUCCESS = "Successfully elected %s as the new leader"; public static final String TRANSFER_FAILED = "Failed to elect %s as the new leader: %s"; - public static final String RESET_INIT = "Resetting priorities of masters after %s transfer of " - + "leadership"; - public static final String RESET_SUCCESS = "Quorum priorities were reset to 1"; - public static final String RESET_FAILED = "Quorum priorities failed to be reset: %s"; /** * @param context fsadmin command context @@ -67,7 +63,6 @@ public int run(CommandLine cl) throws IOException { JournalMasterClient jmClient = mMasterJournalMasterClient; String serverAddress = cl.getOptionValue(ADDRESS_OPTION_NAME); NetAddress address = QuorumCommand.stringToAddress(serverAddress); - boolean success = false; try { mPrintStream.println(String.format(TRANSFER_INIT, serverAddress)); String transferId = jmClient.transferLeadership(address); @@ -84,9 +79,8 @@ public int run(CommandLine cl) throws IOException { GetQuorumInfoPResponse quorumInfo = jmClient.getQuorumInfo(); Optional leadingMasterInfoOpt = quorumInfo.getServerInfoList().stream() .filter(QuorumServerInfo::getIsLeader).findFirst(); - NetAddress leaderAddress = leadingMasterInfoOpt.isPresent() - ? leadingMasterInfoOpt.get().getServerAddress() : null; - return address.equals(leaderAddress); + return leadingMasterInfoOpt.isPresent() + && address.equals(leadingMasterInfoOpt.get().getServerAddress()); } catch (IOException e) { return false; } @@ -96,21 +90,11 @@ public int run(CommandLine cl) throws IOException { throw new Exception(errorMessage.get()); } mPrintStream.println(String.format(TRANSFER_SUCCESS, serverAddress)); - success = true; } catch (Exception e) { mPrintStream.println(String.format(TRANSFER_FAILED, serverAddress, e.getMessage())); + return -1; } - // reset priorities regardless of transfer success - try { - mPrintStream.println(String.format(RESET_INIT, success ? "successful" : "failed")); - jmClient.resetPriorities(); - mPrintStream.println(RESET_SUCCESS); - } catch (IOException e) { - mPrintStream.println(String.format(RESET_FAILED, e)); - success = false; - } - - return success ? 0 : -1; + return 0; } @Override diff --git a/tests/src/test/java/alluxio/client/cli/fsadmin/command/QuorumCommandIntegrationTest.java b/tests/src/test/java/alluxio/client/cli/fsadmin/command/QuorumCommandIntegrationTest.java index 11268b15d6f1..191903638cf2 100644 --- a/tests/src/test/java/alluxio/client/cli/fsadmin/command/QuorumCommandIntegrationTest.java +++ b/tests/src/test/java/alluxio/client/cli/fsadmin/command/QuorumCommandIntegrationTest.java @@ -208,11 +208,9 @@ public void elect() throws Exception { mOutput.reset(); shell.run("journal", "quorum", "elect", "-address" , newLeaderAddr); String output = mOutput.toString().trim(); - String expected = String.format("%s\n%s\n%s\n%s", + String expected = String.format("%s\n%s", String.format(QuorumElectCommand.TRANSFER_INIT, newLeaderAddr), - String.format(QuorumElectCommand.TRANSFER_SUCCESS, newLeaderAddr), - String.format(QuorumElectCommand.RESET_INIT, "successful"), - QuorumElectCommand.RESET_SUCCESS); + String.format(QuorumElectCommand.TRANSFER_SUCCESS, newLeaderAddr)); Assert.assertEquals(expected, output); } mCluster.notifySuccess(); @@ -246,7 +244,7 @@ public void infoAfterElect() throws Exception { shell.run("journal", "quorum", "info", "-domain", "MASTER"); String output = mOutput.toString().trim(); for (MasterNetAddress masterAddr : mCluster.getMasterAddresses()) { - String expected = String.format(QuorumInfoCommand.OUTPUT_SERVER_INFO, "AVAILABLE", "1", + String expected = String.format(QuorumInfoCommand.OUTPUT_SERVER_INFO, "AVAILABLE", "0", String.format("%s:%d", masterAddr.getHostname(), masterAddr.getEmbeddedJournalPort())); Assert.assertTrue(output.contains(expected.trim())); } diff --git a/tests/src/test/java/alluxio/server/ft/journal/raft/EmbeddedJournalIntegrationTestTransferLeadership.java b/tests/src/test/java/alluxio/server/ft/journal/raft/EmbeddedJournalIntegrationTestTransferLeadership.java index 7b6eba4732b2..8d84e5674ff5 100644 --- a/tests/src/test/java/alluxio/server/ft/journal/raft/EmbeddedJournalIntegrationTestTransferLeadership.java +++ b/tests/src/test/java/alluxio/server/ft/journal/raft/EmbeddedJournalIntegrationTestTransferLeadership.java @@ -77,34 +77,6 @@ public void repeatedTransferLeadership() throws Exception { mCluster.notifySuccess(); } - @Test - public void transferWhenAlreadyTransferring() throws Exception { - mCluster = MultiProcessCluster - .newBuilder(PortCoordination.EMBEDDED_JOURNAL_ALREADY_TRANSFERRING) - .setClusterName("EmbeddedJournalTransferLeadership_transferWhenAlreadyTransferring") - .setNumMasters(NUM_MASTERS) - .setNumWorkers(NUM_WORKERS) - .addProperty(PropertyKey.MASTER_JOURNAL_TYPE, JournalType.EMBEDDED) - .addProperty(PropertyKey.MASTER_JOURNAL_FLUSH_TIMEOUT_MS, "5min") - .addProperty(PropertyKey.MASTER_EMBEDDED_JOURNAL_MIN_ELECTION_TIMEOUT, "750ms") - .addProperty(PropertyKey.MASTER_EMBEDDED_JOURNAL_MAX_ELECTION_TIMEOUT, "1500ms") - .build(); - mCluster.start(); - - int newLeaderIdx = (mCluster.getPrimaryMasterIndex(MASTER_INDEX_WAIT_TIME) + 1) % NUM_MASTERS; - // `getPrimaryMasterIndex` uses the same `mMasterAddresses` variable as getMasterAddresses - // we can therefore access to the new leader's address this way - MasterNetAddress newLeaderAddr = mCluster.getMasterAddresses().get(newLeaderIdx); - NetAddress netAddress = masterEBJAddr2NetAddr(newLeaderAddr); - mCluster.getJournalMasterClientForMaster().transferLeadership(netAddress); - // this second call should throw an exception - String transferId = mCluster.getJournalMasterClientForMaster().transferLeadership(netAddress); - String exceptionMessage = mCluster.getJournalMasterClientForMaster() - .getTransferLeaderMessage(transferId).getTransMsg().getMsg(); - Assert.assertFalse(exceptionMessage.isEmpty()); - mCluster.notifySuccess(); - } - @Test public void transferLeadershipOutsideCluster() throws Exception { mCluster = MultiProcessCluster.newBuilder(PortCoordination.EMBEDDED_JOURNAL_OUTSIDE_CLUSTER) @@ -206,11 +178,11 @@ public void resetPriorities() throws Exception { MasterNetAddress newLeaderAddr = mCluster.getMasterAddresses().get(newLeaderIdx); transferAndWait(newLeaderAddr); match = mCluster.getJournalMasterClientForMaster().getQuorumInfo().getServerInfoList() - .stream().allMatch(info -> info.getPriority() == (info.getIsLeader() ? 2 : 1)); + .stream().allMatch(info -> info.getPriority() == 0); Assert.assertTrue(match); mCluster.getJournalMasterClientForMaster().resetPriorities(); match = mCluster.getJournalMasterClientForMaster().getQuorumInfo().getServerInfoList() - .stream().allMatch(info -> info.getPriority() == 1); + .stream().allMatch(info -> info.getPriority() == 0); Assert.assertTrue(match); } mCluster.notifySuccess(); @@ -234,7 +206,7 @@ public void transferToSelfThenToOther() throws Exception { String transferId = transferAndWait(leaderAddr); GetTransferLeaderMessagePResponse transferLeaderMessage = mCluster.getJournalMasterClientForMaster().getTransferLeaderMessage(transferId); - Assert.assertFalse(transferLeaderMessage.getTransMsg().getMsg().isEmpty()); + Assert.assertTrue(transferLeaderMessage.getTransMsg().getMsg().isEmpty()); int newLeaderIdx = (leaderIdx + 1) % NUM_MASTERS; MasterNetAddress newLeaderAddr = mCluster.getMasterAddresses().get(newLeaderIdx); From 1e5eebe57afbc83618b5d8134a4d927dc3ec6ada Mon Sep 17 00:00:00 2001 From: Yaolong Liu Date: Fri, 30 Jun 2023 11:25:10 +0800 Subject: [PATCH 298/334] Fix deadlock issue when master process exit ### What changes are proposed in this pull request? Fix deadlock issue when master process exit ### Why are the changes needed? ``` stackTrace: java.lang.Thread.State: BLOCKED (on object monitor) at java.lang.Shutdown.exit(java.base@11.0.12-ga/Shutdown.java:173) - **waiting to lock <0x00007f55e98d1920> (a java.lang.Class for java.lang.Shutdown)** at java.lang.Runtime.exit(java.base@11.0.12-ga/Runtime.java:116) at java.lang.System.exit(java.base@11.0.12-ga/System.java:1752) at alluxio.ProcessUtils.fatalError(ProcessUtils.java:83) at alluxio.ProcessUtils.fatalError(ProcessUtils.java:63) at alluxio.master.journal.MasterJournalContext.waitForJournalFlush(MasterJournalContext.java:99) at alluxio.master.journal.MasterJournalContext.close(MasterJournalContext.java:109) - locked <0x00007f5602000010> (a alluxio.master.journal.MasterJournalContext) at alluxio.master.journal.StateChangeJournalContext.close(StateChangeJournalContext.java:55) at alluxio.master.block.DefaultBlockMaster.getNewContainerId(DefaultBlockMaster.java:906) - locked <0x00007f594a000298> (a alluxio.master.block.BlockContainerIdGenerator) at alluxio.master.file.meta.InodeDirectoryIdGenerator.initialize(InodeDirectoryIdGenerator.java:83) at alluxio.master.file.meta.InodeDirectoryIdGenerator.getNewDirectoryId(InodeDirectoryIdGenerator.java:57) - **locked <0x00007f55e197de68> (a alluxio.master.file.meta.InodeDirectoryIdGenerator)** at alluxio.master.file.meta.InodeTree.createPath(InodeTree.java:979) at alluxio.master.file.DefaultFileSystemMaster.createDirectoryInternal(DefaultFileSystemMaster.java:2746) at alluxio.master.file.InodeSyncStream.loadDirectoryMetadataInternal(InodeSyncStream.java:1374) at alluxio.master.file.InodeSyncStream.loadDirectoryMetadata(InodeSyncStream.java:1294) at alluxio.master.file.TxInodeSyncStream.concurrentLoadMetadata(TxInodeSyncStream.java:123) at alluxio.master.file.TxInodeSyncStream.loadMetadataForPath(TxInodeSyncStream.java:98) at alluxio.master.file.InodeSyncStream.syncInodeMetadata(InodeSyncStream.java:743) at alluxio.master.file.InodeSyncStream.syncInternal(InodeSyncStream.java:491) at alluxio.master.file.InodeSyncStream.sync(InodeSyncStream.java:409) at alluxio.master.file.DefaultFileSystemMaster.syncMetadata(DefaultFileSystemMaster.java:4075) at alluxio.master.file.TxFileSystemMaster.syncMetadata(TxFileSystemMaster.java:298) at alluxio.master.file.DefaultFileSystemMaster.listStatus(DefaultFileSystemMaster.java:1111) at alluxio.master.file.TxFileSystemMaster.listStatus(TxFileSystemMaster.java:827) ... ``` ``` stackTrace: java.lang.Thread.State: WAITING (on object monitor) at java.lang.Object.wait(java.base@11.0.12-ga/Native Method) - waiting on at java.lang.Thread.join(java.base@11.0.12-ga/Thread.java:1300) - waiting to re-lock in wait() <0x00007f55e98d0918> (a java.lang.Thread) at java.lang.Thread.join(java.base@11.0.12-ga/Thread.java:1375) at java.lang.ApplicationShutdownHooks.runHooks(java.base@11.0.12-ga/ApplicationShutdownHooks.java:107) at java.lang.ApplicationShutdownHooks$1.run(java.base@11.0.12-ga/ApplicationShutdownHooks.java:46) at java.lang.Shutdown.runHooks(java.base@11.0.12-ga/Shutdown.java:130) at java.lang.Shutdown.exit(java.base@11.0.12-ga/Shutdown.java:174) - locked <0x00007f55e98d1920> (a java.lang.Class for java.lang.Shutdown) at java.lang.Runtime.exit(java.base@11.0.12-ga/Runtime.java:116) at java.lang.System.exit(java.base@11.0.12-ga/System.java:1752) at alluxio.ProcessUtils.fatalError(ProcessUtils.java:83) at alluxio.ProcessUtils.fatalError(ProcessUtils.java:63) at alluxio.master.journal.MasterJournalContext.waitForJournalFlush(MasterJournalContext.java:99) at alluxio.master.journal.MasterJournalContext.close(MasterJournalContext.java:109) - locked <0x00007f5c0a60bda0> (a alluxio.master.journal.MasterJournalContext) at alluxio.master.journal.StateChangeJournalContext.close(StateChangeJournalContext.java:55) at alluxio.master.journal.FileSystemMergeJournalContext.close(FileSystemMergeJournalContext.java:90) - locked <0x00007f5c0a60bde0> (a alluxio.master.journal.FileSystemMergeJournalContext) at alluxio.master.file.RpcContext.closeQuietly(RpcContext.java:141) at alluxio.master.file.RpcContext.close(RpcContext.java:129) at alluxio.master.file.DefaultFileSystemMaster.listStatus(DefaultFileSystemMaster.java:1227) at alluxio.master.file.TxFileSystemMaster.listStatus(TxFileSystemMaster.java:827) ``` ``` java.lang.Thread.State: BLOCKED (on object monitor) at alluxio.master.file.meta.InodeDirectoryIdGenerator.peekDirectoryId(InodeDirectoryIdGenerator.java:76) - **waiting to lock <0x00007f55e197de68> (a alluxio.master.file.meta.InodeDirectoryIdGenerator)** at alluxio.master.file.DefaultFileSystemMaster.stop(DefaultFileSystemMaster.java:787) at alluxio.master.file.TxFileSystemMaster.stop(TxFileSystemMaster.java:245) at alluxio.master.AbstractMaster.close(AbstractMaster.java:156) at alluxio.master.file.DefaultFileSystemMaster.close(DefaultFileSystemMaster.java:800) at alluxio.master.file.TxFileSystemMaster.close(TxFileSystemMaster.java:581) at alluxio.Registry.close(Registry.java:156) at alluxio.master.AlluxioMasterProcess.stop(AlluxioMasterProcess.java:412) - locked <0x00007f55e51e2ba8> (a java.util.concurrent.atomic.AtomicBoolean) at alluxio.ProcessUtils.lambda$stopProcessOnShutdown$0(ProcessUtils.java:98) at alluxio.ProcessUtils$$Lambda$363/0x00007f54f02dc840.run(Unknown Source) at java.lang.Thread.run(java.base@11.0.12-ga/Thread.java:829) ``` The blocked listStatus thread(called thread1) wait to get lock <0x00007f55e98d1920> , while it is owned by another wating listStatus thread(called thread2) which also want to exit, thread2 wait the hook process(alluxio-process-shutdown-hook) finished and then continue exit. The alluxio-process-shutdown-hook wait to get lock <0x00007f55e197de68>, which is owned by thread1. ### Does this PR introduce any user facing changes? No pr-link: Alluxio/alluxio#17628 change-id: cid-d077517ba20445ebe95520a1710c173b41810f6d --- .../master/file/meta/InodeDirectoryIdGenerator.java | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/core/server/master/src/main/java/alluxio/master/file/meta/InodeDirectoryIdGenerator.java b/core/server/master/src/main/java/alluxio/master/file/meta/InodeDirectoryIdGenerator.java index bf4f0869333e..b52a5454fe31 100644 --- a/core/server/master/src/main/java/alluxio/master/file/meta/InodeDirectoryIdGenerator.java +++ b/core/server/master/src/main/java/alluxio/master/file/meta/InodeDirectoryIdGenerator.java @@ -72,9 +72,10 @@ synchronized long getNewDirectoryId(JournalContext context) throws UnavailableEx /** * @return the next directory id */ - public synchronized long peekDirectoryId() { - long containerId = mNextDirectoryId.getContainerId(); - long sequenceNumber = mNextDirectoryId.getSequenceNumber(); + public long peekDirectoryId() { + DirectoryId directoryId = mNextDirectoryId; + long containerId = directoryId.getContainerId(); + long sequenceNumber = directoryId.getSequenceNumber(); return BlockId.createBlockId(containerId, sequenceNumber); } From cfcda18fe8321d31f27cd87c1e1693cb0db3c7dc Mon Sep 17 00:00:00 2001 From: secfree Date: Mon, 3 Jul 2023 09:09:32 +0800 Subject: [PATCH 299/334] Fix local cache identifier to solve the local cache consistent issue ### What changes are proposed in this pull request? Fix local cache identifier ### Why are the changes needed? If not including `modification time`, it may have consistency issues. ### Does this PR introduce any user facing changes? NO pr-link: Alluxio/alluxio#17514 change-id: cid-c9c2e91e53d4a95d4f3a165918fc23c02f623891 --- .../java/alluxio/hadoop/LocalCacheFileSystem.java | 12 ++++++++++-- .../src/main/java/alluxio/conf/PropertyKey.java | 10 ++++++++++ 2 files changed, 20 insertions(+), 2 deletions(-) diff --git a/core/client/hdfs/src/main/java/alluxio/hadoop/LocalCacheFileSystem.java b/core/client/hdfs/src/main/java/alluxio/hadoop/LocalCacheFileSystem.java index 3781a3b1cfe2..88e6002a5ffa 100644 --- a/core/client/hdfs/src/main/java/alluxio/hadoop/LocalCacheFileSystem.java +++ b/core/client/hdfs/src/main/java/alluxio/hadoop/LocalCacheFileSystem.java @@ -21,6 +21,7 @@ import alluxio.client.file.cache.LocalCacheFileInStream; import alluxio.client.file.cache.filter.CacheFilter; import alluxio.conf.AlluxioConfiguration; +import alluxio.conf.PropertyKey; import alluxio.metrics.MetricsConfig; import alluxio.metrics.MetricsSystem; import alluxio.wire.FileInfo; @@ -130,8 +131,15 @@ public FSDataInputStream open(Path path, int bufferSize) throws IOException { .setGroup(externalFileStatus.getGroup()); // FilePath is a unique identifier for a file, however it can be a long string // hence using md5 hash of the file path as the identifier in the cache. - CacheContext context = CacheContext.defaults().setCacheIdentifier( - md5().hashString(externalFileStatus.getPath().toString(), UTF_8).toString()); + String cacheIdentifier; + if (mAlluxioConf.getBoolean(PropertyKey.USER_CLIENT_CACHE_IDENTIFIER_INCLUDE_MTIME)) { + // include mtime to avoid consistency issues if the file may update + cacheIdentifier = md5().hashString(externalFileStatus.getPath().toString() + + externalFileStatus.getModificationTime(), UTF_8).toString(); + } else { + cacheIdentifier = md5().hashString(externalFileStatus.getPath().toString(), UTF_8).toString(); + } + CacheContext context = CacheContext.defaults().setCacheIdentifier(cacheIdentifier); URIStatus status = new URIStatus(info, context); return open(status, bufferSize); } diff --git a/core/common/src/main/java/alluxio/conf/PropertyKey.java b/core/common/src/main/java/alluxio/conf/PropertyKey.java index d5c6a5e58fa7..e52e2c41affd 100755 --- a/core/common/src/main/java/alluxio/conf/PropertyKey.java +++ b/core/common/src/main/java/alluxio/conf/PropertyKey.java @@ -6229,6 +6229,14 @@ public String toString() { .setConsistencyCheckLevel(ConsistencyCheckLevel.WARN) .setScope(Scope.CLIENT) .build(); + public static final PropertyKey USER_CLIENT_CACHE_IDENTIFIER_INCLUDE_MTIME = + booleanBuilder(Name.USER_CLIENT_CACHE_IDENTIFIER_INCLUDE_MTIME) + .setDefaultValue(false) + .setDescription("If this is enabled, client-side cache will include modification time " + + "while calculating the identifier of a file.") + .setConsistencyCheckLevel(ConsistencyCheckLevel.WARN) + .setScope(Scope.CLIENT) + .build(); public static final PropertyKey USER_CLIENT_REPORT_VERSION_ENABLED = booleanBuilder(Name.USER_CLIENT_REPORT_VERSION_ENABLED) @@ -8879,6 +8887,8 @@ public static final class Name { "alluxio.user.client.cache.timeout.duration"; public static final String USER_CLIENT_CACHE_TIMEOUT_THREADS = "alluxio.user.client.cache.timeout.threads"; + public static final String USER_CLIENT_CACHE_IDENTIFIER_INCLUDE_MTIME = + "alluxio.user.client.cache.include.mtime"; public static final String USER_CLIENT_REPORT_VERSION_ENABLED = "alluxio.user.client.report.version.enabled"; public static final String USER_CONF_CLUSTER_DEFAULT_ENABLED = From 73cea837e09e017c9a7f133547d5ed70ad8f1a39 Mon Sep 17 00:00:00 2001 From: sundy xiong <920073134@qq.com> Date: Mon, 10 Jul 2023 10:33:38 +0800 Subject: [PATCH 300/334] [DOCFIX] Fix doc errors for Tencent Cloud EMR ### What changes are proposed in this pull request? Please outline the changes and how this PR fixes the issue. ### Why are the changes needed? Tencent Cloud EMR has updated some version information and needs to be synchronized to community documents ### Does this PR introduce any user facing changes? Please list the user-facing changes introduced by your change, including 1. change in user-facing APIs 2. addition or removal of property keys 3. webui pr-link: Alluxio/alluxio#17747 change-id: cid-73d26a59de4f1023081ed0ffaf91cffcc2f8db0e --- docs/cn/cloud/Tencent-Cloud-EMR.md | 4 ++-- docs/en/cloud/Tencent-Cloud-EMR.md | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/docs/cn/cloud/Tencent-Cloud-EMR.md b/docs/cn/cloud/Tencent-Cloud-EMR.md index 519e548ef444..007773eec37e 100644 --- a/docs/cn/cloud/Tencent-Cloud-EMR.md +++ b/docs/cn/cloud/Tencent-Cloud-EMR.md @@ -17,8 +17,8 @@ priority: 3 ## 准备 -- 腾讯云EMR的Hadoop标准版本>=2.1.0 -- 腾讯云EMR的Hadoop天穹版本>=1.0 +- 腾讯云EMR的Hadoop标准2.x版本>=EMR-v2.3.0 +- 腾讯云EMR的Hadoop标准3.x版本>=EMR-v3.2.0 - 有关EMR中版本中支持具体的Alluxio的版本支持可参考[这里](https://cloud.tencent.com/document/product/589/20279) ## 创建基于Alluxio的EMR集群 diff --git a/docs/en/cloud/Tencent-Cloud-EMR.md b/docs/en/cloud/Tencent-Cloud-EMR.md index 13fb671849ca..97ef9ca99774 100644 --- a/docs/en/cloud/Tencent-Cloud-EMR.md +++ b/docs/en/cloud/Tencent-Cloud-EMR.md @@ -16,8 +16,8 @@ The out-of-the-box Alluxio service provided on Tencent Cloud EMR can help custom ## Prerequisites -- Hadoop Standard version of Tencent Cloud EMR >= 2.1.0 -- Hadoop TianQiong version of Tencent Cloud EMR >= 1.0 +- Hadoop Standard 2.x version of Tencent Cloud EMR >= EMR-v2.3.0 +- Hadoop Standard 3.x version of Tencent Cloud EMR >= EMR-v3.2.0 - For the specific Alluxio version supported in EMR, please refer to [Component Version supported Tencent Cloud EMR](https://intl.cloud.tencent.com/document/product/1026/31095). ## Create EMR cluster based on Alluxio From c237203119575072ce1de354567b7aa209173270 Mon Sep 17 00:00:00 2001 From: GuojingFeng Date: Tue, 11 Jul 2023 10:18:45 +0800 Subject: [PATCH 301/334] [SMALLFIX] Use field property for 'AbstractCmdRunner' ### What changes are proposed in this pull request? Use field property 'mActiveJobs' for 'AbstractCmdRunner' instead of the default value DEFAULT_ACTIVE_JOBS which is hardcoded with 3000. ### Why are the changes needed? Currently, the number of active jobs are limited at 3000 which is not enough when we have lots of loading tasks on huge cluster. We will add command line args to allow user pass the limit of active jobs (set the value of 'mActiveJobs') the in the future. ### Does this PR introduce any user facing changes? N/A pr-link: Alluxio/alluxio#17755 change-id: cid-cc87ac5731fac22724ca2d84ca3574f2a09033bb --- .../main/java/alluxio/master/job/tracker/DistLoadCliRunner.java | 2 +- .../main/java/alluxio/master/job/tracker/MigrateCliRunner.java | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/job/server/src/main/java/alluxio/master/job/tracker/DistLoadCliRunner.java b/job/server/src/main/java/alluxio/master/job/tracker/DistLoadCliRunner.java index 40f3574d72cd..6d21cc5387fb 100644 --- a/job/server/src/main/java/alluxio/master/job/tracker/DistLoadCliRunner.java +++ b/job/server/src/main/java/alluxio/master/job/tracker/DistLoadCliRunner.java @@ -156,7 +156,7 @@ private void submitDistLoad(List pool, int replication, Set workerSet, Set excludedWorkerSet, Set localityIds, Set excludedLocalityIds, boolean directCache, CmdInfo cmdInfo) { - if (mSubmitted.size() >= DEFAULT_ACTIVE_JOBS) { + if (mSubmitted.size() >= mActiveJobs) { waitForCmdJob(); } diff --git a/job/server/src/main/java/alluxio/master/job/tracker/MigrateCliRunner.java b/job/server/src/main/java/alluxio/master/job/tracker/MigrateCliRunner.java index c72d4040ecec..c244409a9492 100644 --- a/job/server/src/main/java/alluxio/master/job/tracker/MigrateCliRunner.java +++ b/job/server/src/main/java/alluxio/master/job/tracker/MigrateCliRunner.java @@ -124,7 +124,7 @@ private void copy(AlluxioURI srcPath, AlluxioURI dstPath, boolean overwrite, int // Submit a child job within a distributed command job. private void submitDistCp(List> pool, boolean overwrite, WriteType writeType, CmdInfo cmdInfo) { - if (mSubmitted.size() >= DEFAULT_ACTIVE_JOBS) { + if (mSubmitted.size() >= mActiveJobs) { waitForCmdJob(); } From 2b83d95e123fa660639933f5f6a62907167726ee Mon Sep 17 00:00:00 2001 From: Haoning Sun Date: Thu, 13 Jul 2023 11:54:43 +0800 Subject: [PATCH 302/334] Fix memory leaking when loading data ### What changes are proposed in this pull request? Release buffer when an exception occurs. ### Why are the changes needed? Fix memory leaking. pr-link: Alluxio/alluxio#17758 change-id: cid-b5e9e655aff744b04d3187687a62dbaed6754186 --- .../src/main/java/alluxio/worker/block/MonoBlockStore.java | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/core/server/worker/src/main/java/alluxio/worker/block/MonoBlockStore.java b/core/server/worker/src/main/java/alluxio/worker/block/MonoBlockStore.java index 5f18e4cac897..1e6e2c32b799 100644 --- a/core/server/worker/src/main/java/alluxio/worker/block/MonoBlockStore.java +++ b/core/server/worker/src/main/java/alluxio/worker/block/MonoBlockStore.java @@ -347,12 +347,12 @@ public CompletableFuture> load(List blocks, UfsReadOpti blockWriter.close(); } catch (IOException e) { throw AlluxioRuntimeException.from(e); - } finally { - NioDirectBufferPool.release(buf); } }) .thenRun(() -> commitBlock(sessionId, blockId, false)) + .thenRun(() -> NioDirectBufferPool.release(buf)) .exceptionally(t -> { + NioDirectBufferPool.release(buf); handleException(t.getCause(), block, errors, sessionId); return null; }); From a2e5ea53c2f332048ae7f05299cf0500d0446b7b Mon Sep 17 00:00:00 2001 From: alluxio-bot Date: Thu, 13 Jul 2023 10:44:06 -0700 Subject: [PATCH 303/334] Simplify `elect` command Cherry-pick of existing commit. orig-pr: Alluxio/alluxio#17598 orig-commit: Alluxio/alluxio@fd5098ec8c28cde138af2c452695d184512c4aac orig-commit-author: Arthur Jenoudet <23088925+jenoudet@users.noreply.github.com> pr-link: Alluxio/alluxio#17641 change-id: cid-1de7738fcf7f96b1e1b1ca509c04f914f835c447 From c34dd16eec7c7fd7d1c4ce2795903473778cba70 Mon Sep 17 00:00:00 2001 From: maobaolong <307499405@qq.com> Date: Mon, 17 Jul 2023 13:56:54 +0800 Subject: [PATCH 304/334] Remove rocks page store log ### What changes are proposed in this pull request? Please outline the changes and how this PR fixes the issue. ### Why are the changes needed? Please clarify why the changes are needed. For instance, 1. If you propose a new API, clarify the use case for a new API. 2. If you fix a bug, describe the bug. ### Does this PR introduce any user facing changes? Please list the user-facing changes introduced by your change, including 1. change in user-facing APIs 2. addition or removal of property keys 3. webui pr-link: Alluxio/alluxio#17781 change-id: cid-fd757d751c511c9071faf4c97819d8be667dce6b --- .../main/java/alluxio/client/file/cache/store/PageStoreDir.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/core/client/fs/src/main/java/alluxio/client/file/cache/store/PageStoreDir.java b/core/client/fs/src/main/java/alluxio/client/file/cache/store/PageStoreDir.java index cacd8aa397ba..0a3378b18588 100644 --- a/core/client/fs/src/main/java/alluxio/client/file/cache/store/PageStoreDir.java +++ b/core/client/fs/src/main/java/alluxio/client/file/cache/store/PageStoreDir.java @@ -34,7 +34,7 @@ * Directory of page store. */ public interface PageStoreDir { - Logger LOG = LoggerFactory.getLogger(RocksPageStore.class); + Logger LOG = LoggerFactory.getLogger(PageStoreDir.class); /** * Create a list of PageStoreDir based on the configuration. From 1b2868e8700bdc61d7654d3bf2a7d6a904c4f721 Mon Sep 17 00:00:00 2001 From: maobaolong <307499405@qq.com> Date: Tue, 18 Jul 2023 02:41:48 +0800 Subject: [PATCH 305/334] Fix to support mount fuse do not report error on macosx ### What changes are proposed in this pull request? Fix mount fuse do not report error on macosx pr-link: Alluxio/alluxio#17722 change-id: cid-d528c181cc01abf29e47563c0dcbcbb82a686a61 --- integration/fuse/bin/alluxio-fuse | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/integration/fuse/bin/alluxio-fuse b/integration/fuse/bin/alluxio-fuse index 6be7fec13a24..25f9a6469428 100755 --- a/integration/fuse/bin/alluxio-fuse +++ b/integration/fuse/bin/alluxio-fuse @@ -109,7 +109,7 @@ mount_fuse() { else (nohup ${cmd} > ${ALLUXIO_LOGS_DIR}/fuse.out 2>&1) & # sleep: workaround to let the bg java process exit on errors, if any - sleep ${mount_sleep_seconds}s + sleep ${mount_sleep_seconds} if kill -0 $! > /dev/null 2>&1 ; then echo "Successfully mounted Alluxio to ${mount_point}." echo "See ${ALLUXIO_LOGS_DIR}/fuse.log for logging messages" From cb2d994f84ad9ded72e3e877f0aacabdda4057b2 Mon Sep 17 00:00:00 2001 From: maobaolong <307499405@qq.com> Date: Tue, 18 Jul 2023 06:17:59 +0800 Subject: [PATCH 306/334] Close client when heartbeat executor closed ### What changes are proposed in this pull request? Please outline the changes and how this PR fixes the issue. ### Why are the changes needed? Please clarify why the changes are needed. For instance, 1. If you propose a new API, clarify the use case for a new API. 2. If you fix a bug, describe the bug. ### Does this PR introduce any user facing changes? Please list the user-facing changes introduced by your change, including 1. change in user-facing APIs 2. addition or removal of property keys 3. webui pr-link: Alluxio/alluxio#17785 change-id: cid-657e47ef7657714a572a1f9811bbcc524bb8cfcc --- .../src/main/java/alluxio/master/meta/MetaMasterSync.java | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/core/server/master/src/main/java/alluxio/master/meta/MetaMasterSync.java b/core/server/master/src/main/java/alluxio/master/meta/MetaMasterSync.java index eb77cc181b5d..87bddb267cf2 100644 --- a/core/server/master/src/main/java/alluxio/master/meta/MetaMasterSync.java +++ b/core/server/master/src/main/java/alluxio/master/meta/MetaMasterSync.java @@ -118,5 +118,7 @@ private void setIdAndRegister() throws IOException { } @Override - public void close() {} + public void close() { + mMasterClient.close(); + } } From f3d1af870512c96006aee43de4a99f3475a215f5 Mon Sep 17 00:00:00 2001 From: maobaolong <307499405@qq.com> Date: Mon, 24 Jul 2023 12:38:37 +0800 Subject: [PATCH 307/334] Support getFileBlockLocation in LocalCacheFileSystem ### What changes are proposed in this pull request? Delegate `getFileBlockLocation` to external file system in `LocalCacheFileSystem`. ### Why are the changes needed? Otherwise, `LocalCacheFileSystem` inherits the default behavior of `org.apache.hadoop.fs.FileSystem` which returns `localhost` only. ### Does this PR introduce any user facing changes? No. pr-link: Alluxio/alluxio#17672 change-id: cid-eb545dbd8ed42001d074fecfb9c8d6b118a559c1 --- .../alluxio/hadoop/LocalCacheFileSystem.java | 19 +++++++++++++++++++ 1 file changed, 19 insertions(+) diff --git a/core/client/hdfs/src/main/java/alluxio/hadoop/LocalCacheFileSystem.java b/core/client/hdfs/src/main/java/alluxio/hadoop/LocalCacheFileSystem.java index 88e6002a5ffa..5b0c74fd6fe7 100644 --- a/core/client/hdfs/src/main/java/alluxio/hadoop/LocalCacheFileSystem.java +++ b/core/client/hdfs/src/main/java/alluxio/hadoop/LocalCacheFileSystem.java @@ -27,6 +27,7 @@ import alluxio.wire.FileInfo; import com.google.common.base.Preconditions; +import org.apache.hadoop.fs.BlockLocation; import org.apache.hadoop.fs.FSDataInputStream; import org.apache.hadoop.fs.FSDataOutputStream; import org.apache.hadoop.fs.FileStatus; @@ -213,4 +214,22 @@ public boolean mkdirs(Path f, FsPermission permission) throws IOException { public FileStatus getFileStatus(Path f) throws IOException { return mExternalFileSystem.getFileStatus(f); } + + @Override + public BlockLocation[] getFileBlockLocations(FileStatus file, long start, + long len) throws IOException { + // Applications use the block information here to schedule/distribute the tasks. + // Return the UFS locations directly instead of the local cache location, + // so the application can schedule the tasks accordingly + return mExternalFileSystem.getFileBlockLocations(file, start, len); + } + + @Override + public BlockLocation[] getFileBlockLocations(Path p, long start, long len) + throws IOException { + // Applications use the block information here to schedule/distribute the tasks. + // Return the UFS locations directly instead of the local cache location, + // so the application can schedule the tasks accordingly + return mExternalFileSystem.getFileBlockLocations(p, start, len); + } } From 384ec43b99876707c5488204f27f4c2e39e871c1 Mon Sep 17 00:00:00 2001 From: maobaolong <307499405@qq.com> Date: Fri, 28 Jul 2023 10:57:45 +0800 Subject: [PATCH 308/334] Support dynamic enable/disable audit log ### What changes are proposed in this pull request? Support dynamic enable/disable audit log ### Why are the changes needed? Now, if we disable auditlog before master startup, we cannot support enable it dynamically, but it can be disable if it is enabled at startup time, it is tricky. And it is not expensive to start an object. image ### Does this PR introduce any user facing changes? No pr-link: Alluxio/alluxio#17489 change-id: cid-49454d429d6875157b090b1ca767552743006560 --- .../master/file/DefaultFileSystemMaster.java | 21 +++++++++++-------- .../main/java/alluxio/web/ProxyWebServer.java | 20 +++++++++++------- .../java/alluxio/master/job/JobMaster.java | 19 ++++++++++------- 3 files changed, 35 insertions(+), 25 deletions(-) diff --git a/core/server/master/src/main/java/alluxio/master/file/DefaultFileSystemMaster.java b/core/server/master/src/main/java/alluxio/master/file/DefaultFileSystemMaster.java index 38bf0c92a694..75a060fc74c3 100644 --- a/core/server/master/src/main/java/alluxio/master/file/DefaultFileSystemMaster.java +++ b/core/server/master/src/main/java/alluxio/master/file/DefaultFileSystemMaster.java @@ -411,7 +411,7 @@ public class DefaultFileSystemMaster extends CoreMaster private final ActiveSyncManager mSyncManager; /** Log writer for user access audit log. */ - protected AsyncUserAccessAuditLogWriter mAsyncAuditLogWriter; + protected volatile AsyncUserAccessAuditLogWriter mAsyncAuditLogWriter; /** Stores the time series for various metrics which are exposed in the UI. */ private final TimeSeriesStore mTimeSeriesStore; @@ -558,6 +558,10 @@ public Type getType() { MetricsSystem.registerCachedGaugeIfAbsent( MetricsSystem.getMetricName(MetricKey.MASTER_METADATA_SYNC_EXECUTOR_QUEUE_SIZE.getName()), () -> mSyncMetadataExecutor.getQueue().size(), 2, TimeUnit.SECONDS); + MetricsSystem.registerGaugeIfAbsent( + MetricKey.MASTER_AUDIT_LOG_ENTRIES_SIZE.getName(), + () -> mAsyncAuditLogWriter != null + ? mAsyncAuditLogWriter.getAuditLogEntriesSize() : -1); } private static MountInfo getRootMountInfo(MasterUfsManager ufsManager) { @@ -780,14 +784,6 @@ public void start(Boolean isPrimary) throws IOException { () -> new FixedIntervalSupplier( Configuration.getMs(PropertyKey.MASTER_METRICS_TIME_SERIES_INTERVAL)), Configuration.global(), mMasterContext.getUserState())); - if (Configuration.getBoolean(PropertyKey.MASTER_AUDIT_LOGGING_ENABLED)) { - mAsyncAuditLogWriter = new AsyncUserAccessAuditLogWriter("AUDIT_LOG"); - mAsyncAuditLogWriter.start(); - MetricsSystem.registerGaugeIfAbsent( - MetricKey.MASTER_AUDIT_LOG_ENTRIES_SIZE.getName(), - () -> mAsyncAuditLogWriter != null - ? mAsyncAuditLogWriter.getAuditLogEntriesSize() : -1); - } if (Configuration.getBoolean(PropertyKey.UNDERFS_CLEANUP_ENABLED)) { getExecutorService().submit( new HeartbeatThread(HeartbeatContext.MASTER_UFS_CLEANUP, new UfsCleaner(this), @@ -801,6 +797,13 @@ public void start(Boolean isPrimary) throws IOException { mSyncManager.start(); mScheduler.start(); } + /** + * The audit logger will be running all the time, and an operation checks whether + * to enable audit logs in {@link #createAuditContext}. So audit log can be turned on/off + * at runtime by updating the property key. + */ + mAsyncAuditLogWriter = new AsyncUserAccessAuditLogWriter("AUDIT_LOG"); + mAsyncAuditLogWriter.start(); } @Override diff --git a/core/server/proxy/src/main/java/alluxio/web/ProxyWebServer.java b/core/server/proxy/src/main/java/alluxio/web/ProxyWebServer.java index d43c6e24f914..16b8992fc787 100644 --- a/core/server/proxy/src/main/java/alluxio/web/ProxyWebServer.java +++ b/core/server/proxy/src/main/java/alluxio/web/ProxyWebServer.java @@ -110,14 +110,18 @@ public ProxyWebServer(String serviceName, InetSocketAddress address, (long) Configuration.getInt(PropertyKey.PROXY_S3_GLOBAL_READ_RATE_LIMIT_MB) * Constants.MB; mGlobalRateLimiter = S3RestUtils.createRateLimiter(rate).orElse(null); - if (Configuration.getBoolean(PropertyKey.PROXY_AUDIT_LOGGING_ENABLED)) { - mAsyncAuditLogWriter = new AsyncUserAccessAuditLogWriter("PROXY_AUDIT_LOG"); - mAsyncAuditLogWriter.start(); - MetricsSystem.registerGaugeIfAbsent( - MetricKey.PROXY_AUDIT_LOG_ENTRIES_SIZE.getName(), - () -> mAsyncAuditLogWriter != null - ? mAsyncAuditLogWriter.getAuditLogEntriesSize() : -1); - } + /** + * The audit logger will be running all the time, and an operation checks whether + * to enable audit logs in {@link alluxio.proxy.s3.S3RestServiceHandler#createAuditContext} and + * {@link alluxio.proxy.s3.S3Handler#createAuditContext}. So audit log can be turned on/off + * at runtime by updating the property key. + */ + mAsyncAuditLogWriter = new AsyncUserAccessAuditLogWriter("PROXY_AUDIT_LOG"); + mAsyncAuditLogWriter.start(); + MetricsSystem.registerGaugeIfAbsent( + MetricKey.PROXY_AUDIT_LOG_ENTRIES_SIZE.getName(), + () -> mAsyncAuditLogWriter != null + ? mAsyncAuditLogWriter.getAuditLogEntriesSize() : -1); ServletContainer servlet = new ServletContainer(config) { private static final long serialVersionUID = 7756010860672831556L; diff --git a/job/server/src/main/java/alluxio/master/job/JobMaster.java b/job/server/src/main/java/alluxio/master/job/JobMaster.java index 8dc21170b26b..7e0de7c731bb 100644 --- a/job/server/src/main/java/alluxio/master/job/JobMaster.java +++ b/job/server/src/main/java/alluxio/master/job/JobMaster.java @@ -259,14 +259,17 @@ public void start(Boolean isLeader) throws IOException { () -> new FixedIntervalSupplier( Configuration.getMs(PropertyKey.JOB_MASTER_LOST_MASTER_INTERVAL)), Configuration.global(), mMasterContext.getUserState())); - if (Configuration.getBoolean(PropertyKey.MASTER_AUDIT_LOGGING_ENABLED)) { - mAsyncAuditLogWriter = new AsyncUserAccessAuditLogWriter("JOB_MASTER_AUDIT_LOG"); - mAsyncAuditLogWriter.start(); - MetricsSystem.registerGaugeIfAbsent( - MetricKey.MASTER_AUDIT_LOG_ENTRIES_SIZE.getName(), - () -> mAsyncAuditLogWriter != null - ? mAsyncAuditLogWriter.getAuditLogEntriesSize() : -1); - } + /** + * The audit logger will be running all the time, and an operation checks whether + * to enable audit logs in {@link #createAuditContext}. So audit log can be turned on/off + * at runtime by updating the property key. + */ + mAsyncAuditLogWriter = new AsyncUserAccessAuditLogWriter("JOB_MASTER_AUDIT_LOG"); + mAsyncAuditLogWriter.start(); + MetricsSystem.registerGaugeIfAbsent( + MetricKey.MASTER_AUDIT_LOG_ENTRIES_SIZE.getName(), + () -> mAsyncAuditLogWriter != null + ? mAsyncAuditLogWriter.getAuditLogEntriesSize() : -1); } else { LOG.info("Starting job master as standby"); if (ConfigurationUtils.isHaMode(Configuration.global())) { From c600830f6a892ca934a89481c6cdb5ceed662531 Mon Sep 17 00:00:00 2001 From: David Zhu Date: Mon, 31 Jul 2023 17:26:01 -0700 Subject: [PATCH 309/334] Set min replicate to 0 and unpin file if we fail to replicate a file ### What changes are proposed in this pull request? Now we unpin and set min replication to 0 if we failed due to IO related reasons. (such as UFS not available, file does not exist etc). ### Why are the changes needed? Previously if a replication job fails, it would be rescheduled again and again, causing Denial of Service. ### Does this PR introduce any user facing changes? Yes, the user would expect certain files unpinned or adjusted if it becomes unavailable. pr-link: Alluxio/alluxio#17865 change-id: cid-a2cde6da1e7a95fb361157307bd026303c0164ad --- .../plan/replicate/SetReplicaDefinition.java | 23 +++++++++++++++++-- 1 file changed, 21 insertions(+), 2 deletions(-) diff --git a/job/server/src/main/java/alluxio/job/plan/replicate/SetReplicaDefinition.java b/job/server/src/main/java/alluxio/job/plan/replicate/SetReplicaDefinition.java index 819402db7569..8db1e861fffd 100644 --- a/job/server/src/main/java/alluxio/job/plan/replicate/SetReplicaDefinition.java +++ b/job/server/src/main/java/alluxio/job/plan/replicate/SetReplicaDefinition.java @@ -20,6 +20,7 @@ import alluxio.conf.Configuration; import alluxio.exception.status.NotFoundException; import alluxio.grpc.RemoveBlockRequest; +import alluxio.grpc.SetAttributePOptions; import alluxio.job.RunTaskContext; import alluxio.job.SelectExecutorsContext; import alluxio.job.plan.AbstractVoidPlanDefinition; @@ -37,6 +38,7 @@ import org.slf4j.Logger; import org.slf4j.LoggerFactory; +import java.io.IOException; import java.util.Collections; import java.util.List; import java.util.Set; @@ -168,8 +170,25 @@ private void replicate(SetReplicaConfig config, RunTaskContext context) throws E // to avoid the the race between "replicate" and "rename", so that even a file to replicate is // renamed, the job is still working on the correct file. URIStatus status = context.getFileSystem().getStatus(new AlluxioURI(config.getPath())); - - JobUtils.loadBlock(status, context.getFsContext(), config.getBlockId(), null, false); + try { + JobUtils.loadBlock(status, context.getFsContext(), config.getBlockId(), null, false); + } catch (IOException e) { + // This will remove the file from the pinlist if it fails to replicate, there can be false + // positives because replication can fail transiently and this would unpin it. However, + // compared to repeatedly replicating, this is a more acceptable result. + LOG.warn("Replication of {} failed, reduce min replication to 0 and unpin. Reason: {} ", + status.getPath(), e.getMessage()); + SetAttributePOptions.Builder optionsBuilder = + SetAttributePOptions.newBuilder(); + try { + context.getFileSystem().setAttribute(new AlluxioURI(config.getPath()), + optionsBuilder.setReplicationMin(0).setPinned(false).build()); + } catch (Throwable e2) { + e.addSuppressed(e2); + LOG.warn("Attempt to set min replication to 0 and unpin failed due to ", e2); + } + throw e; + } LOG.info("Replicated file " + config.getPath() + " block " + config.getBlockId()); } } From bad4a2ae60ec1623eaa46754d027643d1bbb2210 Mon Sep 17 00:00:00 2001 From: maobaolong <307499405@qq.com> Date: Tue, 1 Aug 2023 11:08:48 +0800 Subject: [PATCH 310/334] Bump grpc & netty version Cherry-pick of existing commit. orig-pr: Alluxio/alluxio#17413 orig-commit: Alluxio/alluxio@16cbf9c0142c20c29b71fb94bd59f049647aac79 orig-commit-author: elega <445092967@qq.com> pr-link: Alluxio/alluxio#17858 change-id: cid-8bfb171d1660c6da96977129b62b6b6640625f4c --- core/common/pom.xml | 8 +++++++- .../main/java/alluxio/grpc/GrpcSerializationUtils.java | 10 +++++++--- pom.xml | 5 +++-- 3 files changed, 17 insertions(+), 6 deletions(-) diff --git a/core/common/pom.xml b/core/common/pom.xml index 6e3656543c70..32539cde59d8 100644 --- a/core/common/pom.xml +++ b/core/common/pom.xml @@ -131,7 +131,7 @@ io.netty netty-tcnative-boringssl-static - 2.0.34.Final + 2.0.56.Final @@ -159,6 +159,12 @@ test-jar test + + org.junit.jupiter + junit-jupiter-api + ${jupiter.version} + test + diff --git a/core/common/src/main/java/alluxio/grpc/GrpcSerializationUtils.java b/core/common/src/main/java/alluxio/grpc/GrpcSerializationUtils.java index afa335154d05..ceb7c08084a9 100644 --- a/core/common/src/main/java/alluxio/grpc/GrpcSerializationUtils.java +++ b/core/common/src/main/java/alluxio/grpc/GrpcSerializationUtils.java @@ -47,7 +47,7 @@ public class GrpcSerializationUtils { private static final String BUFFER_INPUT_STREAM_CLASS_NAME = "io.grpc.internal.ReadableBuffers$BufferInputStream"; private static final String BUFFER_FIELD_NAME = "buffer"; - private static final String BUFFERS_FIELD_NAME = "buffers"; + private static final String READABLE_BUFFERS_FIELD_NAME = "readableBuffers"; private static final String NETTY_WRITABLE_BUFFER_CLASS_NAME = "io.grpc.netty.NettyWritableBuffer"; private static final String NETTY_READABLE_BUFFER_CLASS_NAME = @@ -79,7 +79,7 @@ public class GrpcSerializationUtils { sBufferList = getPrivateField(BUFFER_CHAIN_OUTPUT_STREAM_CLASS_NAME, BUFFER_LIST_FIELD_NAME); sCurrent = getPrivateField(BUFFER_CHAIN_OUTPUT_STREAM_CLASS_NAME, CURRENT_FIELD_NAME); sCompositeBuffers = - getPrivateField(CompositeReadableBuffer.class.getName(), BUFFERS_FIELD_NAME); + getPrivateField(CompositeReadableBuffer.class.getName(), READABLE_BUFFERS_FIELD_NAME); sReadableByteBuf = getPrivateField(NETTY_READABLE_BUFFER_CLASS_NAME, BUFFER_FIELD_NAME); } catch (Exception e) { LOG.warn("Cannot get gRPC output stream buffer, zero copy receive will be disabled.", e); @@ -95,7 +95,7 @@ private static Field getPrivateField(String className, String fieldName) return field; } - private static Constructor getPrivateConstructor(String className, Class ...parameterTypes) + private static Constructor getPrivateConstructor(String className, Class ... parameterTypes) throws ClassNotFoundException, NoSuchMethodException { Class declaringClass = Class.forName(className); Constructor constructor = declaringClass.getDeclaredConstructor(parameterTypes); @@ -146,6 +146,10 @@ public static ByteBuf getByteBufFromReadableBuffer(ReadableBuffer buffer) { } try { if (buffer instanceof CompositeReadableBuffer) { + // TODO(elega) grpc introduced native protobuf zero copy since 1.39.0 + // https://github.com/grpc/grpc-java/pull/8102/files + // replace the following with + // return Unpooled.wrappedBuffer(buffer.getByteBuffer()); Queue buffers = (Queue) sCompositeBuffers.get(buffer); if (buffers.size() == 1) { return getByteBufFromReadableBuffer(buffers.peek()); diff --git a/pom.xml b/pom.xml index ecabdab22388..5e5df8848bd8 100644 --- a/pom.xml +++ b/pom.xml @@ -130,9 +130,9 @@ build 1.2.1 2.3.13 - 1.37.0 + 1.54.1 2.8.9 - 4.1.52.Final + 4.1.87.Final 7.0.3 3.3.1 0.8.5 @@ -141,6 +141,7 @@ 2.34 9.4.46.v20220331 4.13.1 + 5.9.2 2.17.1 3.3.9 4.1.11 From 9a9c80bdbe51dd240ecffeffc259a4aac330c731 Mon Sep 17 00:00:00 2001 From: Kai Date: Fri, 1 Sep 2023 15:12:38 +0800 Subject: [PATCH 311/334] Add hadoop3 jar to the artifacts ### What changes are proposed in this pull request? Add the alluxio hadoop3 jar client to the artifacts ### Why are the changes needed? Some users may want to use hadoop3 ### Does this PR introduce any user facing changes? no pr-link: Alluxio/alluxio#18090 change-id: cid-7945e9bbce1480c5fdc7c45dc618649ec2ad5e29 --- .../src/alluxio.org/build-distribution/cmd/generate-tarball.go | 1 + 1 file changed, 1 insertion(+) diff --git a/dev/scripts/src/alluxio.org/build-distribution/cmd/generate-tarball.go b/dev/scripts/src/alluxio.org/build-distribution/cmd/generate-tarball.go index 52a82e93088f..c980c2bbb8db 100644 --- a/dev/scripts/src/alluxio.org/build-distribution/cmd/generate-tarball.go +++ b/dev/scripts/src/alluxio.org/build-distribution/cmd/generate-tarball.go @@ -203,6 +203,7 @@ func addAdditionalFiles(srcPath, dstPath string, hadoopVersion version, version "bin/alluxio-workers.sh", "bin/launch-process", fmt.Sprintf("client/build/alluxio-%v-hadoop2-client.jar", version), + fmt.Sprintf("client/build/alluxio-%v-hadoop3-client.jar", version), "conf/rocks-inode-bloom.ini.template", "conf/rocks-block-bloom.ini.template", "conf/rocks-inode.ini.template", From 15b162510f21e97a451c10771d6babb0a32c45e0 Mon Sep 17 00:00:00 2001 From: Bin Fan Date: Tue, 5 Sep 2023 11:26:41 -0700 Subject: [PATCH 312/334] Fix a deadlock in FileSystemContext MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit ### What changes are proposed in this pull request? Fix a deadlock. ### Why are the changes needed? On the Alluxio client, there is a monitor lock of `FileSystemContext` to sync individual client-to-master operations. One application thread can successfully hold the monitor lock (`alluxio.client.file.FileSystemContext`) in order to get worker list, but blocked by another “lock” `FileSystemContextReinitializer` (waiting for latch on on-going RPCs down to zero) for acquiring the block master client to really connect to master process (waiting for other callers involving `FileSystemContextReinitializer` to finish). On the other hand, another heartbeat thread "config-hash-master-heartbeat-0" is awaking periodically to sync with the master process to fetch the latest configuration. This thread detected the conf update and thus entered `FileSystemContextReinitializer` (bumping latch) but was blocked by waiting for the monitor lock of `alluxio.client.file.FileSystemContext` in order to get the master address. This PR moves `getMasterAddress` outside `reinit` block to avoid holding the `Reinitializer` object and wait for the monitor object of `FileSystemContext`. ### Does this PR introduce any user facing changes? No pr-link: Alluxio/alluxio#18109 change-id: cid-0fab540f7dbef3661acb44e7472425add163b019 --- .../alluxio/client/file/FileSystemContext.java | 15 +++++++++------ 1 file changed, 9 insertions(+), 6 deletions(-) diff --git a/core/client/fs/src/main/java/alluxio/client/file/FileSystemContext.java b/core/client/fs/src/main/java/alluxio/client/file/FileSystemContext.java index b8e1c1964b67..9843c09b3c53 100644 --- a/core/client/fs/src/main/java/alluxio/client/file/FileSystemContext.java +++ b/core/client/fs/src/main/java/alluxio/client/file/FileSystemContext.java @@ -414,13 +414,16 @@ public ReinitBlockerResource blockReinit() { */ public void reinit(boolean updateClusterConf, boolean updatePathConf) throws UnavailableException, IOException { + // inquiry primary master address before entering the critical session of mReinitializer, + // where all RPCs wait for the monitor object of FileSystemContext (synchronized methods) + // will block until initialization completes + InetSocketAddress masterAddr; + try { + masterAddr = getMasterAddress(); + } catch (IOException e) { + throw new UnavailableException("Failed to get master address during reinitialization", e); + } try (Closeable r = mReinitializer.allow()) { - InetSocketAddress masterAddr; - try { - masterAddr = getMasterAddress(); - } catch (IOException e) { - throw new UnavailableException("Failed to get master address during reinitialization", e); - } try { getClientContext().loadConf(masterAddr, updateClusterConf, updatePathConf); } catch (AlluxioStatusException e) { From 27fb4c1e1b8b747822055a622c8693336214f0d6 Mon Sep 17 00:00:00 2001 From: Jason Tieu <6509369+tieujason330@users.noreply.github.com> Date: Wed, 6 Sep 2023 09:30:49 -0700 Subject: [PATCH 313/334] Handle any version string for summarycommandtest ### What changes are proposed in this pull request? ref https://github.com/Alluxio/alluxio/commit/8cbc4908da1985b5b0ebec3bb807ce9bd3764f4e ### Why are the changes needed? Fix potentially failing test for different versions ### Does this PR introduce any user facing changes? no pr-link: Alluxio/alluxio#18091 change-id: cid-3cc882f8e3679a3c60f5046cc07b6e8b0b208bc7 --- .../alluxio/cli/fsadmin/report/SummaryCommandTest.java | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/shell/src/test/java/alluxio/cli/fsadmin/report/SummaryCommandTest.java b/shell/src/test/java/alluxio/cli/fsadmin/report/SummaryCommandTest.java index e500e259bdd9..303fd4add38c 100644 --- a/shell/src/test/java/alluxio/cli/fsadmin/report/SummaryCommandTest.java +++ b/shell/src/test/java/alluxio/cli/fsadmin/report/SummaryCommandTest.java @@ -193,11 +193,12 @@ private void checkIfOutputValid(String dateFormatPattern, List " Version: testVersion", " Safe Mode: false")); expectedOutput.addAll(HAPattern); + String versionStr = String.format("%-32s", RuntimeConstants.VERSION); expectedOutput.addAll(new ArrayList<>(Arrays.asList( " Master Address State Version ", - " hostname1:10000 Primary 2.10.0-SNAPSHOT ", - " hostname2:10001 Standby 2.10.0-SNAPSHOT ", - " hostname3:10002 Standby 2.10.0-SNAPSHOT ", + " hostname1:10000 Primary " + versionStr, + " hostname2:10001 Standby " + versionStr, + " hostname3:10002 Standby " + versionStr, " Live Workers: 12", " Lost Workers: 4", " Total Capacity: 1309.92KB", From 3f67341f6d85d926a128aeaa07412073d0db356b Mon Sep 17 00:00:00 2001 From: maobaolong <307499405@qq.com> Date: Fri, 8 Sep 2023 11:13:58 +0800 Subject: [PATCH 314/334] Provide a way to support listening only for modified PropertyKey Fix #17461 pr-link: Alluxio/alluxio#17474 change-id: cid-6c1c5f50c36a0eac8a35e4f176290c0b7ae3e2fe --- .../java/alluxio/conf/Reconfigurable.java | 8 +++++ .../alluxio/conf/ReconfigurableRegistry.java | 32 +++++++++++++++++-- .../alluxio/heartbeat/HeartbeatThread.java | 9 ++++-- .../alluxio/heartbeat/HeartbeatTimer.java | 12 +------ .../java/alluxio/heartbeat/SleepingTimer.java | 10 +++++- .../master/meta/DefaultMetaMaster.java | 11 ++++--- 6 files changed, 61 insertions(+), 21 deletions(-) diff --git a/core/common/src/main/java/alluxio/conf/Reconfigurable.java b/core/common/src/main/java/alluxio/conf/Reconfigurable.java index 19b7dae27309..cbb4f76824d8 100644 --- a/core/common/src/main/java/alluxio/conf/Reconfigurable.java +++ b/core/common/src/main/java/alluxio/conf/Reconfigurable.java @@ -11,6 +11,8 @@ package alluxio.conf; +import java.util.Map; + /** * Reconfigurable listener. */ @@ -18,6 +20,12 @@ public interface Reconfigurable { /** * When the property changed, this function will be invoked. + * @param changedProperties the changed properties + */ + void update(Map changedProperties); + + /** + * When any property changed, this function will be invoked. */ void update(); } diff --git a/core/common/src/main/java/alluxio/conf/ReconfigurableRegistry.java b/core/common/src/main/java/alluxio/conf/ReconfigurableRegistry.java index f8c765a3d98a..e4e9492f82c2 100644 --- a/core/common/src/main/java/alluxio/conf/ReconfigurableRegistry.java +++ b/core/common/src/main/java/alluxio/conf/ReconfigurableRegistry.java @@ -11,13 +11,20 @@ package alluxio.conf; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + import java.util.LinkedList; import java.util.List; +import java.util.Map; +import javax.annotation.concurrent.ThreadSafe; /** * Registry of all reconfigurable listeners. */ +@ThreadSafe public class ReconfigurableRegistry { + private static final Logger LOG = LoggerFactory.getLogger(ReconfigurableRegistry.class); private static final List LISTENER_LIST = new LinkedList<>(); /** @@ -45,12 +52,33 @@ public static synchronized boolean unregister(Reconfigurable listener) { * @return false if no listener related to the given property, otherwise, return false */ public static synchronized boolean update() { - for (Reconfigurable listener : new LinkedList<>(LISTENER_LIST)) { - listener.update(); + for (Reconfigurable listener : LISTENER_LIST) { + try { + listener.update(); + } catch (Throwable t) { + LOG.error("Error while update changed properties for {}", listener, t); + } } return true; } // prevent instantiation private ReconfigurableRegistry() {} + + /** + * When the property was reconfigured, this function will be invoked. + * This property listeners will be notified. + * + * @param changedProperties the changed properties + */ + public static synchronized void update(Map changedProperties) { + for (Reconfigurable listener : LISTENER_LIST) { + try { + listener.update(changedProperties); + } catch (Throwable t) { + LOG.error("Error while update changed properties {} for {}", + changedProperties, listener, t); + } + } + } } diff --git a/core/common/src/main/java/alluxio/heartbeat/HeartbeatThread.java b/core/common/src/main/java/alluxio/heartbeat/HeartbeatThread.java index cc9b200bfe5f..b7fc2342c7ca 100644 --- a/core/common/src/main/java/alluxio/heartbeat/HeartbeatThread.java +++ b/core/common/src/main/java/alluxio/heartbeat/HeartbeatThread.java @@ -12,6 +12,7 @@ package alluxio.heartbeat; import alluxio.conf.AlluxioConfiguration; +import alluxio.conf.Reconfigurable; import alluxio.conf.ReconfigurableRegistry; import alluxio.security.authentication.AuthenticatedClientUser; import alluxio.security.user.UserState; @@ -86,7 +87,9 @@ public HeartbeatThread(String executorName, String threadId, HeartbeatExecutor e mConfiguration = conf; mUserState = userState; mStatus = Status.INIT; - ReconfigurableRegistry.register(mTimer); + if (mTimer instanceof Reconfigurable) { + ReconfigurableRegistry.register((Reconfigurable) mTimer); + } } /** @@ -157,7 +160,9 @@ public void run() { LOG.error("Uncaught exception in heartbeat executor, Heartbeat Thread shutting down", e); } finally { mStatus = Status.STOPPED; - ReconfigurableRegistry.unregister(mTimer); + if (mTimer instanceof Reconfigurable) { + ReconfigurableRegistry.unregister((Reconfigurable) mTimer); + } mExecutor.close(); } } diff --git a/core/common/src/main/java/alluxio/heartbeat/HeartbeatTimer.java b/core/common/src/main/java/alluxio/heartbeat/HeartbeatTimer.java index 736037234edd..cd847922ef33 100644 --- a/core/common/src/main/java/alluxio/heartbeat/HeartbeatTimer.java +++ b/core/common/src/main/java/alluxio/heartbeat/HeartbeatTimer.java @@ -11,20 +11,10 @@ package alluxio.heartbeat; -import alluxio.conf.Reconfigurable; - /** * An interface for heartbeat timers. The {@link HeartbeatThread} calls the {@link #tick()} method. */ -public interface HeartbeatTimer extends Reconfigurable { - - /** - * When this object needs to be reconfigured - * due to external configuration change etc., - * this function will be invoked. - */ - default void update() { - } +public interface HeartbeatTimer { /** * Waits until next heartbeat should be executed. diff --git a/core/common/src/main/java/alluxio/heartbeat/SleepingTimer.java b/core/common/src/main/java/alluxio/heartbeat/SleepingTimer.java index e1a20b1b9681..0b3bf64cdf0c 100644 --- a/core/common/src/main/java/alluxio/heartbeat/SleepingTimer.java +++ b/core/common/src/main/java/alluxio/heartbeat/SleepingTimer.java @@ -11,6 +11,8 @@ package alluxio.heartbeat; +import alluxio.conf.PropertyKey; +import alluxio.conf.Reconfigurable; import alluxio.time.Sleeper; import alluxio.time.SteppingThreadSleeper; @@ -19,6 +21,7 @@ import java.time.Clock; import java.time.Duration; +import java.util.Map; import java.util.Objects; import java.util.function.Supplier; import javax.annotation.concurrent.NotThreadSafe; @@ -27,7 +30,7 @@ * This class can be used for executing heartbeats periodically. */ @NotThreadSafe -public class SleepingTimer implements HeartbeatTimer { +public class SleepingTimer implements HeartbeatTimer, Reconfigurable { protected long mPreviousTickedMs = -1; private final String mThreadName; protected final Logger mLogger; @@ -82,6 +85,11 @@ public long tick() throws InterruptedException { return mIntervalSupplier.getRunLimit(mPreviousTickedMs); } + @Override + public void update(Map changedProperties) { + update(); + } + @Override public void update() { SleepIntervalSupplier newSupplier = mIntervalSupplierSupplier.get(); diff --git a/core/server/master/src/main/java/alluxio/master/meta/DefaultMetaMaster.java b/core/server/master/src/main/java/alluxio/master/meta/DefaultMetaMaster.java index 57ad77dc2e2c..bdd3cc06c70c 100644 --- a/core/server/master/src/main/java/alluxio/master/meta/DefaultMetaMaster.java +++ b/core/server/master/src/main/java/alluxio/master/meta/DefaultMetaMaster.java @@ -725,7 +725,7 @@ public void resetState() { @Override public Map updateConfiguration(Map propertiesMap) { Map result = new HashMap<>(); - int successCount = 0; + Map changedProperties = new HashMap<>(); for (Map.Entry entry : propertiesMap.entrySet()) { try { PropertyKey key = PropertyKey.fromString(entry.getKey()); @@ -735,7 +735,7 @@ public Map updateConfiguration(Map propertiesMa Object value = key.parseValue(entry.getValue()); Configuration.set(key, value, Source.RUNTIME); result.put(entry.getKey(), true); - successCount++; + changedProperties.put(key, Configuration.get(key)); LOG.info("Property {} has been updated to \"{}\" from \"{}\"", key.getName(), entry.getValue(), oldValue); } else { @@ -747,9 +747,10 @@ public Map updateConfiguration(Map propertiesMa LOG.error("Failed to update property {} to {}", entry.getKey(), entry.getValue(), e); } } - LOG.debug("Update {} properties, succeed {}.", propertiesMap.size(), successCount); - if (successCount > 0) { - ReconfigurableRegistry.update(); + LOG.debug("Updating {} properties, {} succeed.", propertiesMap.size(), + changedProperties.size()); + if (changedProperties.size() > 0) { + ReconfigurableRegistry.update(changedProperties); } return result; } From e50baf5d0fdfe88c4949536189f5fc93d0e6ab94 Mon Sep 17 00:00:00 2001 From: maobaolong <307499405@qq.com> Date: Fri, 8 Sep 2023 11:14:53 +0800 Subject: [PATCH 315/334] Support read bytebuffer for non ByteBufferReadable input stream ### What changes are proposed in this pull request? Support read bytebuffer for non ByteBufferReadable input stream ### Why are the changes needed? Without this changes, a non ByteBufferReadable input stream can throw exception when the bytebuffer apis are called. ### Does this PR introduce any user facing changes? No pr-link: Alluxio/alluxio#17982 change-id: cid-1363dbbeca327cbb7b4d39cea1afbb8c51905483 --- .../hadoop/AlluxioHdfsInputStream.java | 26 ++++++++++++++++++- 1 file changed, 25 insertions(+), 1 deletion(-) diff --git a/core/client/hdfs/src/main/java/alluxio/hadoop/AlluxioHdfsInputStream.java b/core/client/hdfs/src/main/java/alluxio/hadoop/AlluxioHdfsInputStream.java index 4671e812b21c..86cd96e7d10c 100644 --- a/core/client/hdfs/src/main/java/alluxio/hadoop/AlluxioHdfsInputStream.java +++ b/core/client/hdfs/src/main/java/alluxio/hadoop/AlluxioHdfsInputStream.java @@ -14,6 +14,7 @@ import alluxio.client.file.FileInStream; import com.google.common.base.Preconditions; +import org.apache.hadoop.fs.ByteBufferReadable; import org.apache.hadoop.fs.FSDataInputStream; import java.io.IOException; @@ -50,7 +51,30 @@ public int read() throws IOException { @Override public int read(ByteBuffer buf) throws IOException { - return mInput.read(buf); + // @see FSDataInputStream.java + if (mInput.getWrappedStream() instanceof ByteBufferReadable) { + return mInput.read(buf); + } else { + int off = buf.position(); + int len = buf.remaining(); + final int totalBytesRead; + if (buf.hasArray()) { + byte[] byteArray = buf.array(); + totalBytesRead = read(byteArray, buf.arrayOffset() + off, len); + if (totalBytesRead > 0) { + buf.position(off + totalBytesRead); + } + } else { + byte[] byteArray = new byte[len]; + totalBytesRead = read(byteArray); + if (totalBytesRead > 0) { + buf.put(byteArray, 0, totalBytesRead); + } + } + return totalBytesRead; + } } @Override From 6a3dcc1daadec5b7a708bbfcdffaae2568fad902 Mon Sep 17 00:00:00 2001 From: Haoning Sun Date: Sat, 9 Sep 2023 01:57:55 +0800 Subject: [PATCH 316/334] Retry requiring the buffer when there is insufficient resources ### What changes are proposed in this pull request? Retry requiring a buffer when loading a block throws OutOfMemoryError. ### Why are the changes needed? Failure to require buffer may result in the following exception. ``` alluxio.exception.BlockAlreadyExistsException: Temp blockId 50331677 is not available, because it already exists ``` pr-link: Alluxio/alluxio#17735 change-id: cid-e87da40ce5c060d7426f95c270fd40539e188e23 --- .../databuffer/NioDirectBufferPool.java | 21 +++++++++++++++++++ .../alluxio/worker/block/MonoBlockStore.java | 9 +++++++- 2 files changed, 29 insertions(+), 1 deletion(-) diff --git a/core/common/src/main/java/alluxio/network/protocol/databuffer/NioDirectBufferPool.java b/core/common/src/main/java/alluxio/network/protocol/databuffer/NioDirectBufferPool.java index 8a031fa97048..49d7dd99ab1c 100644 --- a/core/common/src/main/java/alluxio/network/protocol/databuffer/NioDirectBufferPool.java +++ b/core/common/src/main/java/alluxio/network/protocol/databuffer/NioDirectBufferPool.java @@ -11,6 +11,9 @@ package alluxio.network.protocol.databuffer; +import alluxio.exception.runtime.ResourceExhaustedRuntimeException; +import alluxio.retry.RetryPolicy; + import java.nio.ByteBuffer; import java.util.LinkedList; import java.util.Map; @@ -39,6 +42,24 @@ public static synchronized ByteBuffer acquire(int length) { return buffer; } + /** + * @param length + * @param policy the retry policy to use + * @return buffer + */ + public static synchronized ByteBuffer acquire(int length, RetryPolicy policy) { + Error cause = null; + while (policy.attempt()) { + try { + return acquire(length); + } catch (OutOfMemoryError error) { + cause = error; + } + } + throw new ResourceExhaustedRuntimeException("Not enough direct memory allocated to buffer", + cause, false); + } + /** * @param buffer */ diff --git a/core/server/worker/src/main/java/alluxio/worker/block/MonoBlockStore.java b/core/server/worker/src/main/java/alluxio/worker/block/MonoBlockStore.java index 1e6e2c32b799..4ef1238dfd31 100644 --- a/core/server/worker/src/main/java/alluxio/worker/block/MonoBlockStore.java +++ b/core/server/worker/src/main/java/alluxio/worker/block/MonoBlockStore.java @@ -331,7 +331,14 @@ public CompletableFuture> load(List blocks, UfsReadOpti handleException(e, block, errors, sessionId); continue; } - ByteBuffer buf = NioDirectBufferPool.acquire((int) blockSize); + ByteBuffer buf; + try { + buf = NioDirectBufferPool.acquire((int) blockSize, + new ExponentialBackoffRetry(1000, 5000, 5)); + } catch (Exception e) { + handleException(e, block, errors, sessionId); + continue; + } CompletableFuture future = RetryUtils.retryCallable("read from ufs", () -> manager.read(buf, block.getOffsetInFile(), blockSize, blockId, block.getUfsPath(), options), From f30a2e1754dd6e1d0a21117e658a2ce44b52dd68 Mon Sep 17 00:00:00 2001 From: Haoning Sun Date: Fri, 15 Sep 2023 03:25:21 +0800 Subject: [PATCH 317/334] Check for more results when the filtered result is empty ### What changes are proposed in this pull request? Check for more results when the filtered result is null when loading data. ### Why are the changes needed? Fix #18043. pr-link: Alluxio/alluxio#18133 change-id: cid-a05d66db230971ba6585b732c7bb2990ba02f7f7 --- .../java/alluxio/master/job/FileIterable.java | 39 ++++++++---- .../master/file/scheduler/LoadJobTest.java | 59 +++++++++++++++++++ .../master/file/scheduler/LoadTestUtils.java | 3 +- 3 files changed, 88 insertions(+), 13 deletions(-) diff --git a/core/server/master/src/main/java/alluxio/master/job/FileIterable.java b/core/server/master/src/main/java/alluxio/master/job/FileIterable.java index b826c6d2479f..1a5db1b384c2 100644 --- a/core/server/master/src/main/java/alluxio/master/job/FileIterable.java +++ b/core/server/master/src/main/java/alluxio/master/job/FileIterable.java @@ -37,6 +37,7 @@ import java.util.Optional; import java.util.concurrent.atomic.AtomicLong; import java.util.function.Predicate; +import java.util.function.Supplier; import java.util.stream.Collectors; /** @@ -151,28 +152,38 @@ private void partialListFileInfos() { if (!mStartAfter.isEmpty()) { mListOptions.setDisableAreDescendantsLoadedCheck(true); } - ListStatusContext context = ListStatusContext.create(ListStatusPartialPOptions - .newBuilder() - .setOptions(mListOptions) - .setBatchSize(PARTIAL_LISTING_BATCH_SIZE) - .setStartAfter(mStartAfter)); - listFileInfos(context); + Supplier context = () -> { + return ListStatusContext.create(ListStatusPartialPOptions + .newBuilder() + .setOptions(mListOptions) + .setBatchSize(PARTIAL_LISTING_BATCH_SIZE) + .setStartAfter(mStartAfter)); + }; + + List fileInfos; + while ((fileInfos = listStatus(context.get())) != null + && (mFiles = fileInfos.stream().filter(mFilter).collect(Collectors.toList())).isEmpty() + && !fileInfos.isEmpty()) { + mStartAfter = fileInfos.get(fileInfos.size() - 1).getPath(); + mListOptions.setDisableAreDescendantsLoadedCheck(true); + } if (mFiles.size() > 0) { mStartAfter = mFiles .get(mFiles.size() - 1) .getPath(); } + updateIterator(); } private void listFileInfos(ListStatusContext context) { + mFiles = listStatus(context).stream().filter(mFilter).collect(Collectors.toList()); + updateIterator(); + } + + private List listStatus(ListStatusContext context) { try { AuthenticatedClientUser.set(mUser.orElse(null)); - mFiles = mFileSystemMaster - .listStatus(new AlluxioURI(mPath), context) - .stream() - .filter(mFilter) - .collect(Collectors.toList()); - mFileInfoIterator = mFiles.iterator(); + return mFileSystemMaster.listStatus(new AlluxioURI(mPath), context); } catch (FileDoesNotExistException | InvalidPathException e) { throw new NotFoundRuntimeException(e); } catch (AccessControlException e) { @@ -182,6 +193,10 @@ private void listFileInfos(ListStatusContext context) { } finally { AuthenticatedClientUser.remove(); } + } + + private void updateIterator() { + mFileInfoIterator = mFiles.iterator(); mTotalFileCount.set(mFiles.size()); mTotalByteCount.set(mFiles .stream() diff --git a/core/server/master/src/test/java/alluxio/master/file/scheduler/LoadJobTest.java b/core/server/master/src/test/java/alluxio/master/file/scheduler/LoadJobTest.java index 4ddf1246e8f2..7b1f6d521adc 100644 --- a/core/server/master/src/test/java/alluxio/master/file/scheduler/LoadJobTest.java +++ b/core/server/master/src/test/java/alluxio/master/file/scheduler/LoadJobTest.java @@ -28,6 +28,7 @@ import alluxio.grpc.Block; import alluxio.grpc.JobProgressReportFormat; import alluxio.master.file.FileSystemMaster; +import alluxio.master.file.contexts.ListStatusContext; import alluxio.master.job.FileIterable; import alluxio.master.job.LoadJob; import alluxio.scheduler.job.JobState; @@ -80,6 +81,64 @@ public void testGetNextBatch() assertEquals(0, batch.size()); } + @Test + public void testGetNextBatchWithPartialListing() + throws FileDoesNotExistException, AccessControlException, IOException, InvalidPathException { + List fileInfos = generateRandomFileInfo(400, 2, 64 * Constants.MB); + + for (int i = 0; i < 100; i++) { + fileInfos.get(i).setInAlluxioPercentage(100); + } + for (int i = 200; i < 300; i++) { + fileInfos.get(i).setInAlluxioPercentage(100); + } + for (int i = 0; i < 10; i++) { + fileInfos.get(300 + i * i).setInAlluxioPercentage(100); + } + + FileSystemMaster fileSystemMaster = mock(FileSystemMaster.class); + when(fileSystemMaster.listStatus(any(), any())).thenAnswer(invocation -> { + ListStatusContext context = invocation.getArgument(1, ListStatusContext.class); + int fileSize = fileInfos.size(); + int from = 0; + int to = fileSize; + if (context.isPartialListing()) { + String startAfter = context.getPartialOptions().get().getStartAfter(); + int batch = context.getPartialOptions().get().getBatchSize(); + for (int i = 0; i < fileSize; i++) { + if (startAfter.equals(fileInfos.get(i).getPath())) { + from = i + 1; + break; + } + } + to = fileSize < from + batch ? fileSize : from + batch; + } + return fileInfos.subList(from, to); + }); + String testPath = "test"; + Optional user = Optional.of("user"); + FileIterable files = + new FileIterable(fileSystemMaster, testPath, user, true, + LoadJob.QUALIFIED_FILE_FILTER); + LoadJob load = + new LoadJob(testPath, user, "1", OptionalLong.empty(), true, false, files); + + List batch = load.getNextBatchBlocks(100); + assertEquals(100, batch.size()); + assertEquals(50, batch.stream().map(Block::getUfsPath).distinct().count()); + + batch = load.getNextBatchBlocks(200); + assertEquals(200, batch.size()); + assertEquals(100, batch.stream().map(Block::getUfsPath).distinct().count()); + + batch = load.getNextBatchBlocks(300); + assertEquals(80, batch.size()); + assertEquals(40, batch.stream().map(Block::getUfsPath).distinct().count()); + + batch = load.getNextBatchBlocks(100); + assertEquals(0, batch.size()); + } + @Test public void testIsHealthy() throws FileDoesNotExistException, AccessControlException, IOException, InvalidPathException { diff --git a/core/server/master/src/test/java/alluxio/master/file/scheduler/LoadTestUtils.java b/core/server/master/src/test/java/alluxio/master/file/scheduler/LoadTestUtils.java index de299ea03d97..77265a9b76dd 100644 --- a/core/server/master/src/test/java/alluxio/master/file/scheduler/LoadTestUtils.java +++ b/core/server/master/src/test/java/alluxio/master/file/scheduler/LoadTestUtils.java @@ -89,12 +89,13 @@ private static FileInfo createFileInfo(int blockCount, long blockSizeLimit) { Random random = new Random(); FileInfo info = new FileInfo(); String ufs = CommonUtils.randomAlphaNumString(6); + String filePath = CommonUtils.randomAlphaNumString(6); long blockSize = Math.abs(random.nextLong() % blockSizeLimit); List blockIds = LongStream.range(0, blockCount) .map(i -> random.nextLong()) .boxed() .collect(ImmutableList.toImmutableList()); - info.setUfsPath(ufs) + info.setUfsPath(ufs).setPath(filePath) .setBlockSizeBytes(blockSize) .setLength(blockSizeLimit * blockCount) .setBlockIds(blockIds) From a6eef7d9190c5e91816612304bf5ce01ac6ffee2 Mon Sep 17 00:00:00 2001 From: maobaolong <307499405@qq.com> Date: Fri, 22 Sep 2023 00:18:52 +0800 Subject: [PATCH 318/334] Revert "Bump ratis version to 2.5.1" Fix #17889 pr-link: Alluxio/alluxio#17910 change-id: cid-4fa6c6aff69785d58e35cf1e3d45cafb6ff922ee --- core/server/common/pom.xml | 2 +- .../journal/raft/RaftJournalSystem.java | 111 ++++++++++++++---- .../master/journal/raft/RaftJournalUtils.java | 19 +++ .../master/journal/DefaultJournalMaster.java | 9 ++ .../alluxio/master/journal/JournalMaster.java | 10 +- .../JournalMasterClientServiceHandler.java | 47 +------- .../journal/tool/RaftJournalDumper.java | 5 +- .../master/journal/raft/RaftJournalTest.java | 42 ++++++- .../fsadmin/journal/QuorumElectCommand.java | 24 +++- .../command/QuorumCommandIntegrationTest.java | 8 +- ...dJournalIntegrationTestFaultTolerance.java | 11 +- ...rnalIntegrationTestTransferLeadership.java | 34 +++++- 12 files changed, 234 insertions(+), 88 deletions(-) diff --git a/core/server/common/pom.xml b/core/server/common/pom.xml index db06bedd7966..ec5b82504477 100644 --- a/core/server/common/pom.xml +++ b/core/server/common/pom.xml @@ -26,7 +26,7 @@ ${project.parent.parent.parent.basedir}/build - 2.5.1 + 2.4.1 diff --git a/core/server/common/src/main/java/alluxio/master/journal/raft/RaftJournalSystem.java b/core/server/common/src/main/java/alluxio/master/journal/raft/RaftJournalSystem.java index 8040346ceb46..37ecc043c3cc 100644 --- a/core/server/common/src/main/java/alluxio/master/journal/raft/RaftJournalSystem.java +++ b/core/server/common/src/main/java/alluxio/master/journal/raft/RaftJournalSystem.java @@ -25,6 +25,7 @@ import alluxio.grpc.NodeState; import alluxio.grpc.QuorumServerInfo; import alluxio.grpc.QuorumServerState; +import alluxio.grpc.TransferLeaderMessage; import alluxio.master.Master; import alluxio.master.PrimarySelector; import alluxio.master.StateLockManager; @@ -108,6 +109,7 @@ import java.util.concurrent.ThreadLocalRandom; import java.util.concurrent.TimeUnit; import java.util.concurrent.TimeoutException; +import java.util.concurrent.atomic.AtomicBoolean; import java.util.concurrent.atomic.AtomicLong; import java.util.concurrent.atomic.AtomicReference; import java.util.stream.Collectors; @@ -182,6 +184,8 @@ public class RaftJournalSystem extends AbstractJournalSystem { private final File mPath; private final InetSocketAddress mLocalAddress; private final List mClusterAddresses; + /** Controls whether the quorum leadership can be transferred. */ + private final AtomicBoolean mTransferLeaderAllowed = new AtomicBoolean(false); private final Map mRatisMetricsMap = new ConcurrentHashMap<>(); @@ -240,6 +244,7 @@ public class RaftJournalSystem extends AbstractJournalSystem { private final ClientId mRawClientId = ClientId.randomId(); private RaftGroup mRaftGroup; private RaftPeerId mPeerId; + private final Map mErrorMessages = new ConcurrentHashMap<>(); static long nextCallId() { return CALL_ID_COUNTER.getAndIncrement() & Long.MAX_VALUE; @@ -545,6 +550,7 @@ public synchronized void gainPrimacy() { mRaftJournalWriter = new RaftJournalWriter(nextSN, client); mAsyncJournalWriter .set(new AsyncJournalWriter(mRaftJournalWriter, () -> getJournalSinks(null))); + mTransferLeaderAllowed.set(true); super.registerMetrics(); LOG.info("Gained primacy."); } @@ -556,6 +562,7 @@ public synchronized void losePrimacy() { // Avoid duplicate shut down Ratis server return; } + mTransferLeaderAllowed.set(false); try { // Close async writer first to flush pending entries. mAsyncJournalWriter.get().close(); @@ -978,7 +985,7 @@ public synchronized void removeQuorumServer(NetAddress serverNetAddress) throws */ public synchronized void resetPriorities() throws IOException { List resetPeers = new ArrayList<>(); - final int NEUTRAL_PRIORITY = 0; + final int NEUTRAL_PRIORITY = 1; for (RaftPeer peer : mRaftGroup.getPeers()) { resetPeers.add( RaftPeer.newBuilder(peer) @@ -989,7 +996,7 @@ public synchronized void resetPriorities() throws IOException { LOG.info("Resetting RaftPeer priorities"); try (RaftClient client = createClient()) { RaftClientReply reply = client.admin().setConfiguration(resetPeers); - processReply(reply, "failed to reset master priorities to 0"); + processReply(reply, "failed to reset master priorities to 1"); } } @@ -997,32 +1004,81 @@ public synchronized void resetPriorities() throws IOException { * Transfers the leadership of the quorum to another server. * * @param newLeaderNetAddress the address of the server - * @return error message if an error occurs or empty string if no error occurred + * @return the guid of transfer leader command */ public synchronized String transferLeadership(NetAddress newLeaderNetAddress) { - InetSocketAddress serverAddress = InetSocketAddress - .createUnresolved(newLeaderNetAddress.getHost(), newLeaderNetAddress.getRpcPort()); - Collection peers = mRaftGroup.getPeers(); - // The NetUtil function is used by Ratis to convert InetSocketAddress to string - String strAddr = NetUtils.address2String(serverAddress); - // if you cannot find the address in the quorum, return error message. - if (peers.stream().map(RaftPeer::getAddress).noneMatch(addr -> addr.equals(strAddr))) { - return String.format("<%s> is not part of the quorum <%s>.", - strAddr, peers.stream().map(RaftPeer::getAddress).collect(Collectors.toList())); + final boolean allowed = mTransferLeaderAllowed.getAndSet(false); + String transferId = UUID.randomUUID().toString(); + if (!allowed) { + String msg = "transfer is not allowed at the moment because the master is " + + (mRaftJournalWriter == null ? "still gaining primacy" : "already transferring the ") + + "leadership"; + mErrorMessages.put(transferId, TransferLeaderMessage.newBuilder().setMsg(msg).build()); + return transferId; } + try { + InetSocketAddress serverAddress = InetSocketAddress + .createUnresolved(newLeaderNetAddress.getHost(), newLeaderNetAddress.getRpcPort()); + List oldPeers = new ArrayList<>(mRaftGroup.getPeers()); + // The NetUtil function is used by Ratis to convert InetSocketAddress to string + String strAddr = NetUtils.address2String(serverAddress); + // if you cannot find the address in the quorum, throw exception. + if (oldPeers.stream().map(RaftPeer::getAddress).noneMatch(addr -> addr.equals(strAddr))) { + throw new IOException(String.format("<%s> is not part of the quorum <%s>.", + strAddr, oldPeers.stream().map(RaftPeer::getAddress).collect(Collectors.toList()))); + } + if (strAddr.equals(mRaftGroup.getPeer(mPeerId).getAddress())) { + throw new IOException(String.format("%s is already the leader", strAddr)); + } - RaftPeerId newLeaderPeerId = RaftJournalUtils.getPeerId(serverAddress); - /* transfer leadership */ - LOG.info("Transferring leadership to master with address <{}> and with RaftPeerId <{}>", - serverAddress, newLeaderPeerId); - try (RaftClient client = createClient()) { - RaftClientReply reply1 = client.admin().transferLeadership(newLeaderPeerId, 30_000); - processReply(reply1, "election failed"); + RaftPeerId newLeaderPeerId = RaftJournalUtils.getPeerId(serverAddress); + /* update priorities to enable transfer */ + List peersWithNewPriorities = new ArrayList<>(); + for (RaftPeer peer : oldPeers) { + peersWithNewPriorities.add( + RaftPeer.newBuilder(peer) + .setPriority(peer.getId().equals(newLeaderPeerId) ? 2 : 1) + .build() + ); + } + try (RaftClient client = createClient()) { + String stringPeers = "[" + peersWithNewPriorities.stream().map(RaftPeer::toString) + .collect(Collectors.joining(", ")) + "]"; + LOG.info("Applying new peer state before transferring leadership: {}", stringPeers); + RaftClientReply reply = client.admin().setConfiguration(peersWithNewPriorities); + processReply(reply, "failed to set master priorities before initiating election"); + } + /* transfer leadership */ + LOG.info("Transferring leadership to master with address <{}> and with RaftPeerId <{}>", + serverAddress, newLeaderPeerId); + // fire and forget: need to immediately return as the master will shut down its RPC servers + // once the TransferLeadershipRequest is initiated. + final int SLEEP_TIME_MS = 3_000; + final int TRANSFER_LEADER_WAIT_MS = 30_000; + new Thread(() -> { + try (RaftClient client = createClient()) { + Thread.sleep(SLEEP_TIME_MS); + RaftClientReply reply1 = client.admin().transferLeadership(newLeaderPeerId, + TRANSFER_LEADER_WAIT_MS); + processReply(reply1, "election failed"); + } catch (Throwable t) { + LOG.error("caught an error when executing transfer: {}", t.getMessage()); + // we only allow transfers again if the transfer is unsuccessful: a success means it + // will soon lose primacy + mTransferLeaderAllowed.set(true); + mErrorMessages.put(transferId, TransferLeaderMessage.newBuilder() + .setMsg(t.getMessage()).build()); + /* checking the transfer happens in {@link QuorumElectCommand} */ + } + }).start(); + LOG.info("Transferring leadership initiated"); } catch (Throwable t) { + mTransferLeaderAllowed.set(true); LOG.warn(t.getMessage()); - return t.getMessage(); + mErrorMessages.put(transferId, TransferLeaderMessage.newBuilder() + .setMsg(t.getMessage()).build()); } - return ""; + return transferId; } /** @@ -1039,6 +1095,19 @@ private void processReply(RaftClientReply reply, String msgToUser) throws IOExce } } + /** + * Gets exception message throwing when transfer leader. + * @param transferId the guid of transferLeader command + * @return the exception + */ + public synchronized TransferLeaderMessage getTransferLeaderMessage(String transferId) { + if (mErrorMessages.get(transferId) != null) { + return mErrorMessages.get(transferId); + } else { + return TransferLeaderMessage.newBuilder().setMsg("").build(); + } + } + /** * Adds a server to the quorum. * diff --git a/core/server/common/src/main/java/alluxio/master/journal/raft/RaftJournalUtils.java b/core/server/common/src/main/java/alluxio/master/journal/raft/RaftJournalUtils.java index 6665702eb699..ae46016840f4 100644 --- a/core/server/common/src/main/java/alluxio/master/journal/raft/RaftJournalUtils.java +++ b/core/server/common/src/main/java/alluxio/master/journal/raft/RaftJournalUtils.java @@ -12,8 +12,10 @@ package alluxio.master.journal.raft; import org.apache.ratis.protocol.RaftPeerId; +import org.apache.ratis.statemachine.impl.SimpleStateMachineStorage; import java.io.File; +import java.io.IOException; import java.net.InetSocketAddress; import java.util.concurrent.CompletableFuture; @@ -58,6 +60,23 @@ public static File getRaftJournalDir(File baseDir) { return new File(baseDir, RAFT_DIR); } + /** + * Creates a temporary snapshot file. + * + * @param storage the snapshot storage + * @return the temporary snapshot file + * @throws IOException if error occurred while creating the snapshot file + */ + public static File createTempSnapshotFile(SimpleStateMachineStorage storage) throws IOException { + File tempDir = new File(storage.getSmDir().getParentFile(), "tmp"); + if (!tempDir.isDirectory() && !tempDir.mkdir()) { + throw new IOException( + "Cannot create temporary snapshot directory at " + tempDir.getAbsolutePath()); + } + return File.createTempFile("raft_snapshot_" + System.currentTimeMillis() + "_", + ".dat", tempDir); + } + /** * Creates a future that is completed exceptionally. * diff --git a/core/server/master/src/main/java/alluxio/master/journal/DefaultJournalMaster.java b/core/server/master/src/main/java/alluxio/master/journal/DefaultJournalMaster.java index 6ef347aab38f..eae8d452c37f 100644 --- a/core/server/master/src/main/java/alluxio/master/journal/DefaultJournalMaster.java +++ b/core/server/master/src/main/java/alluxio/master/journal/DefaultJournalMaster.java @@ -15,6 +15,7 @@ import alluxio.clock.SystemClock; import alluxio.grpc.GetNodeStatePResponse; import alluxio.grpc.GetQuorumInfoPResponse; +import alluxio.grpc.GetTransferLeaderMessagePResponse; import alluxio.grpc.GrpcService; import alluxio.grpc.JournalDomain; import alluxio.grpc.NetAddress; @@ -89,6 +90,14 @@ public void resetPriorities() throws IOException { ((RaftJournalSystem) mJournalSystem).resetPriorities(); } + @Override + public GetTransferLeaderMessagePResponse getTransferLeaderMessage(String transferId) { + checkQuorumOpSupported(); + return GetTransferLeaderMessagePResponse.newBuilder() + .setTransMsg(((RaftJournalSystem) mJournalSystem).getTransferLeaderMessage(transferId)) + .build(); + } + @Override public GetNodeStatePResponse getNodeState() { return GetNodeStatePResponse.newBuilder() diff --git a/core/server/master/src/main/java/alluxio/master/journal/JournalMaster.java b/core/server/master/src/main/java/alluxio/master/journal/JournalMaster.java index 4ae643af9cc8..a3eb7d19659b 100644 --- a/core/server/master/src/main/java/alluxio/master/journal/JournalMaster.java +++ b/core/server/master/src/main/java/alluxio/master/journal/JournalMaster.java @@ -13,6 +13,7 @@ import alluxio.grpc.GetNodeStatePResponse; import alluxio.grpc.GetQuorumInfoPResponse; +import alluxio.grpc.GetTransferLeaderMessagePResponse; import alluxio.grpc.NetAddress; import alluxio.master.Master; @@ -45,7 +46,7 @@ public interface JournalMaster extends Master { * {@link alluxio.master.journal.JournalType#EMBEDDED} journal. * * @param newLeaderAddress server address to remove from quorum - * @return an error message if an error occurred, otherwise empty string + * @return the guid of transfer leader command */ String transferLeadership(NetAddress newLeaderAddress); @@ -56,6 +57,13 @@ public interface JournalMaster extends Master { */ void resetPriorities() throws IOException; + /** + * Gets exception messages thrown when transferring the leader. + * @param transferId the guid of transferLeader command + * @return exception message + */ + GetTransferLeaderMessagePResponse getTransferLeaderMessage(String transferId); + /** * Gets the node state. This endpoint is available for both UFS and embedded journals. * If HA mode is turn off, the node state will always be returned as PRIMARY. diff --git a/core/server/master/src/main/java/alluxio/master/journal/JournalMasterClientServiceHandler.java b/core/server/master/src/main/java/alluxio/master/journal/JournalMasterClientServiceHandler.java index 5ea74a1b9a45..37da2fcf39d8 100644 --- a/core/server/master/src/main/java/alluxio/master/journal/JournalMasterClientServiceHandler.java +++ b/core/server/master/src/main/java/alluxio/master/journal/JournalMasterClientServiceHandler.java @@ -23,19 +23,13 @@ import alluxio.grpc.RemoveQuorumServerPResponse; import alluxio.grpc.ResetPrioritiesPRequest; import alluxio.grpc.ResetPrioritiesPResponse; -import alluxio.grpc.TransferLeaderMessage; import alluxio.grpc.TransferLeadershipPRequest; import alluxio.grpc.TransferLeadershipPResponse; -import io.grpc.StatusException; import io.grpc.stub.StreamObserver; import org.slf4j.Logger; import org.slf4j.LoggerFactory; -import java.util.Map; -import java.util.UUID; -import java.util.concurrent.ConcurrentHashMap; - /** * This class is a gRPC handler for journal master RPCs invoked by an Alluxio client. */ @@ -44,8 +38,6 @@ public class JournalMasterClientServiceHandler private static final Logger LOG = LoggerFactory.getLogger(JournalMasterClientServiceHandler.class); - private final Map mTransferLeaderMessages = new ConcurrentHashMap<>(); - private final JournalMaster mJournalMaster; /** @@ -76,34 +68,10 @@ public void removeQuorumServer(RemoveQuorumServerPRequest request, @Override public void transferLeadership(TransferLeadershipPRequest request, StreamObserver responseObserver) { - try { - // using RpcUtils wrapper for metrics tracking - RpcUtils.callAndReturn(LOG, () -> { - String transferId = UUID.randomUUID().toString(); - // atomically reserve UUID in map with empty message: if not in use (which is good), it - // will return null - while (mTransferLeaderMessages.putIfAbsent(transferId, "") != null) { - transferId = UUID.randomUUID().toString(); - } - String message; - try { - // return transfer id to caller before initiating transfer of leadership. this is because - // the leader will close its gRPC server when being demoted - responseObserver.onNext( - TransferLeadershipPResponse.newBuilder().setTransferId(transferId).build()); - responseObserver.onCompleted(); - // initiate transfer after replying with transfer ID - message = mJournalMaster.transferLeadership(request.getServerAddress()); - } catch (Throwable t) { - message = t.getMessage(); - } - mTransferLeaderMessages.put(transferId, message); - return null; - }, "transferLeadership", false, "request=%s", request); - } catch (StatusException e) { - // throws only if above callable throws, which it does not - LOG.warn("error thrown in transferLeadership rpc, should not be possible", e); - } + RpcUtils.call(LOG, () -> { + String transferId = mJournalMaster.transferLeadership(request.getServerAddress()); + return TransferLeadershipPResponse.newBuilder().setTransferId(transferId).build(); + }, "transferLeadership", "request=%s", responseObserver, request); } @Override @@ -118,11 +86,8 @@ public void resetPriorities(ResetPrioritiesPRequest request, @Override public void getTransferLeaderMessage(GetTransferLeaderMessagePRequest request, StreamObserver responseObserver) { - RpcUtils.call(LOG, () -> GetTransferLeaderMessagePResponse.newBuilder() - .setTransMsg(TransferLeaderMessage.newBuilder() - .setMsg(mTransferLeaderMessages.getOrDefault(request.getTransferId(), ""))) - .build(), - "GetTransferLeaderMessage", "request=%s", responseObserver, request); + RpcUtils.call(LOG, () -> mJournalMaster.getTransferLeaderMessage(request.getTransferId()), + "GetTransferLeaderMessage", "request=%s", responseObserver, request); } @Override diff --git a/core/server/master/src/main/java/alluxio/master/journal/tool/RaftJournalDumper.java b/core/server/master/src/main/java/alluxio/master/journal/tool/RaftJournalDumper.java index 780020c23034..974f5ac7d305 100644 --- a/core/server/master/src/main/java/alluxio/master/journal/tool/RaftJournalDumper.java +++ b/core/server/master/src/main/java/alluxio/master/journal/tool/RaftJournalDumper.java @@ -30,7 +30,6 @@ import org.apache.ratis.statemachine.SnapshotInfo; import org.apache.ratis.statemachine.impl.SimpleStateMachineStorage; import org.apache.ratis.util.MD5FileUtil; -import org.apache.ratis.util.SizeInBytes; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -96,8 +95,8 @@ private void readRatisLogFromDir() { List paths = LogSegmentPath.getLogSegmentPaths(storage); for (LogSegmentPath path : paths) { final int entryCount = LogSegment.readSegmentFile(path.getPath().toFile(), - path.getStartEnd(), SizeInBytes.valueOf(Integer.MAX_VALUE), - RaftServerConfigKeys.Log.CorruptionPolicy.EXCEPTION, null, (proto) -> { + path.getStartEnd(), RaftServerConfigKeys.Log.CorruptionPolicy.EXCEPTION, + null, (proto) -> { if (proto.hasStateMachineLogEntry()) { try { Journal.JournalEntry entry = Journal.JournalEntry.parseFrom( diff --git a/core/server/master/src/test/java/alluxio/master/journal/raft/RaftJournalTest.java b/core/server/master/src/test/java/alluxio/master/journal/raft/RaftJournalTest.java index 4871b97218ac..d51b1bfb0d6a 100644 --- a/core/server/master/src/test/java/alluxio/master/journal/raft/RaftJournalTest.java +++ b/core/server/master/src/test/java/alluxio/master/journal/raft/RaftJournalTest.java @@ -13,7 +13,6 @@ import alluxio.conf.Configuration; import alluxio.conf.PropertyKey; -import alluxio.grpc.NetAddress; import alluxio.grpc.QuorumServerInfo; import alluxio.master.NoopMaster; import alluxio.master.StateLockManager; @@ -25,7 +24,9 @@ import alluxio.util.CommonUtils; import alluxio.util.WaitForOptions; +import com.google.common.annotations.VisibleForTesting; import org.apache.ratis.conf.RaftProperties; +import org.apache.ratis.server.RaftServer; import org.apache.ratis.server.RaftServerConfigKeys; import org.junit.After; import org.junit.Assert; @@ -35,6 +36,7 @@ import org.junit.rules.TemporaryFolder; import org.junit.rules.Timeout; +import java.lang.reflect.Method; import java.net.InetSocketAddress; import java.net.ServerSocket; import java.util.ArrayList; @@ -398,11 +400,8 @@ private void promoteFollower() throws Exception { Assert.assertTrue(mLeaderJournalSystem.isLeader()); Assert.assertFalse(mFollowerJournalSystem.isLeader()); // Triggering rigged election via reflection to switch the leader. - NetAddress followerAddress = - mLeaderJournalSystem.getQuorumServerInfoList().stream() - .filter(info -> !info.getIsLeader()).findFirst() - .map(QuorumServerInfo::getServerAddress).get(); - mLeaderJournalSystem.transferLeadership(followerAddress); + changeToFollower(mLeaderJournalSystem); + changeToCandidate(mFollowerJournalSystem); CommonUtils.waitFor("follower becomes leader", () -> mFollowerJournalSystem.isLeader(), mWaitOptions); Assert.assertFalse(mLeaderJournalSystem.isLeader()); @@ -581,6 +580,37 @@ private List startJournalCluster(List jour return journalSystems; } + @VisibleForTesting + void changeToCandidate(RaftJournalSystem journalSystem) throws Exception { + RaftServer.Division serverImpl = journalSystem.getRaftServer() + .getDivision(RaftJournalSystem.RAFT_GROUP_ID); + Class raftServerImpl = (Class.forName("org.apache.ratis.server.impl.RaftServerImpl")); + Method method = raftServerImpl.getDeclaredMethod("changeToCandidate", boolean.class); + method.setAccessible(true); + method.invoke(serverImpl, true); + } + + @VisibleForTesting + void changeToFollower(RaftJournalSystem journalSystem) throws Exception { + RaftServer.Division serverImplObj = journalSystem.getRaftServer() + .getDivision(RaftJournalSystem.RAFT_GROUP_ID); + Class raftServerImplClass = Class.forName("org.apache.ratis.server.impl.RaftServerImpl"); + + Method getStateMethod = raftServerImplClass.getDeclaredMethod("getState"); + getStateMethod.setAccessible(true); + Object serverStateObj = getStateMethod.invoke(serverImplObj); + Class serverStateClass = Class.forName("org.apache.ratis.server.impl.ServerState"); + Method getCurrentTermMethod = serverStateClass.getDeclaredMethod("getCurrentTerm"); + getCurrentTermMethod.setAccessible(true); + long currentTermObj = (long) getCurrentTermMethod.invoke(serverStateObj); + + Method changeToFollowerMethod = raftServerImplClass.getDeclaredMethod("changeToFollower", + long.class, boolean.class, boolean.class, Object.class); + + changeToFollowerMethod.setAccessible(true); + changeToFollowerMethod.invoke(serverImplObj, currentTermObj, true, false, "test"); + } + /** * @return a list of free ports */ diff --git a/shell/src/main/java/alluxio/cli/fsadmin/journal/QuorumElectCommand.java b/shell/src/main/java/alluxio/cli/fsadmin/journal/QuorumElectCommand.java index 59c82552170d..7c8e13981734 100644 --- a/shell/src/main/java/alluxio/cli/fsadmin/journal/QuorumElectCommand.java +++ b/shell/src/main/java/alluxio/cli/fsadmin/journal/QuorumElectCommand.java @@ -41,6 +41,10 @@ public class QuorumElectCommand extends AbstractFsAdminCommand { public static final String TRANSFER_INIT = "Initiating transfer of leadership to %s"; public static final String TRANSFER_SUCCESS = "Successfully elected %s as the new leader"; public static final String TRANSFER_FAILED = "Failed to elect %s as the new leader: %s"; + public static final String RESET_INIT = "Resetting priorities of masters after %s transfer of " + + "leadership"; + public static final String RESET_SUCCESS = "Quorum priorities were reset to 1"; + public static final String RESET_FAILED = "Quorum priorities failed to be reset: %s"; /** * @param context fsadmin command context @@ -63,6 +67,7 @@ public int run(CommandLine cl) throws IOException { JournalMasterClient jmClient = mMasterJournalMasterClient; String serverAddress = cl.getOptionValue(ADDRESS_OPTION_NAME); NetAddress address = QuorumCommand.stringToAddress(serverAddress); + boolean success = false; try { mPrintStream.println(String.format(TRANSFER_INIT, serverAddress)); String transferId = jmClient.transferLeadership(address); @@ -79,8 +84,9 @@ public int run(CommandLine cl) throws IOException { GetQuorumInfoPResponse quorumInfo = jmClient.getQuorumInfo(); Optional leadingMasterInfoOpt = quorumInfo.getServerInfoList().stream() .filter(QuorumServerInfo::getIsLeader).findFirst(); - return leadingMasterInfoOpt.isPresent() - && address.equals(leadingMasterInfoOpt.get().getServerAddress()); + NetAddress leaderAddress = leadingMasterInfoOpt.isPresent() + ? leadingMasterInfoOpt.get().getServerAddress() : null; + return address.equals(leaderAddress); } catch (IOException e) { return false; } @@ -90,11 +96,21 @@ public int run(CommandLine cl) throws IOException { throw new Exception(errorMessage.get()); } mPrintStream.println(String.format(TRANSFER_SUCCESS, serverAddress)); + success = true; } catch (Exception e) { mPrintStream.println(String.format(TRANSFER_FAILED, serverAddress, e.getMessage())); - return -1; } - return 0; + // reset priorities regardless of transfer success + try { + mPrintStream.println(String.format(RESET_INIT, success ? "successful" : "failed")); + jmClient.resetPriorities(); + mPrintStream.println(RESET_SUCCESS); + } catch (IOException e) { + mPrintStream.println(String.format(RESET_FAILED, e)); + success = false; + } + + return success ? 0 : -1; } @Override diff --git a/tests/src/test/java/alluxio/client/cli/fsadmin/command/QuorumCommandIntegrationTest.java b/tests/src/test/java/alluxio/client/cli/fsadmin/command/QuorumCommandIntegrationTest.java index 191903638cf2..11268b15d6f1 100644 --- a/tests/src/test/java/alluxio/client/cli/fsadmin/command/QuorumCommandIntegrationTest.java +++ b/tests/src/test/java/alluxio/client/cli/fsadmin/command/QuorumCommandIntegrationTest.java @@ -208,9 +208,11 @@ public void elect() throws Exception { mOutput.reset(); shell.run("journal", "quorum", "elect", "-address" , newLeaderAddr); String output = mOutput.toString().trim(); - String expected = String.format("%s\n%s", + String expected = String.format("%s\n%s\n%s\n%s", String.format(QuorumElectCommand.TRANSFER_INIT, newLeaderAddr), - String.format(QuorumElectCommand.TRANSFER_SUCCESS, newLeaderAddr)); + String.format(QuorumElectCommand.TRANSFER_SUCCESS, newLeaderAddr), + String.format(QuorumElectCommand.RESET_INIT, "successful"), + QuorumElectCommand.RESET_SUCCESS); Assert.assertEquals(expected, output); } mCluster.notifySuccess(); @@ -244,7 +246,7 @@ public void infoAfterElect() throws Exception { shell.run("journal", "quorum", "info", "-domain", "MASTER"); String output = mOutput.toString().trim(); for (MasterNetAddress masterAddr : mCluster.getMasterAddresses()) { - String expected = String.format(QuorumInfoCommand.OUTPUT_SERVER_INFO, "AVAILABLE", "0", + String expected = String.format(QuorumInfoCommand.OUTPUT_SERVER_INFO, "AVAILABLE", "1", String.format("%s:%d", masterAddr.getHostname(), masterAddr.getEmbeddedJournalPort())); Assert.assertTrue(output.contains(expected.trim())); } diff --git a/tests/src/test/java/alluxio/server/ft/journal/raft/EmbeddedJournalIntegrationTestFaultTolerance.java b/tests/src/test/java/alluxio/server/ft/journal/raft/EmbeddedJournalIntegrationTestFaultTolerance.java index 3fa21cd85745..86c5c369a834 100644 --- a/tests/src/test/java/alluxio/server/ft/journal/raft/EmbeddedJournalIntegrationTestFaultTolerance.java +++ b/tests/src/test/java/alluxio/server/ft/journal/raft/EmbeddedJournalIntegrationTestFaultTolerance.java @@ -39,7 +39,8 @@ import org.apache.ratis.server.RaftServerConfigKeys; import org.apache.ratis.server.storage.RaftStorage; import org.apache.ratis.server.storage.StorageImplUtils; -import org.apache.ratis.statemachine.SnapshotInfo; +import org.apache.ratis.statemachine.impl.SimpleStateMachineStorage; +import org.apache.ratis.statemachine.impl.SingleFileSnapshotInfo; import org.junit.Rule; import org.junit.Test; import org.junit.rules.TemporaryFolder; @@ -175,9 +176,9 @@ public void copySnapshotToMaster() throws Exception { RaftStorage.StartupOption.RECOVER, RaftServerConfigKeys.STORAGE_FREE_SPACE_MIN_DEFAULT.getSize()); rs.initialize(); - SnapshotDirStateMachineStorage storage = new SnapshotDirStateMachineStorage(); + SimpleStateMachineStorage storage = new SimpleStateMachineStorage(); storage.init(rs); - SnapshotInfo snapshot = storage.getLatestSnapshot(); + SingleFileSnapshotInfo snapshot = storage.findLatestSnapshot(); assertNotNull(snapshot); mCluster.notifySuccess(); } @@ -222,9 +223,9 @@ public void copySnapshotToFollower() throws Exception { RaftStorage.StartupOption.RECOVER, RaftServerConfigKeys.STORAGE_FREE_SPACE_MIN_DEFAULT.getSize()); rs.initialize(); - SnapshotDirStateMachineStorage storage = new SnapshotDirStateMachineStorage(); + SimpleStateMachineStorage storage = new SimpleStateMachineStorage(); storage.init(rs); - SnapshotInfo snapshot = storage.getLatestSnapshot(); + SingleFileSnapshotInfo snapshot = storage.findLatestSnapshot(); assertNotNull(snapshot); mCluster.notifySuccess(); } diff --git a/tests/src/test/java/alluxio/server/ft/journal/raft/EmbeddedJournalIntegrationTestTransferLeadership.java b/tests/src/test/java/alluxio/server/ft/journal/raft/EmbeddedJournalIntegrationTestTransferLeadership.java index 8d84e5674ff5..7b6eba4732b2 100644 --- a/tests/src/test/java/alluxio/server/ft/journal/raft/EmbeddedJournalIntegrationTestTransferLeadership.java +++ b/tests/src/test/java/alluxio/server/ft/journal/raft/EmbeddedJournalIntegrationTestTransferLeadership.java @@ -77,6 +77,34 @@ public void repeatedTransferLeadership() throws Exception { mCluster.notifySuccess(); } + @Test + public void transferWhenAlreadyTransferring() throws Exception { + mCluster = MultiProcessCluster + .newBuilder(PortCoordination.EMBEDDED_JOURNAL_ALREADY_TRANSFERRING) + .setClusterName("EmbeddedJournalTransferLeadership_transferWhenAlreadyTransferring") + .setNumMasters(NUM_MASTERS) + .setNumWorkers(NUM_WORKERS) + .addProperty(PropertyKey.MASTER_JOURNAL_TYPE, JournalType.EMBEDDED) + .addProperty(PropertyKey.MASTER_JOURNAL_FLUSH_TIMEOUT_MS, "5min") + .addProperty(PropertyKey.MASTER_EMBEDDED_JOURNAL_MIN_ELECTION_TIMEOUT, "750ms") + .addProperty(PropertyKey.MASTER_EMBEDDED_JOURNAL_MAX_ELECTION_TIMEOUT, "1500ms") + .build(); + mCluster.start(); + + int newLeaderIdx = (mCluster.getPrimaryMasterIndex(MASTER_INDEX_WAIT_TIME) + 1) % NUM_MASTERS; + // `getPrimaryMasterIndex` uses the same `mMasterAddresses` variable as getMasterAddresses + // we can therefore access to the new leader's address this way + MasterNetAddress newLeaderAddr = mCluster.getMasterAddresses().get(newLeaderIdx); + NetAddress netAddress = masterEBJAddr2NetAddr(newLeaderAddr); + mCluster.getJournalMasterClientForMaster().transferLeadership(netAddress); + // this second call should throw an exception + String transferId = mCluster.getJournalMasterClientForMaster().transferLeadership(netAddress); + String exceptionMessage = mCluster.getJournalMasterClientForMaster() + .getTransferLeaderMessage(transferId).getTransMsg().getMsg(); + Assert.assertFalse(exceptionMessage.isEmpty()); + mCluster.notifySuccess(); + } + @Test public void transferLeadershipOutsideCluster() throws Exception { mCluster = MultiProcessCluster.newBuilder(PortCoordination.EMBEDDED_JOURNAL_OUTSIDE_CLUSTER) @@ -178,11 +206,11 @@ public void resetPriorities() throws Exception { MasterNetAddress newLeaderAddr = mCluster.getMasterAddresses().get(newLeaderIdx); transferAndWait(newLeaderAddr); match = mCluster.getJournalMasterClientForMaster().getQuorumInfo().getServerInfoList() - .stream().allMatch(info -> info.getPriority() == 0); + .stream().allMatch(info -> info.getPriority() == (info.getIsLeader() ? 2 : 1)); Assert.assertTrue(match); mCluster.getJournalMasterClientForMaster().resetPriorities(); match = mCluster.getJournalMasterClientForMaster().getQuorumInfo().getServerInfoList() - .stream().allMatch(info -> info.getPriority() == 0); + .stream().allMatch(info -> info.getPriority() == 1); Assert.assertTrue(match); } mCluster.notifySuccess(); @@ -206,7 +234,7 @@ public void transferToSelfThenToOther() throws Exception { String transferId = transferAndWait(leaderAddr); GetTransferLeaderMessagePResponse transferLeaderMessage = mCluster.getJournalMasterClientForMaster().getTransferLeaderMessage(transferId); - Assert.assertTrue(transferLeaderMessage.getTransMsg().getMsg().isEmpty()); + Assert.assertFalse(transferLeaderMessage.getTransMsg().getMsg().isEmpty()); int newLeaderIdx = (leaderIdx + 1) % NUM_MASTERS; MasterNetAddress newLeaderAddr = mCluster.getMasterAddresses().get(newLeaderIdx); From 75ff844945d5678aade070cab2a86c5a255e7128 Mon Sep 17 00:00:00 2001 From: maobaolong <307499405@qq.com> Date: Sat, 23 Sep 2023 17:18:44 +0800 Subject: [PATCH 319/334] Support display all log files on web UI ### What changes are proposed in this pull request? Before: Web UI displays only files ending with ".log" in name. After: Web UI displays logs files like `.log.XXX`, which are rolled over log files. Also the displayed files include `.out` and captured `txt` and `json` files like metrics and jstack. ### Why are the changes needed? Better observability for admin. ### Does this PR introduce any user facing changes? Please list the user-facing changes introduced by your change, including 1. change in user-facing APIs 2. addition or removal of property keys 3. webui pr-link: Alluxio/alluxio#17983 change-id: cid-714bcde45116ed84016d6292e365389e39da1b90 --- .../src/main/java/alluxio/Constants.java | 5 ++ .../meta/AlluxioMasterRestServiceHandler.java | 3 +- .../AlluxioMasterRestServiceHandlerTest.java | 62 +++++++++++++++++++ .../AlluxioWorkerRestServiceHandler.java | 3 +- 4 files changed, 71 insertions(+), 2 deletions(-) diff --git a/core/common/src/main/java/alluxio/Constants.java b/core/common/src/main/java/alluxio/Constants.java index 80ca29f23721..129578ea2bdc 100644 --- a/core/common/src/main/java/alluxio/Constants.java +++ b/core/common/src/main/java/alluxio/Constants.java @@ -11,6 +11,7 @@ package alluxio; +import java.util.regex.Pattern; import javax.annotation.concurrent.ThreadSafe; /** @@ -232,5 +233,9 @@ public final class Constants { public static final String MEDIUM_HDD = "HDD"; public static final String MEDIUM_SSD = "SSD"; + // Log file pattern + public static final Pattern LOG_FILE_PATTERN = + Pattern.compile(".*(\\.log|\\.out)(\\.[0-9-]+)?$|.*.txt|.*.json"); + private Constants() {} // prevent instantiation } diff --git a/core/server/master/src/main/java/alluxio/master/meta/AlluxioMasterRestServiceHandler.java b/core/server/master/src/main/java/alluxio/master/meta/AlluxioMasterRestServiceHandler.java index 1a74ded14dce..f09279100267 100644 --- a/core/server/master/src/main/java/alluxio/master/meta/AlluxioMasterRestServiceHandler.java +++ b/core/server/master/src/main/java/alluxio/master/meta/AlluxioMasterRestServiceHandler.java @@ -689,7 +689,8 @@ public Response getWebUILogs(@DefaultValue("") @QueryParam("path") String reques @DefaultValue("") @QueryParam("end") String requestEnd, @DefaultValue("20") @QueryParam("limit") String requestLimit) { return RestUtils.call(() -> { - FilenameFilter filenameFilter = (dir, name) -> name.toLowerCase().endsWith(".log"); + FilenameFilter filenameFilter = (dir, name) -> + Constants.LOG_FILE_PATTERN.matcher(name.toLowerCase()).matches(); MasterWebUILogs response = new MasterWebUILogs(); if (!Configuration.getBoolean(PropertyKey.WEB_FILE_INFO_ENABLED)) { diff --git a/core/server/master/src/test/java/alluxio/master/meta/AlluxioMasterRestServiceHandlerTest.java b/core/server/master/src/test/java/alluxio/master/meta/AlluxioMasterRestServiceHandlerTest.java index ecde1ccea1b7..61a184cc2072 100644 --- a/core/server/master/src/test/java/alluxio/master/meta/AlluxioMasterRestServiceHandlerTest.java +++ b/core/server/master/src/test/java/alluxio/master/meta/AlluxioMasterRestServiceHandlerTest.java @@ -24,6 +24,7 @@ import alluxio.AlluxioURI; import alluxio.ConfigurationRule; import alluxio.Constants; +import alluxio.DefaultStorageTierAssoc; import alluxio.RuntimeConstants; import alluxio.conf.Configuration; import alluxio.conf.PropertyKey; @@ -48,9 +49,11 @@ import alluxio.underfs.UnderFileSystem; import alluxio.underfs.UnderFileSystemFactory; import alluxio.underfs.UnderFileSystemFactoryRegistry; +import alluxio.util.webui.UIFileInfo; import alluxio.web.MasterWebServer; import alluxio.wire.AlluxioMasterInfo; import alluxio.wire.Capacity; +import alluxio.wire.MasterWebUILogs; import alluxio.wire.MountPointInfo; import alluxio.wire.WorkerInfo; import alluxio.wire.WorkerNetAddress; @@ -60,6 +63,7 @@ import com.codahale.metrics.MetricSet; import com.google.common.collect.ImmutableMap; import org.junit.After; +import org.junit.Assert; import org.junit.Before; import org.junit.Rule; import org.junit.Test; @@ -70,6 +74,7 @@ import org.powermock.core.classloader.annotations.PrepareForTest; import org.powermock.modules.junit4.PowerMockRunner; +import java.io.File; import java.io.IOException; import java.net.InetSocketAddress; import java.util.Arrays; @@ -299,4 +304,61 @@ public void isMounted() { assertFalse(handler.isMounted(hdfsUri)); assertFalse(handler.isMounted(MetricsSystem.escape(new AlluxioURI(hdfsUri)))); } + + @Test + public void testGetWebUILogsByRegex() throws IOException { + File logsDir = mTestFolder.newFolder("logs"); + logsDir.mkdirs(); + String[] wantedFiles = new String[] { + "master.log", + "master.log.1", + "master.log.100", + "master.out", + "master.out.1", + "master.out.100", + "master.txt", + "master.gc.log", + "master.gc.log.2023-09-15-14", + "alluxio-master-exit-metrics-20230526-085548.json" + }; + Arrays.sort(wantedFiles); + String[] unwantedFiles = new String[] { + "master.log.a", + "master.loga", + "master.bin", + }; + + for (String fileName : wantedFiles) { + File file0 = new File(logsDir, fileName); + file0.createNewFile(); + } + for (String fileName : unwantedFiles) { + File file0 = new File(logsDir, fileName); + file0.createNewFile(); + } + + Configuration.set(PropertyKey.LOGS_DIR, logsDir.getPath()); + FileSystemMaster mockMaster = mock(FileSystemMaster.class); + BlockMaster mockBlockMaster = mock(BlockMaster.class); + + AlluxioMasterProcess masterProcess = PowerMockito.mock(AlluxioMasterProcess.class); + when(masterProcess.getMaster(FileSystemMaster.class)).thenReturn(mockMaster); + when(masterProcess.getMaster(BlockMaster.class)).thenReturn(mockBlockMaster); + when(mockBlockMaster.getGlobalStorageTierAssoc()).thenReturn( + new DefaultStorageTierAssoc( + PropertyKey.MASTER_TIERED_STORE_GLOBAL_LEVELS, + PropertyKey.Template.MASTER_TIERED_STORE_GLOBAL_LEVEL_ALIAS)); + + ServletContext context = mock(ServletContext.class); + when(context.getAttribute(MasterWebServer.ALLUXIO_MASTER_SERVLET_RESOURCE_KEY)).thenReturn( + masterProcess); + AlluxioMasterRestServiceHandler handler = new AlluxioMasterRestServiceHandler(context); + Response response = handler.getWebUILogs("", "0", "", "20"); + Assert.assertEquals(Response.Status.OK.getStatusCode(), response.getStatus()); + List fileInfos = ((MasterWebUILogs) response.getEntity()).getFileInfos(); + String[] actualFileNameArray = + fileInfos.stream().map(fileInfo -> fileInfo.getName()).toArray(String[]::new); + Arrays.sort(actualFileNameArray); + Assert.assertArrayEquals(wantedFiles, actualFileNameArray); + } } diff --git a/core/server/worker/src/main/java/alluxio/worker/AlluxioWorkerRestServiceHandler.java b/core/server/worker/src/main/java/alluxio/worker/AlluxioWorkerRestServiceHandler.java index 405314eeb84e..69e367b21fc6 100644 --- a/core/server/worker/src/main/java/alluxio/worker/AlluxioWorkerRestServiceHandler.java +++ b/core/server/worker/src/main/java/alluxio/worker/AlluxioWorkerRestServiceHandler.java @@ -464,7 +464,8 @@ public Response getWebUILogs(@DefaultValue("") @QueryParam("path") String reques @QueryParam("end") String requestEnd, @DefaultValue("20") @QueryParam("limit") String requestLimit) { return RestUtils.call(() -> { - FilenameFilter filenameFilter = (dir, name) -> name.toLowerCase().endsWith(".log"); + FilenameFilter filenameFilter = (dir, name) -> + Constants.LOG_FILE_PATTERN.matcher(name.toLowerCase()).matches(); WorkerWebUILogs response = new WorkerWebUILogs(); if (!Configuration.getBoolean(PropertyKey.WEB_FILE_INFO_ENABLED)) { From 957049e106fe79abb2aedb6e97b64a1d495d20b9 Mon Sep 17 00:00:00 2001 From: maobaolong <307499405@qq.com> Date: Sat, 23 Sep 2023 17:41:44 +0800 Subject: [PATCH 320/334] Support unmount and remount in one atomic operation ### What changes are proposed in this pull request? Support unmount and mount to another mount point within the same lock ### Why are the changes needed? Support replace ufs for a mount point. The operation is atomic so no user operations should fail due to observing the middle state. ### Does this PR introduce any user facing changes? A new rpc field and cmd options are added. pr-link: Alluxio/alluxio#17984 change-id: cid-d96b4df7d24ab1082e46e6d5a72563534714a48f --- .../master/file/DefaultFileSystemMaster.java | 5 +++++ .../src/main/proto/grpc/file_system_master.proto | 1 + core/transport/src/main/proto/proto.lock | 5 +++++ .../java/alluxio/cli/fs/command/MountCommand.java | 14 ++++++++++++-- 4 files changed, 23 insertions(+), 2 deletions(-) diff --git a/core/server/master/src/main/java/alluxio/master/file/DefaultFileSystemMaster.java b/core/server/master/src/main/java/alluxio/master/file/DefaultFileSystemMaster.java index 75a060fc74c3..ae5a931b77db 100644 --- a/core/server/master/src/main/java/alluxio/master/file/DefaultFileSystemMaster.java +++ b/core/server/master/src/main/java/alluxio/master/file/DefaultFileSystemMaster.java @@ -3608,6 +3608,11 @@ public void mount(AlluxioURI alluxioPath, AlluxioURI ufsPath, MountContext conte } mMountTable.checkUnderWritableMountPoint(alluxioPath); + if (context.getOptions().getRemount()) { + LOG.info("Mount {} with remount options, so it will be unmounted first.", + inodePath.getUri()); + unmountInternal(rpcContext, inodePath); + } mountInternal(rpcContext, inodePath, ufsPath, context); auditContext.setSucceeded(true); Metrics.PATHS_MOUNTED.inc(); diff --git a/core/transport/src/main/proto/grpc/file_system_master.proto b/core/transport/src/main/proto/grpc/file_system_master.proto index 35db38e889e9..b0a9c56ce2a5 100644 --- a/core/transport/src/main/proto/grpc/file_system_master.proto +++ b/core/transport/src/main/proto/grpc/file_system_master.proto @@ -411,6 +411,7 @@ message MountPOptions { map properties = 2; optional bool shared = 3; optional FileSystemMasterCommonPOptions commonOptions = 4; + optional bool remount = 5; } message MountPRequest { /** the path of alluxio mount point */ diff --git a/core/transport/src/main/proto/proto.lock b/core/transport/src/main/proto/proto.lock index 8dfbdb55129a..9b1a537bcb60 100644 --- a/core/transport/src/main/proto/proto.lock +++ b/core/transport/src/main/proto/proto.lock @@ -3276,6 +3276,11 @@ "id": 4, "name": "commonOptions", "type": "FileSystemMasterCommonPOptions" + }, + { + "id": 5, + "name": "remount", + "type": "bool" } ], "maps": [ diff --git a/shell/src/main/java/alluxio/cli/fs/command/MountCommand.java b/shell/src/main/java/alluxio/cli/fs/command/MountCommand.java index 443520e021c7..c7080f478d5b 100644 --- a/shell/src/main/java/alluxio/cli/fs/command/MountCommand.java +++ b/shell/src/main/java/alluxio/cli/fs/command/MountCommand.java @@ -51,6 +51,13 @@ public final class MountCommand extends AbstractFileSystemCommand { .hasArg(false) .desc("mount point is shared") .build(); + private static final Option REMOUNT_OPTION = + Option.builder() + .longOpt("remount") + .required(false) + .hasArg(false) + .desc("unmount and remount in one locked operation, with atomicity") + .build(); private static final Option OPTION_OPTION = Option.builder() .longOpt("option") @@ -77,7 +84,7 @@ public String getCommandName() { @Override public Options getOptions() { return new Options().addOption(READONLY_OPTION).addOption(SHARED_OPTION) - .addOption(OPTION_OPTION); + .addOption(REMOUNT_OPTION).addOption(OPTION_OPTION); } @Override @@ -98,6 +105,9 @@ public int run(CommandLine cl) throws AlluxioException, IOException { if (cl.hasOption(SHARED_OPTION.getLongOpt())) { optionsBuilder.setShared(true); } + if (cl.hasOption(REMOUNT_OPTION.getLongOpt())) { + optionsBuilder.setRemount(true); + } if (cl.hasOption(OPTION_OPTION.getLongOpt())) { Properties properties = cl.getOptionProperties(OPTION_OPTION.getLongOpt()); optionsBuilder.putAllProperties(Maps.fromProperties(properties)); @@ -109,7 +119,7 @@ public int run(CommandLine cl) throws AlluxioException, IOException { @Override public String getUsage() { - return "mount [--readonly] [--shared] [--option ] "; + return "mount [--readonly] [--shared] [--remount] [--option ] "; } @Override From fc003ed8fd1861f79b93a5d1b3c8aa47c9e49e04 Mon Sep 17 00:00:00 2001 From: qian0817 Date: Mon, 25 Sep 2023 09:04:46 +0800 Subject: [PATCH 321/334] Add worker rejected cache block request metrics ### What changes are proposed in this pull request? Add cache rejected metrics. ### Why are the changes needed? Add cache rejected metrics. ### Does this PR introduce any user facing changes? Add metrics `Worker_CacheRejectedBlocks`. pr-link: Alluxio/alluxio#18103 change-id: cid-b92ed474cadbc7c52091e51c1431cd1c95ff448b --- core/common/src/main/java/alluxio/metrics/MetricKey.java | 6 ++++++ .../main/java/alluxio/worker/block/CacheRequestManager.java | 3 +++ 2 files changed, 9 insertions(+) diff --git a/core/common/src/main/java/alluxio/metrics/MetricKey.java b/core/common/src/main/java/alluxio/metrics/MetricKey.java index dcdd08877873..100c914ca94e 100644 --- a/core/common/src/main/java/alluxio/metrics/MetricKey.java +++ b/core/common/src/main/java/alluxio/metrics/MetricKey.java @@ -2119,6 +2119,12 @@ public static String getSyncMetricName(long mountId) { .setMetricType(MetricType.COUNTER) .setIsClusterAggregated(false) .build(); + public static final MetricKey WORKER_CACHE_REJECTED_BLOCKS = + new Builder("Worker.CacheRejectedBlocks") + .setDescription("Total number of rejected cache block requests on the worker") + .setMetricType(MetricType.COUNTER) + .setIsClusterAggregated(false) + .build(); public static final MetricKey WORKER_CACHE_UFS_BLOCKS = new Builder("Worker.CacheUfsBlocks") .setDescription("Total number of blocks that need to be cached from local source") diff --git a/core/server/worker/src/main/java/alluxio/worker/block/CacheRequestManager.java b/core/server/worker/src/main/java/alluxio/worker/block/CacheRequestManager.java index 464feabc2356..0c2bb7ebc9bc 100644 --- a/core/server/worker/src/main/java/alluxio/worker/block/CacheRequestManager.java +++ b/core/server/worker/src/main/java/alluxio/worker/block/CacheRequestManager.java @@ -124,6 +124,7 @@ public void submitRequest(CacheRequest request) // gRPC thread pool is drained due to highly concurrent caching workloads. In these cases, // return as async caching is at best effort. mNumRejected.incrementAndGet(); + CACHE_REJECTED_BLOCKS.inc(); SAMPLING_LOG.warn(String.format( "Failed to cache block locally as the thread pool is at capacity." + " To increase, update the parameter '%s'. numRejected: {} error: {}", @@ -351,6 +352,8 @@ public RemoteBlockReader getRemoteBlockReader(long blockId, long blockSize, MetricsSystem.counter(MetricKey.WORKER_CACHE_REMOTE_BLOCKS.getName()); private static final Counter CACHE_SUCCEEDED_BLOCKS = MetricsSystem.counter(MetricKey.WORKER_CACHE_SUCCEEDED_BLOCKS.getName()); + private static final Counter CACHE_REJECTED_BLOCKS = + MetricsSystem.counter(MetricKey.WORKER_CACHE_REJECTED_BLOCKS.getName()); private static final Counter CACHE_UFS_BLOCKS = MetricsSystem.counter(MetricKey.WORKER_CACHE_UFS_BLOCKS.getName()); private static final Counter CACHE_BLOCKS_SIZE = From 263b96f41e0b2bf776b5ce16a631542b0b6478d1 Mon Sep 17 00:00:00 2001 From: Rico Chiu Date: Fri, 29 Sep 2023 10:32:20 -0700 Subject: [PATCH 322/334] Add worker vCPU info to fsadmin report capacity command provide more information about worker state pr-link: Alluxio/alluxio#18185 change-id: cid-b972079ff380e4ac32fa2e013b3034931f94786a --- .../block/options/GetWorkerReportOptions.java | 3 +- .../src/main/java/alluxio/grpc/GrpcUtils.java | 6 ++- .../main/java/alluxio/wire/WorkerInfo.java | 26 ++++++++-- .../java/alluxio/wire/WorkerInfoTest.java | 2 + .../master/block/DefaultBlockMaster.java | 2 + .../master/block/meta/MasterWorkerInfo.java | 32 +++++++++++- .../worker/block/BlockMasterClient.java | 7 ++- .../worker/block/RegisterStreamer.java | 5 +- .../src/main/proto/grpc/block_master.proto | 3 ++ core/transport/src/main/proto/proto.lock | 24 +++++++++ .../command/CollectAlluxioInfoCommand.java | 2 + .../cli/fsadmin/report/CapacityCommand.java | 7 ++- .../fsadmin/report/CapacityCommandTest.java | 4 ++ .../CapacityCommandIntegrationTest.java | 50 ++++++++++--------- 14 files changed, 137 insertions(+), 36 deletions(-) diff --git a/core/client/fs/src/main/java/alluxio/client/block/options/GetWorkerReportOptions.java b/core/client/fs/src/main/java/alluxio/client/block/options/GetWorkerReportOptions.java index 12098cd96d10..3e63692dff85 100644 --- a/core/client/fs/src/main/java/alluxio/client/block/options/GetWorkerReportOptions.java +++ b/core/client/fs/src/main/java/alluxio/client/block/options/GetWorkerReportOptions.java @@ -196,7 +196,8 @@ public enum WorkerInfoField { WORKER_USED_BYTES, WORKER_USED_BYTES_ON_TIERS, BLOCK_COUNT, - BUILD_VERSION; + BUILD_VERSION, + NUM_VCPU; public static final Set ALL = EnumSet.allOf(WorkerInfoField.class); diff --git a/core/common/src/main/java/alluxio/grpc/GrpcUtils.java b/core/common/src/main/java/alluxio/grpc/GrpcUtils.java index 281411186c69..8a8ffbdf0c3f 100644 --- a/core/common/src/main/java/alluxio/grpc/GrpcUtils.java +++ b/core/common/src/main/java/alluxio/grpc/GrpcUtils.java @@ -349,7 +349,8 @@ public static WorkerInfo fromProto(alluxio.grpc.WorkerInfo workerInfo) { .setUsedBytes(workerInfo.getUsedBytes()) .setUsedBytesOnTiers(workerInfo.getUsedBytesOnTiersMap()) .setVersion(workerInfo.getBuildVersion().getVersion()) - .setRevision(workerInfo.getBuildVersion().getRevision()); + .setRevision(workerInfo.getBuildVersion().getRevision()) + .setNumVCpu(workerInfo.getNumVCpu()); } /** @@ -629,7 +630,8 @@ public static alluxio.grpc.WorkerInfo toProto(WorkerInfo workerInfo) { .putAllCapacityBytesOnTiers(workerInfo.getCapacityBytesOnTiers()) .putAllUsedBytesOnTiers(workerInfo.getUsedBytesOnTiers()) .setBuildVersion(BuildVersion.newBuilder().setVersion(workerInfo.getVersion()) - .setRevision(workerInfo.getRevision())) + .setRevision(workerInfo.getRevision())) + .setNumVCpu(workerInfo.getNumVCpu()) .build(); } diff --git a/core/common/src/main/java/alluxio/wire/WorkerInfo.java b/core/common/src/main/java/alluxio/wire/WorkerInfo.java index 14eb64af247d..42872ade9ebe 100644 --- a/core/common/src/main/java/alluxio/wire/WorkerInfo.java +++ b/core/common/src/main/java/alluxio/wire/WorkerInfo.java @@ -41,6 +41,7 @@ public final class WorkerInfo implements Serializable { private long mBlockCount; private String mVersion = ""; private String mRevision = ""; + private int mNumVCpu; /** * @return the worker id @@ -130,6 +131,14 @@ public String getRevision() { return mRevision; } + /** + * @return the git revision at the time of building the worker + */ + @ApiModelProperty(value = "Number of available processors on the worker") + public int getNumVCpu() { + return mNumVCpu; + } + /** * @param id the worker id to use * @return the worker information @@ -231,6 +240,15 @@ public WorkerInfo setRevision(String revision) { return this; } + /** + * @param numVCpu the number of available processors on the worker + * @return the worker information + */ + public WorkerInfo setNumVCpu(int numVCpu) { + mNumVCpu = numVCpu; + return this; + } + @Override public boolean equals(Object o) { if (this == o) { @@ -246,7 +264,8 @@ public boolean equals(Object o) { && mStartTimeMs == that.mStartTimeMs && Objects.equal(mCapacityBytesOnTiers, that.mCapacityBytesOnTiers) && Objects.equal(mUsedBytesOnTiers, that.mUsedBytesOnTiers) - && mVersion.equals(that.mVersion) && mRevision.equals(that.mRevision); + && mVersion.equals(that.mVersion) && mRevision.equals(that.mRevision) + && mNumVCpu == that.mNumVCpu; } /** @@ -287,7 +306,7 @@ public LastContactSecComparator() {} @Override public int hashCode() { return Objects.hashCode(mId, mAddress, mLastContactSec, mState, mCapacityBytes, mUsedBytes, - mStartTimeMs, mCapacityBytesOnTiers, mUsedBytesOnTiers, mVersion, mRevision); + mStartTimeMs, mCapacityBytesOnTiers, mUsedBytesOnTiers, mVersion, mRevision, mNumVCpu); } @Override @@ -297,6 +316,7 @@ public String toString() { .add("capacityBytes", mCapacityBytes).add("usedBytes", mUsedBytes) .add("startTimeMs", mStartTimeMs).add("capacityBytesOnTiers", mCapacityBytesOnTiers) .add("usedBytesOnTiers", mUsedBytesOnTiers) - .add("version", mVersion).add("revision", mRevision).toString(); + .add("version", mVersion).add("revision", mRevision) + .add("numVCpu", mNumVCpu).toString(); } } diff --git a/core/common/src/test/java/alluxio/wire/WorkerInfoTest.java b/core/common/src/test/java/alluxio/wire/WorkerInfoTest.java index 00cf71e5390a..88d2a00d927e 100644 --- a/core/common/src/test/java/alluxio/wire/WorkerInfoTest.java +++ b/core/common/src/test/java/alluxio/wire/WorkerInfoTest.java @@ -92,6 +92,7 @@ public static WorkerInfo createRandom() { String version = String.format("%d.%d.%d", random.nextInt(10), random.nextInt(20), random.nextInt(10)); String revision = DigestUtils.sha1Hex(RandomStringUtils.random(10)); + int numVCpu = random.nextInt(128); result.setId(id); result.setAddress(address); @@ -104,6 +105,7 @@ public static WorkerInfo createRandom() { result.setUsedBytesOnTiers(usedBytesOnTiers); result.setVersion(version); result.setRevision(revision); + result.setNumVCpu(numVCpu); return result; } } diff --git a/core/server/master/src/main/java/alluxio/master/block/DefaultBlockMaster.java b/core/server/master/src/main/java/alluxio/master/block/DefaultBlockMaster.java index d501526ea8b8..6100141fbd46 100644 --- a/core/server/master/src/main/java/alluxio/master/block/DefaultBlockMaster.java +++ b/core/server/master/src/main/java/alluxio/master/block/DefaultBlockMaster.java @@ -1398,6 +1398,7 @@ public void workerRegister(long workerId, List storageTiers, } worker.setBuildVersion(options.getBuildVersion()); + worker.setNumVCpu(options.getNumVCpu()); // Gather all blocks on this worker. int totalSize = currentBlocksOnLocation.values().stream().mapToInt(List::size).sum(); @@ -1537,6 +1538,7 @@ protected void workerRegisterStart(WorkerRegisterContext context, processWorkerOrphanedBlocks(workerInfo); workerInfo.addLostStorage(lostStorage); workerInfo.setBuildVersion(options.getBuildVersion()); + workerInfo.setNumVCpu(options.getNumVCpu()); // TODO(jiacheng): This block can be moved to a non-locked section if (options.getConfigsCount() > 0) { diff --git a/core/server/master/src/main/java/alluxio/master/block/meta/MasterWorkerInfo.java b/core/server/master/src/main/java/alluxio/master/block/meta/MasterWorkerInfo.java index d4dae4783b1d..ea5204075226 100644 --- a/core/server/master/src/main/java/alluxio/master/block/meta/MasterWorkerInfo.java +++ b/core/server/master/src/main/java/alluxio/master/block/meta/MasterWorkerInfo.java @@ -39,6 +39,7 @@ import java.util.List; import java.util.Map; import java.util.Set; +import java.util.concurrent.atomic.AtomicInteger; import java.util.concurrent.atomic.AtomicLong; import java.util.concurrent.atomic.AtomicReference; import java.util.concurrent.locks.ReadWriteLock; @@ -122,12 +123,15 @@ public final class MasterWorkerInfo { EnumSet.of(WorkerInfoField.WORKER_CAPACITY_BYTES, WorkerInfoField.WORKER_CAPACITY_BYTES_ON_TIERS, WorkerInfoField.WORKER_USED_BYTES, - WorkerInfoField.WORKER_USED_BYTES_ON_TIERS); + WorkerInfoField.WORKER_USED_BYTES_ON_TIERS, + WorkerInfoField.NUM_VCPU); /** Worker's last updated time in ms. */ private final AtomicLong mLastUpdatedTimeMs; /** Worker's build version (including version and revision). */ private final AtomicReference mBuildVersion; + /** Worker's number of available processors. */ + private final AtomicInteger mNumVCpu; /** Worker metadata, this field is thread safe. */ private final StaticWorkerMeta mMeta; @@ -168,6 +172,7 @@ public MasterWorkerInfo(long id, WorkerNetAddress address) { mToRemoveBlocks = new LongOpenHashSet(); mLastUpdatedTimeMs = new AtomicLong(CommonUtils.getCurrentMs()); mBuildVersion = new AtomicReference<>(BuildVersion.getDefaultInstance()); + mNumVCpu = new AtomicInteger(); // Init all locks mStatusLock = new StampedLock().asReadWriteLock(); @@ -343,6 +348,9 @@ public WorkerInfo generateWorkerInfo(Set fieldRange, WorkerStat info.setVersion(v.getVersion()); info.setRevision(v.getRevision()); break; + case NUM_VCPU: + info.setNumVCpu(mNumVCpu.get()); + break; default: LOG.warn("Unrecognized worker info field: " + field); } @@ -537,7 +545,9 @@ public String toString() { .add("blocks", LOG.isDebugEnabled() ? mBlocks : CommonUtils.summarizeCollection(mBlocks)) .add("lostStorage", mUsage.mLostStorage) .add("version", buildVersion.getVersion()) - .add("revision", buildVersion.getRevision()).toString(); + .add("revision", buildVersion.getRevision()) + .add("numVCpu", mNumVCpu) + .toString(); } /** @@ -725,4 +735,22 @@ public void setBuildVersion(BuildVersion buildVersion) { public BuildVersion getBuildVersion() { return mBuildVersion.get(); } + + /** + * Sets the number of available processors of the worker. + * + * @param numVCpu the number of available processors + */ + public void setNumVCpu(int numVCpu) { + mNumVCpu.set(numVCpu); + } + + /** + * Get the number of available processors on the worker. + * + * @return the number of available processors + */ + public int getNumVCpu() { + return mNumVCpu.get(); + } } diff --git a/core/server/worker/src/main/java/alluxio/worker/block/BlockMasterClient.java b/core/server/worker/src/main/java/alluxio/worker/block/BlockMasterClient.java index 177bd19ac8a4..657051b3f4c0 100644 --- a/core/server/worker/src/main/java/alluxio/worker/block/BlockMasterClient.java +++ b/core/server/worker/src/main/java/alluxio/worker/block/BlockMasterClient.java @@ -310,7 +310,9 @@ public void register(final long workerId, final List storageTierAliases, final RegisterWorkerPOptions options = RegisterWorkerPOptions.newBuilder().addAllConfigs(configList) - .setBuildVersion(buildVersion).build(); + .setBuildVersion(buildVersion) + .setNumVCpu(Runtime.getRuntime().availableProcessors()) + .build(); final List currentBlocks = convertBlockListMapToProto(currentBlocksOnLocation); @@ -324,7 +326,8 @@ public void register(final long workerId, final List storageTierAliases, .putAllUsedBytesOnTiers(usedBytesOnTiers) .addAllCurrentBlocks(currentBlocks) .putAllLostStorage(lostStorageMap) - .setOptions(options).build(); + .setOptions(options) + .build(); retryRPC(() -> { mClient.registerWorker(request); diff --git a/core/server/worker/src/main/java/alluxio/worker/block/RegisterStreamer.java b/core/server/worker/src/main/java/alluxio/worker/block/RegisterStreamer.java index cdeaea5250f9..33bf971e9730 100644 --- a/core/server/worker/src/main/java/alluxio/worker/block/RegisterStreamer.java +++ b/core/server/worker/src/main/java/alluxio/worker/block/RegisterStreamer.java @@ -183,7 +183,10 @@ private RegisterStreamer( mUsedBytesOnTiers = usedBytesOnTiers; mOptions = RegisterWorkerPOptions.newBuilder().addAllConfigs(configList) - .setBuildVersion(buildVersion).build(); + .setBuildVersion(buildVersion) + .setNumVCpu(Runtime.getRuntime().availableProcessors()) + .build(); + mLostStorageMap = lostStorage.entrySet().stream() .collect(Collectors.toMap(Map.Entry::getKey, e -> StorageList.newBuilder().addAllStorage(e.getValue()).build())); diff --git a/core/transport/src/main/proto/grpc/block_master.proto b/core/transport/src/main/proto/grpc/block_master.proto index f6aed214f010..a49801092b76 100644 --- a/core/transport/src/main/proto/grpc/block_master.proto +++ b/core/transport/src/main/proto/grpc/block_master.proto @@ -69,6 +69,7 @@ message WorkerInfo { map capacityBytesOnTiers = 8; map usedBytesOnTiers = 9; optional BuildVersion buildVersion = 10; + optional int32 numVCpu = 11; } enum WorkerRange { @@ -91,6 +92,7 @@ enum WorkerInfoField { WORKER_USED_BYTES_ON_TIERS = 9; BLOCK_COUNT = 10; BUILD_VERSION = 11; + NUM_VCPU = 12; } message GetWorkerReportPOptions { @@ -290,6 +292,7 @@ message RegisterWorkerPOptions { repeated grpc.ConfigProperty configs = 1; /** the worker version to display in info pages (useful for rolling upgrades) */ optional BuildVersion buildVersion = 2; + optional int32 numVCpu = 3; } message RegisterWorkerPRequest { /** the id of the worker */ diff --git a/core/transport/src/main/proto/proto.lock b/core/transport/src/main/proto/proto.lock index 9b1a537bcb60..1966d9f90bd7 100644 --- a/core/transport/src/main/proto/proto.lock +++ b/core/transport/src/main/proto/proto.lock @@ -112,6 +112,10 @@ { "name": "BUILD_VERSION", "integer": 11 + }, + { + "name": "NUM_VCPU", + "integer": 12 } ] } @@ -287,6 +291,11 @@ "id": 10, "name": "buildVersion", "type": "BuildVersion" + }, + { + "id": 11, + "name": "numVCpu", + "type": "int32" } ], "maps": [ @@ -733,6 +742,11 @@ "id": 2, "name": "buildVersion", "type": "BuildVersion" + }, + { + "id": 3, + "name": "numVCpu", + "type": "int32" } ] }, @@ -6315,6 +6329,16 @@ "id": 4, "name": "pathConfigHash", "type": "string" + }, + { + "id": 5, + "name": "clusterConfigLastUpdateTime", + "type": "int64" + }, + { + "id": 6, + "name": "pathConfigLastUpdateTime", + "type": "int64" } ], "maps": [ diff --git a/shell/src/main/java/alluxio/cli/bundler/command/CollectAlluxioInfoCommand.java b/shell/src/main/java/alluxio/cli/bundler/command/CollectAlluxioInfoCommand.java index dc4f4772a84d..b7672ab64f56 100644 --- a/shell/src/main/java/alluxio/cli/bundler/command/CollectAlluxioInfoCommand.java +++ b/shell/src/main/java/alluxio/cli/bundler/command/CollectAlluxioInfoCommand.java @@ -68,6 +68,8 @@ protected void registerCommands() { new AlluxioCommand(mAlluxioPath, "getConf --master --source"), null); registerCommand("fsadmin", new AlluxioCommand(mAlluxioPath, "fsadmin report"), null); + registerCommand("fsadmin", + new AlluxioCommand(mAlluxioPath, "fsadmin report capacity"), null); registerCommand("mount", new AlluxioCommand(mAlluxioPath, "fs mount"), null); registerCommand("version", diff --git a/shell/src/main/java/alluxio/cli/fsadmin/report/CapacityCommand.java b/shell/src/main/java/alluxio/cli/fsadmin/report/CapacityCommand.java index 908e705406e9..2944e2e61bda 100644 --- a/shell/src/main/java/alluxio/cli/fsadmin/report/CapacityCommand.java +++ b/shell/src/main/java/alluxio/cli/fsadmin/report/CapacityCommand.java @@ -58,6 +58,7 @@ public class CapacityCommand { private int mIndentationLevel = 0; private long mSumCapacityBytes; private long mSumUsedBytes; + private int mSumNumVCpus; private Map mSumCapacityBytesOnTierMap; private Map mSumUsedBytesOnTierMap; private TreeMap> mCapacityTierInfoMap; @@ -141,8 +142,10 @@ private void collectWorkerInfo(List workerInfoList) { for (WorkerInfo workerInfo : workerInfoList) { long usedBytes = workerInfo.getUsedBytes(); long capacityBytes = workerInfo.getCapacityBytes(); + int vCpu = workerInfo.getNumVCpu(); mSumCapacityBytes += capacityBytes; mSumUsedBytes += usedBytes; + mSumNumVCpus += vCpu; String workerName = workerInfo.getAddress().getHost(); @@ -183,6 +186,7 @@ private void printAggregatedInfo(GetWorkerReportOptions options) { options.getWorkerRange().toString().toLowerCase())); mIndentationLevel++; + print("Total vCPUs: " + mSumNumVCpus); print("Total Capacity: " + FormatUtils.getSizeFromBytes(mSumCapacityBytes)); mIndentationLevel++; for (Map.Entry totalBytesTier : mSumCapacityBytesOnTierMap.entrySet()) { @@ -405,7 +409,7 @@ private GetWorkerReportOptions getOptions(CommandLine cl) throws IOException { WorkerInfoField.WORKER_CAPACITY_BYTES, WorkerInfoField.WORKER_CAPACITY_BYTES_ON_TIERS, WorkerInfoField.LAST_CONTACT_SEC, WorkerInfoField.WORKER_USED_BYTES, WorkerInfoField.WORKER_USED_BYTES_ON_TIERS, WorkerInfoField.BUILD_VERSION, - WorkerInfoField.ID, WorkerInfoField.STATE); + WorkerInfoField.ID, WorkerInfoField.STATE, WorkerInfoField.NUM_VCPU); workerOptions.setFieldRange(fieldRange); if (cl.hasOption(ReportCommand.LIVE_OPTION_NAME)) { @@ -444,6 +448,7 @@ private static String getWorkerFormattedTierValues(Map(FileSystemAdminShellUtils::compareTierNames); mSumUsedBytesOnTierMap = new TreeMap<>(FileSystemAdminShellUtils::compareTierNames); diff --git a/shell/src/test/java/alluxio/cli/fsadmin/report/CapacityCommandTest.java b/shell/src/test/java/alluxio/cli/fsadmin/report/CapacityCommandTest.java index d4aa63800ffe..275c0a8521b6 100644 --- a/shell/src/test/java/alluxio/cli/fsadmin/report/CapacityCommandTest.java +++ b/shell/src/test/java/alluxio/cli/fsadmin/report/CapacityCommandTest.java @@ -59,6 +59,7 @@ public void longCapacity() throws IOException { String output = new String(outputStream.toByteArray(), StandardCharsets.UTF_8); // CHECKSTYLE.OFF: LineLengthExceed - Much more readable List expectedOutput = Arrays.asList("Capacity information for all workers: ", + " Total vCPUs: 2", " Total Capacity: 29.80GB", " Tier: MEM Size: 8.38GB", " Tier: SSD Size: 4768.37MB", @@ -101,6 +102,7 @@ public void shortCapacity() throws IOException { String output = new String(outputStream.toByteArray(), StandardCharsets.UTF_8); // CHECKSTYLE.OFF: LineLengthExceed - Much more readable List expectedOutput = Arrays.asList("Capacity information for all workers: ", + " Total vCPUs: 0", " Total Capacity: 14.90GB", " Tier: RAM Size: 14.90GB", " Used Capacity: 5.12GB", @@ -135,6 +137,7 @@ public void longWorkerNameCapacity() throws IOException { List testRst = Arrays.asList(output.split("\n")); // CHECKSTYLE.OFF: LineLengthExceed - Much more readable List expectedOutput = Arrays.asList("Capacity information for all workers: ", + " Total vCPUs: 0", " Total Capacity: 3051.76MB", " Tier: MEM Size: 1144.41MB", " Tier: SSD Size: 572.20MB", @@ -178,6 +181,7 @@ private List prepareLongInfoList() { .setCapacityBytesOnTiers(capacityBytesOnTiersOne) .setId(1) .setLastContactSec(3123) + .setNumVCpu(2) .setStartTimeMs(1331231121212L) .setState(WorkerState.LIVE.toString()) .setUsedBytes(10000000000L) diff --git a/tests/src/test/java/alluxio/client/cli/fsadmin/command/CapacityCommandIntegrationTest.java b/tests/src/test/java/alluxio/client/cli/fsadmin/command/CapacityCommandIntegrationTest.java index 5014d3814d12..76dfd23094f0 100644 --- a/tests/src/test/java/alluxio/client/cli/fsadmin/command/CapacityCommandIntegrationTest.java +++ b/tests/src/test/java/alluxio/client/cli/fsadmin/command/CapacityCommandIntegrationTest.java @@ -29,20 +29,21 @@ public void allCapacity() { String output = mOutput.toString(); String size = FormatUtils.getSizeFromBytes(SIZE_BYTES); String[] lines = output.split("\n"); - Assert.assertEquals(11, lines.length); + Assert.assertEquals(12, lines.length); Assert.assertEquals("Capacity information for all workers: ", lines[0]); - Assert.assertEquals(" Total Capacity: " + size, lines[1]); - Assert.assertEquals(" Tier: MEM Size: " + size, lines[2]); - Assert.assertEquals(" Used Capacity: 0B", lines[3]); - Assert.assertEquals(" Tier: MEM Size: 0B", lines[4]); - Assert.assertEquals(" Used Percentage: 0%", lines[5]); - Assert.assertEquals(" Free Percentage: 100%", lines[6]); - Assert.assertEquals("", lines[7]); - Assert.assertTrue(lines[8].matches( + Assert.assertTrue(lines[1].startsWith(" Total vCPUs:")); // value depends on environment + Assert.assertEquals(" Total Capacity: " + size, lines[2]); + Assert.assertEquals(" Tier: MEM Size: " + size, lines[3]); + Assert.assertEquals(" Used Capacity: 0B", lines[4]); + Assert.assertEquals(" Tier: MEM Size: 0B", lines[5]); + Assert.assertEquals(" Used Percentage: 0%", lines[6]); + Assert.assertEquals(" Free Percentage: 100%", lines[7]); + Assert.assertEquals("", lines[8]); + Assert.assertTrue(lines[9].matches( "Worker Name {6,}State {11,}Last Heartbeat {3}Storage {7}MEM {14}Version {10}Revision *")); - Assert.assertTrue(lines[9].contains("ACTIVE")); - Assert.assertTrue(lines[9].contains("capacity " + size)); - Assert.assertTrue(lines[10].contains("used 0B (0%)")); + Assert.assertTrue(lines[10].contains("ACTIVE")); + Assert.assertTrue(lines[10].contains("capacity " + size)); + Assert.assertTrue(lines[11].contains("used 0B (0%)")); } @Test @@ -59,20 +60,21 @@ public void liveCapacity() { String output = mOutput.toString(); String size = FormatUtils.getSizeFromBytes(SIZE_BYTES); String[] lines = output.split("\n"); - Assert.assertEquals(11, lines.length); + Assert.assertEquals(12, lines.length); Assert.assertEquals("Capacity information for live workers: ", lines[0]); - Assert.assertEquals(" Total Capacity: " + size, lines[1]); - Assert.assertEquals(" Tier: MEM Size: " + size, lines[2]); - Assert.assertEquals(" Used Capacity: 0B", lines[3]); - Assert.assertEquals(" Tier: MEM Size: 0B", lines[4]); - Assert.assertEquals(" Used Percentage: 0%", lines[5]); - Assert.assertEquals(" Free Percentage: 100%", lines[6]); - Assert.assertEquals("", lines[7]); - Assert.assertTrue(lines[8].matches( + Assert.assertTrue(lines[1].startsWith(" Total vCPUs:")); // value depends on environment + Assert.assertEquals(" Total Capacity: " + size, lines[2]); + Assert.assertEquals(" Tier: MEM Size: " + size, lines[3]); + Assert.assertEquals(" Used Capacity: 0B", lines[4]); + Assert.assertEquals(" Tier: MEM Size: 0B", lines[5]); + Assert.assertEquals(" Used Percentage: 0%", lines[6]); + Assert.assertEquals(" Free Percentage: 100%", lines[7]); + Assert.assertEquals("", lines[8]); + Assert.assertTrue(lines[9].matches( "Worker Name {6,}State {11,}Last Heartbeat {3}Storage {7}MEM {14}Version {10}Revision *")); - Assert.assertTrue(lines[9].contains("ACTIVE")); - Assert.assertTrue(lines[9].contains("capacity " + size)); - Assert.assertTrue(lines[10].contains("used 0B (0%)")); + Assert.assertTrue(lines[10].contains("ACTIVE")); + Assert.assertTrue(lines[10].contains("capacity " + size)); + Assert.assertTrue(lines[11].contains("used 0B (0%)")); } @Test From f1cec16d217b4b14cf05a9f0c31620b97eb9c84d Mon Sep 17 00:00:00 2001 From: yuyang wang <39869597+Jackson-Wang-7@users.noreply.github.com> Date: Tue, 10 Oct 2023 17:56:46 +0800 Subject: [PATCH 323/334] Support Mkdir/CreateFile with configured default umask in HDFS API ### What changes are proposed in this pull request? Support creating directories and files with default permissions based on configuration propertykey in HDFS API. ### Why are the changes needed? Alluxio Hdfs api hasn't the corresponding implementation of Mkdir without permission parameter. If it does not carry permission, the umask property in the configuration item shall prevail. ### Does this PR introduce any user facing changes? pr-link: Alluxio/alluxio#18253 change-id: cid-6ddd2243bac00ebbdbdff1e731036c0d3d6228c8 --- .../alluxio/hadoop/AbstractFileSystem.java | 36 +++++++++++++++++++ .../hadoop/AbstractFileSystemTest.java | 6 +++- .../hadoop/FileSystemAclIntegrationTest.java | 10 ++++++ 3 files changed, 51 insertions(+), 1 deletion(-) diff --git a/core/client/hdfs/src/main/java/alluxio/hadoop/AbstractFileSystem.java b/core/client/hdfs/src/main/java/alluxio/hadoop/AbstractFileSystem.java index aa385b0a3aef..4589dd252673 100644 --- a/core/client/hdfs/src/main/java/alluxio/hadoop/AbstractFileSystem.java +++ b/core/client/hdfs/src/main/java/alluxio/hadoop/AbstractFileSystem.java @@ -36,6 +36,7 @@ import alluxio.master.MasterInquireClient.Factory; import alluxio.security.CurrentUser; import alluxio.security.authorization.Mode; +import alluxio.util.ModeUtils; import alluxio.wire.BlockLocationInfo; import alluxio.wire.FileBlockInfo; import alluxio.wire.WorkerNetAddress; @@ -150,6 +151,27 @@ public void close() throws IOException { mFileSystem.close(); } + /** + * Attempts to create a file with default permission. + * Overwrite will not succeed if the path exists and is a folder. + * + * @param path path to create + * @param overwrite overwrite if file exists + * @param bufferSize the size in bytes of the buffer to be used + * @param replication under filesystem replication factor, this is ignored + * @param blockSize block size in bytes + * @param progress queryable progress + * @return an {@link FSDataOutputStream} created at the indicated path of a file + */ + @Override + public FSDataOutputStream create(Path path, boolean overwrite, int bufferSize, short replication, + long blockSize, Progressable progress) throws IOException { + String confUmask = mAlluxioConf.getString(PropertyKey.SECURITY_AUTHORIZATION_PERMISSION_UMASK); + Mode mode = ModeUtils.applyFileUMask(Mode.defaults(), confUmask); + return this.create(path, new FsPermission(mode.toShort()), overwrite, bufferSize, replication, + blockSize, progress); + } + /** * Attempts to create a file. Overwrite will not succeed if the path exists and is a folder. * @@ -601,6 +623,20 @@ public FileStatus[] listStatus(Path path) throws IOException { return ret; } + /** + * Attempts to create a folder with the specified path with default permission. + * Parent directories will be created. + * + * @param path path to create + * @return true if the indicated folder is created successfully or already exists + */ + @Override + public boolean mkdirs(Path path) throws IOException { + String confUmask = mAlluxioConf.getString(PropertyKey.SECURITY_AUTHORIZATION_PERMISSION_UMASK); + Mode mode = ModeUtils.applyDirectoryUMask(Mode.defaults(), confUmask); + return mkdirs(path, new FsPermission(mode.toShort())); + } + /** * Attempts to create a folder with the specified path. Parent directories will be created. * diff --git a/core/client/hdfs/src/test/java/alluxio/hadoop/AbstractFileSystemTest.java b/core/client/hdfs/src/test/java/alluxio/hadoop/AbstractFileSystemTest.java index 221f5a7947d1..78ae1e49229a 100644 --- a/core/client/hdfs/src/test/java/alluxio/hadoop/AbstractFileSystemTest.java +++ b/core/client/hdfs/src/test/java/alluxio/hadoop/AbstractFileSystemTest.java @@ -52,6 +52,8 @@ import org.apache.hadoop.fs.BlockLocation; import org.apache.hadoop.fs.FileStatus; import org.apache.hadoop.fs.Path; +import org.apache.hadoop.fs.permission.FsCreateModes; +import org.apache.hadoop.fs.permission.FsPermission; import org.apache.hadoop.security.UserGroupInformation; import org.junit.After; import org.junit.Before; @@ -726,7 +728,9 @@ public void createWithoutOverwrite() throws Exception { ExceptionMessage.CANNOT_OVERWRITE_FILE_WITHOUT_OVERWRITE.getMessage(path.toString()))); try (FileSystem alluxioHadoopFs = new FileSystem(alluxioFs)) { - alluxioHadoopFs.create(path, false, 100, (short) 1, 1000); + alluxioHadoopFs.create(path, + FsCreateModes.applyUMask(FsPermission.getFileDefault(), FsPermission.getUMask(getConf())), + false, 100, (short) 1, 1000, null); fail("create() of existing file is expected to fail"); } catch (IOException e) { assertEquals("alluxio.exception.FileAlreadyExistsException: " diff --git a/tests/src/test/java/alluxio/client/hadoop/FileSystemAclIntegrationTest.java b/tests/src/test/java/alluxio/client/hadoop/FileSystemAclIntegrationTest.java index 0372cef82f9d..4d4b9123fad0 100644 --- a/tests/src/test/java/alluxio/client/hadoop/FileSystemAclIntegrationTest.java +++ b/tests/src/test/java/alluxio/client/hadoop/FileSystemAclIntegrationTest.java @@ -108,6 +108,12 @@ public void cleanupTFS() throws Exception { @Test public void createFileWithPermission() throws Exception { + Path defaultFile = new Path("/createfile-default"); + FSDataOutputStream stream = sTFS.create(defaultFile, false /* ignored */, 10 /* ignored */, + (short) 1 /* ignored */, 512 /* ignored */, null /* ignored */); + stream.close(); + FileStatus fileStatus = sTFS.getFileStatus(defaultFile); + Assert.assertEquals((short) 0644, fileStatus.getPermission().toShort()); List permissionValues = Lists.newArrayList(0111, 0222, 0333, 0444, 0555, 0666, 0777, 0755, 0733, 0644, 0533, 0511); for (int value : permissionValues) { @@ -124,6 +130,10 @@ public void createFileWithPermission() throws Exception { @Test public void mkdirsWithPermission() throws Exception { + Path defaultDir = new Path("/createDir-default"); + sTFS.mkdirs(defaultDir); + FileStatus fileStatus = sTFS.getFileStatus(defaultDir); + Assert.assertEquals((short) 0755, fileStatus.getPermission().toShort()); List permissionValues = Lists.newArrayList(0111, 0222, 0333, 0444, 0555, 0666, 0777, 0755, 0733, 0644, 0533, 0511); for (int value : permissionValues) { From 15a333a5e8596e42dd9346a6c9fe658a057d2276 Mon Sep 17 00:00:00 2001 From: yuyang wang <39869597+Jackson-Wang-7@users.noreply.github.com> Date: Wed, 11 Oct 2023 18:23:55 +0800 Subject: [PATCH 324/334] Get User/Group/Permission of existed objects in OSS ufs ### What changes are proposed in this pull request? Using the user of the OSS bucket to represent the user of the object loaded from OSS. Add the mapping way from OSS username to the custom username. Add the default permission mode for existing files loaded from OSS. ### Why are the changes needed? the object loaded from OSS ufs will be null user, null group, and 777 by default. ### Does this PR introduce any user facing changes? pr-link: Alluxio/alluxio#18262 change-id: cid-7e56861c9def695876cde32d1e0aa453b512470d --- .../main/java/alluxio/conf/PropertyKey.java | 19 ++++++++ .../underfs/oss/OSSUnderFileSystem.java | 44 ++++++++++++++++++- 2 files changed, 62 insertions(+), 1 deletion(-) diff --git a/core/common/src/main/java/alluxio/conf/PropertyKey.java b/core/common/src/main/java/alluxio/conf/PropertyKey.java index e52e2c41affd..5c147ddd377c 100755 --- a/core/common/src/main/java/alluxio/conf/PropertyKey.java +++ b/core/common/src/main/java/alluxio/conf/PropertyKey.java @@ -1809,6 +1809,21 @@ public String toString() { .setConsistencyCheckLevel(ConsistencyCheckLevel.WARN) .setScope(Scope.SERVER) .build(); + public static final PropertyKey UNDERFS_OSS_DEFAULT_MODE = + stringBuilder(Name.UNDERFS_OSS_DEFAULT_MODE) + .setAlias("alluxio.underfs.oss.default.mode") + .setDefaultValue("0700") + .setDescription("Mode (in octal notation) for OSS objects if mode cannot be discovered.") + .setConsistencyCheckLevel(ConsistencyCheckLevel.WARN) + .setScope(Scope.SERVER) + .build(); + public static final PropertyKey UNDERFS_OSS_OWNER_ID_TO_USERNAME_MAPPING = + stringBuilder(Name.UNDERFS_OSS_OWNER_ID_TO_USERNAME_MAPPING) + .setDescription("Optionally, specify a preset oss canonical id to Alluxio username " + + "static mapping, in the format \"id1=user1;id2=user2\". ") + .setConsistencyCheckLevel(ConsistencyCheckLevel.ENFORCE) + .setScope(Scope.SERVER) + .build(); public static final PropertyKey S3A_ACCESS_KEY = stringBuilder(Name.S3A_ACCESS_KEY) .setAlias(Name.AWS_ACCESS_KEY) .setDescription("The access key of S3 bucket.") @@ -7879,6 +7894,10 @@ public static final class Name { "alluxio.underfs.oss.streaming.upload.partition.size"; public static final String UNDERFS_OSS_STREAMING_UPLOAD_THREADS = "alluxio.underfs.oss.streaming.upload.threads"; + public static final String UNDERFS_OSS_DEFAULT_MODE = + "alluxio.underfs.oss.default.mode"; + public static final String UNDERFS_OSS_OWNER_ID_TO_USERNAME_MAPPING = + "alluxio.underfs.oss.owner.id.to.username.mapping"; public static final String UNDERFS_S3_BULK_DELETE_ENABLED = "alluxio.underfs.s3.bulk.delete.enabled"; public static final String UNDERFS_S3_DEFAULT_MODE = "alluxio.underfs.s3.default.mode"; diff --git a/underfs/oss/src/main/java/alluxio/underfs/oss/OSSUnderFileSystem.java b/underfs/oss/src/main/java/alluxio/underfs/oss/OSSUnderFileSystem.java index c511620a33ad..1686641fa731 100644 --- a/underfs/oss/src/main/java/alluxio/underfs/oss/OSSUnderFileSystem.java +++ b/underfs/oss/src/main/java/alluxio/underfs/oss/OSSUnderFileSystem.java @@ -20,6 +20,8 @@ import alluxio.underfs.UnderFileSystem; import alluxio.underfs.UnderFileSystemConfiguration; import alluxio.underfs.options.OpenOptions; +import alluxio.util.CommonUtils; +import alluxio.util.ModeUtils; import alluxio.util.UnderFileSystemUtils; import alluxio.util.executor.ExecutorServiceFactories; import alluxio.util.io.PathUtils; @@ -29,6 +31,7 @@ import com.aliyun.oss.OSSClientBuilder; import com.aliyun.oss.ServiceException; import com.aliyun.oss.model.AbortMultipartUploadRequest; +import com.aliyun.oss.model.BucketInfo; import com.aliyun.oss.model.DeleteObjectsRequest; import com.aliyun.oss.model.DeleteObjectsResult; import com.aliyun.oss.model.ListMultipartUploadsRequest; @@ -38,6 +41,7 @@ import com.aliyun.oss.model.OSSObjectSummary; import com.aliyun.oss.model.ObjectListing; import com.aliyun.oss.model.ObjectMetadata; +import com.aliyun.oss.model.Owner; import com.google.common.base.Preconditions; import com.google.common.base.Suppliers; import com.google.common.util.concurrent.ListeningExecutorService; @@ -66,6 +70,9 @@ public class OSSUnderFileSystem extends ObjectUnderFileSystem { /** Suffix for an empty file to flag it as a directory. */ private static final String FOLDER_SUFFIX = "_$folder$"; + /** Default owner of objects if owner cannot be determined. */ + private static final String DEFAULT_OWNER = ""; + /** Aliyun OSS client. */ private final OSS mClient; @@ -76,6 +83,10 @@ public class OSSUnderFileSystem extends ObjectUnderFileSystem { private StsOssClientProvider mClientProvider; + /** The permissions associated with the bucket. Fetched once and assumed to be immutable. */ + private final Supplier mPermissions + = CommonUtils.memoize(this::getPermissionsInternal); + /** * Constructs a new instance of {@link OSSUnderFileSystem}. * @@ -332,7 +343,38 @@ protected ObjectStatus getObjectStatus(String key) { // No ACL integration currently, returns default empty value @Override protected ObjectPermissions getPermissions() { - return new ObjectPermissions("", "", Constants.DEFAULT_FILE_SYSTEM_MODE); + return mPermissions.get(); + } + + /** + * Since there is no group in OSS, the owner is reused as the group. This method calls the + * OSS API and requires additional permissions aside from just read only. This method is best + * effort and will continue with default permissions (no owner, no group, 0700). + * + * @return the permissions associated with this under storage system + */ + private ObjectPermissions getPermissionsInternal() { + short bucketMode = + ModeUtils.getUMask(mUfsConf.getString(PropertyKey.UNDERFS_OSS_DEFAULT_MODE)).toShort(); + String accountOwner = DEFAULT_OWNER; + + try { + BucketInfo bucketInfo = mClient.getBucketInfo(mBucketName); + Owner owner = bucketInfo.getBucket().getOwner(); + if (mUfsConf.isSet(PropertyKey.UNDERFS_OSS_OWNER_ID_TO_USERNAME_MAPPING)) { + // Here accountOwner can be null if there is no mapping set for this owner id + accountOwner = CommonUtils.getValueFromStaticMapping( + mUfsConf.getString(PropertyKey.UNDERFS_OSS_OWNER_ID_TO_USERNAME_MAPPING), + owner.getId()); + } + if (accountOwner == null || accountOwner.equals(DEFAULT_OWNER)) { + // If there is no user-defined mapping, use display name or id. + accountOwner = owner.getDisplayName() != null ? owner.getDisplayName() : owner.getId(); + } + } catch (ServiceException e) { + LOG.warn("Failed to get bucket owner, proceeding with defaults. {}", e.toString()); + } + return new ObjectPermissions(accountOwner, accountOwner, bucketMode); } @Override From 87ddb3c1adff516f71de92627a16bd4005b209bf Mon Sep 17 00:00:00 2001 From: Haoning Sun Date: Fri, 13 Oct 2023 14:30:01 +0800 Subject: [PATCH 325/334] Check cluster ufs version in getClusterConf ### What changes are proposed in this pull request? Check cluster ufs version in getClusterConf. ### Why are the changes needed? Fix #18221. pr-link: Alluxio/alluxio#18222 change-id: cid-2c67758e30cac31a84f41d47b50c0ed958c88ffb --- core/common/src/main/java/alluxio/conf/Configuration.java | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/core/common/src/main/java/alluxio/conf/Configuration.java b/core/common/src/main/java/alluxio/conf/Configuration.java index 1561025eeb4d..76a7ff98bac0 100644 --- a/core/common/src/main/java/alluxio/conf/Configuration.java +++ b/core/common/src/main/java/alluxio/conf/Configuration.java @@ -481,6 +481,7 @@ public static GetConfigurationPResponse loadConfiguration(InetSocketAddress addr public static InstancedConfiguration getClusterConf(GetConfigurationPResponse response, AlluxioConfiguration conf, Scope scope) { String clientVersion = conf.getString(PropertyKey.VERSION); + String clientUfsVersion = conf.getString(PropertyKey.UNDERFS_VERSION); LOG.debug("Alluxio {} (version {}) is trying to load cluster level configurations", scope, clientVersion); List clusterConfig = response.getClusterConfigsList(); @@ -493,6 +494,11 @@ public static InstancedConfiguration getClusterConf(GetConfigurationPResponse re scope, clientVersion, clusterVersion); clusterProps.remove(PropertyKey.VERSION); } + // Check ufs version. Avoid adding it to user properties if the two versions are the same. + String clusterUfsVersion = clusterProps.get(PropertyKey.UNDERFS_VERSION).toString(); + if (clientUfsVersion.equals(clusterUfsVersion)) { + clusterProps.remove(PropertyKey.UNDERFS_VERSION); + } // Merge conf returned by master as the cluster default into conf object AlluxioProperties props = conf.copyProperties(); props.merge(clusterProps, Source.CLUSTER_DEFAULT); From 057804ebe07cb6c6de4f934ca2520b1b5b08fd09 Mon Sep 17 00:00:00 2001 From: gp1314 <814085234@qq.com> Date: Tue, 17 Oct 2023 09:42:42 +0800 Subject: [PATCH 326/334] Change CosNUnderFileSystemFactory implementation UnderFileSystemFactory ### What changes are proposed in this pull request? - Don't have to inherit the HdfsUnderFileSystemFactory CosNUnderFileSystemFactory directly implement UnderFileSystemFactory interface ### Why are the changes needed? - Possibly to resolve package conflicts, #17024 removed HdfsUnderFileSystemFactory from COSN UFS jar, resulting in inability to use COSN interface - However, CosNUnderFileSystemFactory inherits from HdfsUnderFileSystemFactory,the ServiceLoader.load method loads CosNUnderFileSystemFactory and first searches for its parent class, so removing HdfsUnderFileSystemFactory directly will result in an error. ``` failed to load jar alluxio-underfs-hadoop-cosn-3.1.0-5.8.5-2.9.3.jar NoClassdDefFoundError :alluxio/underfs/hdfs/HdfsUnderFileSystemFactory ``` pr-link: Alluxio/alluxio#18143 change-id: cid-4a80f6cdeae5b9bdb9e956c36838403ee6ce7c46 --- .../java/alluxio/underfs/cosn/CosNUnderFileSystemFactory.java | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/underfs/cosn/src/main/java/alluxio/underfs/cosn/CosNUnderFileSystemFactory.java b/underfs/cosn/src/main/java/alluxio/underfs/cosn/CosNUnderFileSystemFactory.java index 18856f0b5f4a..4dc6f1d0665b 100644 --- a/underfs/cosn/src/main/java/alluxio/underfs/cosn/CosNUnderFileSystemFactory.java +++ b/underfs/cosn/src/main/java/alluxio/underfs/cosn/CosNUnderFileSystemFactory.java @@ -17,7 +17,7 @@ import alluxio.conf.PropertyKey; import alluxio.underfs.UnderFileSystem; import alluxio.underfs.UnderFileSystemConfiguration; -import alluxio.underfs.hdfs.HdfsUnderFileSystemFactory; +import alluxio.underfs.UnderFileSystemFactory; import com.google.common.base.Preconditions; @@ -27,7 +27,7 @@ * Factory for creating {@link CosnUnderFileSystem}. */ @ThreadSafe -public class CosNUnderFileSystemFactory extends HdfsUnderFileSystemFactory { +public class CosNUnderFileSystemFactory implements UnderFileSystemFactory { @Override public UnderFileSystem create(String path, UnderFileSystemConfiguration conf) { From 5fee8fbdbec3796a4776fb8c38ca5be8729c2747 Mon Sep 17 00:00:00 2001 From: maobaolong <307499405@qq.com> Date: Fri, 27 Oct 2023 17:56:24 +0800 Subject: [PATCH 327/334] Fix cosn ufs cannot find class Fix bug involved by https://github.com/Alluxio/alluxio/pull/17024 After https://github.com/Alluxio/alluxio/pull/18143, `HdfsUnderFileSystemFactory` is no longer included in COSN jar therefore no need to exclude. pr-link: Alluxio/alluxio#18303 change-id: cid-6931be7291f52728022e4555d1c7183857948b9f --- underfs/cosn/pom.xml | 2 -- 1 file changed, 2 deletions(-) diff --git a/underfs/cosn/pom.xml b/underfs/cosn/pom.xml index fb9927f24074..bf869c15785d 100644 --- a/underfs/cosn/pom.xml +++ b/underfs/cosn/pom.xml @@ -86,8 +86,6 @@ META-INF/*.SF META-INF/*.DSA META-INF/*.RSA - - alluxio/underfs/hdfs/HdfsUnderFileSystemFactory.* From b37a96d45c9c56f885927f7fa647aa18bda7123d Mon Sep 17 00:00:00 2001 From: juanjuan2 <45665083+juanjuan2@users.noreply.github.com> Date: Fri, 27 Oct 2023 20:43:31 +0800 Subject: [PATCH 328/334] Fix cacheMissPercentage metric ### What changes are proposed in this pull request? Please outline the changes and how this PR fixes the issue. modify the calculation method for cacheMissPercentage metric, ensuring it comprehensively accounts for the influence of job worker read operations. #16945 ### Why are the changes needed? Because of the incorrect calculation results of this metric, it may produce exception value. ### Does this PR introduce any user facing changes? No pr-link: Alluxio/alluxio#18208 change-id: cid-d24a6f324f7148cab4a30fbbf819510a1a314ebc --- .../main/java/alluxio/metrics/MetricKey.java | 15 +++ .../meta/AlluxioMasterRestServiceHandler.java | 5 +- .../alluxio/master/metrics/MetricsStore.java | 3 + .../worker/block/BlockMasterSyncHelper.java | 1 - .../worker/block/TieredBlockStore.java | 7 +- .../block/UnderFileSystemBlockStore.java | 16 ++- .../block/io/MetricAccountingBlockReader.java | 103 ++++++++++++++++++ .../alluxio/worker/page/PagedBlockReader.java | 2 +- .../alluxio/worker/page/PagedBlockStore.java | 2 +- .../worker/page/PagedUfsBlockReader.java | 31 ++++++ .../worker/page/UfsBlockReadOptions.java | 14 ++- .../worker/page/PagedBlockReaderTest.java | 2 +- 12 files changed, 188 insertions(+), 13 deletions(-) create mode 100644 core/server/worker/src/main/java/alluxio/worker/block/io/MetricAccountingBlockReader.java diff --git a/core/common/src/main/java/alluxio/metrics/MetricKey.java b/core/common/src/main/java/alluxio/metrics/MetricKey.java index 100c914ca94e..8e40eefef70a 100644 --- a/core/common/src/main/java/alluxio/metrics/MetricKey.java +++ b/core/common/src/main/java/alluxio/metrics/MetricKey.java @@ -1711,6 +1711,13 @@ public static String getSyncMetricName(long mountId) { .setDescription("Bytes read per minute throughput from all Alluxio UFSes by all workers") .setMetricType(MetricType.GAUGE) .build(); + + public static final MetricKey CLUSTER_BYTES_READ_CACHE = + new Builder("Cluster.BytesReadCache") + .setDescription("Total number of bytes read from all worker's cache") + .setMetricType(MetricType.COUNTER) + .build(); + public static final MetricKey CLUSTER_BYTES_WRITTEN_REMOTE = new Builder("Cluster.BytesWrittenRemote") .setDescription("Total number of bytes written to workers via network (RPC). " @@ -2003,6 +2010,14 @@ public static String getSyncMetricName(long mountId) { .setMetricType(MetricType.METER) .setIsClusterAggregated(false) .build(); + + public static final MetricKey WORKER_BYTES_READ_CACHE = + new Builder("Worker.BytesReadCache") + .setDescription("Total number of bytes read from the worker's cache") + .setMetricType(MetricType.COUNTER) + .setIsClusterAggregated(true) + .build(); + public static final MetricKey WORKER_BYTES_WRITTEN_DIRECT = new Builder("Worker.BytesWrittenDirect") .setDescription("Total number of bytes written to this worker " diff --git a/core/server/master/src/main/java/alluxio/master/meta/AlluxioMasterRestServiceHandler.java b/core/server/master/src/main/java/alluxio/master/meta/AlluxioMasterRestServiceHandler.java index f09279100267..d7b5ad2bd7cc 100644 --- a/core/server/master/src/main/java/alluxio/master/meta/AlluxioMasterRestServiceHandler.java +++ b/core/server/master/src/main/java/alluxio/master/meta/AlluxioMasterRestServiceHandler.java @@ -989,8 +989,11 @@ public Response getWebUIMetrics() { .setTotalBytesReadRemote(FormatUtils.getSizeFromBytes(bytesReadRemote)) .setTotalBytesReadUfs(FormatUtils.getSizeFromBytes(bytesReadUfs)); + Long bytesReadCache = counters.get( + MetricKey.CLUSTER_BYTES_READ_CACHE.getName()).getCount(); + // cluster cache hit and miss - long bytesReadTotal = bytesReadLocal + bytesReadRemote + bytesReadDomainSocket; + long bytesReadTotal = bytesReadLocal + bytesReadCache + bytesReadUfs; double cacheHitLocalPercentage = (bytesReadTotal > 0) ? (100D * (bytesReadLocal + bytesReadDomainSocket) / bytesReadTotal) : 0; diff --git a/core/server/master/src/main/java/alluxio/master/metrics/MetricsStore.java b/core/server/master/src/main/java/alluxio/master/metrics/MetricsStore.java index cca38f792671..9ded436a8af0 100644 --- a/core/server/master/src/main/java/alluxio/master/metrics/MetricsStore.java +++ b/core/server/master/src/main/java/alluxio/master/metrics/MetricsStore.java @@ -185,6 +185,9 @@ public void initMetricKeys() { mClusterCounters.putIfAbsent(new ClusterCounterKey(InstanceType.WORKER, MetricKey.WORKER_BYTES_READ_DOMAIN.getMetricName()), MetricsSystem.counter(MetricKey.CLUSTER_BYTES_READ_DOMAIN.getName())); + mClusterCounters.putIfAbsent(new ClusterCounterKey(InstanceType.WORKER, + MetricKey.WORKER_BYTES_READ_CACHE.getMetricName()), + MetricsSystem.counter(MetricKey.CLUSTER_BYTES_READ_CACHE.getName())); mClusterCounters.putIfAbsent(new ClusterCounterKey(InstanceType.WORKER, MetricKey.WORKER_BYTES_WRITTEN_REMOTE.getMetricName()), MetricsSystem.counter(MetricKey.CLUSTER_BYTES_WRITTEN_REMOTE.getName())); diff --git a/core/server/worker/src/main/java/alluxio/worker/block/BlockMasterSyncHelper.java b/core/server/worker/src/main/java/alluxio/worker/block/BlockMasterSyncHelper.java index b43cc378ace1..20dc2ded5550 100644 --- a/core/server/worker/src/main/java/alluxio/worker/block/BlockMasterSyncHelper.java +++ b/core/server/worker/src/main/java/alluxio/worker/block/BlockMasterSyncHelper.java @@ -135,7 +135,6 @@ boolean heartbeat( // Send the heartbeat and execute the response Command cmdFromMaster = null; List metrics = MetricsSystem.reportWorkerMetrics(); - try { cmdFromMaster = mMasterClient.heartbeat(workerId, storeMeta.getCapacityBytesOnTiers(), storeMeta.getUsedBytesOnTiers(), blockReport.getRemovedBlocks(), diff --git a/core/server/worker/src/main/java/alluxio/worker/block/TieredBlockStore.java b/core/server/worker/src/main/java/alluxio/worker/block/TieredBlockStore.java index 855203f9859d..ce447d94abce 100644 --- a/core/server/worker/src/main/java/alluxio/worker/block/TieredBlockStore.java +++ b/core/server/worker/src/main/java/alluxio/worker/block/TieredBlockStore.java @@ -30,6 +30,8 @@ import alluxio.worker.block.io.BlockReader; import alluxio.worker.block.io.BlockWriter; import alluxio.worker.block.io.DelegatingBlockReader; +import alluxio.worker.block.io.LocalFileBlockReader; +import alluxio.worker.block.io.MetricAccountingBlockReader; import alluxio.worker.block.io.StoreBlockReader; import alluxio.worker.block.io.StoreBlockWriter; import alluxio.worker.block.management.DefaultStoreLoadTracker; @@ -221,10 +223,11 @@ public BlockReader createBlockReader(long sessionId, long blockId, long offset) } try { - BlockReader reader = new StoreBlockReader(sessionId, block); + LocalFileBlockReader reader = new StoreBlockReader(sessionId, block); ((FileChannel) reader.getChannel()).position(offset); accessBlock(sessionId, blockId); - return new DelegatingBlockReader(reader, blockLock); + BlockReader mareader = new MetricAccountingBlockReader(reader); + return new DelegatingBlockReader(mareader, blockLock); } catch (Exception e) { blockLock.close(); throw new IOException(format("Failed to get local block reader, sessionId=%d, " diff --git a/core/server/worker/src/main/java/alluxio/worker/block/UnderFileSystemBlockStore.java b/core/server/worker/src/main/java/alluxio/worker/block/UnderFileSystemBlockStore.java index 07df037ddc23..b52f428c78f3 100644 --- a/core/server/worker/src/main/java/alluxio/worker/block/UnderFileSystemBlockStore.java +++ b/core/server/worker/src/main/java/alluxio/worker/block/UnderFileSystemBlockStore.java @@ -387,11 +387,19 @@ public String toString() { } } - private static class BytesReadMetricKey { - private final AlluxioURI mUri; - private final String mUser; + /** + * create an BytesReadMetricKey. + */ + public static class BytesReadMetricKey { + public final AlluxioURI mUri; + public final String mUser; - BytesReadMetricKey(AlluxioURI uri, String user) { + /** + * create an instance of the key class. + * @param uri + * @param user + */ + public BytesReadMetricKey(AlluxioURI uri, String user) { mUri = uri; mUser = user; } diff --git a/core/server/worker/src/main/java/alluxio/worker/block/io/MetricAccountingBlockReader.java b/core/server/worker/src/main/java/alluxio/worker/block/io/MetricAccountingBlockReader.java new file mode 100644 index 000000000000..1e58c31aef1f --- /dev/null +++ b/core/server/worker/src/main/java/alluxio/worker/block/io/MetricAccountingBlockReader.java @@ -0,0 +1,103 @@ +/* + * The Alluxio Open Foundation licenses this work under the Apache License, version 2.0 + * (the "License"). You may not use this work except in compliance with the License, which is + * available at www.apache.org/licenses/LICENSE-2.0 + * + * This software is distributed on an "AS IS" basis, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, + * either express or implied, as more fully set forth in the License. + * + * See the NOTICE file distributed with this work for information regarding copyright ownership. + */ + +package alluxio.worker.block.io; + +import alluxio.metrics.MetricKey; +import alluxio.metrics.MetricsSystem; + +import io.netty.buffer.ByteBuf; + +import java.io.IOException; +import java.nio.ByteBuffer; +import java.nio.channels.ReadableByteChannel; + +/** + * An reader class with metrics. + */ +public class MetricAccountingBlockReader extends BlockReader { + private final LocalFileBlockReader mBlockReader; + + /** + * A decorator of BlockReader. + * @param mblockReader block reader + */ + public MetricAccountingBlockReader(LocalFileBlockReader mblockReader) { + mBlockReader = mblockReader; + } + + @Override + public ByteBuffer read(long offset, long length) throws IOException { + ByteBuffer buffer = mBlockReader.read(offset, length); + int bytesReadFromCache = buffer.limit() - buffer.position(); + MetricsSystem.counter(MetricKey.WORKER_BYTES_READ_CACHE.getName()).inc(bytesReadFromCache); + return buffer; + } + + @Override + public long getLength() { + return mBlockReader.getLength(); + } + + @Override + public ReadableByteChannel getChannel() { + return new ReadableByteChannel() { + private final ReadableByteChannel mDelegate = mBlockReader.getChannel(); + @Override + public int read(ByteBuffer dst) throws IOException { + int bytesRead = mDelegate.read(dst); + if (bytesRead != -1) { + MetricsSystem.counter(MetricKey.WORKER_BYTES_READ_CACHE.getName()).inc(bytesRead); + } + return bytesRead; + } + + @Override + public boolean isOpen() { + return mDelegate.isOpen(); + } + + @Override + public void close() throws IOException { + mDelegate.close(); + } + }; + } + + @Override + public int transferTo(ByteBuf buf) throws IOException { + int bytesReadFromCache = mBlockReader.transferTo(buf); + if (bytesReadFromCache != -1) { + MetricsSystem.counter(MetricKey.WORKER_BYTES_READ_CACHE.getName()).inc(bytesReadFromCache); + } + return bytesReadFromCache; + } + + @Override + public boolean isClosed() { + return mBlockReader.isClosed(); + } + + @Override + public String getLocation() { + return mBlockReader.getLocation(); + } + + @Override + public String toString() { + return mBlockReader.toString(); + } + + @Override + public void close() throws IOException { + mBlockReader.close(); + } +} diff --git a/core/server/worker/src/main/java/alluxio/worker/page/PagedBlockReader.java b/core/server/worker/src/main/java/alluxio/worker/page/PagedBlockReader.java index a0a09d2b8b41..41b7a95d4016 100644 --- a/core/server/worker/src/main/java/alluxio/worker/page/PagedBlockReader.java +++ b/core/server/worker/src/main/java/alluxio/worker/page/PagedBlockReader.java @@ -39,7 +39,6 @@ */ @NotThreadSafe public class PagedBlockReader extends BlockReader { - private static final ByteBuffer EMPTY_BYTE_BUFFER = ByteBuffer.allocate(0); private final long mPageSize; private final CacheManager mCacheManager; @@ -122,6 +121,7 @@ private long read(ByteBuf byteBuf, long offset, long length) throws IOException bytesRead += bytesReadFromCache; MetricsSystem.meter(MetricKey.CLIENT_CACHE_BYTES_READ_CACHE.getName()).mark(bytesRead); mReadFromLocalCache = true; + MetricsSystem.counter(MetricKey.WORKER_BYTES_READ_CACHE.getName()).inc(bytesReadFromCache); } else { if (!mUfsBlockReader.isPresent()) { throw new AlluxioRuntimeException( diff --git a/core/server/worker/src/main/java/alluxio/worker/page/PagedBlockStore.java b/core/server/worker/src/main/java/alluxio/worker/page/PagedBlockStore.java index 2dba6f52ce61..59ac5af9daeb 100644 --- a/core/server/worker/src/main/java/alluxio/worker/page/PagedBlockStore.java +++ b/core/server/worker/src/main/java/alluxio/worker/page/PagedBlockStore.java @@ -300,7 +300,7 @@ private BlockReader getBlockReader(PagedBlockMeta blockMeta, long offset, } final Optional ufsBlockReader = readOptions.map(opt -> new PagedUfsBlockReader( - mUfsManager, mUfsInStreamCache, blockMeta, offset, opt, mPageSize)); + mUfsManager, mUfsInStreamCache, blockMeta, offset, opt, mPageSize)); return new PagedBlockReader(mCacheManager, blockMeta, offset, ufsBlockReader, mPageSize); } diff --git a/core/server/worker/src/main/java/alluxio/worker/page/PagedUfsBlockReader.java b/core/server/worker/src/main/java/alluxio/worker/page/PagedUfsBlockReader.java index ef79f18348da..dd4d132a4bef 100644 --- a/core/server/worker/src/main/java/alluxio/worker/page/PagedUfsBlockReader.java +++ b/core/server/worker/src/main/java/alluxio/worker/page/PagedUfsBlockReader.java @@ -12,6 +12,12 @@ package alluxio.worker.page; import alluxio.conf.PropertyKey; +import alluxio.exception.runtime.AlluxioRuntimeException; +import alluxio.exception.status.NotFoundException; +import alluxio.exception.status.UnavailableException; +import alluxio.metrics.MetricInfo; +import alluxio.metrics.MetricKey; +import alluxio.metrics.MetricsSystem; import alluxio.network.protocol.databuffer.NioDirectBufferPool; import alluxio.resource.CloseableResource; import alluxio.underfs.UfsManager; @@ -19,9 +25,11 @@ import alluxio.underfs.options.OpenOptions; import alluxio.util.IdUtils; import alluxio.worker.block.UfsInputStreamCache; +import alluxio.worker.block.UnderFileSystemBlockStore.BytesReadMetricKey; import alluxio.worker.block.io.BlockReader; import alluxio.worker.block.meta.BlockMeta; +import com.codahale.metrics.Counter; import com.google.common.base.Preconditions; import io.netty.buffer.ByteBuf; @@ -31,6 +39,8 @@ import java.nio.channels.Channels; import java.nio.channels.ClosedChannelException; import java.nio.channels.ReadableByteChannel; +import java.util.concurrent.ConcurrentHashMap; +import java.util.concurrent.ConcurrentMap; /** * Block reader that reads from UFS. @@ -47,6 +57,9 @@ public class PagedUfsBlockReader extends BlockReader { private long mLastPageIndex = -1; private boolean mClosed = false; private long mPosition; + private final ConcurrentMap mUfsBytesReadMetrics = + new ConcurrentHashMap<>(); + private final Counter mUfsBytesRead; /** * @param ufsManager @@ -70,6 +83,23 @@ public PagedUfsBlockReader(UfsManager ufsManager, mInitialOffset = offset; mLastPage = ByteBuffer.allocateDirect((int) mPageSize); mPosition = offset; + try { + UfsManager.UfsClient ufsClient = mUfsManager.get(mUfsBlockOptions.getMountId()); + mUfsBytesRead = mUfsBytesReadMetrics.computeIfAbsent( + new BytesReadMetricKey(ufsClient.getUfsMountPointUri(), mUfsBlockOptions.getUser()), + key -> key.mUser == null + ? MetricsSystem.counterWithTags( + MetricKey.WORKER_BYTES_READ_UFS.getName(), + MetricKey.WORKER_BYTES_READ_UFS.isClusterAggregated(), + MetricInfo.TAG_UFS, MetricsSystem.escape(key.mUri)) + : MetricsSystem.counterWithTags( + MetricKey.WORKER_BYTES_READ_UFS.getName(), + MetricKey.WORKER_BYTES_READ_UFS.isClusterAggregated(), + MetricInfo.TAG_UFS, MetricsSystem.escape(key.mUri), + MetricInfo.TAG_USER, key.mUser)); + } catch (UnavailableException | NotFoundException e) { + throw AlluxioRuntimeException.from(e); + } } @Override @@ -145,6 +175,7 @@ public int readPageAtIndex(ByteBuffer buffer, long pageIndex) throws IOException mLastPage.flip(); mLastPageIndex = pageIndex; fillWithCachedPage(buffer, pageIndex * mPageSize, totalBytesRead); + mUfsBytesRead.inc(totalBytesRead); return totalBytesRead; } diff --git a/core/server/worker/src/main/java/alluxio/worker/page/UfsBlockReadOptions.java b/core/server/worker/src/main/java/alluxio/worker/page/UfsBlockReadOptions.java index 4156c63334cb..d6b43a4c197e 100644 --- a/core/server/worker/src/main/java/alluxio/worker/page/UfsBlockReadOptions.java +++ b/core/server/worker/src/main/java/alluxio/worker/page/UfsBlockReadOptions.java @@ -16,6 +16,7 @@ import com.google.common.base.Preconditions; import java.util.Objects; +import javax.annotation.Nullable; /** * Options for reading a block from UFS. @@ -26,12 +27,15 @@ public final class UfsBlockReadOptions { private final long mOffsetInFile; private final String mUfsPath; private final boolean mCacheIntoAlluxio; + @Nullable private final String mUser; - UfsBlockReadOptions(long mountId, long offsetInFile, String ufsPath, boolean cacheIntoAlluxio) { + UfsBlockReadOptions(long mountId, long offsetInFile, String ufsPath, boolean cacheIntoAlluxio, + @Nullable String user) { mMountId = mountId; mOffsetInFile = offsetInFile; mUfsPath = ufsPath; mCacheIntoAlluxio = cacheIntoAlluxio; + mUser = user; } /** @@ -47,7 +51,7 @@ public static UfsBlockReadOptions fromProto(Protocol.OpenUfsBlockOptions options "missing offset in file for UFS block read"); Preconditions.checkArgument(options.hasUfsPath(), "missing UFS path for UFS block read"); return new UfsBlockReadOptions(options.getMountId(), - options.getOffsetInFile(), options.getUfsPath(), !options.getNoCache()); + options.getOffsetInFile(), options.getUfsPath(), !options.getNoCache(), options.getUser()); } /** @@ -71,6 +75,12 @@ public String getUfsPath() { return mUfsPath; } + /** + * + * @return user + */ + public String getUser() { return mUser; } + /** * @return whether the UFS block should be cached into Alluxio */ diff --git a/core/server/worker/src/test/java/alluxio/worker/page/PagedBlockReaderTest.java b/core/server/worker/src/test/java/alluxio/worker/page/PagedBlockReaderTest.java index 19d1fd222652..d8a938078b71 100644 --- a/core/server/worker/src/test/java/alluxio/worker/page/PagedBlockReaderTest.java +++ b/core/server/worker/src/test/java/alluxio/worker/page/PagedBlockReaderTest.java @@ -239,7 +239,7 @@ public void sequentialTransferMultipleTimes() throws Exception { } private static UfsBlockReadOptions createUfsBlockOptions(String ufsPath) { - return new UfsBlockReadOptions(MOUNT_ID, OFFSET_IN_FILE, ufsPath, true); + return new UfsBlockReadOptions(MOUNT_ID, OFFSET_IN_FILE, ufsPath, true, null); } private static void createTempUfsBlock(Path destPath, long blockSize) throws Exception { From f00399f4d423d9c8f4d4cf0f9c63e8b1cf360a7f Mon Sep 17 00:00:00 2001 From: fsl <1171313930@qq.com> Date: Mon, 30 Oct 2023 12:33:25 +0800 Subject: [PATCH 329/334] Bump hadoop version to 3.3.4 ### What changes are proposed in this pull request? Bump Hadoop version from `3.3.1` to `3.3.4`. ### Why are the changes needed? Fix hadoop CVE-2021-37404. ### Does this PR introduce any user facing changes? Hadoop version bump. pr-link: Alluxio/alluxio#17002 change-id: cid-fd12eec84b42efd3112c3c71039702dca92ca775 --- core/common/src/main/java/alluxio/conf/PropertyKey.java | 2 +- .../src/alluxio.org/build-distribution/cmd/common.go | 4 ++-- .../build-distribution/cmd/generate-tarball.go | 2 +- docs/cn/contributor/Building-Alluxio-From-Source.md | 2 +- docs/cn/ufs/HDFS.md | 6 +++--- docs/en/contributor/Building-Alluxio-From-Source.md | 4 ++-- docs/en/ufs/COSN.md | 2 +- docs/en/ufs/HDFS.md | 8 ++++---- integration/tools/pom.xml | 2 +- pom.xml | 6 +++--- shaded/hadoop/pom.xml | 2 +- underfs/abfs/pom.xml | 2 +- underfs/adl/pom.xml | 2 +- underfs/hdfs/pom.xml | 2 +- underfs/wasb/pom.xml | 2 +- 15 files changed, 24 insertions(+), 24 deletions(-) diff --git a/core/common/src/main/java/alluxio/conf/PropertyKey.java b/core/common/src/main/java/alluxio/conf/PropertyKey.java index 5c147ddd377c..8f7db1b06fa3 100755 --- a/core/common/src/main/java/alluxio/conf/PropertyKey.java +++ b/core/common/src/main/java/alluxio/conf/PropertyKey.java @@ -7297,7 +7297,7 @@ public String toString() { // TODO(ns) Fix default value to handle other UFS types public static final PropertyKey UNDERFS_VERSION = stringBuilder(Name.UNDERFS_VERSION) - .setDefaultValue("3.3.1") + .setDefaultValue("3.3.4") .setIsHidden(true) .build(); diff --git a/dev/scripts/src/alluxio.org/build-distribution/cmd/common.go b/dev/scripts/src/alluxio.org/build-distribution/cmd/common.go index 9d09774f100b..184ceb84bc19 100644 --- a/dev/scripts/src/alluxio.org/build-distribution/cmd/common.go +++ b/dev/scripts/src/alluxio.org/build-distribution/cmd/common.go @@ -37,7 +37,7 @@ var hadoopDistributions = map[string]version{ "hadoop-3.0": parseVersion("3.0.3"), "hadoop-3.1": parseVersion("3.1.1"), "hadoop-3.2": parseVersion("3.2.1"), - "hadoop-3.3": parseVersion("3.3.1"), + "hadoop-3.3": parseVersion("3.3.4"), // This distribution type is built with 2.7.3, but doesn't include the hadoop version in the name. "default": parseVersion("2.7.3"), } @@ -69,7 +69,7 @@ var ufsModules = map[string]module{ "ufs-hadoop-3.0": {"hadoop-3.0", "hdfs", false, "-pl underfs/hdfs -Pufs-hadoop-3 -Dufs.hadoop.version=3.0.0 -PhdfsActiveSync"}, "ufs-hadoop-3.1": {"hadoop-3.1", "hdfs", false, "-pl underfs/hdfs -Pufs-hadoop-3 -Dufs.hadoop.version=3.1.1 -PhdfsActiveSync"}, "ufs-hadoop-3.2": {"hadoop-3.2", "hdfs", true, "-pl underfs/hdfs -Pufs-hadoop-3 -Dufs.hadoop.version=3.2.1 -PhdfsActiveSync"}, - "ufs-hadoop-3.3": {"hadoop-3.3", "hdfs", false, "-pl underfs/hdfs -Pufs-hadoop-3 -Dufs.hadoop.version=3.3.1 -PhdfsActiveSync"}, + "ufs-hadoop-3.3": {"hadoop-3.3", "hdfs", false, "-pl underfs/hdfs -Pufs-hadoop-3 -Dufs.hadoop.version=3.3.4 -PhdfsActiveSync"}, "ufs-hadoop-ozone-1.2.1": {"hadoop-ozone-1.2.1", "ozone", true, "-pl underfs/ozone -Pufs-hadoop-3 -Dufs.ozone.version=1.2.1"}, "ufs-hadoop-cosn-3.1.0-5.8.5": {"hadoop-cosn-3.1.0-5.8.5", "cosn", true, "-pl underfs/cosn -Dufs.cosn.version=3.1.0-5.8.5"}, diff --git a/dev/scripts/src/alluxio.org/build-distribution/cmd/generate-tarball.go b/dev/scripts/src/alluxio.org/build-distribution/cmd/generate-tarball.go index c980c2bbb8db..6af5aeaddc87 100644 --- a/dev/scripts/src/alluxio.org/build-distribution/cmd/generate-tarball.go +++ b/dev/scripts/src/alluxio.org/build-distribution/cmd/generate-tarball.go @@ -165,7 +165,7 @@ func buildModules(srcPath, name, moduleFlag, version string, modules map[string] run(fmt.Sprintf("compiling %v module %v", name, moduleName), "mvn", moduleMvnArgs...) var srcJar string if moduleEntry.ufsType == "hdfs" { - var versionMvnArg = "3.3.1" + var versionMvnArg = "3.3.4" for _, arg := range moduleMvnArgs { if strings.Contains(arg, "ufs.hadoop.version") { versionMvnArg = strings.Split(arg, "=")[1] diff --git a/docs/cn/contributor/Building-Alluxio-From-Source.md b/docs/cn/contributor/Building-Alluxio-From-Source.md index 7b867ec4a9e6..151502c0b6ff 100644 --- a/docs/cn/contributor/Building-Alluxio-From-Source.md +++ b/docs/cn/contributor/Building-Alluxio-From-Source.md @@ -124,7 +124,7 @@ Hadoop versions >= 3.0.0 与新版本的Alluxio有最好的兼容性。 $ mvn clean install -pl underfs/hdfs/ \ -Dmaven.javadoc.skip=true -DskipTests -Dlicense.skip=true \ -Dcheckstyle.skip=true -Dfindbugs.skip=true \ - -Pufs-hadoop-3 -Dufs.hadoop.version=3.3.1 + -Pufs-hadoop-3 -Dufs.hadoop.version=3.3.4 ``` 要启用`active sync`,请确保使用 `hdfsActiveSync` 属性来构建, 请参考 [Active Sync for HDFS]({{ '/cn/core-services/Unified-Namespace.html' | relativize_url }}#hdfs元数据主动同步) 获得更多关于使用Active Sync的信息。 diff --git a/docs/cn/ufs/HDFS.md b/docs/cn/ufs/HDFS.md index 285512289475..fd7e75118793 100644 --- a/docs/cn/ufs/HDFS.md +++ b/docs/cn/ufs/HDFS.md @@ -15,7 +15,7 @@ priority: 3 要在一组机器上运行一个Alluxio集群,需要在每台机器上部署Alluxio二进制服务端包。你可以[下载带有正确Hadoop版本的预编译二进制包](Running-Alluxio-Locally.html),对于高级用户,也可[源码编译Alluxio](Building-Alluxio-From-Source.html), -注意,在编译源码包的时候,默认的Alluxio二进制包适用于HDFS `3.3.1`,若使用其他版本的Hadoop,需要指定正确的Hadoop版本,并且在Alluxio源码目录下运行如下命令: +注意,在编译源码包的时候,默认的Alluxio二进制包适用于HDFS `3.3.4`,若使用其他版本的Hadoop,需要指定正确的Hadoop版本,并且在Alluxio源码目录下运行如下命令: ```console $ mvn install -P -D -DskipTests @@ -74,9 +74,9 @@ alluxio.master.mount.table.root.ufs=hdfs://nameservice/ Alluxio支持类POSIX文件系统[用户和权限检查]({{ '/cn/security/Security.html' | relativize_url }}),这从v1.3开始默认启用。 为了确保文件/目录的权限信息,即HDFS上的用户,组和访问模式,与Alluxio一致,(例如,在Alluxio中被用户Foo创建的文件在HDFS中也以Foo作为用户持久化),用户**需要**以以下方式启动: -1. [HDFS超级用户](http://hadoop.apache.org/docs/r3.3.1/hadoop-project-dist/hadoop-hdfs/HdfsPermissionsGuide.html#The_Super-User)。即,使用启动HDFS namenode进程的同一用户也启动Alluxio master和worker进程。也就是说,使用与启动HDFS的namenode进程相同的用户名启动Alluxio master和worker进程。 +1. [HDFS超级用户](http://hadoop.apache.org/docs/r3.3.4/hadoop-project-dist/hadoop-hdfs/HdfsPermissionsGuide.html#The_Super-User)。即,使用启动HDFS namenode进程的同一用户也启动Alluxio master和worker进程。也就是说,使用与启动HDFS的namenode进程相同的用户名启动Alluxio master和worker进程。 -2. [HDFS超级用户组](http://hadoop.apache.org/docs/r3.3.1/hadoop-project-dist/hadoop-hdfs/HdfsPermissionsGuide.html#Configuration_Parameters)的成员。编辑HDFS配置文件`hdfs-site.xml`并检查配置属性`dfs.permissions.superusergroup`的值。如果使用组(例如,"hdfs")设置此属性,则将用户添加到此组("hdfs")以启动Alluxio进程(例如,"alluxio");如果未设置此属性,请将一个组添加到此属性,其中Alluxio运行用户是此新添加组的成员。 +2. [HDFS超级用户组](http://hadoop.apache.org/docs/r3.3.4/hadoop-project-dist/hadoop-hdfs/HdfsPermissionsGuide.html#Configuration_Parameters)的成员。编辑HDFS配置文件`hdfs-site.xml`并检查配置属性`dfs.permissions.superusergroup`的值。如果使用组(例如,"hdfs")设置此属性,则将用户添加到此组("hdfs")以启动Alluxio进程(例如,"alluxio");如果未设置此属性,请将一个组添加到此属性,其中Alluxio运行用户是此新添加组的成员。 注意,上面设置的用户只是启动Alluxio master和worker进程的标识。一旦Alluxio服务器启动,就**不必**使用此用户运行Alluxio客户端应用程序。 diff --git a/docs/en/contributor/Building-Alluxio-From-Source.md b/docs/en/contributor/Building-Alluxio-From-Source.md index dcf5bc8c012d..cb41b0d8996d 100644 --- a/docs/en/contributor/Building-Alluxio-From-Source.md +++ b/docs/en/contributor/Building-Alluxio-From-Source.md @@ -145,7 +145,7 @@ For example, $ mvn clean install -pl underfs/hdfs/ \ -Dmaven.javadoc.skip=true -DskipTests -Dlicense.skip=true \ -Dcheckstyle.skip=true -Dfindbugs.skip=true \ - -Pufs-hadoop-3 -Dufs.hadoop.version=3.3.1 + -Pufs-hadoop-3 -Dufs.hadoop.version=3.3.4 ``` To enable active sync be sure to build using the `hdfsActiveSync` property. @@ -173,7 +173,7 @@ All main builds are from Apache so all Apache releases can be used directly -Pufs-hadoop-2 -Dufs.hadoop.version=2.9.0 -Pufs-hadoop-2 -Dufs.hadoop.version=2.10.0 -Pufs-hadoop-3 -Dufs.hadoop.version=3.0.0 --Pufs-hadoop-3 -Dufs.hadoop.version=3.3.1 +-Pufs-hadoop-3 -Dufs.hadoop.version=3.3.4 ``` {% endcollapsible %} diff --git a/docs/en/ufs/COSN.md b/docs/en/ufs/COSN.md index af26cec2ee77..88984a4f76c7 100644 --- a/docs/en/ufs/COSN.md +++ b/docs/en/ufs/COSN.md @@ -69,7 +69,7 @@ Specify COS configuration information in order to access COS by modifying `conf/ ``` -The above is the most basic configuration. For more configuration please refer to [here](https://hadoop.apache.org/docs/r3.3.1/hadoop-cos/cloud-storage/index.html). +The above is the most basic configuration. For more configuration please refer to [here](https://hadoop.apache.org/docs/r3.3.4/hadoop-cos/cloud-storage/index.html). After these changes, Alluxio should be configured to work with COSN as its under storage system and you can try [Running Alluxio Locally with COSN](#running-alluxio-locally-with-cosn). ### Nested Mount diff --git a/docs/en/ufs/HDFS.md b/docs/en/ufs/HDFS.md index 4a56d9e2f6e3..4e669958b45a 100644 --- a/docs/en/ufs/HDFS.md +++ b/docs/en/ufs/HDFS.md @@ -23,7 +23,7 @@ with the correct Hadoop version (recommended), or (for advanced users). Note that, when building Alluxio from source code, by default Alluxio server binaries are built to -work with Apache Hadoop HDFS of version `3.3.1`. To work with Hadoop distributions of other +work with Apache Hadoop HDFS of version `3.3.4`. To work with Hadoop distributions of other versions, one needs to specify the correct Hadoop profile and run the following in your Alluxio directory: @@ -159,7 +159,7 @@ alluxio.master.mount.table.root.option.alluxio.underfs.hdfs.configuration=/path/ To configure Alluxio to work with HDFS namenodes in HA mode, first configure Alluxio servers to [access HDFS with the proper configuration files](#specify-hdfs-configuration-location). In addition, set the under storage address to `hdfs://nameservice/` (`nameservice` is -the [HDFS nameservice](https://hadoop.apache.org/docs/r3.3.1/hadoop-project-dist/hadoop-hdfs/HDFSHighAvailabilityWithQJM.html#Configuration_details) +the [HDFS nameservice](https://hadoop.apache.org/docs/r3.3.4/hadoop-project-dist/hadoop-hdfs/HDFSHighAvailabilityWithQJM.html#Configuration_details) already configured in `hdfs-site.xml`). To mount an HDFS subdirectory to Alluxio instead of the whole HDFS namespace, change the under storage address to something like `hdfs://nameservice/alluxio/data`. @@ -176,11 +176,11 @@ HDFS is consistent with Alluxio (e.g., a file created by user Foo in Alluxio is HDFS also with owner as user Foo), the user to start Alluxio master and worker processes **is required** to be either: -1. [HDFS super user](http://hadoop.apache.org/docs/r3.3.1/hadoop-project-dist/hadoop-hdfs/HdfsPermissionsGuide.html#The_Super-User). +1. [HDFS super user](http://hadoop.apache.org/docs/r3.3.4/hadoop-project-dist/hadoop-hdfs/HdfsPermissionsGuide.html#The_Super-User). Namely, use the same user that starts HDFS namenode process to also start Alluxio master and worker processes. -2. A member of [HDFS superuser group](http://hadoop.apache.org/docs/r3.3.1/hadoop-project-dist/hadoop-hdfs/HdfsPermissionsGuide.html#Configuration_Parameters). +2. A member of [HDFS superuser group](http://hadoop.apache.org/docs/r3.3.4/hadoop-project-dist/hadoop-hdfs/HdfsPermissionsGuide.html#Configuration_Parameters). Edit HDFS configuration file `hdfs-site.xml` and check the value of configuration property `dfs.permissions.superusergroup`. If this property is set with a group (e.g., "hdfs"), add the user to start Alluxio process (e.g., "alluxio") to this group ("hdfs"); if this property is not diff --git a/integration/tools/pom.xml b/integration/tools/pom.xml index fe8918feea72..00e0d7b145a1 100644 --- a/integration/tools/pom.xml +++ b/integration/tools/pom.xml @@ -26,7 +26,7 @@ ${project.parent.parent.basedir}/build - 3.3.1 + 3.3.4 false diff --git a/pom.xml b/pom.xml index 5e5df8848bd8..2fe7e7f159de 100644 --- a/pom.xml +++ b/pom.xml @@ -134,7 +134,7 @@ 2.8.9 4.1.87.Final 7.0.3 - 3.3.1 + 3.3.4 0.8.5 1.8 2.3.3 @@ -1411,7 +1411,7 @@ hadoop-3 - 3.3.1 + 3.3.4 @@ -1459,7 +1459,7 @@ - 3.3.1 + 3.3.4 diff --git a/shaded/hadoop/pom.xml b/shaded/hadoop/pom.xml index e592570a7b9f..43b89cff974d 100644 --- a/shaded/hadoop/pom.xml +++ b/shaded/hadoop/pom.xml @@ -27,7 +27,7 @@ ${project.parent.parent.basedir}/build - 3.3.1 + 3.3.4 alluxio.shaded.hdfs ${project.artifactId}-${ufs.hadoop.version}-${project.version}.jar diff --git a/underfs/abfs/pom.xml b/underfs/abfs/pom.xml index 51fb584be0b8..cf80d7f506f6 100644 --- a/underfs/abfs/pom.xml +++ b/underfs/abfs/pom.xml @@ -27,7 +27,7 @@ ${project.parent.parent.basedir}/build - 3.3.1 + 3.3.4 diff --git a/underfs/adl/pom.xml b/underfs/adl/pom.xml index 9df7954b47ac..2451b18c079a 100755 --- a/underfs/adl/pom.xml +++ b/underfs/adl/pom.xml @@ -25,7 +25,7 @@ ${project.parent.parent.basedir}/build - 3.3.1 + 3.3.4 diff --git a/underfs/hdfs/pom.xml b/underfs/hdfs/pom.xml index f18eda19ede5..06054ffdf59c 100644 --- a/underfs/hdfs/pom.xml +++ b/underfs/hdfs/pom.xml @@ -26,7 +26,7 @@ ${project.parent.parent.basedir}/build - 3.3.1 + 3.3.4 ${project.artifactId}-${ufs.hadoop.version}-${project.version}.jar diff --git a/underfs/wasb/pom.xml b/underfs/wasb/pom.xml index 4bbf9d85bf8a..45fa09e4d48f 100644 --- a/underfs/wasb/pom.xml +++ b/underfs/wasb/pom.xml @@ -25,7 +25,7 @@ ${project.parent.parent.basedir}/build - 3.3.1 + 3.3.4 From 297982bc0278dd0376078514da7344751bb35961 Mon Sep 17 00:00:00 2001 From: Haoning Sun Date: Sat, 4 Nov 2023 10:02:05 +0800 Subject: [PATCH 330/334] Use correct audit log configuration in proxy ### What changes are proposed in this pull request? The v1 and v2 versions of proxy use uniform judgment conditions. pr-link: Alluxio/alluxio#18371 change-id: cid-1842a0b96b5233f9a456e1aee7dbd95e2bd2d5e0 --- core/server/proxy/src/main/java/alluxio/proxy/s3/S3Handler.java | 2 +- .../src/main/java/alluxio/proxy/s3/S3RestServiceHandler.java | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/core/server/proxy/src/main/java/alluxio/proxy/s3/S3Handler.java b/core/server/proxy/src/main/java/alluxio/proxy/s3/S3Handler.java index 179447ede09e..7479d91f9e67 100644 --- a/core/server/proxy/src/main/java/alluxio/proxy/s3/S3Handler.java +++ b/core/server/proxy/src/main/java/alluxio/proxy/s3/S3Handler.java @@ -369,7 +369,7 @@ public S3AuditContext createAuditContext(String command, @Nullable String object) { // Audit log may be enabled during runtime AsyncUserAccessAuditLogWriter auditLogWriter = null; - if (Configuration.getBoolean(PropertyKey.MASTER_AUDIT_LOGGING_ENABLED)) { + if (Configuration.getBoolean(PropertyKey.PROXY_AUDIT_LOGGING_ENABLED)) { auditLogWriter = mAsyncAuditLogWriter; } S3AuditContext auditContext = new S3AuditContext(auditLogWriter); diff --git a/core/server/proxy/src/main/java/alluxio/proxy/s3/S3RestServiceHandler.java b/core/server/proxy/src/main/java/alluxio/proxy/s3/S3RestServiceHandler.java index 465b0f074a4a..d897642e646c 100644 --- a/core/server/proxy/src/main/java/alluxio/proxy/s3/S3RestServiceHandler.java +++ b/core/server/proxy/src/main/java/alluxio/proxy/s3/S3RestServiceHandler.java @@ -1518,7 +1518,7 @@ private S3AuditContext createAuditContext(String command, String user, @Nullable String bucket, @Nullable String object) { // Audit log may be enabled during runtime AsyncUserAccessAuditLogWriter auditLogWriter = null; - if (Configuration.getBoolean(PropertyKey.MASTER_AUDIT_LOGGING_ENABLED)) { + if (Configuration.getBoolean(PropertyKey.PROXY_AUDIT_LOGGING_ENABLED)) { auditLogWriter = mAsyncAuditLogWriter; } S3AuditContext auditContext = new S3AuditContext(auditLogWriter); From 66ca4b63b9a5ab9fb79fafdbe6bc1845eb079450 Mon Sep 17 00:00:00 2001 From: Yichuan Sun Date: Mon, 6 Nov 2023 11:43:40 +0800 Subject: [PATCH 331/334] Fix getConf bugs when it comes across warning ### What changes are proposed in this pull request? As issue https://github.com/Alluxio/alluxio/issues/15795 mentioned. When output any warnings, the cli of judging HA_ENABLED will be invalid. This PR fixs it. ### Why are the changes needed? Bug fix. ### Does this PR introduce any user facing changes? No. pr-link: Alluxio/alluxio#18334 change-id: cid-c0d595d263788c5e64e6ce4508e2ee120b527b74 --- bin/alluxio-masters.sh | 15 ++++++++++++++- bin/alluxio-monitor.sh | 13 ++++++++++++- 2 files changed, 26 insertions(+), 2 deletions(-) diff --git a/bin/alluxio-masters.sh b/bin/alluxio-masters.sh index e72cc693c820..73058d060733 100755 --- a/bin/alluxio-masters.sh +++ b/bin/alluxio-masters.sh @@ -32,8 +32,21 @@ ALLUXIO_TASK_LOG="${ALLUXIO_LOGS_DIR}/task.log" echo "Executing the following command on all master nodes and logging to ${ALLUXIO_TASK_LOG}: $@" | tee -a ${ALLUXIO_TASK_LOG} +check_true() { + local output=$1 + if [[ $output == *"true"* ]]; then + result="true" + else + result="false" + fi + echo $result +} + N=0 -HA_ENABLED=$(${BIN}/alluxio getConf ${ALLUXIO_MASTER_JAVA_OPTS} alluxio.zookeeper.enabled) + +HA_ENABLED_GETCONF_RES=$(${BIN}/alluxio getConf ${ALLUXIO_MASTER_JAVA_OPTS} alluxio.zookeeper.enabled) +HA_ENABLED=$(check_true "$HA_ENABLED_GETCONF_RES") + JOURNAL_TYPE=$(${BIN}/alluxio getConf ${ALLUXIO_MASTER_JAVA_OPTS} alluxio.master.journal.type | awk '{print toupper($0)}') if [[ ${JOURNAL_TYPE} == "EMBEDDED" ]]; then HA_ENABLED="true" diff --git a/bin/alluxio-monitor.sh b/bin/alluxio-monitor.sh index 7b00017523cb..5460a9d6e1d9 100755 --- a/bin/alluxio-monitor.sh +++ b/bin/alluxio-monitor.sh @@ -192,7 +192,8 @@ run_monitors() { # if there is an error, print the log tail for the remaining master nodes. batch_run_on_nodes "$(echo ${nodes})" "${BIN}/alluxio-monitor.sh" -L "${node_type}" else - HA_ENABLED=$(${BIN}/alluxio getConf ${ALLUXIO_MASTER_JAVA_OPTS} alluxio.zookeeper.enabled) + HA_ENABLED_GETCONF_RES=$(${BIN}/alluxio getConf ${ALLUXIO_MASTER_JAVA_OPTS} alluxio.zookeeper.enabled) + HA_ENABLED=$(check_true "$HA_ENABLED_GETCONF_RES") JOURNAL_TYPE=$(${BIN}/alluxio getConf ${ALLUXIO_MASTER_JAVA_OPTS} alluxio.master.journal.type | awk '{print toupper($0)}') if [[ ${JOURNAL_TYPE} == "EMBEDDED" ]]; then HA_ENABLED="true" @@ -206,6 +207,16 @@ run_monitors() { fi } +check_true() { + local output=$1 + if [[ $output == *"true"* ]]; then + result="true" + else + result="false" + fi + echo $result +} + # Used to run a command on multiple hosts concurrently. # By default it limits concurrent tasks to 100. batch_run_on_nodes() { From 045a5117a983eeb00ec1ecd8b131320bb47d833e Mon Sep 17 00:00:00 2001 From: David Zhu Date: Mon, 6 Nov 2023 21:28:56 +0800 Subject: [PATCH 332/334] Add direct access option to avoid caching certain paths ### What changes are proposed in this pull request? Datalake formats such as Iceberg requires frequent changes to certain files, it is better not to cache them at all to avoid frequent invalidations. ### Why are the changes needed? for correct functionality using iceberg ### Does this PR introduce any user facing changes? one user property that is by default empty, alluxio.user.file.direct.access pr-link: Alluxio/alluxio#18326 change-id: cid-25a1ee3c2876a4a4126727113342c24d40b06126 --- .../alluxio/client/file/BaseFileSystem.java | 84 +++++--- .../alluxio/conf/ConfigurationBuilder.java | 0 .../alluxio/conf/OverlayConfiguration.java | 188 ++++++++++++++++++ .../main/java/alluxio/conf/PropertyKey.java | 8 + .../fs/DirectAccessIntegrationTest.java | 108 ++++++++++ 5 files changed, 364 insertions(+), 24 deletions(-) rename core/common/src/{test => main}/java/alluxio/conf/ConfigurationBuilder.java (100%) create mode 100644 core/common/src/main/java/alluxio/conf/OverlayConfiguration.java create mode 100644 tests/src/test/java/alluxio/client/fs/DirectAccessIntegrationTest.java diff --git a/core/client/fs/src/main/java/alluxio/client/file/BaseFileSystem.java b/core/client/fs/src/main/java/alluxio/client/file/BaseFileSystem.java index e72c027b00f8..1c4cfe421091 100644 --- a/core/client/fs/src/main/java/alluxio/client/file/BaseFileSystem.java +++ b/core/client/fs/src/main/java/alluxio/client/file/BaseFileSystem.java @@ -16,12 +16,16 @@ import alluxio.AlluxioURI; import alluxio.Constants; +import alluxio.client.ReadType; +import alluxio.client.WriteType; import alluxio.client.block.BlockStoreClient; import alluxio.client.block.BlockWorkerInfo; import alluxio.client.file.FileSystemContextReinitializer.ReinitBlockerResource; import alluxio.client.file.options.InStreamOptions; import alluxio.client.file.options.OutStreamOptions; import alluxio.conf.AlluxioConfiguration; +import alluxio.conf.ConfigurationBuilder; +import alluxio.conf.OverlayConfiguration; import alluxio.conf.PropertyKey; import alluxio.exception.AlluxioException; import alluxio.exception.DirectoryNotEmptyException; @@ -69,6 +73,7 @@ import alluxio.security.authorization.AclEntry; import alluxio.uri.Authority; import alluxio.util.FileSystemOptionsUtils; +import alluxio.util.io.PathUtils; import alluxio.wire.BlockLocation; import alluxio.wire.BlockLocationInfo; import alluxio.wire.FileBlockInfo; @@ -99,11 +104,17 @@ */ @ThreadSafe public class BaseFileSystem implements FileSystem { + private static final AlluxioConfiguration DIRECT_ACCESS_CONF = new ConfigurationBuilder() + .setProperty(PropertyKey.USER_FILE_METADATA_SYNC_INTERVAL, "0") + .setProperty(PropertyKey.USER_FILE_READ_TYPE_DEFAULT, ReadType.NO_CACHE) + .setProperty(PropertyKey.USER_FILE_WRITE_TYPE_DEFAULT, WriteType.THROUGH).build(); private static final Logger LOG = LoggerFactory.getLogger(BaseFileSystem.class); + /** Used to manage closeable resources. */ private final Closer mCloser = Closer.create(); protected final FileSystemContext mFsContext; protected final BlockStoreClient mBlockStore; + protected List mPathList; protected volatile boolean mClosed = false; @@ -149,7 +160,7 @@ public void checkAccess(AlluxioURI path, CheckAccessPOptions options) checkUri(path); rpc(client -> { CheckAccessPOptions mergedOptions = FileSystemOptionsUtils - .checkAccessDefaults(mFsContext.getPathConf(path)) + .checkAccessDefaults(getDirectAccessConf(path)) .toBuilder().mergeFrom(options).build(); client.checkAccess(path, mergedOptions); LOG.debug("Checked access {}, options: {}", path.getPath(), mergedOptions); @@ -157,13 +168,38 @@ public void checkAccess(AlluxioURI path, CheckAccessPOptions options) }); } + private boolean checkDirectAccess(AlluxioURI uri) { + if (!getConf().isSet(PropertyKey.USER_FILE_DIRECT_ACCESS)) { + return false; + } + if (mPathList == null) { + mPathList = getConf().getList(PropertyKey.USER_FILE_DIRECT_ACCESS); + } + return mPathList.stream().anyMatch(x -> { + try { + return PathUtils.hasPrefix(uri.getPath(), x); + } catch (InvalidPathException e) { + return false; + } + }); + } + + private AlluxioConfiguration getDirectAccessConf(AlluxioURI uri) { + AlluxioConfiguration inner = mFsContext.getPathConf(uri); + if (checkDirectAccess(uri)) { + return new OverlayConfiguration(DIRECT_ACCESS_CONF, inner); + } else { + return inner; + } + } + @Override public void createDirectory(AlluxioURI path, CreateDirectoryPOptions options) throws FileAlreadyExistsException, InvalidPathException, IOException, AlluxioException { checkUri(path); rpc(client -> { CreateDirectoryPOptions mergedOptions = FileSystemOptionsUtils.createDirectoryDefaults( - mFsContext.getPathConf(path)).toBuilder().mergeFrom(options).build(); + getDirectAccessConf(path)).toBuilder().mergeFrom(options).build(); client.createDirectory(path, mergedOptions); LOG.debug("Created directory {}, options: {}", path.getPath(), mergedOptions); return null; @@ -171,17 +207,17 @@ public void createDirectory(AlluxioURI path, CreateDirectoryPOptions options) } @Override - public FileOutStream createFile(AlluxioURI path, CreateFilePOptions options) + public FileOutStream createFile(AlluxioURI path, final CreateFilePOptions options) throws FileAlreadyExistsException, InvalidPathException, IOException, AlluxioException { checkUri(path); return rpc(client -> { CreateFilePOptions mergedOptions = FileSystemOptionsUtils.createFileDefaults( - mFsContext.getPathConf(path)).toBuilder().mergeFrom(options).build(); + getDirectAccessConf(path)).toBuilder().mergeFrom(options).build(); URIStatus status = client.createFile(path, mergedOptions); LOG.debug("Created file {}, options: {}", path.getPath(), mergedOptions); OutStreamOptions outStreamOptions = new OutStreamOptions(mergedOptions, mFsContext, - mFsContext.getPathConf(path)); + getDirectAccessConf(path)); outStreamOptions.setUfsPath(status.getUfsPath()); outStreamOptions.setMountId(status.getMountId()); outStreamOptions.setAcl(status.getAcl()); @@ -200,7 +236,7 @@ public void delete(AlluxioURI path, DeletePOptions options) checkUri(path); rpc(client -> { DeletePOptions mergedOptions = FileSystemOptionsUtils.deleteDefaults( - mFsContext.getPathConf(path)).toBuilder().mergeFrom(options).build(); + getDirectAccessConf(path)).toBuilder().mergeFrom(options).build(); client.delete(path, mergedOptions); LOG.debug("Deleted {}, options: {}", path.getPath(), mergedOptions); return null; @@ -213,7 +249,7 @@ public boolean exists(AlluxioURI path, final ExistsPOptions options) checkUri(path); return rpc(client -> { ExistsPOptions mergedOptions = FileSystemOptionsUtils.existsDefaults( - mFsContext.getPathConf(path)).toBuilder().mergeFrom(options).build(); + getDirectAccessConf(path)).toBuilder().mergeFrom(options).build(); return client.exists(path, mergedOptions); }); } @@ -223,7 +259,7 @@ public void free(AlluxioURI path, final FreePOptions options) throws FileDoesNotExistException, IOException, AlluxioException { checkUri(path); rpc(client -> { - FreePOptions mergedOptions = FileSystemOptionsUtils.freeDefaults(mFsContext.getPathConf(path)) + FreePOptions mergedOptions = FileSystemOptionsUtils.freeDefaults(getDirectAccessConf(path)) .toBuilder().mergeFrom(options).build(); client.free(path, mergedOptions); LOG.debug("Freed {}, options: {}", path.getPath(), mergedOptions); @@ -279,7 +315,7 @@ public URIStatus getStatus(AlluxioURI path, final GetStatusPOptions options) checkUri(path); URIStatus status = rpc(client -> { GetStatusPOptions mergedOptions = FileSystemOptionsUtils.getStatusDefaults( - mFsContext.getPathConf(path)).toBuilder().mergeFrom(options).build(); + getDirectAccessConf(path)).toBuilder().mergeFrom(options).build(); return client.getStatus(path, mergedOptions); }); if (!status.isCompleted()) { @@ -295,7 +331,7 @@ public List listStatus(AlluxioURI path, final ListStatusPOptions opti return rpc(client -> { // TODO(calvin): Fix the exception handling in the master ListStatusPOptions mergedOptions = FileSystemOptionsUtils.listStatusDefaults( - mFsContext.getPathConf(path)).toBuilder().mergeFrom(options).build(); + getDirectAccessConf(path)).toBuilder().mergeFrom(options).build(); return client.listStatus(path, mergedOptions); }); } @@ -308,7 +344,7 @@ public void iterateStatus(AlluxioURI path, final ListStatusPOptions options, rpc(client -> { // TODO(calvin): Fix the exception handling in the master ListStatusPOptions mergedOptions = FileSystemOptionsUtils.listStatusDefaults( - mFsContext.getPathConf(path)).toBuilder().mergeFrom(options).build(); + getDirectAccessConf(path)).toBuilder().mergeFrom(options).build(); client.iterateStatus(path, mergedOptions, action); return null; }); @@ -321,7 +357,7 @@ public ListStatusPartialResult listStatusPartial( checkUri(path); return rpc(client -> { ListStatusPartialPOptions mergedOptions = FileSystemOptionsUtils.listStatusPartialDefaults( - mFsContext.getPathConf(path)).toBuilder().mergeFrom(options).build(); + getDirectAccessConf(path)).toBuilder().mergeFrom(options).build(); return client.listStatusPartial(path, mergedOptions); }); } @@ -332,7 +368,7 @@ public void loadMetadata(AlluxioURI path, final ListStatusPOptions options) checkUri(path); rpc(client -> { ListStatusPOptions mergedOptions = FileSystemOptionsUtils.listStatusDefaults( - mFsContext.getPathConf(path)).toBuilder().mergeFrom(options) + getDirectAccessConf(path)).toBuilder().mergeFrom(options) .setLoadMetadataType(LoadMetadataPType.ALWAYS).setLoadMetadataOnly(true).build(); client.listStatus(path, mergedOptions); return null; @@ -384,7 +420,7 @@ public void persist(final AlluxioURI path, final ScheduleAsyncPersistencePOption rpc(client -> { ScheduleAsyncPersistencePOptions mergedOptions = FileSystemOptionsUtils - .scheduleAsyncPersistDefaults(mFsContext.getPathConf(path)).toBuilder() + .scheduleAsyncPersistDefaults(getDirectAccessConf(path)).toBuilder() .mergeFrom(options).build(); client.scheduleAsyncPersist(path, mergedOptions); LOG.debug("Scheduled persist for {}, options: {}", path.getPath(), mergedOptions); @@ -397,12 +433,12 @@ public FileInStream openFile(AlluxioURI path, OpenFilePOptions options) throws FileDoesNotExistException, OpenDirectoryException, FileIncompleteException, IOException, AlluxioException { checkUri(path); - AlluxioConfiguration conf = mFsContext.getPathConf(path); - URIStatus status = getStatus(path, - FileSystemOptionsUtils.getStatusDefaults(conf).toBuilder() - .setAccessMode(Bits.READ) - .setUpdateTimestamps(options.getUpdateLastAccessTime()) - .build()); + AlluxioConfiguration conf = getDirectAccessConf(path); + GetStatusPOptions opt = FileSystemOptionsUtils.getStatusDefaults(conf) + .toBuilder() + .setAccessMode(Bits.READ) + .setUpdateTimestamps(options.getUpdateLastAccessTime()).build(); + URIStatus status = getStatus(path, opt); return openFile(status, options); } @@ -417,7 +453,7 @@ public FileInStream openFile(URIStatus status, OpenFilePOptions options) if (!status.isCompleted()) { throw new FileIncompleteException(path); } - AlluxioConfiguration conf = mFsContext.getPathConf(path); + AlluxioConfiguration conf = getDirectAccessConf(path); OpenFilePOptions mergedOptions = FileSystemOptionsUtils.openFileDefaults(conf) .toBuilder().mergeFrom(options).build(); InStreamOptions inStreamOptions = new InStreamOptions(status, mergedOptions, conf, mFsContext); @@ -455,7 +491,7 @@ public void setAcl(AlluxioURI path, SetAclAction action, List entries, checkUri(path); rpc(client -> { SetAclPOptions mergedOptions = FileSystemOptionsUtils.setAclDefaults( - mFsContext.getPathConf(path)).toBuilder().mergeFrom(options).build(); + getDirectAccessConf(path)).toBuilder().mergeFrom(options).build(); client.setAcl(path, action, entries, mergedOptions); LOG.debug("Set ACL for {}, entries: {} options: {}", path.getPath(), entries, mergedOptions); @@ -468,7 +504,7 @@ public void setAttribute(AlluxioURI path, SetAttributePOptions options) throws FileDoesNotExistException, IOException, AlluxioException { checkUri(path); SetAttributePOptions mergedOptions = - FileSystemOptionsUtils.setAttributeClientDefaults(mFsContext.getPathConf(path)) + FileSystemOptionsUtils.setAttributeClientDefaults(getDirectAccessConf(path)) .toBuilder().mergeFrom(options).build(); rpc(client -> { client.setAttribute(path, mergedOptions); @@ -512,7 +548,7 @@ public void unmount(AlluxioURI path, UnmountPOptions options) checkUri(path); rpc(client -> { UnmountPOptions mergedOptions = FileSystemOptionsUtils.unmountDefaults( - mFsContext.getPathConf(path)).toBuilder().mergeFrom(options).build(); + getDirectAccessConf(path)).toBuilder().mergeFrom(options).build(); client.unmount(path); LOG.debug("Unmounted {}, options: {}", path.getPath(), mergedOptions); return null; diff --git a/core/common/src/test/java/alluxio/conf/ConfigurationBuilder.java b/core/common/src/main/java/alluxio/conf/ConfigurationBuilder.java similarity index 100% rename from core/common/src/test/java/alluxio/conf/ConfigurationBuilder.java rename to core/common/src/main/java/alluxio/conf/ConfigurationBuilder.java diff --git a/core/common/src/main/java/alluxio/conf/OverlayConfiguration.java b/core/common/src/main/java/alluxio/conf/OverlayConfiguration.java new file mode 100644 index 000000000000..4b3cd1df16a1 --- /dev/null +++ b/core/common/src/main/java/alluxio/conf/OverlayConfiguration.java @@ -0,0 +1,188 @@ +/* + * The Alluxio Open Foundation licenses this work under the Apache License, version 2.0 + * (the "License"). You may not use this work except in compliance with the License, which is + * available at www.apache.org/licenses/LICENSE-2.0 + * + * This software is distributed on an "AS IS" basis, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, + * either express or implied, as more fully set forth in the License. + * + * See the NOTICE file distributed with this work for information regarding copyright ownership. + */ + +package alluxio.conf; + +import com.google.common.collect.ImmutableMap; + +import java.time.Duration; +import java.util.HashSet; +import java.util.List; +import java.util.Map; +import java.util.Set; +import javax.annotation.concurrent.ThreadSafe; + +/** + * Configuration that wraps around another configuration + * Priority for the value of a property follows: + * if a property has been set by user on the outer configuration, it takes priority + * if it is not set explicitly on the outer configuration, the inner configuration + * determines the value. + */ +@ThreadSafe +public class OverlayConfiguration implements AlluxioConfiguration { + /** + * Runtime level configuration. + */ + private final AlluxioConfiguration mOuterConf; + /** + * Default configuration. + */ + private final AlluxioConfiguration mInnerConf; + private final Set mKeySet; + private final Set mUserKeySet; + + /** + * Constructs a new instance with the specified references without copying the underlying + * properties. + * + * @param outerConf the runtime level configuration to override + * @param innerConf the default configuration + */ + public OverlayConfiguration(AlluxioConfiguration outerConf, + AlluxioConfiguration innerConf) { + mOuterConf = outerConf; + mInnerConf = innerConf; + mUserKeySet = new HashSet<>(); + mUserKeySet.addAll(outerConf.userKeySet()); + mUserKeySet.addAll(innerConf.userKeySet()); + mKeySet = new HashSet<>(); + mKeySet.addAll(innerConf.keySet()); + mKeySet.addAll(outerConf.keySet()); + } + + private AlluxioConfiguration conf(PropertyKey key) { + return mOuterConf.isSetByUser(key) ? mOuterConf : mInnerConf; + } + + @Override + public Object get(PropertyKey key) { + return conf(key).get(key); + } + + @Override + public Object get(PropertyKey key, ConfigurationValueOptions options) { + return conf(key).get(key, options); + } + + @Override + public boolean isSet(PropertyKey key) { + return conf(key).isSet(key); + } + + @Override + public boolean isSetByUser(PropertyKey key) { + return conf(key).isSetByUser(key); + } + + @Override + public Set keySet() { + return mKeySet; + } + + @Override + public Set userKeySet() { + return mUserKeySet; + } + + @Override + public String getString(PropertyKey key) { + return conf(key).getString(key); + } + + @Override + public int getInt(PropertyKey key) { + return conf(key).getInt(key); + } + + @Override + public long getLong(PropertyKey key) { + return conf(key).getLong(key); + } + + @Override + public double getDouble(PropertyKey key) { + return conf(key).getDouble(key); + } + + @Override + public boolean getBoolean(PropertyKey key) { + return conf(key).getBoolean(key); + } + + @Override + public List getList(PropertyKey key) { + return conf(key).getList(key); + } + + @Override + public > T getEnum(PropertyKey key, Class enumType) { + return conf(key).getEnum(key, enumType); + } + + @Override + public long getBytes(PropertyKey key) { + return conf(key).getBytes(key); + } + + @Override + public long getMs(PropertyKey key) { + return conf(key).getMs(key); + } + + @Override + public Duration getDuration(PropertyKey key) { + return conf(key).getDuration(key); + } + + @Override + public Class getClass(PropertyKey key) { + return conf(key).getClass(key); + } + + @Override + public Map getNestedProperties(PropertyKey prefixKey) { + return conf(prefixKey).getNestedProperties(prefixKey); + } + + @Override + public AlluxioProperties copyProperties() { + AlluxioProperties properties = mInnerConf.copyProperties(); + for (PropertyKey key : mOuterConf.userKeySet()) { + properties.put(key, mOuterConf.get(key), Source.RUNTIME); + } + return properties; + } + + @Override + public Source getSource(PropertyKey key) { + return conf(key).getSource(key); + } + + @Override + public Map toMap(ConfigurationValueOptions opts) { + ImmutableMap.Builder map = ImmutableMap.builder(); + // Cannot use Collectors.toMap because we support null keys. + keySet().forEach(key -> + map.put(key.getName(), conf(key).getOrDefault(key, null, opts))); + return map.build(); + } + + @Override + public void validate() { + new InstancedConfiguration(copyProperties()).validate(); + } + + @Override + public boolean clusterDefaultsLoaded() { + return mInnerConf.clusterDefaultsLoaded(); + } +} diff --git a/core/common/src/main/java/alluxio/conf/PropertyKey.java b/core/common/src/main/java/alluxio/conf/PropertyKey.java index 8f7db1b06fa3..c0f64cee8913 100755 --- a/core/common/src/main/java/alluxio/conf/PropertyKey.java +++ b/core/common/src/main/java/alluxio/conf/PropertyKey.java @@ -5833,6 +5833,12 @@ public String toString() { + "before attempting to delete persisted directories recursively.") .setScope(Scope.CLIENT) .build(); + public static final PropertyKey USER_FILE_DIRECT_ACCESS = + listBuilder(Name.USER_FILE_DIRECT_ACCESS) + .setDescription("A list of Alluxio paths that are not read or write cached and " + + "always fetches from the ufs for the latest listing") + .setScope(Scope.CLIENT) + .build(); public static final PropertyKey USER_FILE_MASTER_CLIENT_POOL_SIZE_MIN = intBuilder(Name.USER_FILE_MASTER_CLIENT_POOL_SIZE_MIN) .setDefaultValue(0) @@ -8920,6 +8926,8 @@ public static final class Name { "alluxio.user.file.copyfromlocal.block.location.policy.class"; public static final String USER_FILE_DELETE_UNCHECKED = "alluxio.user.file.delete.unchecked"; + public static final String USER_FILE_DIRECT_ACCESS = + "alluxio.user.file.direct.access"; public static final String USER_FILE_MASTER_CLIENT_POOL_SIZE_MIN = "alluxio.user.file.master.client.pool.size.min"; public static final String USER_FILE_MASTER_CLIENT_POOL_SIZE_MAX = diff --git a/tests/src/test/java/alluxio/client/fs/DirectAccessIntegrationTest.java b/tests/src/test/java/alluxio/client/fs/DirectAccessIntegrationTest.java new file mode 100644 index 000000000000..e557eadbc31b --- /dev/null +++ b/tests/src/test/java/alluxio/client/fs/DirectAccessIntegrationTest.java @@ -0,0 +1,108 @@ +/* + * The Alluxio Open Foundation licenses this work under the Apache License, version 2.0 + * (the "License"). You may not use this work except in compliance with the License, which is + * available at www.apache.org/licenses/LICENSE-2.0 + * + * This software is distributed on an "AS IS" basis, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, + * either express or implied, as more fully set forth in the License. + * + * See the NOTICE file distributed with this work for information regarding copyright ownership. + */ + +package alluxio.client.fs; + +import alluxio.AlluxioTestDirectory; +import alluxio.AlluxioURI; +import alluxio.client.file.FileInStream; +import alluxio.client.file.FileOutStream; +import alluxio.client.file.FileSystem; +import alluxio.client.file.FileSystemUtils; +import alluxio.conf.PropertyKey; +import alluxio.grpc.CreateFilePOptions; +import alluxio.testutils.BaseIntegrationTest; +import alluxio.testutils.LocalAlluxioClusterResource; + +import com.google.common.io.ByteStreams; +import org.apache.commons.io.IOUtils; +import org.junit.Assert; +import org.junit.Before; +import org.junit.Rule; +import org.junit.Test; + +import java.io.File; + +/** + * Integration tests for direct access configurations. + */ +public class DirectAccessIntegrationTest extends BaseIntegrationTest { + private static final byte[] TEST_BYTES = "TestBytes".getBytes(); + private static final int USER_QUOTA_UNIT_BYTES = 1000; + private static final String DIRECT_DIR = "/mnt/direct/"; + private static final String NON_DIRECT_DIR = "/mnt/non_direct/"; + + @Rule + public LocalAlluxioClusterResource mLocalAlluxioClusterResource = + new LocalAlluxioClusterResource.Builder() + .setProperty(PropertyKey.USER_FILE_BUFFER_BYTES, USER_QUOTA_UNIT_BYTES) + .setProperty(PropertyKey.USER_FILE_DIRECT_ACCESS, DIRECT_DIR) + .build(); + private FileSystem mFileSystem; + + private final String mLocalUfsPath = AlluxioTestDirectory + .createTemporaryDirectory("DirectAccessIntegrationTest").getAbsolutePath(); + + @Before + public void before() throws Exception { + mFileSystem = mLocalAlluxioClusterResource.get().getClient(); + mFileSystem.mount(new AlluxioURI("/mnt/"), new AlluxioURI(mLocalUfsPath)); + } + + @Test + public void writeDirect() throws Exception { + final int n = 3; + for (int i = 0; i < n; i++) { + AlluxioURI uri = new AlluxioURI(DIRECT_DIR + i); + try (FileOutStream os = mFileSystem.createFile(uri, + CreateFilePOptions.newBuilder().setRecursive(true).build())) { + os.write(TEST_BYTES); + } + } + Assert.assertEquals(n, new File(mLocalUfsPath + "/direct").listFiles().length); + for (int i = 0; i < n; i++) { + checkCacheStatus(DIRECT_DIR + i, false, false); + } + } + + @Test + public void writeNonDirect() throws Exception { + final int n = 3; + for (int i = 0; i < n; i++) { + AlluxioURI uri = new AlluxioURI(NON_DIRECT_DIR + i); + try (FileOutStream os = mFileSystem.createFile(uri, + CreateFilePOptions.newBuilder().setRecursive(true).build())) { + os.write(TEST_BYTES); + } + } + Assert.assertNull(new File(mLocalUfsPath + "/non_direct").listFiles()); + + for (int i = 0; i < n; i++) { + checkCacheStatus(NON_DIRECT_DIR + i, true, true); + } + } + + private void checkCacheStatus(String path, + boolean shouldCacheBefore, boolean shouldCache) throws Exception { + AlluxioURI uri = new AlluxioURI(path); + Assert.assertEquals(shouldCacheBefore ? 100 : 0, + mFileSystem.getStatus(uri).getInMemoryPercentage()); + try (FileInStream is = mFileSystem.openFile(uri)) { + IOUtils.copy(is, ByteStreams.nullOutputStream()); + } + // Find the block location directly from block info to determine + // if the file has been cached + Assert.assertTrue(shouldCache ^ mFileSystem.getBlockLocations(uri) + .get(0).getBlockInfo().getBlockInfo().getLocations().isEmpty()); + FileSystemUtils.waitForAlluxioPercentage(mFileSystem, uri, shouldCache ? 100 : 0); + Assert.assertEquals(shouldCache ? 100 : 0, mFileSystem.getStatus(uri).getInMemoryPercentage()); + } +} From 5de2b54cb95435d2b208457bb74335a63fb6c9a5 Mon Sep 17 00:00:00 2001 From: Bowen Ding <6999708+dbw9580@users.noreply.github.com> Date: Tue, 7 Nov 2023 14:22:30 +0800 Subject: [PATCH 333/334] fix empty block location --- .../main/java/alluxio/worker/page/PagedBlockStore.java | 10 +--------- 1 file changed, 1 insertion(+), 9 deletions(-) diff --git a/core/server/worker/src/main/java/alluxio/worker/page/PagedBlockStore.java b/core/server/worker/src/main/java/alluxio/worker/page/PagedBlockStore.java index 59ac5af9daeb..7fd540fb63f7 100644 --- a/core/server/worker/src/main/java/alluxio/worker/page/PagedBlockStore.java +++ b/core/server/worker/src/main/java/alluxio/worker/page/PagedBlockStore.java @@ -389,15 +389,7 @@ public CacheManager.State getCacheManagerState() { public void moveBlock(long sessionId, long blockId, AllocateOptions moveOptions) throws IOException { // TODO(bowen): implement actual move and replace placeholder values - int dirIndex = getDirIndexOfBlock(blockId); - BlockStoreLocation srcLocation = new BlockStoreLocation(DEFAULT_TIER, dirIndex); - BlockStoreLocation destLocation = moveOptions.getLocation(); - for (BlockStoreEventListener listener : mBlockStoreEventListeners) { - synchronized (listener) { - listener.onMoveBlockByClient(blockId, srcLocation, destLocation); - } - } - throw new UnsupportedOperationException(); + throw new UnsupportedOperationException("moveBlock"); } @Override From 3def52daf5f58387bb8281cd802ee132b5772c7c Mon Sep 17 00:00:00 2001 From: pkuweblab Date: Thu, 16 Nov 2023 14:41:37 +0800 Subject: [PATCH 334/334] add AlluxioCosException --- .../underfs/cos/AlluxioCosException.java | 100 ++++++++++++++++++ .../underfs/cos/COSUnderFileSystem.java | 5 +- 2 files changed, 103 insertions(+), 2 deletions(-) create mode 100644 underfs/cos/src/main/java/alluxio/underfs/cos/AlluxioCosException.java diff --git a/underfs/cos/src/main/java/alluxio/underfs/cos/AlluxioCosException.java b/underfs/cos/src/main/java/alluxio/underfs/cos/AlluxioCosException.java new file mode 100644 index 000000000000..dd9187b3b151 --- /dev/null +++ b/underfs/cos/src/main/java/alluxio/underfs/cos/AlluxioCosException.java @@ -0,0 +1,100 @@ +/* + * The Alluxio Open Foundation licenses this work under the Apache License, version 2.0 + * (the "License"). You may not use this work except in compliance with the License, which is + * available at www.apache.org/licenses/LICENSE-2.0 + * + * This software is distributed on an "AS IS" basis, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, + * either express or implied, as more fully set forth in the License. + * + * See the NOTICE file distributed with this work for information regarding copyright ownership. + */ + +package alluxio.underfs.cos; + +import alluxio.exception.runtime.AlluxioRuntimeException; +import alluxio.grpc.ErrorType; + +import com.qcloud.cos.exception.CosClientException; +import com.qcloud.cos.exception.CosServiceException; +import io.grpc.Status; + +import java.net.HttpURLConnection; + +/** + * Alluxio exception for cos. + */ +public class AlluxioCosException extends AlluxioRuntimeException { + private static final ErrorType ERROR_TYPE = ErrorType.External; + + /** + * Converts an AmazonClientException to a corresponding AlluxioCosException. + * @param cause cos exception + * @return alluxio cos exception + */ + public static AlluxioCosException from(CosClientException cause) { + return from(null, cause); + } + + /** + * Converts an CosClientException with errormessage to a corresponding AlluxioCosException. + * @param errorMessage error message + * @param cause cos exception + * @return alluxio cos exception + */ + public static AlluxioCosException from(String errorMessage, CosClientException cause) { + Status status = Status.UNKNOWN; + String errorDescription = "ClientException:" + cause.getMessage(); + if (cause instanceof CosServiceException) { + CosServiceException exception = (CosServiceException) cause; + status = httpStatusToGrpcStatus(exception.getStatusCode()); + errorDescription = exception.getErrorCode() + ":" + exception.getErrorMessage(); + } + if (errorMessage == null) { + errorMessage = errorDescription; + } + return new AlluxioCosException(status, errorMessage, cause, cause.isRetryable()); + } + + private AlluxioCosException(Status status, String message, Throwable cause, boolean isRetryAble) { + super(status, message, cause, ERROR_TYPE, isRetryAble); + } + + private static Status httpStatusToGrpcStatus(int httpStatusCode) { + if (httpStatusCode >= 100 && httpStatusCode < 200) { + // 1xx. These headers should have been ignored. + return Status.INTERNAL; + } + switch (httpStatusCode) { + case HttpURLConnection.HTTP_BAD_REQUEST: // 400 + return Status.INVALID_ARGUMENT; + case HttpURLConnection.HTTP_UNAUTHORIZED: // 401 + return Status.UNAUTHENTICATED; + case HttpURLConnection.HTTP_FORBIDDEN: // 403 + return Status.PERMISSION_DENIED; + case HttpURLConnection.HTTP_NOT_FOUND: // 404 + return Status.NOT_FOUND; + case HttpURLConnection.HTTP_BAD_METHOD: // 405 + case HttpURLConnection.HTTP_NOT_IMPLEMENTED: // 501 + return Status.UNIMPLEMENTED; + case HttpURLConnection.HTTP_CONFLICT: // 409 + return Status.ABORTED; + case HttpURLConnection.HTTP_LENGTH_REQUIRED: // 411 + case HttpURLConnection.HTTP_PRECON_FAILED: // 412 + return Status.FAILED_PRECONDITION; + case 416: // Requested Range Not Satisfiable + return Status.OUT_OF_RANGE; + case HttpURLConnection.HTTP_INTERNAL_ERROR: //500 + return Status.INTERNAL; + case HttpURLConnection.HTTP_MOVED_PERM: // 301 + case HttpURLConnection.HTTP_NOT_MODIFIED: //304 + case 307: // Moved Temporarily + case HttpURLConnection.HTTP_BAD_GATEWAY: // 502 + case HttpURLConnection.HTTP_UNAVAILABLE: // 503 + return Status.UNAVAILABLE; + case HttpURLConnection.HTTP_GATEWAY_TIMEOUT: // 504 + return Status.DEADLINE_EXCEEDED; + default: + return Status.UNKNOWN; + } + } +} diff --git a/underfs/cos/src/main/java/alluxio/underfs/cos/COSUnderFileSystem.java b/underfs/cos/src/main/java/alluxio/underfs/cos/COSUnderFileSystem.java index 14c965c0ef0b..7f589cfd082e 100644 --- a/underfs/cos/src/main/java/alluxio/underfs/cos/COSUnderFileSystem.java +++ b/underfs/cos/src/main/java/alluxio/underfs/cos/COSUnderFileSystem.java @@ -180,7 +180,8 @@ protected List deleteObjects(List keys) throws IOException { .map(DeleteObjectsResult.DeletedObject::getKey) .collect(Collectors.toList()); } catch (CosClientException e) { - throw new IOException("failed to delete objects", e); + LOG.warn("failed to delete objects"); + throw AlluxioCosException.from(e); } } @@ -324,7 +325,7 @@ protected InputStream openObject(String key, OpenOptions options, return new COSInputStream(mBucketNameInternal, key, mClient, options.getOffset(), retryPolicy, mUfsConf.getBytes(PropertyKey.UNDERFS_OBJECT_STORE_MULTI_RANGE_CHUNK_SIZE)); } catch (CosClientException e) { - throw new IOException(e.getMessage()); + throw AlluxioCosException.from(e); } } }