diff --git a/pom.xml b/pom.xml index 0e6955030c..1369e75b24 100644 --- a/pom.xml +++ b/pom.xml @@ -69,6 +69,7 @@ 2.16.0 2.6 ${user.home}/clover.license + 5.9.0 3.2.0 3.8.1 4.9.3 @@ -736,6 +737,11 @@ + + org.apache.hadoop + hadoop-registry + ${hadoop.version} + org.apache.hadoop hadoop-hdfs @@ -743,6 +749,18 @@ test-jar test + + org.apache.curator + curator-test + ${curator.version} + test + + + org.junit.jupiter + junit-jupiter-api + + + org.mockito mockito-core diff --git a/tez-api/findbugs-exclude.xml b/tez-api/findbugs-exclude.xml index de8f3824cd..25d41cd95b 100644 --- a/tez-api/findbugs-exclude.xml +++ b/tez-api/findbugs-exclude.xml @@ -151,4 +151,37 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/tez-api/pom.xml b/tez-api/pom.xml index ff1f9d2e6a..9e889bdd39 100644 --- a/tez-api/pom.xml +++ b/tez-api/pom.xml @@ -76,7 +76,6 @@ org.apache.hadoop hadoop-registry - ${hadoop.version} org.apache.commons @@ -124,6 +123,11 @@ org.xerial.snappy snappy-java + + org.apache.curator + curator-test + test + diff --git a/tez-api/src/main/java/org/apache/tez/client/FrameworkClient.java b/tez-api/src/main/java/org/apache/tez/client/FrameworkClient.java index 9b7a32b61f..dac20edc08 100644 --- a/tez-api/src/main/java/org/apache/tez/client/FrameworkClient.java +++ b/tez-api/src/main/java/org/apache/tez/client/FrameworkClient.java @@ -26,7 +26,6 @@ import org.apache.hadoop.yarn.api.records.ApplicationId; import org.apache.hadoop.yarn.api.records.ApplicationReport; import org.apache.hadoop.yarn.api.records.ApplicationSubmissionContext; -import org.apache.hadoop.yarn.client.api.YarnClient; import org.apache.hadoop.yarn.client.api.YarnClientApplication; import org.apache.hadoop.yarn.exceptions.YarnException; import org.apache.tez.common.RPCUtil; @@ -46,6 +45,9 @@ import org.apache.tez.dag.api.client.rpc.DAGClientAMProtocolRPC.ShutdownSessionRequestProto; import org.apache.tez.dag.api.client.rpc.DAGClientAMProtocolRPC.SubmitDAGRequestProto; import org.apache.tez.dag.api.client.rpc.DAGClientAMProtocolRPC.SubmitDAGResponseProto; +import org.apache.tez.frameworkplugins.ClientFrameworkService; +import org.apache.tez.frameworkplugins.FrameworkUtils; +import org.apache.tez.frameworkplugins.yarn.YarnClientFrameworkService; import com.google.protobuf.ServiceException; @@ -57,7 +59,6 @@ public abstract class FrameworkClient { protected static final Logger LOG = LoggerFactory.getLogger(FrameworkClient.class); public static FrameworkClient createFrameworkClient(TezConfiguration tezConf) { - boolean isLocal = tezConf.getBoolean(TezConfiguration.TEZ_LOCAL_MODE, TezConfiguration.TEZ_LOCAL_MODE_DEFAULT); if (isLocal) { try { @@ -65,8 +66,11 @@ public static FrameworkClient createFrameworkClient(TezConfiguration tezConf) { } catch (TezReflectionException e) { throw new TezUncheckedException("Fail to create LocalClient", e); } + } else { + ClientFrameworkService clientFrameworkService = FrameworkUtils.get(ClientFrameworkService.class, tezConf, + YarnClientFrameworkService.class); + return clientFrameworkService.newFrameworkClient(); } - return new TezYarnClient(YarnClient.createYarnClient()); } /** diff --git a/tez-api/src/main/java/org/apache/tez/client/TezClient.java b/tez-api/src/main/java/org/apache/tez/client/TezClient.java index e6c852a1a3..8c16f6be8c 100644 --- a/tez-api/src/main/java/org/apache/tez/client/TezClient.java +++ b/tez-api/src/main/java/org/apache/tez/client/TezClient.java @@ -484,7 +484,9 @@ public synchronized TezClient getClient(ApplicationId appId) throws TezException } private void startFrameworkClient() { - frameworkClient = createFrameworkClient(); + if (frameworkClient == null) { + frameworkClient = createFrameworkClient(); + } frameworkClient.init(amConfig.getTezConfiguration()); frameworkClient.start(); } diff --git a/tez-api/src/main/java/org/apache/tez/client/TezYarnClient.java b/tez-api/src/main/java/org/apache/tez/client/TezYarnClient.java index d109648075..eed5129610 100644 --- a/tez-api/src/main/java/org/apache/tez/client/TezYarnClient.java +++ b/tez-api/src/main/java/org/apache/tez/client/TezYarnClient.java @@ -43,7 +43,7 @@ public class TezYarnClient extends FrameworkClient { private String amHost; private int amPort; - protected TezYarnClient(YarnClient yarnClient) { + public TezYarnClient(YarnClient yarnClient) { this.yarnClient = yarnClient; } diff --git a/tez-api/src/main/java/org/apache/tez/client/registry/AMRecord.java b/tez-api/src/main/java/org/apache/tez/client/registry/AMRecord.java index 8453f9836c..4fadbf9ed7 100644 --- a/tez-api/src/main/java/org/apache/tez/client/registry/AMRecord.java +++ b/tez-api/src/main/java/org/apache/tez/client/registry/AMRecord.java @@ -19,10 +19,12 @@ package org.apache.tez.client.registry; import java.util.Objects; +import java.util.Optional; import org.apache.hadoop.classification.InterfaceAudience; import org.apache.hadoop.registry.client.types.ServiceRecord; import org.apache.hadoop.yarn.api.records.ApplicationId; +import org.apache.tez.client.registry.zookeeper.ZkConfig; /** @@ -37,14 +39,20 @@ @InterfaceAudience.Public public class AMRecord { private static final String APP_ID_RECORD_KEY = "appId"; - private static final String HOST_RECORD_KEY = "host"; + private static final String HOST_NAME_RECORD_KEY = "hostName"; + private static final String HOST_IP_RECORD_KEY = "hostIp"; private static final String PORT_RECORD_KEY = "port"; - private static final String OPAQUE_ID_KEY = "id"; + private static final String EXTERNAL_ID_KEY = "externalId"; + private static final String COMPUTE_GROUP_NAME_KEY = "computeName"; private final ApplicationId appId; - private final String host; + private final String hostName; + private final String hostIp; private final int port; - private final String id; + private final String externalId; + private final String computeName; + + private ServiceRecord serviceRecord; /** * Creates a new {@code AMRecord} with the given application ID, host, port, and identifier. @@ -54,17 +62,23 @@ public class AMRecord { * Although this constructor may not be used directly within Tez internals, * it is part of the public API for Tez clients that handle unmanaged sessions. * - * @param appId the {@link ApplicationId} of the Tez application - * @param host the hostname where the Application Master is running - * @param port the port number on which the Application Master is listening - * @param id an opaque identifier for the record; if {@code null}, defaults to an empty string + * @param appId the {@link ApplicationId} of the Tez application + * @param hostName the hostname where the Application Master is running + * @param hostIp the IP address of the Application Master host + * @param port the RPC port number on which the Application Master is listening + * @param externalId an optional external identifier for the record; if {@code null}, defaults to an empty string + * @param computeName the compute group or cluster name; if {@code null}, + * defaults to {@link ZkConfig#DEFAULT_COMPUTE_GROUP_NAME} */ - public AMRecord(ApplicationId appId, String host, int port, String id) { + public AMRecord(ApplicationId appId, String hostName, String hostIp, int port, String externalId, + String computeName) { this.appId = appId; - this.host = host; + this.hostName = hostName; + this.hostIp = hostIp; this.port = port; - //If id is not provided, convert to empty string - this.id = (id == null) ? "" : id; + //externalId is optional, if not provided, convert to empty string + this.externalId = Optional.ofNullable(externalId).orElse(""); + this.computeName = Optional.ofNullable(computeName).orElse(ZkConfig.DEFAULT_COMPUTE_GROUP_NAME); } /** @@ -78,10 +92,15 @@ public AMRecord(ApplicationId appId, String host, int port, String id) { * @param other the {@code AMRecord} instance to copy */ public AMRecord(AMRecord other) { - this.appId = other.getApplicationId(); - this.host = other.getHost(); - this.port = other.getPort(); - this.id = other.getId(); + this.appId = other.appId; + this.hostName = other.hostName; + this.hostIp = other.hostIp; + this.port = other.port; + this.externalId = other.externalId; + this.computeName = other.computeName; + // all fields are final immutable, we can copy the serviceRecord, + // if it's initialized there already, as it won't change + this.serviceRecord = other.serviceRecord; } /** @@ -97,25 +116,35 @@ public AMRecord(AMRecord other) { */ public AMRecord(ServiceRecord serviceRecord) { this.appId = ApplicationId.fromString(serviceRecord.get(APP_ID_RECORD_KEY)); - this.host = serviceRecord.get(HOST_RECORD_KEY); + this.hostName = serviceRecord.get(HOST_NAME_RECORD_KEY); + this.hostIp = serviceRecord.get(HOST_IP_RECORD_KEY); this.port = Integer.parseInt(serviceRecord.get(PORT_RECORD_KEY)); - this.id = serviceRecord.get(OPAQUE_ID_KEY); + this.externalId = serviceRecord.get(EXTERNAL_ID_KEY); + this.computeName = serviceRecord.get(COMPUTE_GROUP_NAME_KEY); } public ApplicationId getApplicationId() { return appId; } - public String getHost() { - return host; + public String getHostName() { + return hostName; + } + + public String getHostIp() { + return hostIp; } public int getPort() { return port; } - public String getId() { - return id; + public String getExternalId() { + return externalId; + } + + public String getComputeName() { + return computeName; } @Override @@ -125,9 +154,11 @@ public boolean equals(Object other) { } if (other instanceof AMRecord otherRecord) { return appId.equals(otherRecord.appId) - && host.equals(otherRecord.host) + && hostName.equals(otherRecord.hostName) + && hostIp.equals(otherRecord.hostIp) && port == otherRecord.port - && id.equals(otherRecord.id); + && externalId.equals(otherRecord.externalId) + && computeName.equals(otherRecord.computeName); } else { return false; } @@ -148,16 +179,27 @@ public boolean equals(Object other) { * @return a {@link ServiceRecord} populated with the values of this {@code AMRecord} */ public ServiceRecord toServiceRecord() { - ServiceRecord serviceRecord = new ServiceRecord(); + if (serviceRecord != null) { + return serviceRecord; + } + serviceRecord = new ServiceRecord(); serviceRecord.set(APP_ID_RECORD_KEY, appId); - serviceRecord.set(HOST_RECORD_KEY, host); + serviceRecord.set(HOST_NAME_RECORD_KEY, hostName); + serviceRecord.set(HOST_IP_RECORD_KEY, hostIp); serviceRecord.set(PORT_RECORD_KEY, port); - serviceRecord.set(OPAQUE_ID_KEY, id); + serviceRecord.set(EXTERNAL_ID_KEY, externalId); + serviceRecord.set(COMPUTE_GROUP_NAME_KEY, computeName); + return serviceRecord; } + @Override + public String toString() { + return toServiceRecord().attributes().toString(); + } + @Override public int hashCode() { - return Objects.hash(appId, host, port, id); + return Objects.hash(appId, hostName, hostIp, externalId, computeName, port); } } diff --git a/tez-api/src/main/java/org/apache/tez/client/registry/AMRegistry.java b/tez-api/src/main/java/org/apache/tez/client/registry/AMRegistry.java new file mode 100644 index 0000000000..ca0a19d5bf --- /dev/null +++ b/tez-api/src/main/java/org/apache/tez/client/registry/AMRegistry.java @@ -0,0 +1,52 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.tez.client.registry; + + +import org.apache.hadoop.yarn.api.records.ApplicationId; + + +/** + * Base class for {@code AMRegistry} implementations. + * + *

The specific implementation is configured via the + * {@code tez.am.registry.class} property.

+ * + *

Implementations are expected to provide appropriate service lifecycle + * behavior, including: + *

    + *
  • {@code init}
  • + *
  • {@code serviceStart}
  • + *
  • {@code serviceStop}
  • + *
+ *

+ */ +public interface AMRegistry extends AutoCloseable { + + void add(AMRecord server) throws Exception; + + void remove(AMRecord server) throws Exception; + + ApplicationId generateNewId() throws Exception; + + AMRecord createAmRecord(ApplicationId appId, String hostName, String hostIp, int port, + String computeName); + + void close(); +} diff --git a/tez-api/src/main/java/org/apache/tez/client/registry/AMRegistryClient.java b/tez-api/src/main/java/org/apache/tez/client/registry/AMRegistryClient.java new file mode 100644 index 0000000000..02e320fc1c --- /dev/null +++ b/tez-api/src/main/java/org/apache/tez/client/registry/AMRegistryClient.java @@ -0,0 +1,118 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.tez.client.registry; + +import java.io.Closeable; +import java.io.IOException; +import java.util.ArrayList; +import java.util.List; + +import org.apache.hadoop.yarn.api.records.ApplicationId; + +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +/** + * Client-side interface for discovering Application Master (AM) instances + * registered in the AM registry. + * + *

Implementations are responsible for locating AM endpoints and returning + * their metadata. This API is used by client components to discover running + * Tez AMs.

+ * + *

Listeners may be registered to receive notifications when AM records + * appear or are removed.

+ */ +public abstract class AMRegistryClient implements Closeable { + private static final Logger LOG = LoggerFactory.getLogger(AMRegistryClient.class); + + private final List listeners = new ArrayList<>(); + + /** + * Lookup AM metadata for the given application ID. + * + * @param appId the application ID + * @return the AM record if found, otherwise {@code null} + * @throws IOException if the lookup fails + */ + public abstract AMRecord getRecord(ApplicationId appId) throws IOException; + + /** + * Retrieve all AM records known in the registry. + * + * @return a list of AM records (possibly empty) + * @throws IOException if the fetch fails + */ + public abstract List getAllRecords() throws IOException; + + /** + * Register a listener for AM registry events. + * The listener will be notified when AM records are added or removed. + * + * @param listener the listener to add + */ + public synchronized void addListener(AMRegistryClientListener listener) { + listeners.add(listener); + } + + /** + * Notify listeners of a newly added AM record. + * + * @param record the added AM record + */ + protected synchronized void notifyOnAdded(AMRecord record) { + for (AMRegistryClientListener listener : listeners) { + try { + listener.onAdd(record); + } catch (Exception e) { + LOG.warn("Exception while calling AM add listener, AM record {}", record, e); + } + } + } + + /** + * Notify listeners of an updated AM record. + * + * @param record the updated AM record + */ + protected synchronized void notifyOnUpdated(AMRecord record) { + for (AMRegistryClientListener listener : listeners) { + try { + listener.onUpdate(record); + } catch (Exception e) { + LOG.warn("Exception while calling AM update listener, AM record {}", record, e); + } + } + } + + /** + * Notify listeners of a removed AM record. + * + * @param record the removed AM record + */ + protected synchronized void notifyOnRemoved(AMRecord record) { + for (AMRegistryClientListener listener : listeners) { + try { + listener.onRemove(record); + } catch (Exception e) { + LOG.warn("Exception while calling AM remove listener, AM record {}", record, e); + } + } + } +} diff --git a/tez-api/src/main/java/org/apache/tez/client/registry/AMRegistryClientListener.java b/tez-api/src/main/java/org/apache/tez/client/registry/AMRegistryClientListener.java new file mode 100644 index 0000000000..789fc22ef2 --- /dev/null +++ b/tez-api/src/main/java/org/apache/tez/client/registry/AMRegistryClientListener.java @@ -0,0 +1,41 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + *

+ * http://www.apache.org/licenses/LICENSE-2.0 + *

+ * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.tez.client.registry; + +public interface AMRegistryClientListener { + + void onAdd(AMRecord record); + + /** + * Default implementation of {@code onUpdate} delegates to {@code onAdd}. + * + *

This provides a convenient backward-compatible behavior for consumers that + * store {@link AMRecord} instances in collections keyed by something stable + * (such as ApplicationId). In such cases, re-adding an {@link AMRecord} + * effectively overwrites the previous entry, making an explicit update handler + * unnecessary for many implementations.

+ * + * @param record the updated {@link AMRecord} instance + */ + default void onUpdate(AMRecord record){ + onAdd(record); + } + + void onRemove(AMRecord record); +} diff --git a/tez-api/src/main/java/org/apache/tez/client/registry/AMRegistryUtils.java b/tez-api/src/main/java/org/apache/tez/client/registry/AMRegistryUtils.java new file mode 100644 index 0000000000..79b372c683 --- /dev/null +++ b/tez-api/src/main/java/org/apache/tez/client/registry/AMRegistryUtils.java @@ -0,0 +1,39 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + *

+ * http://www.apache.org/licenses/LICENSE-2.0 + *

+ * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.tez.client.registry; + +import java.io.IOException; + +import org.apache.hadoop.registry.client.binding.RegistryUtils; +import org.apache.hadoop.registry.client.types.ServiceRecord; + +public final class AMRegistryUtils { + + private AMRegistryUtils() {} + + public static AMRecord jsonStringToRecord(String json) throws IOException { + RegistryUtils.ServiceRecordMarshal marshal = new RegistryUtils.ServiceRecordMarshal(); + ServiceRecord serviceRecord = marshal.fromJson(json); + return new AMRecord(serviceRecord); + } + + public static String recordToJsonString(AMRecord amRecord) throws IOException { + RegistryUtils.ServiceRecordMarshal marshal = new RegistryUtils.ServiceRecordMarshal(); + return marshal.toJson(amRecord.toServiceRecord()); + } +} diff --git a/tez-api/src/main/java/org/apache/tez/client/registry/zookeeper/ZkAMRegistryClient.java b/tez-api/src/main/java/org/apache/tez/client/registry/zookeeper/ZkAMRegistryClient.java new file mode 100644 index 0000000000..7e189490da --- /dev/null +++ b/tez-api/src/main/java/org/apache/tez/client/registry/zookeeper/ZkAMRegistryClient.java @@ -0,0 +1,209 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + *

+ * http://www.apache.org/licenses/LICENSE-2.0 + *

+ * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.tez.client.registry.zookeeper; + +import java.io.IOException; +import java.nio.charset.StandardCharsets; +import java.util.HashMap; +import java.util.List; +import java.util.Map; +import java.util.concurrent.ConcurrentHashMap; +import java.util.stream.Collectors; + +import org.apache.commons.io.IOUtils; +import org.apache.curator.framework.CuratorFramework; +import org.apache.curator.framework.imps.CuratorFrameworkState; +import org.apache.curator.framework.recipes.cache.ChildData; +import org.apache.curator.framework.recipes.cache.TreeCache; +import org.apache.curator.framework.recipes.cache.TreeCacheEvent; +import org.apache.curator.framework.recipes.cache.TreeCacheListener; +import org.apache.hadoop.classification.InterfaceAudience; +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.registry.client.types.ServiceRecord; +import org.apache.hadoop.yarn.api.records.ApplicationId; +import org.apache.tez.client.registry.AMRecord; +import org.apache.tez.client.registry.AMRegistryClient; +import org.apache.tez.client.registry.AMRegistryUtils; +import org.apache.tez.dag.api.TezConfiguration; + +import com.fasterxml.jackson.core.JsonParseException; +import com.google.common.base.Preconditions; + +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +/** + * Curator/Zookeeper implementation of {@link AMRegistryClient}. + */ +@InterfaceAudience.Public +public final class ZkAMRegistryClient extends AMRegistryClient { + private static final Logger LOG = LoggerFactory.getLogger(ZkAMRegistryClient.class); + private static final Map INSTANCES = new HashMap<>(); + + private final Configuration conf; + // Cache of known AMs + private final ConcurrentHashMap amRecordCache = new ConcurrentHashMap<>(); + + private CuratorFramework client; + private TreeCache cache; + private ZkRegistryListener listener; + + private ZkAMRegistryClient(final Configuration conf) { + this.conf = conf; + } + + public static synchronized ZkAMRegistryClient getClient(final Configuration conf) { + String namespace = conf.get(TezConfiguration.TEZ_AM_REGISTRY_NAMESPACE); + ZkAMRegistryClient registry = INSTANCES.get(namespace); + if (registry == null) { + registry = new ZkAMRegistryClient(conf); + INSTANCES.put(namespace, registry); + } + LOG.info("Returning tez AM registry ({}) for namespace '{}'", System.identityHashCode(registry), namespace); + return registry; + } + + /** + * Deserializes a {@link ServiceRecord} from ZooKeeper data and converts it into an {@link AMRecord} + * for caching. + * + * @param childData the ZooKeeper node data containing a serialized {@link ServiceRecord} + * @return an {@link AMRecord} constructed from the deserialized {@link ServiceRecord}, or {@code null} + * if no data is present + * @throws IOException if the data cannot be deserialized into a {@link ServiceRecord} + */ + public static AMRecord getAMRecord(final ChildData childData) throws IOException { + // Not a leaf path. Only leaf path contains AMRecord. + if (!childData.getPath().contains(ApplicationId.appIdStrPrefix)) { + return null; + } + byte[] data = childData.getData(); + // only the path appeared, there is no data yet + if (data.length == 0) { + return null; + } + String value = new String(data, StandardCharsets.UTF_8); + try { + return AMRegistryUtils.jsonStringToRecord(value); + } catch (JsonParseException e) { + //Not a json AMRecord (SRV), could be some other data. + LOG.warn("Non-json data received while de-serializing AMRecord: {}. Ignoring...", value); + return null; + } + } + + public void start() throws Exception { + ZkConfig zkConf = new ZkConfig(this.conf); + client = zkConf.createCuratorFramework(); + cache = new TreeCache(client, zkConf.getZkNamespace()); + client.start(); + cache.start(); + listener = new ZkRegistryListener(); + cache.getListenable().addListener(listener); + } + + @Override + public AMRecord getRecord(ApplicationId appId) { + AMRecord rec = amRecordCache.get(appId); + // Return a copy. + return rec == null ? null : new AMRecord(rec); + } + + @Override + public List getAllRecords() { + return amRecordCache.values().stream().map(AMRecord::new).collect(Collectors.toList()); + } + + @Override + public void close() { + IOUtils.closeQuietly(cache); + IOUtils.closeQuietly(client); + } + + public boolean isInitialized() { + return listener.initialized; + } + + /** + * Callback listener for ZooKeeper events that updates the local cache + * when child nodes under the monitored path change. + */ + private class ZkRegistryListener implements TreeCacheListener { + + private boolean initialized = false; + + @Override + public void childEvent(final CuratorFramework clientParam, final TreeCacheEvent event) throws Exception { + Preconditions.checkArgument(clientParam != null && clientParam.getState() == CuratorFrameworkState.STARTED, + "Curator client is not started"); + + ChildData childData = event.getData(); + switch (event.getType()) { + case NODE_ADDED: + if (isEmpty(childData)) { + LOG.info("AppId allocated: {}", childData.getPath()); + } else { + AMRecord amRecord = getAMRecord(childData); + if (amRecord != null) { + LOG.info("AM registered with data: {}. Notifying listeners.", amRecord); + amRecordCache.put(amRecord.getApplicationId(), amRecord); + notifyOnAdded(amRecord); + } + } + break; + case NODE_UPDATED: + if (isEmpty(childData)) { + throw new RuntimeException("AM updated with empty data"); + } else { + AMRecord amRecord = getAMRecord(childData); + if (amRecord != null) { + LOG.info("AM updated data: {}. Notifying listeners.", amRecord); + amRecordCache.put(amRecord.getApplicationId(), amRecord); + notifyOnAdded(amRecord); + } + } + break; + case NODE_REMOVED: + if (isEmpty(childData)) { + LOG.info("Unused AppId unregistered: {}", childData.getPath()); + } else { + AMRecord amRecord = getAMRecord(childData); + if (amRecord != null) { + LOG.info("AM removed: {}. Notifying listeners.", amRecord); + amRecordCache.remove(amRecord.getApplicationId(), amRecord); + notifyOnRemoved(amRecord); + } + } + break; + case INITIALIZED: + this.initialized = true; + default: + if (childData == null) { + LOG.info("Ignored event {}", event.getType()); + } else { + LOG.info("Ignored event {} for {}", event.getType(), childData.getPath()); + } + } + } + + private boolean isEmpty(ChildData childData) { + return childData == null || childData.getData() == null || childData.getData().length == 0; + } + } +} diff --git a/tez-api/src/main/java/org/apache/tez/client/registry/zookeeper/ZkConfig.java b/tez-api/src/main/java/org/apache/tez/client/registry/zookeeper/ZkConfig.java new file mode 100644 index 0000000000..af59072532 --- /dev/null +++ b/tez-api/src/main/java/org/apache/tez/client/registry/zookeeper/ZkConfig.java @@ -0,0 +1,138 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.tez.client.registry.zookeeper; + +import java.util.concurrent.TimeUnit; + +import org.apache.curator.RetryPolicy; +import org.apache.curator.framework.CuratorFramework; +import org.apache.curator.framework.CuratorFrameworkFactory; +import org.apache.curator.retry.ExponentialBackoffRetry; +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.fs.Path; +import org.apache.tez.dag.api.TezConfiguration; + +import com.google.common.base.Preconditions; +import com.google.common.base.Strings; + +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +public class ZkConfig { + private static final Logger LOG = LoggerFactory.getLogger(ZkConfig.class); + + // if namespace defined in config is 'foo' and COMPUTE_GROUP_NAME env is 'bar' then the zkpaths will be of format + // /tez-external-sessions/foo/bar + private final static String ZK_NAMESPACE_PREFIX = "/tez-external-sessions"; + public final static String COMPUTE_GROUP_NAME_ENV = "COMPUTE_GROUP_NAME"; + public final static String DEFAULT_COMPUTE_GROUP_NAME = "default-compute"; + + private final String zkQuorum; + private final String zkNamespace; + private final int curatorBackoffSleepMs; + private final int curatorMaxRetries; + private final int sessionTimeoutMs; + private final int connectionTimeoutMs; + + public ZkConfig(Configuration conf) { + zkQuorum = conf.get(TezConfiguration.TEZ_AM_ZOOKEEPER_QUORUM); + Preconditions.checkArgument(!Strings.isNullOrEmpty(zkQuorum), "zkQuorum cannot be null or empty"); + + String fullZkNamespace = ZK_NAMESPACE_PREFIX; + + String namespace = conf.get(TezConfiguration.TEZ_AM_REGISTRY_NAMESPACE, + TezConfiguration.TEZ_AM_REGISTRY_NAMESPACE_DEFAULT); + Preconditions.checkArgument(!Strings.isNullOrEmpty(namespace), "namespace cannot be null or empty"); + + fullZkNamespace = appendNamespace(fullZkNamespace, namespace); + + boolean enableComputeGroups = conf.getBoolean(TezConfiguration.TEZ_AM_REGISTRY_ENABLE_COMPUTE_GROUPS, + TezConfiguration.TEZ_AM_REGISTRY_ENABLE_COMPUTE_GROUPS_DEFAULT); + if (enableComputeGroups) { + final String subNamespace = System.getenv(COMPUTE_GROUP_NAME_ENV); + if (subNamespace != null && !subNamespace.isEmpty()) { + fullZkNamespace = appendNamespace(fullZkNamespace, subNamespace); + LOG.info("Compute groups enabled: subNamespace: {} fullZkNamespace: {}", subNamespace, fullZkNamespace); + } + } else { + LOG.info("Compute groups disabled: fullZkNamespace: {}", fullZkNamespace); + } + zkNamespace = fullZkNamespace; + LOG.info("Using ZK namespace: {}", fullZkNamespace); + + curatorBackoffSleepMs = Math.toIntExact(conf.getTimeDuration(TezConfiguration.TEZ_AM_CURATOR_BACKOFF_SLEEP, + TezConfiguration.TEZ_AM_CURATOR_BACKOFF_SLEEP_DEFAULT, TimeUnit.MILLISECONDS)); + curatorMaxRetries = conf.getInt(TezConfiguration.TEZ_AM_CURATOR_MAX_RETRIES, + TezConfiguration.TEZ_AM_CURATOR_MAX_RETRIES_DEFAULT); + sessionTimeoutMs = Math.toIntExact(conf.getTimeDuration(TezConfiguration.TEZ_AM_CURATOR_SESSION_TIMEOUT, + TezConfiguration.TEZ_AM_CURATOR_SESSION_TIMEOUT_DEFAULT, TimeUnit.MILLISECONDS)); + connectionTimeoutMs = Math.toIntExact(conf.getTimeDuration(TezConfiguration.TEZ_AM_CURATOR_CONNECTION_TIMEOUT, + TezConfiguration.TEZ_AM_CURATOR_CONNECTION_TIMEOUT_DEFAULT, TimeUnit.MILLISECONDS)); + } + + public String getZkQuorum() { + return zkQuorum; + } + + public String getZkNamespace() { + return zkNamespace; + } + + public int getCuratorBackoffSleepMs() { + return curatorBackoffSleepMs; + } + + public int getCuratorMaxRetries() { + return curatorMaxRetries; + } + + public int getSessionTimeoutMs() { + return sessionTimeoutMs; + } + + public int getConnectionTimeoutMs() { + return connectionTimeoutMs; + } + + public RetryPolicy getRetryPolicy() { + return new ExponentialBackoffRetry(getCuratorBackoffSleepMs(), getCuratorMaxRetries()); + } + + public CuratorFramework createCuratorFramework() { + return CuratorFrameworkFactory.newClient( + getZkQuorum(), + getSessionTimeoutMs(), + getConnectionTimeoutMs(), + getRetryPolicy() + ); + } + + /** + * Appends a namespace to the given prefix, inserting a path separator between + * them if necessary. + * + * @param prefix the initial path prefix to which the namespace is appended; must not be null + * @param namespace the namespace segment to append; must not be null + * @return the concatenation of {@code prefix} and {@code namespace} with a separator inserted if needed + */ + private String appendNamespace(String prefix, String namespace) { + boolean hasSlash = namespace.startsWith(Path.SEPARATOR); + return prefix + (hasSlash ? namespace : Path.SEPARATOR + namespace); + } +} diff --git a/tez-api/src/main/java/org/apache/tez/client/registry/zookeeper/ZkFrameworkClient.java b/tez-api/src/main/java/org/apache/tez/client/registry/zookeeper/ZkFrameworkClient.java new file mode 100644 index 0000000000..ef9f948039 --- /dev/null +++ b/tez-api/src/main/java/org/apache/tez/client/registry/zookeeper/ZkFrameworkClient.java @@ -0,0 +1,170 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + *

+ * http://www.apache.org/licenses/LICENSE-2.0 + *

+ * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.tez.client.registry.zookeeper; + +import java.io.IOException; + +import org.apache.hadoop.yarn.api.protocolrecords.GetNewApplicationResponse; +import org.apache.hadoop.yarn.api.records.ApplicationId; +import org.apache.hadoop.yarn.api.records.ApplicationReport; +import org.apache.hadoop.yarn.api.records.ApplicationSubmissionContext; +import org.apache.hadoop.yarn.api.records.FinalApplicationStatus; +import org.apache.hadoop.yarn.api.records.YarnApplicationState; +import org.apache.hadoop.yarn.client.api.YarnClientApplication; +import org.apache.hadoop.yarn.exceptions.YarnException; +import org.apache.hadoop.yarn.util.Records; +import org.apache.tez.client.FrameworkClient; +import org.apache.tez.client.registry.AMRecord; +import org.apache.tez.dag.api.TezConfiguration; + +import com.google.common.annotations.VisibleForTesting; + +public class ZkFrameworkClient extends FrameworkClient { + + private AMRecord amRecord; + private ZkAMRegistryClient amRegistryClient = null; + private volatile boolean isRunning = false; + private String amHost; + private int amPort; + + @Override + public synchronized void init(TezConfiguration tezConf) { + if (amRegistryClient == null) { + try { + amRegistryClient = ZkAMRegistryClient.getClient(tezConf); + } catch (Exception e) { + throw new RuntimeException(e); + } + } + } + + @Override + public void start() { + try { + amRegistryClient.start(); + isRunning = true; + } catch (Exception e) { + throw new RuntimeException(e); + } + } + + @Override + public void stop() { + isRunning = false; + close(); + } + + @Override + public void close() { + if (amRegistryClient != null) { + amRegistryClient.close(); + } + } + + /** + * Creates a dummy {@link YarnClientApplication} using a pre-existing {@link ApplicationId} + * rather than requesting a new one from the ResourceManager. + * + *

Note: This is a dummy, backward-compatibility implementation. + * Instead of allocating a fresh application ID from the ResourceManager, this method + * reuses the {@code applicationId} already obtained via {@code getApplicationReport()}. + * This allows legacy code paths to continue operating without requiring actual + * creation of a new application.

+ * + *

Hidden assumption here: this method assumes that + * {@code getApplicationReport()} has already been called before + * {@code createApplication()}, ensuring that {@code amRecord.getApplicationId()} + * is always available. This assumption holds in all supported usage patterns: + * the only code path where {@code createApplication()} might be called first is + * {@code TezClient.submitDAGApplication()}, but that path is never exercised in + * Zookeeper standalone mode because that mode assumes applications are already + * running. Therefore, the ordering guarantee is valid in practice.

+ * + *

+ * The method constructs a minimal {@link ApplicationSubmissionContext} and a + * synthetic {@link GetNewApplicationResponse}, both populated with the already + * known application ID. These objects are then wrapped into a + * {@link YarnClientApplication} instance and returned. + *

+ * + * @return a {@link YarnClientApplication} backed by a submission context and + * a mocked {@link GetNewApplicationResponse}, both tied to the pre-existing + * application ID. + */ + @Override + public YarnClientApplication createApplication() { + ApplicationSubmissionContext context = Records.newRecord(ApplicationSubmissionContext.class); + ApplicationId appId = amRecord.getApplicationId(); + context.setApplicationId(appId); + GetNewApplicationResponse response = Records.newRecord(GetNewApplicationResponse.class); + response.setApplicationId(appId); + return new YarnClientApplication(response, context); + } + + @Override + public ApplicationId submitApplication(ApplicationSubmissionContext appSubmissionContext) { + return null; + } + + @Override + public void killApplication(ApplicationId appId) throws YarnException, IOException { + close(); + } + + @Override + public ApplicationReport getApplicationReport(ApplicationId appId) throws YarnException, IOException { + ApplicationReport report = Records.newRecord(ApplicationReport.class); + report.setApplicationId(appId); + report.setTrackingUrl(""); + amRecord = amRegistryClient.getRecord(appId); + // this could happen if the AM died, the AM record store under path will not exist + if (amRecord == null) { + report.setYarnApplicationState(YarnApplicationState.FINISHED); + report.setFinalApplicationStatus(FinalApplicationStatus.FAILED); + report.setDiagnostics("AM record not found (likely died) in zookeeper for application id: " + appId); + } else { + report.setHost(amRecord.getHostName()); + amHost = amRecord.getHostName(); + amPort = amRecord.getPort(); + report.setRpcPort(amRecord.getPort()); + report.setYarnApplicationState(YarnApplicationState.RUNNING); + } + return report; + } + + @Override + public boolean isRunning() { + return isRunning; + } + + @Override + public String getAmHost() { + return amHost; + } + + @Override + public int getAmPort() { + return amPort; + } + + @VisibleForTesting + boolean isZkInitialized() { + return amRegistryClient.isInitialized(); + } +} diff --git a/tez-api/src/main/java/org/apache/tez/client/registry/zookeeper/package-info.java b/tez-api/src/main/java/org/apache/tez/client/registry/zookeeper/package-info.java new file mode 100644 index 0000000000..ea4412da84 --- /dev/null +++ b/tez-api/src/main/java/org/apache/tez/client/registry/zookeeper/package-info.java @@ -0,0 +1,24 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +@Public +@Evolving +package org.apache.tez.client.registry.zookeeper; + +import org.apache.hadoop.classification.InterfaceAudience.Public; +import org.apache.hadoop.classification.InterfaceStability.Evolving; diff --git a/tez-api/src/main/java/org/apache/tez/dag/api/TezConfiguration.java b/tez-api/src/main/java/org/apache/tez/dag/api/TezConfiguration.java index 2176141d06..6fc61dddbe 100644 --- a/tez-api/src/main/java/org/apache/tez/dag/api/TezConfiguration.java +++ b/tez-api/src/main/java/org/apache/tez/dag/api/TezConfiguration.java @@ -866,6 +866,12 @@ public TezConfiguration(boolean loadDefaults) { public static final boolean TEZ_AM_NODE_UNHEALTHY_RESCHEDULE_TASKS_DEFAULT = false; + /** Int value. Port used for AM RPC*/ + @ConfigurationScope(Scope.AM) + @ConfigurationProperty(type="integer") + public static final String TEZ_AM_RPC_PORT = TEZ_AM_PREFIX + "rpc.port"; + public static final int TEZ_AM_RPC_PORT_DEFAULT = 0; + /** Int value. Number of threads to handle client RPC requests. Expert level setting.*/ @ConfigurationScope(Scope.AM) @ConfigurationProperty(type="integer") @@ -2344,16 +2350,74 @@ static Set getPropertySet() { public static final String TEZ_TASK_ATTEMPT_HOOKS = TEZ_TASK_PREFIX + "attempt.hooks"; /** - * Comma-separated list of additional hadoop config files to load from CLASSPATH in standalone mode. + * String value. ZooKeeper quorum connection string used when creating a CuratorFramework for the ZooKeeper registry. */ @ConfigurationScope(Scope.AM) @ConfigurationProperty - public static final String TEZ_AM_STANDALONE_CONFS = TEZ_AM_PREFIX + "standalone.confs"; + public static final String TEZ_AM_ZOOKEEPER_QUORUM = TEZ_AM_PREFIX + "zookeeper.quorum"; /** - * String value. The class to be used for the AM registry. - */ + * String value. Namespace in ZooKeeper registry for the Application Master. + */ + @ConfigurationScope(Scope.AM) + @ConfigurationProperty + public static final String TEZ_AM_REGISTRY_NAMESPACE = TEZ_AM_PREFIX + "registry.namespace"; + public static final String TEZ_AM_REGISTRY_NAMESPACE_DEFAULT = "/tez_am/server"; + + /** + * Boolean value. Whether to enable compute groups, see further details in ZkConfig. + */ + @ConfigurationScope(Scope.AM) + @ConfigurationProperty + public static final String TEZ_AM_REGISTRY_ENABLE_COMPUTE_GROUPS = TEZ_AM_PREFIX + "registry.enable.compute.groups"; + public static final boolean TEZ_AM_REGISTRY_ENABLE_COMPUTE_GROUPS_DEFAULT = false; + + + /** + * Initial backoff sleep duration for Curator retries. Supports TimeUnits. + * Default unit is milliseconds. It's used when creating a CuratorFramework for the ZooKeeper registry. + */ + @ConfigurationScope(Scope.AM) + @ConfigurationProperty + public static final String TEZ_AM_CURATOR_BACKOFF_SLEEP = TEZ_AM_PREFIX + "curator.backoff.sleep"; + public static final String TEZ_AM_CURATOR_BACKOFF_SLEEP_DEFAULT = "1000ms"; + + /** + * Integer value. Maximum number of retries for Curator operations. + * It's used when creating a CuratorFramework for the ZooKeeper registry. + */ + @ConfigurationScope(Scope.AM) + @ConfigurationProperty + public static final String TEZ_AM_CURATOR_MAX_RETRIES = TEZ_AM_PREFIX + "curator.max.retries"; + public static final int TEZ_AM_CURATOR_MAX_RETRIES_DEFAULT = 3; + + /** + * Session timeout for Curator framework. Supports TimeUnits. + * Default unit is milliseconds. It's used when creating a CuratorFramework for the ZooKeeper registry. + */ + @ConfigurationScope(Scope.AM) + @ConfigurationProperty + public static final String TEZ_AM_CURATOR_SESSION_TIMEOUT = TEZ_AM_PREFIX + "curator.session.timeout"; + public static final String TEZ_AM_CURATOR_SESSION_TIMEOUT_DEFAULT = "150000ms"; + + /** + * Connection timeout for Curator framework. Supports TimeUnits. + * Default unit is milliseconds. It's used when creating a CuratorFramework for the ZooKeeper registry. + */ @ConfigurationScope(Scope.AM) @ConfigurationProperty - public static final String TEZ_AM_REGISTRY_CLASS = TEZ_AM_PREFIX + "registry.class"; + public static final String TEZ_AM_CURATOR_CONNECTION_TIMEOUT = TEZ_AM_PREFIX + "curator.connection.timeout"; + public static final String TEZ_AM_CURATOR_CONNECTION_TIMEOUT_DEFAULT = "15000ms"; + + + @ConfigurationScope(Scope.AM) + @ConfigurationProperty + public static final String TEZ_FRAMEWORK_MODE = TEZ_PREFIX + ".framework.mode"; + + /** + * List of additional hadoop config files to load from CLASSPATH in ZOOKEEPER_STANDALONE framework mode. + */ + @ConfigurationScope(Scope.AM) + @ConfigurationProperty + public static final String TEZ_AM_STANDALONE_CONFS = TEZ_AM_PREFIX + "standalone.confs"; } diff --git a/tez-api/src/main/java/org/apache/tez/dag/api/TezConstants.java b/tez-api/src/main/java/org/apache/tez/dag/api/TezConstants.java index 71aff74801..2f2ccd5ec1 100644 --- a/tez-api/src/main/java/org/apache/tez/dag/api/TezConstants.java +++ b/tez-api/src/main/java/org/apache/tez/dag/api/TezConstants.java @@ -29,25 +29,21 @@ public final class TezConstants { public static final String TEZ_APPLICATION_MASTER_CLASS = "org.apache.tez.dag.app.DAGAppMaster"; - + /** * Command-line argument to be set when running the Tez AM in session mode. */ public static final String TEZ_SESSION_MODE_CLI_OPTION = "session"; public static final String TEZ_TAR_LR_NAME = "tezlib"; - + /* * Tez AM Service Authorization * These are the same as MR which allows Tez to run in secure * mode without configuring service ACLs */ - public static final String - TEZ_AM_SECURITY_SERVICE_AUTHORIZATION_TASK_UMBILICAL = - "security.job.task.protocol.acl"; - public static final String - TEZ_AM_SECURITY_SERVICE_AUTHORIZATION_CLIENT = - "security.job.client.protocol.acl"; + public static final String TEZ_AM_SECURITY_SERVICE_AUTHORIZATION_TASK_UMBILICAL = "security.job.task.protocol.acl"; + public static final String TEZ_AM_SECURITY_SERVICE_AUTHORIZATION_CLIENT = "security.job.client.protocol.acl"; public static final String SERVICE_PLUGINS_DESCRIPTOR_JSON = "service_plugins_descriptor.json"; public static final String TEZ_PB_BINARY_CONF_NAME = "tez-conf.pb"; @@ -67,8 +63,8 @@ public final class TezConstants { public static final String TEZ_CONTAINER_LOG_PARAMS_SEPARATOR = ";"; public static final String TEZ_AM_LOCAL_RESOURCES_PB_FILE_NAME = - TezConfiguration.TEZ_SESSION_PREFIX + "local-resources.pb"; - + TezConfiguration.TEZ_SESSION_PREFIX + "local-resources.pb"; + public static final String TEZ_APPLICATION_TYPE = "TEZ"; /** * The service id for the NodeManager plugin used to share intermediate data @@ -84,7 +80,7 @@ public final class TezConstants { // Configuration keys used internally and not set by the users - + // These are session specific DAG ACL's. Currently here because these can only be specified // via code in the API. /** @@ -107,6 +103,8 @@ public final class TezConstants { private static final String TEZ_AM_SERVICE_PLUGIN_NAME_YARN_CONTAINERS = "TezYarn"; private static final String TEZ_AM_SERVICE_PLUGIN_NAME_IN_AM = "TezUber"; + public static final String TEZ_AM_EXTERNAL_ID = "TEZ_AM_EXTERNAL_ID"; + public static final String TEZ_FRAMEWORK_MODE = "TEZ_FRAMEWORK_MODE"; public static String getTezYarnServicePluginName() { return TEZ_AM_SERVICE_PLUGIN_NAME_YARN_CONTAINERS; diff --git a/tez-api/src/main/java/org/apache/tez/frameworkplugins/ClientFrameworkService.java b/tez-api/src/main/java/org/apache/tez/frameworkplugins/ClientFrameworkService.java new file mode 100644 index 0000000000..1ee70e8344 --- /dev/null +++ b/tez-api/src/main/java/org/apache/tez/frameworkplugins/ClientFrameworkService.java @@ -0,0 +1,42 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + *

+ * http://www.apache.org/licenses/LICENSE-2.0 + *

+ * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.tez.frameworkplugins; + + +import org.apache.tez.client.FrameworkClient; + +/** + * A {@code FrameworkService} that runs within the client process using {@code TezClient}. + * + *

This service bundles together a compatible {@code FrameworkClient} and + * {@code AMRegistryClient} to enable communication and coordination with the + * Application Master.

+ * + *

Implementations must provide a {@link FrameworkClient} instance that will + * be used by the Tez client layer.

+ */ +public interface ClientFrameworkService extends FrameworkService { + + /** + * Create a new {@link FrameworkClient} instance used by the client-side + * Tez runtime. + * + * @return a new {@code FrameworkClient} instance + */ + FrameworkClient newFrameworkClient(); +} diff --git a/tez-api/src/main/java/org/apache/tez/frameworkplugins/FrameworkMode.java b/tez-api/src/main/java/org/apache/tez/frameworkplugins/FrameworkMode.java new file mode 100644 index 0000000000..32b82b05d4 --- /dev/null +++ b/tez-api/src/main/java/org/apache/tez/frameworkplugins/FrameworkMode.java @@ -0,0 +1,52 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + *

+ * http://www.apache.org/licenses/LICENSE-2.0 + *

+ * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.tez.frameworkplugins; + +/* + A FrameworkMode is a pair of classes implementing (ClientFrameworkService, ServerFrameworkService) + Clients using one FrameworkMode should only connect to AMs using the same FrameworkMode + It is the responsibility of the user to setup their environment/configs to ensure matching modes + e.g. a client using a mode that requires a Zookeeper-based registry should not be configured + to interact with AMs that do not keep a Zookeeper-based registry + */ +public enum FrameworkMode { + + STANDALONE_ZOOKEEPER( + "org.apache.tez.frameworkplugins.zookeeper.ZkStandaloneClientFrameworkService", + "org.apache.tez.frameworkplugins.zookeeper.ZkStandaloneServerFrameworkService"), + + YARN( + "org.apache.tez.frameworkplugins.yarn.YarnClientFrameworkService", + "org.apache.tez.frameworkplugins.yarn.YarnServerFrameworkService"); + + private final String clientClassName; + private final String serverClassName; + + FrameworkMode(String clientClassName, String serverClassName) { + this.clientClassName = clientClassName; + this.serverClassName = serverClassName; + } + + public String getClientClassName() { + return clientClassName; + } + + public String getServerClassName() { + return serverClassName; + } +} diff --git a/tez-api/src/main/java/org/apache/tez/frameworkplugins/FrameworkService.java b/tez-api/src/main/java/org/apache/tez/frameworkplugins/FrameworkService.java new file mode 100644 index 0000000000..f27daf6577 --- /dev/null +++ b/tez-api/src/main/java/org/apache/tez/frameworkplugins/FrameworkService.java @@ -0,0 +1,28 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + *

+ * http://www.apache.org/licenses/LICENSE-2.0 + *

+ * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.tez.frameworkplugins; + +/** + * Marker interface for framework-level services in Tez. + *

+ * This interface is extended by ClientFrameworkService and ServerFrameworkService + * to represent client-side and server-side framework service implementations, respectively. + *

+ */ +public interface FrameworkService { +} diff --git a/tez-api/src/main/java/org/apache/tez/frameworkplugins/FrameworkUtils.java b/tez-api/src/main/java/org/apache/tez/frameworkplugins/FrameworkUtils.java new file mode 100644 index 0000000000..9011906fe4 --- /dev/null +++ b/tez-api/src/main/java/org/apache/tez/frameworkplugins/FrameworkUtils.java @@ -0,0 +1,84 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.tez.frameworkplugins; + + +import javax.annotation.Nullable; + +import org.apache.hadoop.conf.Configuration; +import org.apache.tez.common.ReflectionUtils; +import org.apache.tez.dag.api.TezConfiguration; +import org.apache.tez.dag.api.TezConstants; +import org.apache.tez.dag.api.TezReflectionException; + +public final class FrameworkUtils { + + private static final String SERVER_FRAMEWORK_SERVICE_INTERFACE_NAME = + "org.apache.tez.frameworkplugins.ServerFrameworkService"; + + private FrameworkUtils() {} + + /* + Searches for a FrameworkService provider which implements a target interface. + The interface should be either ClientFrameworkService or ServerFrameworkService. + Depending on which interface is used, either the client or server class of a + matching FrameworkMode will be used as the implementation. + + NOTE: Layering of FrameworkServices in a decorator-style is currently not supported + + An implementation is searched in the following order: + 1. If conf is not null and the parameter TEZ_FRAMEWORK_MODE is set: + the value of TEZ_FRAMEWORK_MODE from the conf will be used + 2. If conf is null or the parameter TEZ_FRAMEWORK_MODE is not set + and the environment var TEZ_FRAMEWORK_MODE is not empty: + the value of the environment var will be used + 3. Otherwise: the default class will be instantiated and returned + */ + public static T get(Class interfaze, @Nullable Configuration conf, + Class defaultClazz) { + String modeInConf = conf != null ? conf.get(TezConfiguration.TEZ_FRAMEWORK_MODE) : null; + String modeInEnv = System.getenv(TezConstants.TEZ_FRAMEWORK_MODE); + try { + if (modeInConf != null) { + return getByMode(interfaze, modeInConf); + } else if (modeInEnv != null) { + return getByMode(interfaze, modeInEnv); + } else if (defaultClazz != null) { + return (T) defaultClazz.newInstance(); + } else { + throw new RuntimeException( + "Framework service not found in any mode: configuration, environment, or default class"); + } + } catch (TezReflectionException | InstantiationException | IllegalAccessException e) { + throw new RuntimeException("Failed to load framework service for interface: " + interfaze.getName(), e); + } + } + + private static T getByMode(Class interfaze, String mode) throws TezReflectionException { + mode = mode.toUpperCase(); + String clazz; + if (ClientFrameworkService.class == interfaze) { + clazz = FrameworkMode.valueOf(mode).getClientClassName(); + } else if (SERVER_FRAMEWORK_SERVICE_INTERFACE_NAME.equals(interfaze.getCanonicalName())) { + clazz = FrameworkMode.valueOf(mode).getServerClassName(); + } else { + throw new IllegalArgumentException("Unsupported FrameworkService: " + interfaze.getName()); + } + return ReflectionUtils.createClazzInstance(clazz); + } +} diff --git a/tez-api/src/main/java/org/apache/tez/frameworkplugins/package-info.java b/tez-api/src/main/java/org/apache/tez/frameworkplugins/package-info.java new file mode 100644 index 0000000000..d279462b1e --- /dev/null +++ b/tez-api/src/main/java/org/apache/tez/frameworkplugins/package-info.java @@ -0,0 +1,24 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +@Public +@Evolving +package org.apache.tez.frameworkplugins; + +import org.apache.hadoop.classification.InterfaceAudience.Public; +import org.apache.hadoop.classification.InterfaceStability.Evolving; diff --git a/tez-api/src/main/java/org/apache/tez/frameworkplugins/yarn/YarnClientFrameworkService.java b/tez-api/src/main/java/org/apache/tez/frameworkplugins/yarn/YarnClientFrameworkService.java new file mode 100644 index 0000000000..224f7ff256 --- /dev/null +++ b/tez-api/src/main/java/org/apache/tez/frameworkplugins/yarn/YarnClientFrameworkService.java @@ -0,0 +1,35 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + *

+ * http://www.apache.org/licenses/LICENSE-2.0 + *

+ * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.tez.frameworkplugins.yarn; + +import org.apache.hadoop.yarn.client.api.YarnClient; +import org.apache.tez.client.FrameworkClient; +import org.apache.tez.client.TezYarnClient; +import org.apache.tez.frameworkplugins.ClientFrameworkService; + +/** + * YARN-based client framework service implementation. + * Provides default YARN framework client functionality. + */ +public class YarnClientFrameworkService implements ClientFrameworkService { + + @Override + public FrameworkClient newFrameworkClient() { + return new TezYarnClient(YarnClient.createYarnClient()); + } +} diff --git a/tez-api/src/main/java/org/apache/tez/frameworkplugins/yarn/package-info.java b/tez-api/src/main/java/org/apache/tez/frameworkplugins/yarn/package-info.java new file mode 100644 index 0000000000..ffc5b78fb9 --- /dev/null +++ b/tez-api/src/main/java/org/apache/tez/frameworkplugins/yarn/package-info.java @@ -0,0 +1,24 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + *

+ * http://www.apache.org/licenses/LICENSE-2.0 + *

+ * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +@Public +@Evolving +package org.apache.tez.frameworkplugins.yarn; + +import org.apache.hadoop.classification.InterfaceAudience.Public; +import org.apache.hadoop.classification.InterfaceStability.Evolving; diff --git a/tez-api/src/main/java/org/apache/tez/frameworkplugins/zookeeper/ZkStandaloneClientFrameworkService.java b/tez-api/src/main/java/org/apache/tez/frameworkplugins/zookeeper/ZkStandaloneClientFrameworkService.java new file mode 100644 index 0000000000..43b11140cc --- /dev/null +++ b/tez-api/src/main/java/org/apache/tez/frameworkplugins/zookeeper/ZkStandaloneClientFrameworkService.java @@ -0,0 +1,29 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + *

+ * http://www.apache.org/licenses/LICENSE-2.0 + *

+ * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.tez.frameworkplugins.zookeeper; + + +import org.apache.tez.client.FrameworkClient; +import org.apache.tez.client.registry.zookeeper.ZkFrameworkClient; +import org.apache.tez.frameworkplugins.ClientFrameworkService; + +public class ZkStandaloneClientFrameworkService implements ClientFrameworkService { + @Override public FrameworkClient newFrameworkClient() { + return new ZkFrameworkClient(); + } +} diff --git a/tez-api/src/main/java/org/apache/tez/frameworkplugins/zookeeper/package-info.java b/tez-api/src/main/java/org/apache/tez/frameworkplugins/zookeeper/package-info.java new file mode 100644 index 0000000000..3acc21feae --- /dev/null +++ b/tez-api/src/main/java/org/apache/tez/frameworkplugins/zookeeper/package-info.java @@ -0,0 +1,24 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +@Public +@Evolving +package org.apache.tez.frameworkplugins.zookeeper; + +import org.apache.hadoop.classification.InterfaceAudience.Public; +import org.apache.hadoop.classification.InterfaceStability.Evolving; diff --git a/tez-api/src/test/java/org/apache/tez/client/TestTezClient.java b/tez-api/src/test/java/org/apache/tez/client/TestTezClient.java index 46a972c1d6..9d004e8303 100644 --- a/tez-api/src/test/java/org/apache/tez/client/TestTezClient.java +++ b/tez-api/src/test/java/org/apache/tez/client/TestTezClient.java @@ -221,6 +221,16 @@ public void testTezClientSession() throws Exception { testTezClient(true, true, "testTezClientSession"); } + @Test (timeout = 5000) + public void testTezClientReconnect() throws Exception { + testTezClientReconnect(true); + } + + @Test (timeout = 5000, expected = IllegalStateException.class) + public void testTezClientReconnectNoSession() throws Exception { + testTezClientReconnect(false); + } + @Test (timeout = 5000) public void testTezClientSessionLargeDAGPlan() throws Exception { // request size is within threshold of being serialized @@ -387,18 +397,18 @@ public TezClientForTest testTezClient(boolean isSession, boolean shouldStop, Str assertTrue(context.getAMContainerSpec().getLocalResources().containsKey( lrName1)); } - + // add resources String lrName2 = "LR2"; lrs.clear(); lrs.put(lrName2, LocalResource.newInstance(URL.newInstance("file", "localhost", 0, "/test2"), LocalResourceType.FILE, LocalResourceVisibility.PUBLIC, 1, 1)); client.addAppMasterLocalFiles(lrs); - + ApplicationId appId2 = ApplicationId.newInstance(0, 2); when(client.mockYarnClient.createApplication().getNewApplicationResponse().getApplicationId()) .thenReturn(appId2); - + when(client.mockYarnClient.getApplicationReport(appId2).getYarnApplicationState()) .thenReturn(YarnApplicationState.RUNNING); dag = DAG.create("DAG-2-" + dagName).addVertex( @@ -447,6 +457,97 @@ public TezClientForTest testTezClient(boolean isSession, boolean shouldStop, Str return client; } + public void testTezClientReconnect(boolean isSession) throws Exception { + //Setup 1 + Map lrs = Maps.newHashMap(); + String lrName1 = "LR1"; + lrs.put(lrName1, LocalResource.newInstance(URL.newInstance("file", "localhost", 0, "/test"), + LocalResourceType.FILE, LocalResourceVisibility.PUBLIC, 1, 1)); + + //Client 1 + TezClientForTest client = configureAndCreateTezClient(lrs, isSession, null); + + //Submission Context 1 + ArgumentCaptor captor = ArgumentCaptor.forClass(ApplicationSubmissionContext.class); + when(client.mockYarnClient.getApplicationReport(client.mockAppId).getYarnApplicationState()) + .thenReturn(YarnApplicationState.RUNNING); + + //Client 1 start + client.start(); + + //Client 1 verify + verify(client.mockYarnClient, times(1)).init((Configuration)any()); + verify(client.mockYarnClient, times(1)).start(); + + if (isSession) { + verify(client.mockYarnClient, times(1)).submitApplication(captor.capture()); + ApplicationSubmissionContext context = captor.getValue(); + Assert.assertEquals(3, context.getAMContainerSpec().getLocalResources().size()); + assertTrue(context.getAMContainerSpec().getLocalResources().containsKey( + TezConstants.TEZ_AM_LOCAL_RESOURCES_PB_FILE_NAME)); + assertTrue(context.getAMContainerSpec().getLocalResources().containsKey( + TezConstants.TEZ_PB_BINARY_CONF_NAME)); + assertTrue(context.getAMContainerSpec().getLocalResources().containsKey( + lrName1)); + } else { + verify(client.mockYarnClient, times(0)).submitApplication(captor.capture()); + } + + //DAG 1 resources + Map lrDAG = Collections.singletonMap(lrName1, LocalResource + .newInstance(URL.newInstance("file", "localhost", 0, "/test1"), LocalResourceType.FILE, + LocalResourceVisibility.PUBLIC, 1, 1)); + + //DAG 1 setup + Vertex vertex = Vertex.create("Vertex", ProcessorDescriptor.create("P"), 1, + Resource.newInstance(1, 1)); + DAG dag = DAG.create("DAG").addVertex(vertex).addTaskLocalFiles(lrDAG); + + //DAG 1 submit + DAGClient dagClient = client.submitDAG(dag); + + //DAG 1 assertions + assertTrue(dagClient.getExecutionContext().contains(client.mockAppId.toString())); + assertEquals(dagClient.getSessionIdentifierString(), client.mockAppId.toString()); + + //Client 2 reuse appId + ApplicationId appId = client.getAppMasterApplicationId(); + + //Client 2 reuse lrs + TezClientForTest client2 = configureAndCreateTezClient(lrs, isSession, null); + + //Submission Context 2 + ArgumentCaptor captorClient2 = + ArgumentCaptor.forClass(ApplicationSubmissionContext.class); + when(client2.mockYarnClient.getApplicationReport(client2.mockAppId).getYarnApplicationState()) + .thenReturn(YarnApplicationState.RUNNING); + + //Client 2 reconnect + client2.getClient(appId); + assertEquals(client2.mockAppId, appId); + + //Client 2 verify + verify(client2.mockYarnClient, times(1)).init((Configuration)any()); + verify(client2.mockYarnClient, times(1)).start(); + //New AM should not be submitted + verify(client2.mockYarnClient, times(0)).submitApplication(captorClient2.capture()); + + //DAG 2 setup + Vertex vertex2 = Vertex.create("Vertex2", ProcessorDescriptor.create("P"), 1, + Resource.newInstance(1, 1)); + dag = DAG.create("DAG2").addVertex(vertex2).addTaskLocalFiles(lrDAG); + + dagClient.close(); + //DAG 2 submit + dagClient = client2.submitDAG(dag); + + //DAG 2 assertions + assertTrue(dagClient.getExecutionContext().contains(appId.toString())); + assertEquals(dagClient.getSessionIdentifierString(), appId.toString()); + + dagClient.close(); + } + @Test (timeout=5000) public void testPreWarm() throws Exception { TezClientForTest client = configureAndCreateTezClient(); diff --git a/tez-api/src/test/java/org/apache/tez/client/TestTezClientUtils.java b/tez-api/src/test/java/org/apache/tez/client/TestTezClientUtils.java index 03818060c6..56d4192df2 100644 --- a/tez-api/src/test/java/org/apache/tez/client/TestTezClientUtils.java +++ b/tez-api/src/test/java/org/apache/tez/client/TestTezClientUtils.java @@ -697,6 +697,28 @@ public void testDefaultLoggingJavaOptsWithRootLogger() { Assert.assertTrue(javaOpts.contains("-DtestProperty=value")); } + @Test (timeout = 5000) + public void testConfYarnZkWorkaround() { + Configuration conf = new Configuration(false); + String val = "localhost:2181"; + conf.set("yarn.resourcemanager.zk-address", val); + + Map expected = new HashMap<>(); + expected.put("yarn.resourcemanager.zk-address", val); + + ConfigurationProto confProto = TezClientUtils.createFinalConfProtoForApp(conf, null); + + for (PlanKeyValuePair kvPair : confProto.getConfKeyValuesList()) { + if (expected.containsKey(kvPair.getKey())) { // fix for polluting keys + String v = expected.remove(kvPair.getKey()); + // this way the test still validates that the original + // key/value pairs can be found in the proto's conf + assertEquals("Unexpected value for key: " + kvPair.getKey(), v, kvPair.getValue()); + } + } + assertTrue("Expected keys not found in conf: " + expected.keySet(), expected.isEmpty()); + } + @Test (timeout = 5000) public void testConfSerializationForAm() { Configuration conf =new Configuration(false); diff --git a/tez-api/src/test/java/org/apache/tez/client/registry/TestAMRecord.java b/tez-api/src/test/java/org/apache/tez/client/registry/TestAMRecord.java new file mode 100644 index 0000000000..b51b8246e2 --- /dev/null +++ b/tez-api/src/test/java/org/apache/tez/client/registry/TestAMRecord.java @@ -0,0 +1,259 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + *

+ * http://www.apache.org/licenses/LICENSE-2.0 + *

+ * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.tez.client.registry; + +import static org.junit.Assert.*; + +import java.util.concurrent.ConcurrentHashMap; + +import org.apache.hadoop.registry.client.types.ServiceRecord; +import org.apache.hadoop.yarn.api.records.ApplicationId; +import org.apache.tez.client.registry.zookeeper.ZkConfig; + +import org.junit.Test; + +public class TestAMRecord { + + @Test + public void testConstructorWithAllParameters() { + ApplicationId appId = ApplicationId.newInstance(12345L, 1); + String hostName = "test-host.example.com"; + String hostIp = "192.168.1.100"; + int port = 8080; + String externalId = "external-123"; + String computeName = "test-compute"; + + AMRecord record = new AMRecord(appId, hostName, hostIp, port, externalId, computeName); + + assertEquals(appId, record.getApplicationId()); + assertEquals(hostName, record.getHostName()); + assertEquals(hostName, record.getHostName()); + assertEquals(hostIp, record.getHostIp()); + assertEquals(port, record.getPort()); + assertEquals(externalId, record.getExternalId()); + assertEquals(computeName, record.getComputeName()); + } + + @Test + public void testConstructorWithNullExternalIdAndComputeName() { + ApplicationId appId = ApplicationId.newInstance(12345L, 1); + String hostName = "test-host.example.com"; + String hostIp = "192.168.1.100"; + int port = 8080; + + AMRecord record = new AMRecord(appId, hostName, hostIp, port, null, null); + + assertEquals("", record.getExternalId()); + assertEquals(ZkConfig.DEFAULT_COMPUTE_GROUP_NAME, record.getComputeName()); + } + + @Test + public void testCopyConstructor() { + ApplicationId appId = ApplicationId.newInstance(12345L, 1); + String hostName = "test-host.example.com"; + String hostIp = "192.168.1.100"; + int port = 8080; + String externalId = "external-123"; + String computeName = "test-compute"; + + AMRecord original = new AMRecord(appId, hostName, hostIp, port, externalId, computeName); + AMRecord copy = new AMRecord(original); + + assertEquals(original.getApplicationId(), copy.getApplicationId()); + assertEquals(original.getHostName(), copy.getHostName()); + assertEquals(original.getHostIp(), copy.getHostIp()); + assertEquals(original.getPort(), copy.getPort()); + assertEquals(original.getExternalId(), copy.getExternalId()); + assertEquals(original.getComputeName(), copy.getComputeName()); + assertEquals(original, copy); + assertEquals(original.hashCode(), copy.hashCode()); + } + + @Test + public void testConstructorFromServiceRecord() { + ApplicationId appId = ApplicationId.newInstance(12345L, 1); + String hostName = "test-host.example.com"; + String hostIp = "192.168.1.100"; + int port = 8080; + String externalId = "external-123"; + String computeName = "test-compute"; + + AMRecord original = new AMRecord(appId, hostName, hostIp, port, externalId, computeName); + ServiceRecord serviceRecord = original.toServiceRecord(); + AMRecord reconstructed = new AMRecord(serviceRecord); + + assertEquals(original.getApplicationId(), reconstructed.getApplicationId()); + assertEquals(original.getHostName(), reconstructed.getHostName()); + assertEquals(original.getHostIp(), reconstructed.getHostIp()); + assertEquals(original.getPort(), reconstructed.getPort()); + assertEquals(original.getExternalId(), reconstructed.getExternalId()); + assertEquals(original.getComputeName(), reconstructed.getComputeName()); + assertEquals(original, reconstructed); + } + + @Test + public void testConstructorFromServiceRecordWithNullDefaults() { + ApplicationId appId = ApplicationId.newInstance(12345L, 1); + String hostName = "test-host.example.com"; + String hostIp = "192.168.1.100"; + int port = 8080; + + // Create record with null externalId and computeName + AMRecord original = new AMRecord(appId, hostName, hostIp, port, null, null); + + // Convert to ServiceRecord and back + ServiceRecord serviceRecord = original.toServiceRecord(); + AMRecord reconstructed = new AMRecord(serviceRecord); + + // Verify defaults are preserved + assertEquals("", reconstructed.getExternalId()); + assertEquals(ZkConfig.DEFAULT_COMPUTE_GROUP_NAME, reconstructed.getComputeName()); + assertEquals(original, reconstructed); + } + + @Test + public void testToServiceRecord() { + ApplicationId appId = ApplicationId.newInstance(12345L, 1); + String hostName = "test-host.example.com"; + String hostIp = "192.168.1.100"; + int port = 8080; + String externalId = "external-123"; + String computeName = "test-compute"; + + AMRecord record = new AMRecord(appId, hostName, hostIp, port, externalId, computeName); + ServiceRecord serviceRecord = record.toServiceRecord(); + + assertNotNull(serviceRecord); + assertEquals(appId.toString(), serviceRecord.get("appId")); + assertEquals(hostName, serviceRecord.get("hostName")); + assertEquals(hostIp, serviceRecord.get("hostIp")); + assertEquals(String.valueOf(port), serviceRecord.get("port")); + assertEquals(externalId, serviceRecord.get("externalId")); + assertEquals(computeName, serviceRecord.get("computeName")); + } + + @Test + public void testToServiceRecordCaching() { + ApplicationId appId = ApplicationId.newInstance(12345L, 1); + String hostName = "test-host.example.com"; + String hostIp = "192.168.1.100"; + int port = 8080; + + AMRecord record = new AMRecord(appId, hostName, hostIp, port, "external-123", "test-compute"); + ServiceRecord serviceRecord1 = record.toServiceRecord(); + ServiceRecord serviceRecord2 = record.toServiceRecord(); + + // Should return the same cached instance + assertSame(serviceRecord1, serviceRecord2); + } + + @Test + public void testEquals() { + ApplicationId appId1 = ApplicationId.newInstance(12345L, 1); + ApplicationId appId2 = ApplicationId.newInstance(12345L, 1); + ApplicationId appId3 = ApplicationId.newInstance(12345L, 2); + + AMRecord record1 = new AMRecord(appId1, "host1", "192.168.1.1", 8080, "ext1", "compute1"); + AMRecord record2 = new AMRecord(appId2, "host1", "192.168.1.1", 8080, "ext1", "compute1"); + AMRecord record3 = new AMRecord(appId3, "host1", "192.168.1.1", 8080, "ext1", "compute1"); + AMRecord record4 = new AMRecord(appId1, "host2", "192.168.1.1", 8080, "ext1", "compute1"); + AMRecord record5 = new AMRecord(appId1, "host1", "192.168.1.2", 8080, "ext1", "compute1"); + AMRecord record6 = new AMRecord(appId1, "host1", "192.168.1.1", 8081, "ext1", "compute1"); + AMRecord record7 = new AMRecord(appId1, "host1", "192.168.1.1", 8080, "ext2", "compute1"); + AMRecord record8 = new AMRecord(appId1, "host1", "192.168.1.1", 8080, "ext1", "compute2"); + + // Same values should be equal + assertEquals(record1, record2); + assertEquals(record2, record1); + // Different appId + assertNotEquals(record1, record3); + // Different hostName + assertNotEquals(record1, record4); + // Different hostIp + assertNotEquals(record1, record5); + // Different port + assertNotEquals(record1, record6); + // Different externalId + assertNotEquals(record1, record7); + // Different computeName + assertNotEquals(record1, record8); + // Self equality + assertEquals(record1, record1); + // Null equality + assertNotEquals(null, record1); + // Different type + assertNotEquals("not an AMRecord", record1); + } + + @Test + public void testHashCode() { + ApplicationId appId1 = ApplicationId.newInstance(12345L, 1); + ApplicationId appId2 = ApplicationId.newInstance(12345L, 1); + + AMRecord record1 = new AMRecord(appId1, "host1", "192.168.1.1", 8080, "ext1", "compute1"); + AMRecord record2 = new AMRecord(appId2, "host1", "192.168.1.1", 8080, "ext1", "compute1"); + AMRecord record3 = new AMRecord(appId1, "host2", "192.168.1.1", 8080, "ext1", "compute1"); + + // Equal objects should have same hashCode + assertEquals(record1.hashCode(), record2.hashCode()); + } + + @Test + public void testToString() { + ApplicationId appId = ApplicationId.newInstance(12345L, 1); + String hostName = "test-host.example.com"; + String hostIp = "192.168.1.100"; + int port = 8080; + String externalId = "external-123"; + String computeName = "test-compute"; + + AMRecord record = new AMRecord(appId, hostName, hostIp, port, externalId, computeName); + String str = record.toString(); + + assertNotNull(str); + // Validate actual JSON-like snippets from the string + assertTrue("Should contain appId=value snippet", str.contains("appId=" + appId.toString())); + assertTrue("Should contain hostName=value snippet", str.contains("hostName=" + hostName)); + assertTrue("Should contain hostIp=value snippet", str.contains("hostIp=" + hostIp)); + assertTrue("Should contain port=value snippet", str.contains("port=" + port)); + assertTrue("Should contain externalId=value snippet", str.contains("externalId=" + externalId)); + assertTrue("Should contain computeName=value snippet", str.contains("computeName=" + computeName)); + } + + @Test + public void testRemoveFromCacheByDeserializedRecordAppId() throws Exception { + ConcurrentHashMap amRecordCache = new ConcurrentHashMap<>(); + + ApplicationId appId = ApplicationId.newInstance(12345L, 1); + String hostName = "test-host.example.com"; + String hostIp = "192.168.1.100"; + int port = 8080; + String externalId = "external-123"; + String computeName = "test-compute"; + + AMRecord record = new AMRecord(appId, hostName, hostIp, port, externalId, computeName); + amRecordCache.put(appId, record); + + assertEquals(1, amRecordCache.size()); + + AMRecord deserialized = AMRegistryUtils.jsonStringToRecord(AMRegistryUtils.recordToJsonString(record)); + amRecordCache.remove(deserialized.getApplicationId()); + + assertEquals(0, amRecordCache.size()); + } +} diff --git a/tez-api/src/test/java/org/apache/tez/client/registry/zookeeper/TestZkConfig.java b/tez-api/src/test/java/org/apache/tez/client/registry/zookeeper/TestZkConfig.java new file mode 100644 index 0000000000..7a4ab20db4 --- /dev/null +++ b/tez-api/src/test/java/org/apache/tez/client/registry/zookeeper/TestZkConfig.java @@ -0,0 +1,230 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + *

+ * http://www.apache.org/licenses/LICENSE-2.0 + *

+ * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.tez.client.registry.zookeeper; + +import static org.junit.Assert.assertEquals; +import static org.junit.Assert.assertNotNull; + +import java.util.concurrent.TimeUnit; + +import org.apache.curator.RetryPolicy; +import org.apache.curator.framework.CuratorFramework; +import org.apache.hadoop.conf.Configuration; +import org.apache.tez.dag.api.TezConfiguration; + +import org.junit.Test; + +public class TestZkConfig { + + @Test + public void testBasicConfiguration() { + TezConfiguration conf = new TezConfiguration(); + conf.set(TezConfiguration.TEZ_AM_ZOOKEEPER_QUORUM, "localhost:2181"); + conf.set(TezConfiguration.TEZ_AM_REGISTRY_NAMESPACE, "/test-namespace"); + + ZkConfig zkConfig = new ZkConfig(conf); + + assertEquals("localhost:2181", zkConfig.getZkQuorum()); + assertEquals("/tez-external-sessions/test-namespace", zkConfig.getZkNamespace()); + } + + @Test + public void testDefaultValues() { + TezConfiguration conf = new TezConfiguration(); + conf.set(TezConfiguration.TEZ_AM_ZOOKEEPER_QUORUM, "localhost:2181"); + + ZkConfig zkConfig = new ZkConfig(conf); + + Configuration defaultConf = new Configuration(); + long expectedBackoffSleep = defaultConf.getTimeDuration( + TezConfiguration.TEZ_AM_CURATOR_BACKOFF_SLEEP, + TezConfiguration.TEZ_AM_CURATOR_BACKOFF_SLEEP_DEFAULT, TimeUnit.MILLISECONDS); + long expectedSessionTimeout = defaultConf.getTimeDuration( + TezConfiguration.TEZ_AM_CURATOR_SESSION_TIMEOUT, + TezConfiguration.TEZ_AM_CURATOR_SESSION_TIMEOUT_DEFAULT, TimeUnit.MILLISECONDS); + long expectedConnectionTimeout = defaultConf.getTimeDuration( + TezConfiguration.TEZ_AM_CURATOR_CONNECTION_TIMEOUT, + TezConfiguration.TEZ_AM_CURATOR_CONNECTION_TIMEOUT_DEFAULT, TimeUnit.MILLISECONDS); + + assertEquals(expectedBackoffSleep, zkConfig.getCuratorBackoffSleepMs()); + assertEquals(TezConfiguration.TEZ_AM_CURATOR_MAX_RETRIES_DEFAULT, zkConfig.getCuratorMaxRetries()); + assertEquals(expectedSessionTimeout, zkConfig.getSessionTimeoutMs()); + assertEquals(expectedConnectionTimeout, zkConfig.getConnectionTimeoutMs()); + } + + @Test + public void testCustomConfigurationValues() { + TezConfiguration conf = new TezConfiguration(); + conf.set(TezConfiguration.TEZ_AM_ZOOKEEPER_QUORUM, "zk1:2181,zk2:2181"); + conf.set(TezConfiguration.TEZ_AM_REGISTRY_NAMESPACE, "/custom-namespace"); + conf.set(TezConfiguration.TEZ_AM_CURATOR_BACKOFF_SLEEP, "2000ms"); + conf.setInt(TezConfiguration.TEZ_AM_CURATOR_MAX_RETRIES, 5); + conf.set(TezConfiguration.TEZ_AM_CURATOR_SESSION_TIMEOUT, "200000ms"); + conf.set(TezConfiguration.TEZ_AM_CURATOR_CONNECTION_TIMEOUT, "20000ms"); + + ZkConfig zkConfig = new ZkConfig(conf); + + assertEquals("zk1:2181,zk2:2181", zkConfig.getZkQuorum()); + assertEquals("/tez-external-sessions/custom-namespace", zkConfig.getZkNamespace()); + assertEquals(2000, zkConfig.getCuratorBackoffSleepMs()); + assertEquals(5, zkConfig.getCuratorMaxRetries()); + assertEquals(200000, zkConfig.getSessionTimeoutMs()); + assertEquals(20000, zkConfig.getConnectionTimeoutMs()); + } + + @Test + public void testNamespaceWithLeadingSlash() { + TezConfiguration conf = new TezConfiguration(); + conf.set(TezConfiguration.TEZ_AM_ZOOKEEPER_QUORUM, "localhost:2181"); + conf.set(TezConfiguration.TEZ_AM_REGISTRY_NAMESPACE, "/namespace-with-slash"); + + ZkConfig zkConfig = new ZkConfig(conf); + + assertEquals("/tez-external-sessions/namespace-with-slash", zkConfig.getZkNamespace()); + } + + @Test + public void testNamespaceWithoutLeadingSlash() { + TezConfiguration conf = new TezConfiguration(); + conf.set(TezConfiguration.TEZ_AM_ZOOKEEPER_QUORUM, "localhost:2181"); + conf.set(TezConfiguration.TEZ_AM_REGISTRY_NAMESPACE, "namespace-without-slash"); + + ZkConfig zkConfig = new ZkConfig(conf); + + assertEquals("/tez-external-sessions/namespace-without-slash", zkConfig.getZkNamespace()); + } + + @Test + public void testComputeGroupsDisabled() { + TezConfiguration conf = new TezConfiguration(); + conf.set(TezConfiguration.TEZ_AM_ZOOKEEPER_QUORUM, "localhost:2181"); + conf.set(TezConfiguration.TEZ_AM_REGISTRY_NAMESPACE, "/test-namespace"); + conf.setBoolean(TezConfiguration.TEZ_AM_REGISTRY_ENABLE_COMPUTE_GROUPS, false); + + ZkConfig zkConfig = new ZkConfig(conf); + + assertEquals("/tez-external-sessions/test-namespace", zkConfig.getZkNamespace()); + } + + @Test + public void testComputeGroupsEnabledWithoutEnvVar() { + TezConfiguration conf = new TezConfiguration(); + conf.set(TezConfiguration.TEZ_AM_ZOOKEEPER_QUORUM, "localhost:2181"); + conf.set(TezConfiguration.TEZ_AM_REGISTRY_NAMESPACE, "/test-namespace"); + conf.setBoolean(TezConfiguration.TEZ_AM_REGISTRY_ENABLE_COMPUTE_GROUPS, true); + + // When compute groups are enabled but env var is not set, namespace should not include sub-namespace + ZkConfig zkConfig = new ZkConfig(conf); + + // Namespace should start with base namespace (env var not set, so no sub-namespace added) + assertEquals("/tez-external-sessions/test-namespace", zkConfig.getZkNamespace()); + } + + @Test + public void testGetRetryPolicy() { + TezConfiguration conf = new TezConfiguration(); + conf.set(TezConfiguration.TEZ_AM_ZOOKEEPER_QUORUM, "localhost:2181"); + conf.set(TezConfiguration.TEZ_AM_CURATOR_BACKOFF_SLEEP, "1500ms"); + conf.setInt(TezConfiguration.TEZ_AM_CURATOR_MAX_RETRIES, 4); + + ZkConfig zkConfig = new ZkConfig(conf); + RetryPolicy retryPolicy = zkConfig.getRetryPolicy(); + + assertNotNull(retryPolicy); + // Verify it's an ExponentialBackoffRetry instance + assertEquals("org.apache.curator.retry.ExponentialBackoffRetry", retryPolicy.getClass().getName()); + } + + @Test + public void testTimeUnitSupport() { + TezConfiguration conf = new TezConfiguration(); + conf.set(TezConfiguration.TEZ_AM_ZOOKEEPER_QUORUM, "localhost:2181"); + // Test different time units + conf.set(TezConfiguration.TEZ_AM_CURATOR_BACKOFF_SLEEP, "2s"); + conf.set(TezConfiguration.TEZ_AM_CURATOR_SESSION_TIMEOUT, "3m"); + conf.set(TezConfiguration.TEZ_AM_CURATOR_CONNECTION_TIMEOUT, "5s"); + + ZkConfig zkConfig = new ZkConfig(conf); + + assertEquals(2000, zkConfig.getCuratorBackoffSleepMs()); + assertEquals(180000, zkConfig.getSessionTimeoutMs()); + assertEquals(5000, zkConfig.getConnectionTimeoutMs()); + + // Unit-less values should default to milliseconds + conf.set(TezConfiguration.TEZ_AM_CURATOR_BACKOFF_SLEEP, "2000"); + conf.set(TezConfiguration.TEZ_AM_CURATOR_SESSION_TIMEOUT, "300000"); + conf.set(TezConfiguration.TEZ_AM_CURATOR_CONNECTION_TIMEOUT, "15000"); + + ZkConfig unitlessConfig = new ZkConfig(conf); + assertEquals(2000, unitlessConfig.getCuratorBackoffSleepMs()); + assertEquals(300000, unitlessConfig.getSessionTimeoutMs()); + assertEquals(15000, unitlessConfig.getConnectionTimeoutMs()); + } + + @Test + public void testCreateCuratorFramework() { + TezConfiguration conf = new TezConfiguration(); + conf.set(TezConfiguration.TEZ_AM_ZOOKEEPER_QUORUM, "localhost:2181"); + + ZkConfig zkConfig = new ZkConfig(conf); + CuratorFramework curator = zkConfig.createCuratorFramework(); + + assertNotNull(curator); + assertEquals(zkConfig.getZkQuorum(), curator.getZookeeperClient().getCurrentConnectionString()); + } + + @Test(expected = IllegalArgumentException.class) + public void testNullZkQuorum() { + TezConfiguration conf = new TezConfiguration(); + // Don't set zkQuorum + new ZkConfig(conf); + } + + @Test(expected = IllegalArgumentException.class) + public void testEmptyZkQuorum() { + TezConfiguration conf = new TezConfiguration(); + conf.set(TezConfiguration.TEZ_AM_ZOOKEEPER_QUORUM, ""); + new ZkConfig(conf); + } + + @Test(expected = IllegalArgumentException.class) + public void testNullNamespace() { + TezConfiguration conf = new TezConfiguration(); + conf.set(TezConfiguration.TEZ_AM_ZOOKEEPER_QUORUM, "localhost:2181"); + conf.set(TezConfiguration.TEZ_AM_REGISTRY_NAMESPACE, null); + new ZkConfig(conf); + } + + @Test(expected = IllegalArgumentException.class) + public void testEmptyNamespace() { + TezConfiguration conf = new TezConfiguration(); + conf.set(TezConfiguration.TEZ_AM_ZOOKEEPER_QUORUM, "localhost:2181"); + conf.set(TezConfiguration.TEZ_AM_REGISTRY_NAMESPACE, ""); + new ZkConfig(conf); + } + + @Test + public void testDefaultNamespace() { + TezConfiguration conf = new TezConfiguration(); + conf.set(TezConfiguration.TEZ_AM_ZOOKEEPER_QUORUM, "localhost:2181"); + // Don't set namespace, should use default + ZkConfig zkConfig = new ZkConfig(conf); + assertEquals("/tez-external-sessions" + TezConfiguration.TEZ_AM_REGISTRY_NAMESPACE_DEFAULT, + zkConfig.getZkNamespace()); + } +} diff --git a/tez-api/src/test/java/org/apache/tez/client/registry/zookeeper/TestZkFrameworkClient.java b/tez-api/src/test/java/org/apache/tez/client/registry/zookeeper/TestZkFrameworkClient.java new file mode 100644 index 0000000000..efb5f98733 --- /dev/null +++ b/tez-api/src/test/java/org/apache/tez/client/registry/zookeeper/TestZkFrameworkClient.java @@ -0,0 +1,236 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + *

+ * http://www.apache.org/licenses/LICENSE-2.0 + *

+ * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.tez.client.registry.zookeeper; + +import static org.junit.Assert.*; + +import java.io.File; +import java.nio.charset.StandardCharsets; + +import org.apache.commons.io.IOUtils; +import org.apache.curator.framework.CuratorFramework; +import org.apache.curator.test.TestingServer; +import org.apache.hadoop.registry.client.binding.RegistryUtils; +import org.apache.hadoop.registry.client.types.ServiceRecord; +import org.apache.hadoop.test.LambdaTestUtils; +import org.apache.hadoop.yarn.api.records.ApplicationId; +import org.apache.hadoop.yarn.api.records.ApplicationReport; +import org.apache.hadoop.yarn.api.records.FinalApplicationStatus; +import org.apache.hadoop.yarn.client.api.YarnClientApplication; +import org.apache.tez.client.registry.AMRecord; +import org.apache.tez.dag.api.TezConfiguration; +import org.apache.zookeeper.CreateMode; + +import org.junit.After; +import org.junit.Before; +import org.junit.Test; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +/** + * Unit tests for {@link ZkFrameworkClient}. + *

+ * This test class validates the ZooKeeper-based framework client that discovers + * and communicates with Application Masters through ZooKeeper registry. + *

+ */ +public class TestZkFrameworkClient { + private static final Logger LOG = LoggerFactory.getLogger(TestZkFrameworkClient.class); + private static final File TEST_DIR = new File(System.getProperty("test.build.data", "target"), + TestZkFrameworkClient.class.getName()).getAbsoluteFile(); + + private TestingServer zkServer; + private ZkFrameworkClient zkFrameworkClient; + private CuratorFramework curatorClient; + + @Before + public void setup() throws Exception { + zkServer = new TestingServer(true); + LOG.info("Started ZooKeeper test server on port: {}", zkServer.getPort()); + } + + @After + public void teardown() throws Exception { + if (zkFrameworkClient != null) { + zkFrameworkClient.close(); + } + IOUtils.closeQuietly(curatorClient); + IOUtils.closeQuietly(zkServer); + } + + /** + * Tests initialization and lifecycle methods of ZkFrameworkClient. + */ + @Test + public void testInitAndLifecycle() throws Exception { + TezConfiguration tezConf = createTezConf(); + + zkFrameworkClient = new ZkFrameworkClient(); + zkFrameworkClient.init(tezConf); + + assertFalse("Client should not be running after init", zkFrameworkClient.isRunning()); + + zkFrameworkClient.start(); + assertTrue("Client should be running after start", zkFrameworkClient.isRunning()); + + zkFrameworkClient.stop(); + assertFalse("Client should not be running after stop", zkFrameworkClient.isRunning()); + } + + /** + * Tests retrieving application report when AM is registered in ZooKeeper. + */ + @Test + public void testGetApplicationReportWithRegisteredAM() throws Exception { + TezConfiguration tezConf = createTezConf(); + + // Register a mock AM in ZooKeeper + ApplicationId appId = ApplicationId.newInstance(System.currentTimeMillis(), 1); + String testHostName = "test-host"; + int testPort = 12345; + registerMockAM(tezConf, appId, testHostName, testPort); + + zkFrameworkClient = new ZkFrameworkClient(); + zkFrameworkClient.init(tezConf); + zkFrameworkClient.start(); + + LambdaTestUtils.await(1000, 100, () -> zkFrameworkClient.isZkInitialized()); + + ApplicationReport report = zkFrameworkClient.getApplicationReport(appId); + + assertNotNull("Application report should not be null", report); + assertEquals("Application ID should match", appId, report.getApplicationId()); + assertEquals("Host should match", testHostName, report.getHost()); + assertEquals("Port should match", testPort, report.getRpcPort()); + assertEquals("AM host should be cached", testHostName, zkFrameworkClient.getAmHost()); + assertEquals("AM port should be cached", testPort, zkFrameworkClient.getAmPort()); + } + + /** + * Tests retrieving application report when AM is not found in ZooKeeper. + */ + @Test + public void testGetApplicationReportWithMissingAM() throws Exception { + TezConfiguration tezConf = createTezConf(); + + zkFrameworkClient = new ZkFrameworkClient(); + zkFrameworkClient.init(tezConf); + zkFrameworkClient.start(); + + ApplicationId appId = ApplicationId.newInstance(System.currentTimeMillis(), 1); + + LambdaTestUtils.await(1000, 100, () -> zkFrameworkClient.isZkInitialized()); + + ApplicationReport report = zkFrameworkClient.getApplicationReport(appId); + + assertNotNull("Application report should not be null", report); + assertEquals("Application ID should match", appId, report.getApplicationId()); + assertEquals("Final status should be FAILED", FinalApplicationStatus.FAILED, + report.getFinalApplicationStatus()); + assertTrue("Diagnostics should mention missing AM", + report.getDiagnostics().contains("AM record not found")); + } + + /** + * Tests creating application from AM record. + */ + @Test + public void testCreateApplication() throws Exception { + TezConfiguration tezConf = createTezConf(); + + // Register a mock AM in ZooKeeper + ApplicationId appId = ApplicationId.newInstance(System.currentTimeMillis(), 1); + registerMockAM(tezConf, appId, "test-host", 12345); + + zkFrameworkClient = new ZkFrameworkClient(); + zkFrameworkClient.init(tezConf); + zkFrameworkClient.start(); + + LambdaTestUtils.await(1000, 100, () -> zkFrameworkClient.isZkInitialized()); + + // Need to call getApplicationReport first to populate amRecord + zkFrameworkClient.getApplicationReport(appId); + + YarnClientApplication clientApp = zkFrameworkClient.createApplication(); + + assertNotNull("YarnClientApplication should not be null", clientApp); + assertNotNull("ApplicationSubmissionContext should not be null", clientApp.getApplicationSubmissionContext()); + assertEquals("Application ID should match", appId, clientApp.getApplicationSubmissionContext().getApplicationId()); + assertNotNull("GetNewApplicationResponse should not be null", clientApp.getNewApplicationResponse()); + assertEquals("Response application ID should match", + appId, clientApp.getNewApplicationResponse().getApplicationId()); + } + + /** + * Tests kill application method. + */ + @Test + public void testKillApplication() throws Exception { + TezConfiguration tezConf = createTezConf(); + + zkFrameworkClient = new ZkFrameworkClient(); + zkFrameworkClient.init(tezConf); + zkFrameworkClient.start(); + + // Give time for ZK registry to initialize + Thread.sleep(500); + + ApplicationId appId = ApplicationId.newInstance(System.currentTimeMillis(), 1); + + // Should not throw exception + zkFrameworkClient.killApplication(appId); + } + + private TezConfiguration createTezConf() { + TezConfiguration tezConf = new TezConfiguration(); + tezConf.set(TezConfiguration.TEZ_FRAMEWORK_MODE, "STANDALONE_ZOOKEEPER"); + tezConf.set(TezConfiguration.TEZ_AM_ZOOKEEPER_QUORUM, "localhost:" + zkServer.getPort()); + tezConf.set(TezConfiguration.TEZ_AM_REGISTRY_NAMESPACE, "/tez-test-" + System.currentTimeMillis()); + tezConf.setBoolean(TezConfiguration.TEZ_LOCAL_MODE, true); + tezConf.set(TezConfiguration.TEZ_AM_STAGING_DIR, TEST_DIR.toString()); + return tezConf; + } + + private void registerMockAM(TezConfiguration tezConf, ApplicationId appId, String hostName, int port) + throws Exception { + // Create AM record and publish it directly to ZooKeeper + AMRecord amRecord = new AMRecord(appId, hostName, "127.0.0.1", port, "test-external-id", "test-compute"); + ServiceRecord serviceRecord = amRecord.toServiceRecord(); + + RegistryUtils.ServiceRecordMarshal marshal = new RegistryUtils.ServiceRecordMarshal(); + String json = marshal.toJson(serviceRecord); + + // Use Curator to write directly to ZooKeeper + ZkConfig zkConfig = new ZkConfig(tezConf); + curatorClient = zkConfig.createCuratorFramework(); + curatorClient.start(); + + // Wait for connection + curatorClient.blockUntilConnected(); + + String namespace = zkConfig.getZkNamespace(); + String path = namespace + "/" + appId.toString(); + + // Create parent directories if needed + curatorClient.create().creatingParentsIfNeeded().withMode(CreateMode.EPHEMERAL) + .forPath(path, json.getBytes(StandardCharsets.UTF_8)); + + LOG.info("Registered mock AM to ZK path: {}", path); + } +} diff --git a/tez-common/src/main/java/org/apache/tez/common/TezUtilsInternal.java b/tez-common/src/main/java/org/apache/tez/common/TezUtilsInternal.java index 661d77320e..cb7581981b 100644 --- a/tez-common/src/main/java/org/apache/tez/common/TezUtilsInternal.java +++ b/tez-common/src/main/java/org/apache/tez/common/TezUtilsInternal.java @@ -46,8 +46,10 @@ import org.apache.hadoop.yarn.api.records.ApplicationId; import org.apache.log4j.Appender; import org.apache.log4j.PatternLayout; +import org.apache.tez.client.TezClientUtils; import org.apache.tez.common.io.NonSyncByteArrayOutputStream; import org.apache.tez.dag.api.DagTypeConverters; +import org.apache.tez.dag.api.TezConfiguration; import org.apache.tez.dag.api.TezConstants; import org.apache.tez.dag.api.TezUncheckedException; import org.apache.tez.dag.api.records.DAGProtos; @@ -58,6 +60,7 @@ import org.apache.tez.dag.records.TezTaskAttemptID; import org.apache.tez.dag.records.TezVertexID; import org.apache.tez.hadoop.shim.HadoopShim; +import org.apache.tez.serviceplugins.api.ServicePluginsDescriptor; import org.apache.tez.serviceplugins.api.TaskAttemptEndReason; import org.apache.tez.util.StopWatch; @@ -90,6 +93,21 @@ public static Configuration readTezConfigurationXml(InputStream is) { return configuration; } + public static ConfigurationProto loadConfProtoFromText() throws IOException { + try (InputStream cis = ClassLoader.getSystemResourceAsStream(TezConfiguration.TEZ_SITE_XML); + InputStream sis = ClassLoader.getSystemResourceAsStream(TezConstants.SERVICE_PLUGINS_DESCRIPTOR_JSON)) { + Configuration confFromXml = TezUtilsInternal.readTezConfigurationXml(cis); + for (String confFile : confFromXml.getTrimmedStringCollection(TezConfiguration.TEZ_AM_STANDALONE_CONFS)) { + try (InputStream additionalInput = ClassLoader.getSystemResourceAsStream(confFile)) { + Configuration additionalConfFromXml = TezUtilsInternal.readTezConfigurationXml(additionalInput); + confFromXml.addResource(additionalConfFromXml); + } + } + ServicePluginsDescriptor pluginsDescriptor = TezClientUtils.createPluginsDescriptorFromJSON(sis); + return TezClientUtils.createFinalConfProtoForApp(confFromXml, pluginsDescriptor); + } + } + public static void addUserSpecifiedTezConfiguration(Configuration conf, List kvPairList) { if (kvPairList != null && !kvPairList.isEmpty()) { diff --git a/tez-dag/findbugs-exclude.xml b/tez-dag/findbugs-exclude.xml index 9ee1b3d946..2c878bb6fe 100644 --- a/tez-dag/findbugs-exclude.xml +++ b/tez-dag/findbugs-exclude.xml @@ -273,4 +273,11 @@ + + + + + + + diff --git a/tez-dag/pom.xml b/tez-dag/pom.xml index a765fb708a..ac5936206e 100644 --- a/tez-dag/pom.xml +++ b/tez-dag/pom.xml @@ -83,6 +83,10 @@ org.apache.hadoop hadoop-yarn-client
+ + org.apache.hadoop + hadoop-registry + org.apache.hadoop hadoop-yarn-server-web-proxy @@ -148,6 +152,11 @@ test-jar test + + org.apache.curator + curator-test + test + @@ -203,6 +212,7 @@ ${test.jvm.args} ${test.log.dir} + test-external-id diff --git a/tez-dag/src/main/java/org/apache/tez/client/LocalClient.java b/tez-dag/src/main/java/org/apache/tez/client/LocalClient.java index 79f9f15a64..9b65f2f452 100644 --- a/tez-dag/src/main/java/org/apache/tez/client/LocalClient.java +++ b/tez-dag/src/main/java/org/apache/tez/client/LocalClient.java @@ -415,7 +415,7 @@ protected DAGAppMaster createDAGAppMaster(ApplicationAttemptId applicationAttemp versionInfo.getVersion(), credentials, jobUserName, amPluginDescriptorProto) : new DAGAppMaster(applicationAttemptId, cId, currentHost, nmPort, nmHttpPort, SystemClock.getInstance(), appSubmitTime, isSession, userDir, localDirs, logDirs, - versionInfo.getVersion(), credentials, jobUserName, amPluginDescriptorProto, null); + versionInfo.getVersion(), credentials, jobUserName, amPluginDescriptorProto); } @Override diff --git a/tez-dag/src/main/java/org/apache/tez/dag/api/client/DAGClientServer.java b/tez-dag/src/main/java/org/apache/tez/dag/api/client/DAGClientServer.java index 20d1563e4e..4c5a0039e6 100644 --- a/tez-dag/src/main/java/org/apache/tez/dag/api/client/DAGClientServer.java +++ b/tez-dag/src/main/java/org/apache/tez/dag/api/client/DAGClientServer.java @@ -66,7 +66,8 @@ public DAGClientServer(DAGClientHandler realInstance, public void serviceStart() { try { Configuration conf = getConfig(); - InetSocketAddress addr = new InetSocketAddress(0); + int rpcPort = conf.getInt(TezConfiguration.TEZ_AM_RPC_PORT, TezConfiguration.TEZ_AM_RPC_PORT_DEFAULT); + InetSocketAddress addr = new InetSocketAddress(rpcPort); DAGClientAMProtocolBlockingPBServerImpl service = new DAGClientAMProtocolBlockingPBServerImpl(realInstance, stagingFs); @@ -82,7 +83,7 @@ public void serviceStart() { server = createServer(DAGClientAMProtocolBlockingPB.class, addr, conf, numHandlers, blockingService, TezConfiguration.TEZ_AM_CLIENT_AM_PORT_RANGE); - + // Enable service authorization? if (conf.getBoolean( CommonConfigurationKeysPublic.HADOOP_SECURITY_AUTHORIZATION, @@ -116,7 +117,7 @@ public void serviceStop() { public InetSocketAddress getBindAddress() { return bindAddress; } - + public void setClientAMSecretKey(ByteBuffer key) { if (key != null && key.hasRemaining()) { // non-empty key. must be useful diff --git a/tez-dag/src/main/java/org/apache/tez/dag/api/client/registry/zookeeper/ZkAMRegistry.java b/tez-dag/src/main/java/org/apache/tez/dag/api/client/registry/zookeeper/ZkAMRegistry.java new file mode 100644 index 0000000000..005867ce16 --- /dev/null +++ b/tez-dag/src/main/java/org/apache/tez/dag/api/client/registry/zookeeper/ZkAMRegistry.java @@ -0,0 +1,179 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + *

+ * http://www.apache.org/licenses/LICENSE-2.0 + *

+ * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.tez.dag.api.client.registry.zookeeper; + +import java.nio.charset.StandardCharsets; +import java.util.ArrayList; +import java.util.Collections; +import java.util.List; + +import org.apache.curator.RetryLoop; +import org.apache.curator.RetryPolicy; +import org.apache.curator.framework.CuratorFramework; +import org.apache.hadoop.classification.InterfaceAudience; +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.yarn.api.records.ApplicationId; +import org.apache.tez.client.registry.AMRecord; +import org.apache.tez.client.registry.AMRegistry; +import org.apache.tez.client.registry.AMRegistryUtils; +import org.apache.tez.client.registry.zookeeper.ZkConfig; +import org.apache.zookeeper.CreateMode; +import org.apache.zookeeper.KeeperException; +import org.apache.zookeeper.data.Stat; + +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +/** + * Curator/Zookeeper impl of AMRegistry (for internal use only) + * Clients should use org.apache.tez.dag.api.client.registry.zookeeper.ZkAMRegistryClient instead. + */ +@InterfaceAudience.Private +public class ZkAMRegistry implements AMRegistry { + + private static final Logger LOG = LoggerFactory.getLogger(ZkAMRegistry.class); + + private final List amRecords = Collections.synchronizedList(new ArrayList<>()); + private final String externalId; + + private CuratorFramework client = null; + private String namespace = null; + private ZkConfig zkConfig = null; + + public ZkAMRegistry(String externalId) { + this.externalId = externalId; + } + + public void init(Configuration conf) { + zkConfig = new ZkConfig(conf); + this.client = zkConfig.createCuratorFramework(); + this.namespace = zkConfig.getZkNamespace(); + LOG.info("ZkAMRegistry initialized"); + } + + public void start() throws Exception { + client.start(); + LOG.info("ZkAMRegistry started"); + } + + /** + * Shuts down the service by removing all {@link AMRecord} entries from ZooKeeper + * that were created by this instance. + * + *

After all removal attempts, the ZooKeeper client is closed and the shutdown + * is logged.

+ */ + public void close() { + for (AMRecord amRecord : new ArrayList<>(amRecords)) { + try { + remove(amRecord); + } catch (Exception e) { + LOG.warn("Exception while trying to remove AMRecord: {}", amRecord, e); + } + } + client.close(); + LOG.info("ZkAMRegistry shutdown"); + } + + //Serialize AMRecord to ServiceRecord and deliver the JSON bytes to + //zkNode at the path: / + @Override + public void add(AMRecord server) throws Exception { + String json = AMRegistryUtils.recordToJsonString(server); + try { + final String path = pathFor(server); + client.setData().forPath(path, json.getBytes(StandardCharsets.UTF_8)); + LOG.info("Added AMRecord to zkpath {}", path); + } catch (KeeperException.NoNodeException nne) { + client.create().creatingParentContainersIfNeeded().withMode(CreateMode.EPHEMERAL) + .forPath(pathFor(server), json.getBytes(StandardCharsets.UTF_8)); + } + amRecords.add(server); + } + + @Override + public void remove(AMRecord server) throws Exception { + amRecords.remove(server); + final String path = pathFor(server); + client.delete().forPath(path); + LOG.info("Deleted AMRecord from zkpath {}", path); + } + + @Override + public ApplicationId generateNewId() throws Exception { + createNamespaceIfNotExists(); + long namespaceCreationTime = getNamespaceCreationTime(); + + boolean success = false; + long startTime = System.currentTimeMillis(); + RetryPolicy retryPolicy = zkConfig.getRetryPolicy(); + int tryId = 0; + for (int i = 0; (i < zkConfig.getCuratorMaxRetries()) && !success; i++) { + List children = client.getChildren().forPath(namespace); + if (children != null && !children.isEmpty()) { + children.sort(Collections.reverseOrder()); + String last = children.getFirst(); + ApplicationId lastAppId = ApplicationId.fromString(last); + tryId = lastAppId.getId() + 1; + } + ApplicationId tryAppId = ApplicationId.newInstance(namespaceCreationTime, tryId); + try { + client + .create() + .withMode(CreateMode.EPHEMERAL) + .forPath(namespace + "/" + tryAppId.toString(), new byte[0]); + LOG.debug("Successfully created application id {} for namespace {}", tryAppId, namespace); + success = true; + } catch (KeeperException.NodeExistsException nodeExists) { + LOG.info("Node already exists in ZK for application id {}", tryId); + long elapsedTime = System.currentTimeMillis() - startTime; + retryPolicy.allowRetry(i + 1, elapsedTime, RetryLoop.getDefaultRetrySleeper()); + tryId++; + } + } + if (success) { + return ApplicationId.newInstance(namespaceCreationTime, tryId); + } else { + throw new RuntimeException("Could not obtain unique ApplicationId after " + + zkConfig.getCuratorMaxRetries() + " tries"); + } + } + + @Override + public AMRecord createAmRecord(ApplicationId appId, String hostName, String hostIp, int port, String computeName) { + return new AMRecord(appId, hostName, hostIp, port, externalId, computeName); + } + + private long getNamespaceCreationTime() throws Exception { + Stat stat = client.checkExists().forPath(namespace); + return stat.getCtime(); + } + + private void createNamespaceIfNotExists() throws Exception { + try { + client.create().creatingParentContainersIfNeeded().forPath(namespace); + } catch (KeeperException.NodeExistsException nodeExists) { + LOG.info("Namespace already exists, will use existing: {}", namespace); + } + } + + private String pathFor(AMRecord record) { + return namespace + "/" + record.getApplicationId().toString(); + } +} diff --git a/tez-dag/src/main/java/org/apache/tez/dag/api/client/registry/zookeeper/package-info.java b/tez-dag/src/main/java/org/apache/tez/dag/api/client/registry/zookeeper/package-info.java new file mode 100644 index 0000000000..be8343b7c5 --- /dev/null +++ b/tez-dag/src/main/java/org/apache/tez/dag/api/client/registry/zookeeper/package-info.java @@ -0,0 +1,24 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +@Public +@Evolving +package org.apache.tez.dag.api.client.registry.zookeeper; + +import org.apache.hadoop.classification.InterfaceAudience.Public; +import org.apache.hadoop.classification.InterfaceStability.Evolving; diff --git a/tez-dag/src/main/java/org/apache/tez/dag/app/AppContext.java b/tez-dag/src/main/java/org/apache/tez/dag/app/AppContext.java index 9afd1fb873..c3258b426a 100644 --- a/tez-dag/src/main/java/org/apache/tez/dag/app/AppContext.java +++ b/tez-dag/src/main/java/org/apache/tez/dag/app/AppContext.java @@ -38,6 +38,7 @@ import org.apache.tez.dag.app.rm.node.AMNodeTracker; import org.apache.tez.dag.history.HistoryEventHandler; import org.apache.tez.dag.records.TezDAGID; +import org.apache.tez.frameworkplugins.AMExtensions; import org.apache.tez.hadoop.shim.HadoopShim; import com.google.common.util.concurrent.ListeningExecutorService; @@ -140,4 +141,6 @@ public interface AppContext { public HadoopShim getHadoopShim(); public DAGRecoveryData getDAGRecoveryData(); + + AMExtensions getAmExtensions(); } diff --git a/tez-dag/src/main/java/org/apache/tez/dag/app/DAGAppMaster.java b/tez-dag/src/main/java/org/apache/tez/dag/app/DAGAppMaster.java index ec4a89be03..22497247dd 100644 --- a/tez-dag/src/main/java/org/apache/tez/dag/app/DAGAppMaster.java +++ b/tez-dag/src/main/java/org/apache/tez/dag/app/DAGAppMaster.java @@ -20,12 +20,12 @@ -import java.io.BufferedInputStream; import java.io.File; import java.io.FileInputStream; import java.io.FileNotFoundException; import java.io.IOException; import java.io.PrintWriter; +import java.net.InetSocketAddress; import java.net.URI; import java.net.URISyntaxException; import java.net.URL; @@ -46,6 +46,7 @@ import java.util.Map; import java.util.Map.Entry; import java.util.Objects; +import java.util.Optional; import java.util.Random; import java.util.Set; import java.util.Timer; @@ -78,7 +79,6 @@ import org.apache.hadoop.util.ShutdownHookManager; import org.apache.hadoop.yarn.YarnUncaughtExceptionHandler; import org.apache.hadoop.yarn.api.ApplicationConstants; -import org.apache.hadoop.yarn.api.ApplicationConstants.Environment; import org.apache.hadoop.yarn.api.records.ApplicationAccessType; import org.apache.hadoop.yarn.api.records.ApplicationAttemptId; import org.apache.hadoop.yarn.api.records.ApplicationId; @@ -90,7 +90,6 @@ import org.apache.hadoop.yarn.event.EventHandler; import org.apache.hadoop.yarn.security.AMRMTokenIdentifier; import org.apache.hadoop.yarn.util.Clock; -import org.apache.hadoop.yarn.util.ConverterUtils; import org.apache.hadoop.yarn.util.ResourceCalculatorProcessTree; import org.apache.hadoop.yarn.util.SystemClock; import org.apache.log4j.helpers.ThreadLocalMap; @@ -98,6 +97,8 @@ import org.apache.tez.client.CallerContext; import org.apache.tez.client.TezClientUtils; import org.apache.tez.client.registry.AMRecord; +import org.apache.tez.client.registry.AMRegistry; +import org.apache.tez.client.registry.zookeeper.ZkConfig; import org.apache.tez.common.AsyncDispatcher; import org.apache.tez.common.AsyncDispatcherConcurrent; import org.apache.tez.common.ContainerSignatureMatcher; @@ -126,10 +127,8 @@ import org.apache.tez.dag.api.UserPayload; import org.apache.tez.dag.api.client.DAGClientHandler; import org.apache.tez.dag.api.client.DAGClientServer; -import org.apache.tez.dag.api.client.registry.AMRegistry; import org.apache.tez.dag.api.records.DAGProtos; import org.apache.tez.dag.api.records.DAGProtos.AMPluginDescriptorProto; -import org.apache.tez.dag.api.records.DAGProtos.ConfigurationProto; import org.apache.tez.dag.api.records.DAGProtos.DAGPlan; import org.apache.tez.dag.api.records.DAGProtos.PlanLocalResourcesProto; import org.apache.tez.dag.api.records.DAGProtos.VertexPlan; @@ -183,9 +182,12 @@ import org.apache.tez.dag.records.TezDAGID; import org.apache.tez.dag.records.TezTaskAttemptID; import org.apache.tez.dag.records.TezVertexID; -import org.apache.tez.dag.utils.AMRegistryUtils; import org.apache.tez.dag.utils.RelocalizationUtils; import org.apache.tez.dag.utils.Simple2LevelVersionComparator; +import org.apache.tez.frameworkplugins.AMExtensions; +import org.apache.tez.frameworkplugins.FrameworkUtils; +import org.apache.tez.frameworkplugins.ServerFrameworkService; +import org.apache.tez.frameworkplugins.yarn.YarnServerFrameworkService; import org.apache.tez.hadoop.shim.HadoopShim; import org.apache.tez.hadoop.shim.HadoopShimsLoader; import org.apache.tez.runtime.hook.TezDAGHook; @@ -225,6 +227,7 @@ public class DAGAppMaster extends AbstractService { private static final Logger LOG = LoggerFactory.getLogger(DAGAppMaster.class); + private ServerFrameworkService frameworkService; /** * Priority of the DAGAppMaster shutdown hook. @@ -243,7 +246,6 @@ public class DAGAppMaster extends AbstractService { private String appName; private final ApplicationAttemptId appAttemptID; private final ContainerId containerID; - private String amUUID; private final String nmHost; private final int nmPort; private final int nmHttpPort; @@ -346,8 +348,7 @@ public DAGAppMaster(ApplicationAttemptId applicationAttemptId, ContainerId containerId, String nmHost, int nmPort, int nmHttpPort, Clock clock, long appSubmitTime, boolean isSession, String workingDirectory, String [] localDirs, String[] logDirs, String clientVersion, - Credentials credentials, String jobUserName, AMPluginDescriptorProto pluginDescriptorProto, - String amUUID) { + Credentials credentials, String jobUserName, AMPluginDescriptorProto pluginDescriptorProto) { super(DAGAppMaster.class.getName()); this.mdcContext = LoggingUtils.setupLog4j(); this.clock = clock; @@ -355,7 +356,6 @@ public DAGAppMaster(ApplicationAttemptId applicationAttemptId, this.appSubmitTime = appSubmitTime; this.appAttemptID = applicationAttemptId; this.containerID = containerId; - this.amUUID = amUUID; this.nmHost = nmHost; this.nmPort = nmPort; this.nmHttpPort = nmHttpPort; @@ -429,6 +429,8 @@ private long getAMGCTime() { protected void serviceInit(final Configuration conf) throws Exception { this.amConf = conf; + this.frameworkService = getFrameworkService(conf); + initResourceCalculatorPlugins(); this.hadoopShim = new HadoopShimsLoader(this.amConf).getHadoopShim(); @@ -513,10 +515,14 @@ protected void serviceInit(final Configuration conf) throws Exception { jobTokenSecretManager = new JobTokenSecretManager(amConf); - sessionToken = - TokenCache.getSessionToken(amCredentials); + sessionToken = frameworkService.getAMExtensions().getSessionToken( + appAttemptID, jobTokenSecretManager, amCredentials); + if (sessionToken == null) { - throw new RuntimeException("Could not find session token in AM Credentials"); + sessionToken = TokenCache.getSessionToken(amCredentials); + if (sessionToken == null) { + throw new RuntimeException("Could not find session token in AM Credentials"); + } } // Prepare the TaskAttemptListener server for authentication of Containers @@ -524,8 +530,6 @@ protected void serviceInit(final Configuration conf) throws Exception { jobTokenSecretManager.addTokenForJob( appAttemptID.getApplicationId().toString(), sessionToken); - - //service to handle requests to TaskUmbilicalProtocol taskCommunicatorManager = createTaskCommunicatorManager(context, taskHeartbeatHandler, containerHeartbeatHandler, taskCommunicatorDescriptors); @@ -602,31 +606,21 @@ protected void serviceInit(final Configuration conf) throws Exception { if (!versionMismatch) { if (isSession) { - try (BufferedInputStream sessionResourcesStream = - new BufferedInputStream( - new FileInputStream(new File(workingDirectory, - TezConstants.TEZ_AM_LOCAL_RESOURCES_PB_FILE_NAME)))) { - PlanLocalResourcesProto amLocalResourceProto = PlanLocalResourcesProto - .parseDelimitedFrom(sessionResourcesStream); - amResources.putAll(DagTypeConverters - .convertFromPlanLocalResources(amLocalResourceProto)); - } + PlanLocalResourcesProto amLocalResourceProto = + frameworkService.getAMExtensions().getAdditionalSessionResources(workingDirectory); + amResources.putAll(DagTypeConverters.convertFromPlanLocalResources(amLocalResourceProto)); } } int threadCount = conf.getInt(TezConfiguration.TEZ_AM_DAG_APPCONTEXT_THREAD_COUNT_LIMIT, TezConfiguration.TEZ_AM_DAG_APPCONTEXT_THREAD_COUNT_LIMIT_DEFAULT); // NOTE: LinkedBlockingQueue does not have a capacity Limit and can thus - // occupy large memory chunks when numerous Runables are pending for execution + // occupy large memory chunks when numerous Runnables are pending for execution ExecutorService rawExecutor = Executors.newFixedThreadPool(threadCount, new ThreadFactoryBuilder() .setDaemon(true).setNameFormat("App Shared Pool - #%d").build()); execService = MoreExecutors.listeningDecorator(rawExecutor); - AMRegistry amRegistry = AMRegistryUtils.createAMRegistry(conf); - initAmRegistry(appAttemptID.getApplicationId(), amUUID, amRegistry, clientRpcServer); - addIfService(amRegistry, false); - initServices(conf); super.serviceInit(conf); @@ -649,26 +643,47 @@ protected void serviceInit(final Configuration conf) throws Exception { } } + private static ServerFrameworkService getFrameworkService(Configuration conf) { + return FrameworkUtils.get(ServerFrameworkService.class, conf, YarnServerFrameworkService.class); + } + protected void initClientRpcServer() { clientRpcServer = new DAGClientServer(clientHandler, appAttemptID, recoveryFS); addIfService(clientRpcServer, true); + + initAmRegistryCallbackForRecordAdd(); } - @VisibleForTesting - public static void initAmRegistry(ApplicationId appId, String amUUID, AMRegistry amRegistry, - DAGClientServer dagClientServer) { - if (amRegistry != null) { - dagClientServer.registerServiceListener((service) -> { - if (service.isInState(STATE.STARTED)) { - AMRecord amRecord = AMRegistryUtils.recordForDAGClientServer(appId, amUUID, dagClientServer); - try { - amRegistry.add(amRecord); - } catch (Exception e) { - throw new RuntimeException(e); - } - } - }); + /** + * Initializes an AM registry callback when the clientRpcServer is already initialized + */ + private void initAmRegistryCallbackForRecordAdd() { + AMRegistry amRegistry = frameworkService.getAMRegistry(this.amConf); + if (amRegistry == null) { + return; } + if (clientRpcServer == null){ + throw new IllegalStateException( + "Client RPC Server has not been initialized before attempting to initialize an AM registry"); + } + + ApplicationId appId = appAttemptID.getApplicationId(); + + clientRpcServer.registerServiceListener((service) -> { + if (service.isInState(STATE.STARTED)) { + InetSocketAddress rpcServerAddress = clientRpcServer.getBindAddress(); + + final String computeName = System.getenv(ZkConfig.COMPUTE_GROUP_NAME_ENV); + AMRecord amRecord = amRegistry.createAmRecord(appId, rpcServerAddress.getHostName(), + rpcServerAddress.getAddress().getHostAddress(), rpcServerAddress.getPort(), computeName); + try { + amRegistry.add(amRecord); + LOG.info("Added AMRecord: {} to registry..", amRecord); + } catch (Exception e) { + throw new RuntimeException(e); + } + } + }); } @VisibleForTesting @@ -869,7 +884,7 @@ protected synchronized void handle(DAGAppMasterEvent event) { } else { LOG.info("Session shutting down now."); this.taskSchedulerManager.setShouldUnregisterFlag(); - if (this.historyEventHandler.hasRecoveryFailed()) { + if (recoveryEnabled && this.historyEventHandler.hasRecoveryFailed()) { state = DAGAppMasterState.FAILED; } else { state = DAGAppMasterState.SUCCEEDED; @@ -1771,6 +1786,11 @@ public String getQueueName() { public void setQueueName(String queueName) { this.queueName = queueName; } + + @Override + public AMExtensions getAmExtensions() { + return frameworkService.getAMExtensions(); + } } private String getShutdownTimeString() { @@ -1958,6 +1978,9 @@ void stopServices() { firstException = ex; } } + + Optional.ofNullable(frameworkService.getAMRegistry(this.amConf)).ifPresent(AMRegistry::close); + //after stopping all services, rethrow the first exception raised if (firstException != null) { throw ServiceStateException.convert(firstException); @@ -2031,16 +2054,18 @@ public void serviceStart() throws Exception { this.lastDAGCompletionTime = clock.getTime(); - DAGRecoveryData recoveredDAGData; - try { - recoveredDAGData = recoverDAG(); - } catch (IOException e) { - LOG.error("Error occurred when trying to recover data from previous attempt." - + " Shutting down AM", e); - this.state = DAGAppMasterState.ERROR; - this.taskSchedulerManager.setShouldUnregisterFlag(); - shutdownHandler.shutdown(); - return; + DAGRecoveryData recoveredDAGData = null; + if (recoveryEnabled) { + try { + recoveredDAGData = recoverDAG(); + } catch (IOException e) { + LOG.error("Error occurred when trying to recover data from previous attempt." + + " Shutting down AM", e); + this.state = DAGAppMasterState.ERROR; + this.taskSchedulerManager.setShouldUnregisterFlag(); + shutdownHandler.shutdown(); + return; + } } DAGPlan dagPlan = null; @@ -2061,7 +2086,7 @@ public void serviceStart() throws Exception { this.state = DAGAppMasterState.IDLE; } - if (recoveredDAGData != null) { + if (recoveryEnabled && recoveredDAGData != null) { if (recoveredDAGData.cumulativeAdditionalResources != null) { recoveredDAGData.additionalUrlsForClasspath = processAdditionalResources( recoveredDAGData.recoveredDagID, @@ -2385,16 +2410,12 @@ public static void main(String[] args) { TezClassLoader.setupTezClassLoader(); Thread.setDefaultUncaughtExceptionHandler(new YarnUncaughtExceptionHandler()); final String pid = System.getenv().get("JVM_PID"); - String containerIdStr = - System.getenv(Environment.CONTAINER_ID.name()); - String nodeHostString = System.getenv(Environment.NM_HOST.name()); - String nodePortString = System.getenv(Environment.NM_PORT.name()); - String nodeHttpPortString = - System.getenv(Environment.NM_HTTP_PORT.name()); - String appSubmitTimeStr = - System.getenv(ApplicationConstants.APP_SUBMIT_TIME_ENV); + + String nodeHostString = System.getenv(ApplicationConstants.Environment.NM_HOST.name()); + String nodePortString = System.getenv(ApplicationConstants.Environment.NM_PORT.name()); + String nodeHttpPortString = System.getenv(ApplicationConstants.Environment.NM_HTTP_PORT.name()); + String appSubmitTimeStr = System.getenv(ApplicationConstants.APP_SUBMIT_TIME_ENV); String clientVersion = System.getenv(TezConstants.TEZ_CLIENT_VERSION_ENV); - String amUUID = System.getenv(TezConstants.TEZ_AM_UUID); if (clientVersion == null) { clientVersion = VersionInfo.UNKNOWN; } @@ -2402,16 +2423,20 @@ public static void main(String[] args) { Objects.requireNonNull(appSubmitTimeStr, ApplicationConstants.APP_SUBMIT_TIME_ENV + " is null"); - ContainerId containerId = ConverterUtils.toContainerId(containerIdStr); - ApplicationAttemptId applicationAttemptId = - containerId.getApplicationAttemptId(); + Configuration conf = new Configuration(); + + AMExtensions amExtensions = getFrameworkService(conf).getAMExtensions(); + DAGProtos.ConfigurationProto confProto = amExtensions.loadConfigurationProto(); + TezUtilsInternal.addUserSpecifiedTezConfiguration(conf, confProto.getConfKeyValuesList()); + + ContainerId containerId = amExtensions.allocateContainerId(conf); + + ApplicationAttemptId applicationAttemptId = containerId.getApplicationAttemptId(); org.apache.hadoop.ipc.CallerContext.setCurrent(new org.apache.hadoop.ipc.CallerContext .Builder("tez_appmaster_" + containerId.getApplicationAttemptId() ).build()); long appSubmitTime = Long.parseLong(appSubmitTimeStr); - - String jobUserName = System - .getenv(ApplicationConstants.Environment.USER.name()); + String jobUserName = System.getenv(ApplicationConstants.Environment.USER.name()); // Command line options Option option = Option.builder() @@ -2431,15 +2456,9 @@ public static void main(String[] args) { + ", jvmPid=" + pid + ", userFromEnv=" + jobUserName + ", cliSessionOption=" + sessionModeCliOption - + ", pwd=" + System.getenv(Environment.PWD.name()) - + ", localDirs=" + System.getenv(Environment.LOCAL_DIRS.name()) - + ", logDirs=" + System.getenv(Environment.LOG_DIRS.name())); - - Configuration conf = new Configuration(); - - ConfigurationProto confProto = - TezUtilsInternal.readUserSpecifiedTezConfiguration(System.getenv(Environment.PWD.name())); - TezUtilsInternal.addUserSpecifiedTezConfiguration(conf, confProto.getConfKeyValuesList()); + + ", pwd=" + System.getenv(ApplicationConstants.Environment.PWD.name()) + + ", localDirs=" + System.getenv(ApplicationConstants.Environment.LOCAL_DIRS.name()) + + ", logDirs=" + System.getenv(ApplicationConstants.Environment.LOG_DIRS.name())); AMPluginDescriptorProto amPluginDescriptorProto = null; if (confProto.hasAmPluginDescriptor()) { @@ -2452,16 +2471,13 @@ public static void main(String[] args) { TezUtilsInternal.setSecurityUtilConfigration(LOG, conf); DAGAppMaster appMaster = - new DAGAppMaster(applicationAttemptId, containerId, nodeHostString, - Integer.parseInt(nodePortString), - Integer.parseInt(nodeHttpPortString), new SystemClock(), appSubmitTime, - sessionModeCliOption, - System.getenv(Environment.PWD.name()), - TezCommonUtils.getTrimmedStrings(System.getenv(Environment.LOCAL_DIRS.name())), - TezCommonUtils.getTrimmedStrings(System.getenv(Environment.LOG_DIRS.name())), - clientVersion, credentials, jobUserName, amPluginDescriptorProto, amUUID); - ShutdownHookManager.get().addShutdownHook( - new DAGAppMasterShutdownHook(appMaster), SHUTDOWN_HOOK_PRIORITY); + new DAGAppMaster(applicationAttemptId, containerId, nodeHostString, Integer.parseInt(nodePortString), + Integer.parseInt(nodeHttpPortString), new SystemClock(), appSubmitTime, sessionModeCliOption, + System.getenv(ApplicationConstants.Environment.PWD.name()), + TezCommonUtils.getTrimmedStrings(System.getenv(ApplicationConstants.Environment.LOCAL_DIRS.name())), + TezCommonUtils.getTrimmedStrings(System.getenv(ApplicationConstants.Environment.LOG_DIRS.name())), + clientVersion, credentials, jobUserName, amPluginDescriptorProto); + ShutdownHookManager.get().addShutdownHook(new DAGAppMasterShutdownHook(appMaster), SHUTDOWN_HOOK_PRIORITY); // log the system properties if (LOG.isInfoEnabled()) { diff --git a/tez-dag/src/main/java/org/apache/tez/dag/app/LocalDAGAppMaster.java b/tez-dag/src/main/java/org/apache/tez/dag/app/LocalDAGAppMaster.java index 71eafd8965..e0c8443577 100644 --- a/tez-dag/src/main/java/org/apache/tez/dag/app/LocalDAGAppMaster.java +++ b/tez-dag/src/main/java/org/apache/tez/dag/app/LocalDAGAppMaster.java @@ -37,7 +37,7 @@ public LocalDAGAppMaster(ApplicationAttemptId applicationAttemptId, ContainerId Credentials credentials, String jobUserName, AMPluginDescriptorProto pluginDescriptorProto) { super(applicationAttemptId, containerId, nmHost, nmPort, nmHttpPort, clock, appSubmitTime, isSession, workingDirectory, localDirs, logDirs, clientVersion, credentials, jobUserName, - pluginDescriptorProto, null); + pluginDescriptorProto); } @Override diff --git a/tez-dag/src/main/java/org/apache/tez/dag/app/PluginManager.java b/tez-dag/src/main/java/org/apache/tez/dag/app/PluginManager.java index b9151bd4cc..1b1307deb3 100644 --- a/tez-dag/src/main/java/org/apache/tez/dag/app/PluginManager.java +++ b/tez-dag/src/main/java/org/apache/tez/dag/app/PluginManager.java @@ -174,6 +174,11 @@ public static void parsePlugin(List resultList, for (TezNamedEntityDescriptorProto namedEntityDescriptorProto : namedEntityDescriptorProtos) { NamedEntityDescriptor descriptor = DagTypeConverters .convertNamedDescriptorFromProto(namedEntityDescriptorProto); + if (descriptor.getUserPayload() == null) { + //If custom-plugin descriptor includes no payload, include the defaultPayload + //Useful in providing Configuration payload for hand-written JSON descriptors + descriptor.setUserPayload(defaultPayload); + } addDescriptor(resultList, pluginMap, descriptor); } } diff --git a/tez-dag/src/main/java/org/apache/tez/dag/app/dag/impl/DAGImpl.java b/tez-dag/src/main/java/org/apache/tez/dag/app/dag/impl/DAGImpl.java index e86624db78..8ce4f6afa8 100644 --- a/tez-dag/src/main/java/org/apache/tez/dag/app/dag/impl/DAGImpl.java +++ b/tez-dag/src/main/java/org/apache/tez/dag/app/dag/impl/DAGImpl.java @@ -1612,19 +1612,14 @@ DAGState initializeDAG() { addVertex(v); } - // check task resources, only check it in non-local mode if (!appContext.isLocal()) { - for (Vertex v : vertexMap.values()) { - // TODO TEZ-2003 (post) TEZ-2624 Ideally, this should be per source. - if (v.getTaskResource().compareTo(appContext.getClusterInfo().getMaxContainerCapability()) > 0) { - String msg = "Vertex's TaskResource is beyond the cluster container capability," + - "Vertex=" + v.getLogIdentifier() +", Requested TaskResource=" + v.getTaskResource() - + ", Cluster MaxContainerCapability=" + appContext.getClusterInfo().getMaxContainerCapability(); - LOG.error(msg); - addDiagnostic(msg); - finished(DAGState.FAILED); - return DAGState.FAILED; - } + try { + appContext.getAmExtensions().checkTaskResources(vertexMap, appContext.getClusterInfo()); + } catch (Exception e) { + LOG.error(e.getMessage()); + addDiagnostic(e.getMessage()); + finished(DAGState.FAILED); + return DAGState.FAILED; } } @@ -1769,7 +1764,7 @@ private static void parseVertexEdges(DAGImpl dag, Map edgePlan Map outVertices = new HashMap(); - for(String inEdgeId : vertexPlan.getInEdgeIdList()){ + for (String inEdgeId : vertexPlan.getInEdgeIdList()) { EdgePlan edgePlan = edgePlans.get(inEdgeId); Vertex inVertex = dag.vertexMap.get(edgePlan.getInputVertexName()); Edge edge = dag.edges.get(inEdgeId); @@ -1778,7 +1773,7 @@ private static void parseVertexEdges(DAGImpl dag, Map edgePlan inVertices.put(inVertex, edge); } - for(String outEdgeId : vertexPlan.getOutEdgeIdList()){ + for (String outEdgeId : vertexPlan.getOutEdgeIdList()) { EdgePlan edgePlan = edgePlans.get(outEdgeId); Vertex outVertex = dag.vertexMap.get(edgePlan.getOutputVertexName()); Edge edge = dag.edges.get(outEdgeId); @@ -1803,7 +1798,7 @@ private static void parseVertexEdges(DAGImpl dag, Map edgePlan *
    *
  • * 1. For the completed dag, recover the dag to the desired state and also its vertices, - * but not task & task attempt. This recovery is sync call (after this Transition, + * but not task & task attempt. This recovery is sync call (after this Transition, * DAG & vertices are all recovered to the desired state) *
  • *
  • diff --git a/tez-dag/src/main/java/org/apache/tez/dag/app/dag/impl/TaskAttemptImpl.java b/tez-dag/src/main/java/org/apache/tez/dag/app/dag/impl/TaskAttemptImpl.java index 3a28db07cf..def53ef1c0 100644 --- a/tez-dag/src/main/java/org/apache/tez/dag/app/dag/impl/TaskAttemptImpl.java +++ b/tez-dag/src/main/java/org/apache/tez/dag/app/dag/impl/TaskAttemptImpl.java @@ -132,7 +132,7 @@ public class TaskAttemptImpl implements TaskAttempt, private static final Logger LOG = LoggerFactory.getLogger(TaskAttemptImpl.class); private static final String LINE_SEPARATOR = System .getProperty("line.separator"); - + public static class DataEventDependencyInfo { long timestamp; TezTaskAttemptID taId; @@ -576,7 +576,6 @@ public TaskAttemptImpl(TezTaskAttemptID attemptId, EventHandler eventHandler, this.hungIntervalMax = conf.getLong( TezConfiguration.TEZ_TASK_PROGRESS_STUCK_INTERVAL_MS, TezConfiguration.TEZ_TASK_PROGRESS_STUCK_INTERVAL_MS_DEFAULT); - this.recoveryData = appContext.getDAGRecoveryData() == null ? null : appContext.getDAGRecoveryData().getTaskAttemptRecoveryData(attemptId); } @@ -1267,7 +1266,7 @@ public TaskAttemptStateInternal transition(TaskAttemptImpl ta, TaskAttemptEvent return TaskAttemptStateInternal.NEW; } } - // No matter whether TaskAttemptStartedEvent is seen, send corresponding event to move + // No matter whether TaskAttemptStartedEvent is seen, send corresponding event to move // TA to the state of TaskAttemptFinishedEvent TaskAttemptFinishedEvent taFinishedEvent = ta.recoveryData.getTaskAttemptFinishedEvent(); diff --git a/tez-dag/src/main/java/org/apache/tez/dag/utils/AMRegistryUtils.java b/tez-dag/src/main/java/org/apache/tez/dag/utils/AMRegistryUtils.java deleted file mode 100644 index 13cc27cbac..0000000000 --- a/tez-dag/src/main/java/org/apache/tez/dag/utils/AMRegistryUtils.java +++ /dev/null @@ -1,45 +0,0 @@ -/** - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.tez.dag.utils; - -import java.net.InetSocketAddress; - -import org.apache.hadoop.conf.Configuration; -import org.apache.hadoop.yarn.api.records.ApplicationId; -import org.apache.tez.client.registry.AMRecord; -import org.apache.tez.common.ReflectionUtils; -import org.apache.tez.dag.api.TezConfiguration; -import org.apache.tez.dag.api.client.DAGClientServer; -import org.apache.tez.dag.api.client.registry.AMRegistry; - -public final class AMRegistryUtils { - - private AMRegistryUtils() {} - - public static AMRecord recordForDAGClientServer(ApplicationId appId, String opaqueId, - DAGClientServer dagClientServer) { - InetSocketAddress address = dagClientServer.getBindAddress(); - return new AMRecord(appId, address.getHostName(), address.getPort(), opaqueId); - } - - public static AMRegistry createAMRegistry(Configuration conf) throws Exception { - String tezAMRegistryClass = conf.get(TezConfiguration.TEZ_AM_REGISTRY_CLASS); - return tezAMRegistryClass == null ? null : ReflectionUtils.createClazzInstance(tezAMRegistryClass); - } -} diff --git a/tez-dag/src/main/java/org/apache/tez/frameworkplugins/AMExtensions.java b/tez-dag/src/main/java/org/apache/tez/frameworkplugins/AMExtensions.java new file mode 100644 index 0000000000..a758dfe01d --- /dev/null +++ b/tez-dag/src/main/java/org/apache/tez/frameworkplugins/AMExtensions.java @@ -0,0 +1,92 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.tez.frameworkplugins; + +import java.io.IOException; +import java.util.Map; + +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.security.Credentials; +import org.apache.hadoop.security.token.Token; +import org.apache.hadoop.yarn.api.records.ApplicationAttemptId; +import org.apache.hadoop.yarn.api.records.ContainerId; +import org.apache.tez.common.security.JobTokenIdentifier; +import org.apache.tez.common.security.JobTokenSecretManager; +import org.apache.tez.dag.api.records.DAGProtos; +import org.apache.tez.dag.app.ClusterInfo; +import org.apache.tez.dag.app.dag.Vertex; + +/** + * Extension points for customizing AM behavior. + * + *

    These hooks allow injecting alternate or additional logic into the + * Application Master without requiring a standalone service. They are + * intended for behaviors that are too small or cross-cutting to justify + * a dedicated service.

    + */ +public interface AMExtensions { + + /** + * Override the default configuration loading performed in + * {@code DAGAppMaster.main(...)}. + * + * @return a {@link DAGProtos.ConfigurationProto} representing the final configuration + * @throws IOException if configuration loading fails + */ + DAGProtos.ConfigurationProto loadConfigurationProto() throws IOException; + + /** + * Override the default logic used to assign a {@link ContainerId} to the AM. + * + * @param conf the Tez configuration + * @return the allocated {@link ContainerId} + */ + ContainerId allocateContainerId(Configuration conf); + + /** + * Validate resource constraints for tasks before execution. + * + * @param vertices mapping of vertex names to their DAG vertices + * @param clusterInfo cluster resource information + * @throws Exception if resource requirements cannot be satisfied + */ + void checkTaskResources(Map vertices, ClusterInfo clusterInfo) throws Exception; + + /** + * Create or override the session token used for AM authentication. + * + * @param appAttemptID current application attempt ID + * @param jobTokenSecretManager token secret manager + * @param amCredentials AM credentials store + * @return the session token + */ + Token getSessionToken( + ApplicationAttemptId appAttemptID, + JobTokenSecretManager jobTokenSecretManager, + Credentials amCredentials + ); + + /** + * Provide additional local resources required for the AM session. + * + * @param workingDirectory the AM working directory + * @return protocol buffers describing local session resources + * @throws IOException if resources cannot be discovered or packaged + */ + DAGProtos.PlanLocalResourcesProto getAdditionalSessionResources(String workingDirectory) throws IOException; +} diff --git a/tez-dag/src/main/java/org/apache/tez/frameworkplugins/ServerFrameworkService.java b/tez-dag/src/main/java/org/apache/tez/frameworkplugins/ServerFrameworkService.java new file mode 100644 index 0000000000..5b19130a53 --- /dev/null +++ b/tez-dag/src/main/java/org/apache/tez/frameworkplugins/ServerFrameworkService.java @@ -0,0 +1,33 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + *

    + * http://www.apache.org/licenses/LICENSE-2.0 + *

    + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.tez.frameworkplugins; + +import org.apache.hadoop.conf.Configuration; +import org.apache.tez.client.registry.AMRegistry; + +/** + * A {@code FrameworkService} that runs inside the Application Master (AM) process. + * + *

    This service bundles together an {@code AMRegistry} and an + * {@code AMExtensions} implementation that are designed to be compatible and + * work together within the AM lifecycle.

    + */ +public interface ServerFrameworkService extends FrameworkService { + AMRegistry getAMRegistry(Configuration conf); + AMExtensions getAMExtensions(); +} diff --git a/tez-dag/src/main/java/org/apache/tez/frameworkplugins/package-info.java b/tez-dag/src/main/java/org/apache/tez/frameworkplugins/package-info.java new file mode 100644 index 0000000000..8d75aa3614 --- /dev/null +++ b/tez-dag/src/main/java/org/apache/tez/frameworkplugins/package-info.java @@ -0,0 +1,21 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + *

    + * http://www.apache.org/licenses/LICENSE-2.0 + *

    + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +/** + * Provides basic classes of framework plugins for Apache Tez. + */ +package org.apache.tez.frameworkplugins; diff --git a/tez-dag/src/main/java/org/apache/tez/frameworkplugins/yarn/YarnServerFrameworkService.java b/tez-dag/src/main/java/org/apache/tez/frameworkplugins/yarn/YarnServerFrameworkService.java new file mode 100644 index 0000000000..40d6eb1d9c --- /dev/null +++ b/tez-dag/src/main/java/org/apache/tez/frameworkplugins/yarn/YarnServerFrameworkService.java @@ -0,0 +1,112 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + *

    + * http://www.apache.org/licenses/LICENSE-2.0 + *

    + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.tez.frameworkplugins.yarn; + +import java.io.File; +import java.io.FileInputStream; +import java.io.IOException; +import java.util.Map; + +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.security.Credentials; +import org.apache.hadoop.security.token.Token; +import org.apache.hadoop.yarn.api.ApplicationConstants; +import org.apache.hadoop.yarn.api.records.ApplicationAttemptId; +import org.apache.hadoop.yarn.api.records.ContainerId; +import org.apache.hadoop.yarn.api.records.Resource; +import org.apache.tez.client.registry.AMRegistry; +import org.apache.tez.common.TezUtilsInternal; +import org.apache.tez.common.security.JobTokenIdentifier; +import org.apache.tez.common.security.JobTokenSecretManager; +import org.apache.tez.common.security.TokenCache; +import org.apache.tez.dag.api.TezConstants; +import org.apache.tez.dag.api.records.DAGProtos; +import org.apache.tez.dag.app.ClusterInfo; +import org.apache.tez.dag.app.dag.Vertex; +import org.apache.tez.frameworkplugins.AMExtensions; +import org.apache.tez.frameworkplugins.ServerFrameworkService; + +/** + * YARN-based server framework service implementation. + * Provides default YARN framework server functionality with default implementations + * for all AmExtensions methods. + */ +public class YarnServerFrameworkService implements ServerFrameworkService { + + private final YarnAMExtensions amExtensions = new YarnAMExtensions(); + + @Override + public AMRegistry getAMRegistry(Configuration conf) { + // YARN mode doesn't require a custom AM registry + return null; + } + + @Override + public AMExtensions getAMExtensions() { + return amExtensions; + } + + /** + * Default YARN implementation of AmExtensions. + * Provides sensible defaults for all methods. + */ + public static class YarnAMExtensions implements AMExtensions { + + @Override + public DAGProtos.ConfigurationProto loadConfigurationProto() throws IOException { + return TezUtilsInternal + .readUserSpecifiedTezConfiguration(System.getenv(ApplicationConstants.Environment.PWD.name())); + } + + @Override + public ContainerId allocateContainerId(Configuration conf) { + String containerIdStr = System.getenv(ApplicationConstants.Environment.CONTAINER_ID.name()); + return ContainerId.fromString(containerIdStr); + } + + @Override + public void checkTaskResources(Map vertices, ClusterInfo clusterInfo) throws Exception { + Resource maxContainerCapability = clusterInfo.getMaxContainerCapability(); + for (Vertex v : vertices.values()) { + // TODO TEZ-2003 (post) TEZ-2624 Ideally, this should be per source. + if (v.getTaskResource().compareTo(maxContainerCapability) > 0) { + String msg = "Vertex's TaskResource is beyond the cluster container capability," + + "Vertex=" + v.getLogIdentifier() +", Requested TaskResource=" + v.getTaskResource() + + ", Cluster MaxContainerCapability=" + maxContainerCapability; + throw new Exception(msg); + } + } + } + + @Override + public Token getSessionToken( + ApplicationAttemptId appAttemptID, + JobTokenSecretManager jobTokenSecretManager, + Credentials amCredentials) { + return TokenCache.getSessionToken(amCredentials); + } + + @Override + public DAGProtos.PlanLocalResourcesProto getAdditionalSessionResources(String workingDirectory) throws IOException { + try (FileInputStream sessionResourcesStream = new FileInputStream( + new File(workingDirectory, TezConstants.TEZ_AM_LOCAL_RESOURCES_PB_FILE_NAME))) { + return DAGProtos.PlanLocalResourcesProto.parseDelimitedFrom(sessionResourcesStream); + } + } + } +} diff --git a/tez-dag/src/main/java/org/apache/tez/frameworkplugins/yarn/package-info.java b/tez-dag/src/main/java/org/apache/tez/frameworkplugins/yarn/package-info.java new file mode 100644 index 0000000000..cb50990e68 --- /dev/null +++ b/tez-dag/src/main/java/org/apache/tez/frameworkplugins/yarn/package-info.java @@ -0,0 +1,21 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + *

    + * http://www.apache.org/licenses/LICENSE-2.0 + *

    + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +/** + * Provides YARN-specific framework plugins for Apache Tez. + */ +package org.apache.tez.frameworkplugins.yarn; diff --git a/tez-dag/src/main/java/org/apache/tez/frameworkplugins/zookeeper/ZkStandaloneAMExtensions.java b/tez-dag/src/main/java/org/apache/tez/frameworkplugins/zookeeper/ZkStandaloneAMExtensions.java new file mode 100644 index 0000000000..6f95bcfb56 --- /dev/null +++ b/tez-dag/src/main/java/org/apache/tez/frameworkplugins/zookeeper/ZkStandaloneAMExtensions.java @@ -0,0 +1,90 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + *

    + * http://www.apache.org/licenses/LICENSE-2.0 + *

    + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.tez.frameworkplugins.zookeeper; + +import java.io.IOException; +import java.util.Map; + +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.io.Text; +import org.apache.hadoop.security.Credentials; +import org.apache.hadoop.security.token.Token; +import org.apache.hadoop.yarn.api.records.ApplicationAttemptId; +import org.apache.hadoop.yarn.api.records.ApplicationId; +import org.apache.hadoop.yarn.api.records.ContainerId; +import org.apache.tez.client.registry.AMRegistry; +import org.apache.tez.common.TezUtilsInternal; +import org.apache.tez.common.security.JobTokenIdentifier; +import org.apache.tez.common.security.JobTokenSecretManager; +import org.apache.tez.common.security.TokenCache; +import org.apache.tez.dag.api.records.DAGProtos; +import org.apache.tez.dag.app.ClusterInfo; +import org.apache.tez.dag.app.dag.Vertex; +import org.apache.tez.frameworkplugins.AMExtensions; +import org.apache.tez.frameworkplugins.ServerFrameworkService; + +public class ZkStandaloneAMExtensions implements AMExtensions { + + private final ServerFrameworkService frameworkService; + + public ZkStandaloneAMExtensions(ServerFrameworkService frameworkService) { + this.frameworkService = frameworkService; + } + + @Override + public ContainerId allocateContainerId(Configuration conf) { + try { + AMRegistry amRegistry = frameworkService.getAMRegistry(conf); + if (amRegistry != null) { + ApplicationId appId = amRegistry.generateNewId(); + // attemptId is set to 1 only then APP_LAUNCHED event gets triggered + ApplicationAttemptId applicationAttemptId = ApplicationAttemptId.newInstance(appId, 1); + return ContainerId.newContainerId(applicationAttemptId, 0); + } else { + throw new IllegalStateException("AMRegistry must not be null for standalone AM mode"); + } + } catch (Exception e) { + throw new RuntimeException(e); + } + } + + @Override + public void checkTaskResources(Map vertices, ClusterInfo clusterInfo) { + // no-op: Zookeeper-based framework current enforce task resources at the moment + } + + @Override + public DAGProtos.ConfigurationProto loadConfigurationProto() throws IOException { + return TezUtilsInternal.loadConfProtoFromText(); + } + + @Override + public Token getSessionToken(ApplicationAttemptId appAttemptID, + JobTokenSecretManager jobTokenSecretManager, Credentials amCredentials) { + JobTokenIdentifier identifier = new JobTokenIdentifier(new Text(appAttemptID.getApplicationId().toString())); + Token newSessionToken = new Token<>(identifier, jobTokenSecretManager); + newSessionToken.setService(identifier.getJobId()); + TokenCache.setSessionToken(newSessionToken, amCredentials); + return newSessionToken; + } + + @Override + public DAGProtos.PlanLocalResourcesProto getAdditionalSessionResources(String workingDirectory) { + return DAGProtos.PlanLocalResourcesProto.getDefaultInstance(); + } +} diff --git a/tez-dag/src/main/java/org/apache/tez/frameworkplugins/zookeeper/ZkStandaloneServerFrameworkService.java b/tez-dag/src/main/java/org/apache/tez/frameworkplugins/zookeeper/ZkStandaloneServerFrameworkService.java new file mode 100644 index 0000000000..12c62640ac --- /dev/null +++ b/tez-dag/src/main/java/org/apache/tez/frameworkplugins/zookeeper/ZkStandaloneServerFrameworkService.java @@ -0,0 +1,75 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + *

    + * http://www.apache.org/licenses/LICENSE-2.0 + *

    + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.tez.frameworkplugins.zookeeper; + + +import org.apache.hadoop.conf.Configuration; +import org.apache.tez.client.registry.AMRegistry; +import org.apache.tez.dag.api.TezConstants; +import org.apache.tez.dag.api.client.registry.zookeeper.ZkAMRegistry; +import org.apache.tez.frameworkplugins.AMExtensions; +import org.apache.tez.frameworkplugins.ServerFrameworkService; + +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +public class ZkStandaloneServerFrameworkService implements ServerFrameworkService { + private static final Logger LOG = LoggerFactory.getLogger(ZkStandaloneServerFrameworkService.class); + private final ZkStandaloneAMExtensions amExtensions = new ZkStandaloneAMExtensions(this); + private volatile ZkAMRegistry amRegistry; + + /** + * Returns a singleton {@link AMRegistry} instance backed by ZooKeeper. + * + *

    If the registry has not yet been created, this method initializes and starts + * a new {@link ZkAMRegistry} using the external AM identifier obtained from the + * {@code TEZ_AM_EXTERNAL_ID} environment variable.

    + * + *

    When the registry is used as a service within the DAGAppMaster, the + * DAGAppMaster is responsible for managing its lifecycle, including closure.

    + * + * @param conf the configuration used to initialize the registry; must not be null + * @return the initialized and started {@link AMRegistry} instance + * @throws IllegalStateException if the {@code TEZ_AM_EXTERNAL_ID} environment variable is not set + * @throws RuntimeException if an error occurs while creating, initializing, or starting the registry + */ + @Override + public synchronized AMRegistry getAMRegistry(Configuration conf) { + if (amRegistry == null) { + final String externalId = System.getenv(TezConstants.TEZ_AM_EXTERNAL_ID); + if (externalId == null) { + throw new IllegalStateException( + TezConstants.TEZ_AM_EXTERNAL_ID + " environment variable is not set for standalone AM"); + } + try { + amRegistry = new ZkAMRegistry(externalId); + amRegistry.init(conf); + amRegistry.start(); + LOG.info("Created Zookeeper based AM Registry with externalId: {}", externalId); + } catch (Exception e) { + throw new RuntimeException(e); + } + } + return amRegistry; + } + + @Override + public AMExtensions getAMExtensions() { + return amExtensions; + } +} diff --git a/tez-dag/src/main/java/org/apache/tez/frameworkplugins/zookeeper/package-info.java b/tez-dag/src/main/java/org/apache/tez/frameworkplugins/zookeeper/package-info.java new file mode 100644 index 0000000000..c3ddc3f7a7 --- /dev/null +++ b/tez-dag/src/main/java/org/apache/tez/frameworkplugins/zookeeper/package-info.java @@ -0,0 +1,21 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + *

    + * http://www.apache.org/licenses/LICENSE-2.0 + *

    + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +/** + * Provides Zookeeper-specific framework plugins for Apache Tez. + */ +package org.apache.tez.frameworkplugins.zookeeper; diff --git a/tez-dag/src/test/java/org/apache/tez/client/registry/zookeeper/TestZkAMRegistryClient.java b/tez-dag/src/test/java/org/apache/tez/client/registry/zookeeper/TestZkAMRegistryClient.java new file mode 100644 index 0000000000..bbeee6b3c2 --- /dev/null +++ b/tez-dag/src/test/java/org/apache/tez/client/registry/zookeeper/TestZkAMRegistryClient.java @@ -0,0 +1,201 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + *

    + * http://www.apache.org/licenses/LICENSE-2.0 + *

    + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.tez.client.registry.zookeeper; + + +import static org.apache.tez.frameworkplugins.FrameworkMode.STANDALONE_ZOOKEEPER; +import static org.junit.Assert.assertEquals; +import static org.junit.Assert.assertFalse; +import static org.junit.Assert.assertNotNull; +import static org.junit.Assert.assertTrue; + +import java.io.File; +import java.util.List; +import java.util.concurrent.CountDownLatch; +import java.util.concurrent.TimeUnit; +import java.util.concurrent.atomic.AtomicBoolean; + +import org.apache.commons.io.IOUtils; +import org.apache.curator.test.TestingServer; +import org.apache.hadoop.security.Credentials; +import org.apache.hadoop.security.UserGroupInformation; +import org.apache.hadoop.yarn.api.records.ApplicationAttemptId; +import org.apache.hadoop.yarn.api.records.ApplicationId; +import org.apache.hadoop.yarn.api.records.ContainerId; +import org.apache.hadoop.yarn.util.SystemClock; +import org.apache.tez.client.registry.AMRecord; +import org.apache.tez.client.registry.AMRegistryClientListener; +import org.apache.tez.dag.api.TezConfiguration; +import org.apache.tez.dag.app.DAGAppMaster; +import org.apache.tez.dag.app.MockDAGAppMaster; + +import org.junit.After; +import org.junit.Before; +import org.junit.Test; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +/** + * Unit tests for {@link ZkAMRegistryClient}. + *

    + * This test class validates the ZooKeeper-based AM (Application Master) registry and discovery + * mechanism. It tests that when a DAGAppMaster is started with STANDALONE_ZOOKEEPER framework mode, + * it properly registers itself to ZooKeeper and can be discovered by a {@link ZkAMRegistryClient}. + *

    + *

    + * The tests use an embedded ZooKeeper {@link TestingServer} to avoid external dependencies + * and ensure test isolation. + *

    + */ +public class TestZkAMRegistryClient { + private static final Logger LOG = LoggerFactory.getLogger(TestZkAMRegistryClient.class); + private static final File TEST_DIR = new File(System.getProperty("test.build.data", "target"), + TestZkAMRegistryClient.class.getName()).getAbsoluteFile(); + + /** + * Embedded ZooKeeper server for testing. Uses Apache Curator's {@link TestingServer} + * to provide an in-memory ZooKeeper instance. + */ + private TestingServer zkServer; + + /** + * ZooKeeper-based AM registry client used to discover and retrieve AM records. + */ + private ZkAMRegistryClient registryClient; + + /** + * Mock DAGAppMaster instance that registers itself to the ZooKeeper registry. + */ + private DAGAppMaster dagAppMaster; + + @Before + public void setup() throws Exception { + zkServer = new TestingServer(); + zkServer.start(); + LOG.info("Started ZooKeeper test server on port: {}", zkServer.getPort()); + } + + @After + public void teardown() throws Exception { + if (dagAppMaster != null) { + dagAppMaster.stop(); + } + IOUtils.closeQuietly(registryClient); + IOUtils.closeQuietly(zkServer); + } + + /** + * Tests the complete ZooKeeper-based AM registry and discovery flow. + *

    + * This test validates the following workflow: + *

    + *
      + *
    1. Configure Tez with STANDALONE_ZOOKEEPER framework mode
    2. + *
    3. Create and start a {@link ZkAMRegistryClient} with an event listener
    4. + *
    5. Start a {@link MockDAGAppMaster} which registers itself to ZooKeeper
    6. + *
    7. Verify that the registry client's listener is notified of the AM registration
    8. + *
    9. Verify the AM record can be retrieved via {@link ZkAMRegistryClient#getRecord(ApplicationId)}
    10. + *
    11. Verify the AM appears in the list from {@link ZkAMRegistryClient#getAllRecords()}
    12. + *
    13. Validate all expected fields (host, port, applicationId) are correctly set
    14. + *
    + *

    + * The test uses a {@link CountDownLatch} to synchronize between the AM registration + * event and the test assertions, ensuring the AM has fully registered before validation. + *

    + * + * @throws Exception if any part of the test fails + */ + @Test(timeout = 10000) + public void testZkAmRegistryDiscovery() throws Exception { + TezConfiguration tezConf = getTezConfForZkDiscovery(); + + ApplicationId appId = ApplicationId.newInstance(System.currentTimeMillis(), 1); + ApplicationAttemptId attemptId = ApplicationAttemptId.newInstance(appId, 1); + ContainerId containerId = ContainerId.newContainerId(attemptId, 1); + + CountDownLatch amRegisteredLatch = new CountDownLatch(1); + AtomicBoolean amDiscovered = new AtomicBoolean(false); + + // Create and start the ZkAMRegistryClient + registryClient = ZkAMRegistryClient.getClient(tezConf); + registryClient.addListener(new AMRegistryClientListener() { + @Override + public void onAdd(AMRecord amRecord) { + LOG.info("AM added to registry: {}", amRecord); + if (amRecord.getApplicationId().equals(appId)) { + amDiscovered.set(true); + amRegisteredLatch.countDown(); + } + } + + @Override + public void onRemove(AMRecord amRecord) { + LOG.info("AM removed from registry: {}", amRecord); + } + }); + registryClient.start(); + + String workingDir = TEST_DIR.toString(); + String[] localDirs = new String[]{TEST_DIR.toString()}; + String[] logDirs = new String[]{TEST_DIR + "/logs"}; + String jobUserName = UserGroupInformation.getCurrentUser().getShortUserName(); + + dagAppMaster = new MockDAGAppMaster(attemptId, containerId, "localhost", 0, 0, SystemClock.getInstance(), + System.currentTimeMillis(), true, workingDir, localDirs, logDirs, new AtomicBoolean(true), false, false, + new Credentials(), jobUserName, 1, 1); + + dagAppMaster.init(tezConf); + dagAppMaster.start(); + + // Wait for AM to be registered in ZooKeeper + boolean registered = amRegisteredLatch.await(30, TimeUnit.SECONDS); + assertTrue("AM was not registered in ZooKeeper within timeout", registered); + assertTrue("AM was not discovered by registry client", amDiscovered.get()); + + // Verify the AM record is available through the registry client + AMRecord amRecord = registryClient.getRecord(appId); + assertNotNull("AM record should be retrievable from registry", amRecord); + assertEquals("Application ID should match", appId, amRecord.getApplicationId()); + assertNotNull("Host should be set", amRecord.getHostName()); + assertTrue("Port should be positive", amRecord.getPort() > 0); + + // Verify getAllRecords also returns the AM + List allRecords = registryClient.getAllRecords(); + assertNotNull("getAllRecords should not return null", allRecords); + assertFalse("getAllRecords should contain at least one record", allRecords.isEmpty()); + + boolean found = false; + for (AMRecord record : allRecords) { + if (record.getApplicationId().equals(appId)) { + found = true; + break; + } + } + assertTrue("AM record should be in getAllRecords", found); + } + + private TezConfiguration getTezConfForZkDiscovery() { + TezConfiguration tezConf = new TezConfiguration(); + tezConf.set(TezConfiguration.TEZ_FRAMEWORK_MODE, STANDALONE_ZOOKEEPER.name()); + tezConf.set(TezConfiguration.TEZ_AM_ZOOKEEPER_QUORUM, "localhost:" + zkServer.getPort()); + tezConf.setBoolean(TezConfiguration.TEZ_LOCAL_MODE, true); + tezConf.set(TezConfiguration.TEZ_AM_STAGING_DIR, TEST_DIR.toString()); + return tezConf; + } +} diff --git a/tez-dag/src/test/java/org/apache/tez/dag/api/client/registry/TestAMRegistry.java b/tez-dag/src/test/java/org/apache/tez/dag/api/client/registry/TestAMRegistry.java deleted file mode 100644 index dc8cc4acf7..0000000000 --- a/tez-dag/src/test/java/org/apache/tez/dag/api/client/registry/TestAMRegistry.java +++ /dev/null @@ -1,100 +0,0 @@ -/** - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.tez.dag.api.client.registry; - -import static org.junit.Assert.assertEquals; -import static org.junit.Assert.assertNotNull; -import static org.junit.Assert.assertNull; -import static org.mockito.Mockito.mock; -import static org.mockito.Mockito.times; -import static org.mockito.Mockito.verify; -import static org.mockito.Mockito.when; - -import java.net.InetSocketAddress; -import java.util.UUID; - -import org.apache.hadoop.conf.Configuration; -import org.apache.hadoop.fs.FileSystem; -import org.apache.hadoop.yarn.api.records.ApplicationAttemptId; -import org.apache.hadoop.yarn.api.records.ApplicationId; -import org.apache.tez.client.registry.AMRecord; -import org.apache.tez.dag.api.TezConfiguration; -import org.apache.tez.dag.api.client.DAGClientHandler; -import org.apache.tez.dag.api.client.DAGClientServer; -import org.apache.tez.dag.app.DAGAppMaster; -import org.apache.tez.dag.utils.AMRegistryUtils; - -import org.junit.Test; - -public class TestAMRegistry { - - @Test(timeout = 5000) - public void testAMRegistryFactory() throws Exception { - Configuration conf = new Configuration(); - AMRegistry amRegistry = AMRegistryUtils.createAMRegistry(conf); - assertNull(amRegistry); - String className = SkeletonAMRegistry.class.getName(); - conf.set(TezConfiguration.TEZ_AM_REGISTRY_CLASS, className); - amRegistry = AMRegistryUtils.createAMRegistry(conf); - assertNotNull(amRegistry); - assertEquals(className, amRegistry.getClass().getName()); - } - - @Test(timeout = 5000) - public void testRecordForDagServer() { - DAGClientServer dagClientServer = mock(DAGClientServer.class); - when(dagClientServer.getBindAddress()).thenReturn(new InetSocketAddress("testhost", 1000)); - ApplicationId appId = ApplicationId.newInstance(0, 1); - String id = UUID.randomUUID().toString(); - AMRecord record = AMRegistryUtils.recordForDAGClientServer(appId, id, dagClientServer); - assertEquals(appId, record.getApplicationId()); - assertEquals("testhost", record.getHost()); - assertEquals(1000, record.getPort()); - assertEquals(record.getId(), id); - } - - @Test(timeout = 20000) - public void testAMRegistryService() throws Exception { - DAGClientHandler dagClientHandler = mock(DAGClientHandler.class); - ApplicationAttemptId appAttemptId = mock(ApplicationAttemptId.class); - ApplicationId appId = ApplicationId.newInstance(0, 1); - String uuid = UUID.randomUUID().toString(); - when(appAttemptId.getApplicationId()).thenReturn(appId); - AMRegistry amRegistry = mock(AMRegistry.class); - FileSystem fs = mock(FileSystem.class); - DAGClientServer dagClientServer = new DAGClientServer(dagClientHandler, appAttemptId, fs); - try { - DAGAppMaster.initAmRegistry(appAttemptId.getApplicationId(), uuid, amRegistry, dagClientServer); - dagClientServer.init(new Configuration()); - dagClientServer.start(); - AMRecord record = AMRegistryUtils.recordForDAGClientServer(appId, uuid, dagClientServer); - verify(amRegistry, times(1)).add(record); - } finally { - dagClientServer.stop(); - } - } - - public static class SkeletonAMRegistry extends AMRegistry { - public SkeletonAMRegistry() { - super("SkeletonAMRegistry"); - } - @Override public void add(AMRecord server) throws Exception { } - @Override public void remove(AMRecord server) throws Exception { } - } -} diff --git a/tez-dag/src/test/java/org/apache/tez/dag/api/client/registry/zookeeper/TestZkAMRegistry.java b/tez-dag/src/test/java/org/apache/tez/dag/api/client/registry/zookeeper/TestZkAMRegistry.java new file mode 100644 index 0000000000..cf3b9872e0 --- /dev/null +++ b/tez-dag/src/test/java/org/apache/tez/dag/api/client/registry/zookeeper/TestZkAMRegistry.java @@ -0,0 +1,192 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.tez.dag.api.client.registry.zookeeper; + +import static org.junit.Assert.assertEquals; +import static org.junit.Assert.assertNotNull; +import static org.junit.Assert.assertNull; + +import java.nio.charset.StandardCharsets; +import java.util.ArrayList; +import java.util.Collections; +import java.util.HashSet; +import java.util.List; +import java.util.Set; +import java.util.concurrent.CompletableFuture; +import java.util.concurrent.CountDownLatch; +import java.util.concurrent.ExecutionException; +import java.util.concurrent.ExecutorService; +import java.util.concurrent.Executors; +import java.util.concurrent.Future; + +import org.apache.curator.framework.CuratorFramework; +import org.apache.curator.test.TestingServer; +import org.apache.hadoop.yarn.api.records.ApplicationId; +import org.apache.tez.client.registry.AMRecord; +import org.apache.tez.client.registry.AMRegistryUtils; +import org.apache.tez.client.registry.zookeeper.ZkConfig; +import org.apache.tez.dag.api.TezConfiguration; + +import org.junit.After; +import org.junit.Before; +import org.junit.Test; + +/** + * Unit tests for {@link ZkAMRegistry}. + * + *

    This test class focuses on the low-level AM registry implementation that runs + * inside the AM process. It validates that:

    + *
      + *
    • Unique {@link ApplicationId}s are generated and persisted in ZooKeeper.
    • + *
    • {@link AMRecord}s are written to and removed from ZooKeeper at the expected paths.
    • + *
    + */ +public class TestZkAMRegistry { + + private TestingServer zkServer; + + @Before + public void setup() throws Exception { + zkServer = new TestingServer(); + zkServer.start(); + } + + @After + public void teardown() throws Exception { + if (zkServer != null) { + zkServer.close(); + } + } + + @Test + public void testGenerateNewIdProducesUniqueIds() throws Exception { + TezConfiguration conf = createTezConf(); + try (ZkAMRegistry registry = new ZkAMRegistry("external-id")) { + registry.init(conf); + registry.start(); + + ApplicationId first = registry.generateNewId(); + ApplicationId second = registry.generateNewId(); + + assertNotNull(first); + assertNotNull(second); + assertEquals("Cluster timestamps should match", first.getClusterTimestamp(), second.getClusterTimestamp()); + assertEquals("Second id should be first id + 1", first.getId() + 1, second.getId()); + } + } + + @Test(timeout = 120000) + public void testGenerateNewIdFromParallelThreads() throws Exception { + final int threadCount = 50; + + TezConfiguration conf = createTezConf(); + // this is the maxRetries for ExponentialBackoffRetry, let's use it to be able to test high concurrency + conf.setInt(TezConfiguration.TEZ_AM_CURATOR_MAX_RETRIES, 29); + + try (ZkAMRegistry registry = new ZkAMRegistry("external-id")) { + registry.init(conf); + registry.start(); + + ExecutorService executor = Executors.newFixedThreadPool(threadCount); + CountDownLatch startLatch = new CountDownLatch(1); + CountDownLatch doneLatch = new CountDownLatch(threadCount); + + Set ids = Collections.synchronizedSet(new HashSet<>()); + + List> asyncTasks = new ArrayList<>(); + + for (int i = 0; i < threadCount; i++) { + asyncTasks.add(CompletableFuture.runAsync(() -> { + try { + // Ensure all threads start generateNewId as simultaneously as possible + startLatch.await(); + ApplicationId id = registry.generateNewId(); + assertNotNull(id); + ids.add(id); + } catch (Exception e) { + throw new RuntimeException(e); + } finally { + doneLatch.countDown(); + } + }, executor)); + } + + // release all threads + startLatch.countDown(); + + // run the tasks + try { + CompletableFuture.allOf(asyncTasks.toArray(new CompletableFuture[0])).get(); + } catch (ExecutionException e) { // ExecutionException wraps the original exception + throw new RuntimeException(e.getCause()); + } finally { + executor.shutdown(); + } + assertEquals(String.format("All generated ids should be unique, ids found: %s", ids), threadCount, ids.size()); + + // additionally ensure cluster timestamp is the same for all IDs + long clusterTs = ids.iterator().next().getClusterTimestamp(); + for (ApplicationId id : ids) { + assertEquals("Cluster timestamps should match for all generated ids", clusterTs, id.getClusterTimestamp()); + } + } + } + + @Test + public void testAddAndRemoveAmRecordUpdatesZooKeeper() throws Exception { + TezConfiguration conf = createTezConf(); + + // Use a separate ZkConfig/Curator to inspect ZooKeeper state + ZkConfig zkConfig = new ZkConfig(conf); + + try (ZkAMRegistry registry = new ZkAMRegistry("external-id"); + CuratorFramework checkClient = zkConfig.createCuratorFramework()) { + registry.init(conf); + registry.start(); + + checkClient.start(); + + ApplicationId appId = registry.generateNewId(); + AMRecord record = registry.createAmRecord( + appId, "localhost", "127.0.0.1", 10000, "default-compute"); + + // Add record and verify node contents + registry.add(record); + + String path = zkConfig.getZkNamespace() + "/" + appId.toString(); + byte[] data = checkClient.getData().forPath(path); + + assertNotNull("Data should be written to ZooKeeper for AMRecord", data); + String json = new String(data, StandardCharsets.UTF_8); + String expectedJson = AMRegistryUtils.recordToJsonString(record); + assertEquals("Stored AMRecord JSON should match expected", expectedJson, json); + + // Remove record and ensure node is deleted + registry.remove(record); + assertNull("Node should be removed from ZooKeeper after remove()", checkClient.checkExists().forPath(path)); + } + } + + private TezConfiguration createTezConf() { + TezConfiguration conf = new TezConfiguration(); + conf.set(TezConfiguration.TEZ_AM_ZOOKEEPER_QUORUM, "localhost:" + zkServer.getPort()); + conf.set(TezConfiguration.TEZ_AM_REGISTRY_NAMESPACE, "/test-namespace"); + return conf; + } +} diff --git a/tez-dag/src/test/java/org/apache/tez/dag/app/MockDAGAppMaster.java b/tez-dag/src/test/java/org/apache/tez/dag/app/MockDAGAppMaster.java index 213d85b892..fbab519376 100644 --- a/tez-dag/src/test/java/org/apache/tez/dag/app/MockDAGAppMaster.java +++ b/tez-dag/src/test/java/org/apache/tez/dag/app/MockDAGAppMaster.java @@ -500,7 +500,7 @@ public MockDAGAppMaster(ApplicationAttemptId applicationAttemptId, ContainerId c Credentials credentials, String jobUserName, int handlerConcurrency, int numConcurrentContainers) { super(applicationAttemptId, containerId, nmHost, nmPort, nmHttpPort, clock, appSubmitTime, isSession, workingDirectory, localDirs, logDirs, new TezApiVersionInfo().getVersion(), - credentials, jobUserName, null, null); + credentials, jobUserName, null); shutdownHandler = new MockDAGAppMasterShutdownHandler(); this.launcherGoFlag = launcherGoFlag; this.initFailFlag = initFailFlag; diff --git a/tez-dag/src/test/java/org/apache/tez/dag/app/TestDAGAppMaster.java b/tez-dag/src/test/java/org/apache/tez/dag/app/TestDAGAppMaster.java index afe2e8bc3b..0bc362db09 100644 --- a/tez-dag/src/test/java/org/apache/tez/dag/app/TestDAGAppMaster.java +++ b/tez-dag/src/test/java/org/apache/tez/dag/app/TestDAGAppMaster.java @@ -498,7 +498,7 @@ public void testBadProgress() throws Exception { TEST_DIR.toString(), new String[] {TEST_DIR.toString()}, new String[] {TEST_DIR.toString()}, new TezApiVersionInfo().getVersion(), amCreds, - "someuser", null, null)); + "someuser", null)); when(am.getState()).thenReturn(DAGAppMasterState.RUNNING); am.init(conf); am.start(); @@ -583,7 +583,7 @@ private void testDagCredentials(boolean doMerge) throws IOException { TEST_DIR.toString(), new String[] {TEST_DIR.toString()}, new String[] {TEST_DIR.toString()}, new TezApiVersionInfo().getVersion(), amCreds, - "someuser", null, null); + "someuser", null); am.init(conf); am.start(); @@ -704,7 +704,7 @@ public DAGAppMasterForTest(ApplicationAttemptId attemptId, boolean isSession) { super(attemptId, ContainerId.newContainerId(attemptId, 1), "hostname", 12345, 12346, new SystemClock(), 0, isSession, TEST_DIR.getAbsolutePath(), new String[] { TEST_DIR.getAbsolutePath() }, new String[] { TEST_DIR.getAbsolutePath() }, - new TezDagVersionInfo().getVersion(), createCredentials(), "jobname", null, null); + new TezDagVersionInfo().getVersion(), createCredentials(), "jobname", null); } public static Credentials createCredentials() { diff --git a/tez-dag/src/test/java/org/apache/tez/dag/app/dag/impl/TestCommit.java b/tez-dag/src/test/java/org/apache/tez/dag/app/dag/impl/TestCommit.java index 953b473ab9..863abffc26 100644 --- a/tez-dag/src/test/java/org/apache/tez/dag/app/dag/impl/TestCommit.java +++ b/tez-dag/src/test/java/org/apache/tez/dag/app/dag/impl/TestCommit.java @@ -105,6 +105,8 @@ import org.apache.tez.dag.records.TezTaskAttemptID; import org.apache.tez.dag.records.TezTaskID; import org.apache.tez.dag.records.TezVertexID; +import org.apache.tez.frameworkplugins.AMExtensions; +import org.apache.tez.frameworkplugins.yarn.YarnServerFrameworkService; import org.apache.tez.hadoop.shim.DefaultHadoopShim; import org.apache.tez.runtime.api.OutputCommitter; import org.apache.tez.runtime.api.OutputCommitterContext; @@ -155,6 +157,7 @@ public class TestCommit { private ExecutorService rawExecutor; private ListeningExecutorService execService; + private AMExtensions amExtensions = new YarnServerFrameworkService.YarnAMExtensions(); private class DagEventDispatcher implements EventHandler { @Override @@ -306,6 +309,7 @@ public void setupDAG(DAGPlan dagPlan) { fsTokens = new Credentials(); appContext = mock(AppContext.class); when(appContext.getHadoopShim()).thenReturn(new DefaultHadoopShim()); + when(appContext.getAmExtensions()).thenReturn(amExtensions); rawExecutor = Executors.newCachedThreadPool(new ThreadFactoryBuilder() .setDaemon(true).setNameFormat("App Shared Pool - " + "#%d").build()); execService = MoreExecutors.listeningDecorator(rawExecutor); diff --git a/tez-dag/src/test/java/org/apache/tez/dag/app/dag/impl/TestDAGImpl.java b/tez-dag/src/test/java/org/apache/tez/dag/app/dag/impl/TestDAGImpl.java index c0bac853c1..92d8a15d9e 100644 --- a/tez-dag/src/test/java/org/apache/tez/dag/app/dag/impl/TestDAGImpl.java +++ b/tez-dag/src/test/java/org/apache/tez/dag/app/dag/impl/TestDAGImpl.java @@ -140,6 +140,8 @@ import org.apache.tez.dag.records.TezTaskAttemptID; import org.apache.tez.dag.records.TezTaskID; import org.apache.tez.dag.records.TezVertexID; +import org.apache.tez.frameworkplugins.AMExtensions; +import org.apache.tez.frameworkplugins.yarn.YarnServerFrameworkService; import org.apache.tez.hadoop.shim.DefaultHadoopShim; import org.apache.tez.hadoop.shim.HadoopShim; import org.apache.tez.runtime.api.OutputCommitter; @@ -210,6 +212,7 @@ public class TestDAGImpl { private TaskAttemptEventDispatcher taskAttemptEventDispatcher; private ClusterInfo clusterInfo = new ClusterInfo(Resource.newInstance(8192,10)); private HadoopShim defaultShim = new DefaultHadoopShim(); + private AMExtensions amExtensions = new YarnServerFrameworkService.YarnAMExtensions(); static { Limits.reset(); @@ -872,6 +875,7 @@ public void setup() { final ListenableFuture mockFuture = mock(ListenableFuture.class); when(appContext.getHadoopShim()).thenReturn(defaultShim); when(appContext.getApplicationID()).thenReturn(appAttemptId.getApplicationId()); + doReturn(amExtensions).when(appContext).getAmExtensions(); doAnswer(new Answer() { public ListenableFuture answer(InvocationOnMock invocation) { @@ -902,6 +906,7 @@ public ListenableFuture answer(InvocationOnMock invocation) { doReturn(aclManager).when(mrrAppContext).getAMACLManager(); doReturn(execService).when(mrrAppContext).getExecService(); doReturn(defaultShim).when(mrrAppContext).getHadoopShim(); + doReturn(amExtensions).when(mrrAppContext).getAmExtensions(); mrrDagId = TezDAGID.getInstance(appAttemptId.getApplicationId(), 2); mrrDagPlan = createTestMRRDAGPlan(); @@ -935,6 +940,7 @@ public ListenableFuture answer(InvocationOnMock invocation) { .when(groupAppContext).getApplicationID(); doReturn(historyEventHandler).when(groupAppContext).getHistoryHandler(); doReturn(clusterInfo).when(groupAppContext).getClusterInfo(); + doReturn(amExtensions).when(groupAppContext).getAmExtensions(); // reset totalCommitCounter to 0 TotalCountingOutputCommitter.totalCommitCounter = 0; @@ -1005,6 +1011,7 @@ private void setupDAGWithCustomEdge(ExceptionLocation exLocation, boolean useLeg doReturn(appAttemptId.getApplicationId()).when(dagWithCustomEdgeAppContext).getApplicationID(); doReturn(historyEventHandler).when(dagWithCustomEdgeAppContext).getHistoryHandler(); doReturn(clusterInfo).when(dagWithCustomEdgeAppContext).getClusterInfo(); + doReturn(amExtensions).when(dagWithCustomEdgeAppContext).getAmExtensions(); dispatcher.register(TaskAttemptEventType.class, new TaskAttemptEventDisptacher2()); dispatcher.register(AMSchedulerEventType.class, new AMSchedulerEventHandler()); when(dagWithCustomEdgeAppContext.getContainerLauncherName(anyInt())).thenReturn( diff --git a/tez-dag/src/test/java/org/apache/tez/dag/app/dag/impl/TestDAGRecovery.java b/tez-dag/src/test/java/org/apache/tez/dag/app/dag/impl/TestDAGRecovery.java index ee3684ae03..8b82a6aa3e 100644 --- a/tez-dag/src/test/java/org/apache/tez/dag/app/dag/impl/TestDAGRecovery.java +++ b/tez-dag/src/test/java/org/apache/tez/dag/app/dag/impl/TestDAGRecovery.java @@ -116,6 +116,7 @@ import org.apache.tez.dag.records.TezTaskAttemptID; import org.apache.tez.dag.records.TezTaskID; import org.apache.tez.dag.records.TezVertexID; +import org.apache.tez.frameworkplugins.yarn.YarnServerFrameworkService; import org.apache.tez.hadoop.shim.DefaultHadoopShim; import org.apache.tez.runtime.api.Event; import org.apache.tez.runtime.api.InputInitializer; @@ -325,6 +326,7 @@ public void setup() { when(appContext.getHadoopShim()).thenReturn(new DefaultHadoopShim()); when(appContext.getApplicationID()).thenReturn(appAttemptId.getApplicationId()); when(appContext.getClock()).thenReturn(new SystemClock()); + when(appContext.getAmExtensions()).thenReturn(new YarnServerFrameworkService.YarnAMExtensions()); doAnswer(new Answer() { public ListenableFuture answer(InvocationOnMock invocation) { diff --git a/tez-dag/src/test/java/org/apache/tez/dag/app/dag/impl/TestVertexImpl2.java b/tez-dag/src/test/java/org/apache/tez/dag/app/dag/impl/TestVertexImpl2.java index c29a471604..a3b870ff3a 100644 --- a/tez-dag/src/test/java/org/apache/tez/dag/app/dag/impl/TestVertexImpl2.java +++ b/tez-dag/src/test/java/org/apache/tez/dag/app/dag/impl/TestVertexImpl2.java @@ -373,13 +373,15 @@ public ExecutionContextTestInfoHolder(VertexExecutionContext vertexExecutionCont this.vertexName = "testvertex"; this.vertexExecutionContext = vertexExecutionContext; this.defaultExecutionContext = defaultDagExecitionContext; + + UserPayload defaultPayload; + try { + defaultPayload = TezUtils.createUserPayloadFromConf(new Configuration(false)); + } catch (IOException e) { + throw new TezUncheckedException(e); + } + if (numPlugins == 0) { // Add default container plugins only - UserPayload defaultPayload; - try { - defaultPayload = TezUtils.createUserPayloadFromConf(new Configuration(false)); - } catch (IOException e) { - throw new TezUncheckedException(e); - } PluginManager.parsePlugin(Lists.newLinkedList(), taskSchedulers, null, true, false, defaultPayload); PluginManager.parsePlugin(Lists.newLinkedList(), containerLaunchers, null, true, false, defaultPayload); PluginManager.parsePlugin(Lists.newLinkedList(), taskComms, null, true, false, defaultPayload); @@ -402,9 +404,10 @@ public ExecutionContextTestInfoHolder(VertexExecutionContext vertexExecutionCont DAGProtos.TezEntityDescriptorProto.newBuilder() .setClassName(append(TASK_COMM_NAME_BASE, i))).build()); } - PluginManager.parsePlugin(Lists.newLinkedList(), taskSchedulers, schedulerList, false, false, null); - PluginManager.parsePlugin(Lists.newLinkedList(), containerLaunchers, launcherList, false, false, null); - PluginManager.parsePlugin(Lists.newLinkedList(), taskComms, taskCommList, false, false, null); + PluginManager.parsePlugin(Lists.newLinkedList(), taskSchedulers, schedulerList, false, false, defaultPayload); + PluginManager.parsePlugin(Lists.newLinkedList(), containerLaunchers, launcherList, false, false, + defaultPayload); + PluginManager.parsePlugin(Lists.newLinkedList(), taskComms, taskCommList, false, false, defaultPayload); } this.appContext = createDefaultMockAppContext(); diff --git a/tez-examples/pom.xml b/tez-examples/pom.xml index 18429a459a..7d303d762c 100644 --- a/tez-examples/pom.xml +++ b/tez-examples/pom.xml @@ -71,6 +71,10 @@ commons-cli commons-cli + + org.apache.curator + curator-test + junit junit diff --git a/tez-examples/src/main/java/org/apache/tez/examples/TezExampleBase.java b/tez-examples/src/main/java/org/apache/tez/examples/TezExampleBase.java index 507dc01e11..5716a0a0f6 100644 --- a/tez-examples/src/main/java/org/apache/tez/examples/TezExampleBase.java +++ b/tez-examples/src/main/java/org/apache/tez/examples/TezExampleBase.java @@ -123,7 +123,7 @@ public final int run(String[] args) throws Exception { } hadoopShim = new HadoopShimsLoader(conf).getHadoopShim(); - return _execute(otherArgs, null, null); + return execute(otherArgs, null, null); } /** @@ -160,7 +160,7 @@ public int run(TezConfiguration conf, String[] args, @Nullable TezClient tezClie generateSplitInClient = true; } String[] otherArgs = optionParser.getRemainingArgs(); - return _execute(otherArgs, conf, tezClient); + return execute(otherArgs, conf, tezClient); } /** @@ -215,8 +215,7 @@ private int _validateArgs(String[] args) { return 0; } - private int _execute(String[] otherArgs, TezConfiguration tezConf, TezClient tezClient) throws - Exception { + protected int execute(String[] otherArgs, TezConfiguration tezConf, TezClient tezClient) throws Exception { int result = _validateArgs(otherArgs); if (result != 0) { @@ -250,7 +249,7 @@ private int _execute(String[] otherArgs, TezConfiguration tezConf, TezClient tez } } - private TezClient createTezClient(TezConfiguration tezConf) throws IOException, TezException { + protected TezClient createTezClient(TezConfiguration tezConf) throws IOException, TezException { TezClient tezClient = TezClient.create("TezExampleApplication", tezConf); if(reconnectAppId != null) { ApplicationId appId = TezClient.appIdfromString(reconnectAppId); diff --git a/tez-runtime-library/src/main/java/org/apache/tez/runtime/library/package-info.java b/tez-runtime-library/src/main/java/org/apache/tez/runtime/library/package-info.java new file mode 100644 index 0000000000..4fb471f82a --- /dev/null +++ b/tez-runtime-library/src/main/java/org/apache/tez/runtime/library/package-info.java @@ -0,0 +1,24 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + *

    + * http://www.apache.org/licenses/LICENSE-2.0 + *

    + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +@Public +@Evolving +package org.apache.tez.runtime.library; + +import org.apache.hadoop.classification.InterfaceAudience.Public; +import org.apache.hadoop.classification.InterfaceStability.Evolving;