Enhance fail fast on error functionality

Make persister fail faster on errors.
Use one executor service for all perister threads.
Shutdown all threads in thread executor service on any error.
Catch java.lang.Throwable to catch java.lang.Error as well as java.lang.Exception.

Change-Id: I0dc421cf6bb4ab3f52c47e97b7f396483283b561
This commit is contained in:
Deklan Dieterly 2015-08-18 10:25:22 -06:00
parent 53fd650296
commit 452c020f63
5 changed files with 108 additions and 38 deletions

View File

@ -17,6 +17,7 @@
package monasca.persister; package monasca.persister;
import com.google.common.util.concurrent.ThreadFactoryBuilder;
import com.google.inject.Guice; import com.google.inject.Guice;
import com.google.inject.Injector; import com.google.inject.Injector;
import com.google.inject.Key; import com.google.inject.Key;
@ -25,6 +26,10 @@ import com.google.inject.TypeLiteral;
import org.slf4j.Logger; import org.slf4j.Logger;
import org.slf4j.LoggerFactory; import org.slf4j.LoggerFactory;
import java.util.concurrent.ExecutorService;
import java.util.concurrent.Executors;
import java.util.concurrent.ThreadFactory;
import io.dropwizard.Application; import io.dropwizard.Application;
import io.dropwizard.setup.Bootstrap; import io.dropwizard.setup.Bootstrap;
import io.dropwizard.setup.Environment; import io.dropwizard.setup.Environment;
@ -105,8 +110,18 @@ public class PersisterApplication extends Application<PersisterConfig> {
injector.getInstance(Key.get(new TypeLiteral<KafkaConsumerFactory<MetricEnvelope[]>>(){})); injector.getInstance(Key.get(new TypeLiteral<KafkaConsumerFactory<MetricEnvelope[]>>(){}));
final KafkaConsumerRunnableBasicFactory<MetricEnvelope[]> kafkaMetricConsumerRunnableBasicFactory = final KafkaConsumerRunnableBasicFactory<MetricEnvelope[]> kafkaMetricConsumerRunnableBasicFactory =
injector.getInstance(Key.get(new TypeLiteral<KafkaConsumerRunnableBasicFactory injector.getInstance(
<MetricEnvelope[]>>(){})); Key.get(new TypeLiteral<KafkaConsumerRunnableBasicFactory<MetricEnvelope[]>>() {
}));
ThreadFactory threadFactory = new ThreadFactoryBuilder()
.setDaemon(true)
.build();
int totalNumberOfThreads = configuration.getMetricConfiguration().getNumThreads()
+ configuration.getAlarmHistoryConfiguration().getNumThreads();
ExecutorService executorService = Executors.newFixedThreadPool(totalNumberOfThreads, threadFactory);
for (int i = 0; i < configuration.getMetricConfiguration().getNumThreads(); i++) { for (int i = 0; i < configuration.getMetricConfiguration().getNumThreads(); i++) {
@ -122,7 +137,7 @@ public class PersisterApplication extends Application<PersisterConfig> {
kafkaMetricConsumerRunnableBasicFactory.create(managedMetricPipeline, kafkaMetricChannel, threadId); kafkaMetricConsumerRunnableBasicFactory.create(managedMetricPipeline, kafkaMetricChannel, threadId);
final KafkaConsumer<MetricEnvelope[]> kafkaMetricConsumer = final KafkaConsumer<MetricEnvelope[]> kafkaMetricConsumer =
kafkaMetricConsumerFactory.create(kafkaMetricConsumerRunnableBasic, threadId); kafkaMetricConsumerFactory.create(kafkaMetricConsumerRunnableBasic, threadId, executorService);
ManagedConsumer<MetricEnvelope[]> managedMetricConsumer = ManagedConsumer<MetricEnvelope[]> managedMetricConsumer =
metricManagedConsumerFactory.create(kafkaMetricConsumer, threadId); metricManagedConsumerFactory.create(kafkaMetricConsumer, threadId);
@ -158,7 +173,8 @@ public class PersisterApplication extends Application<PersisterConfig> {
kafkaAlarmStateTransitionConsumerRunnableBasicFactory.create(managedAlarmStateTransitionPipeline, kafkaAlarmStateTransitionChannel, threadId); kafkaAlarmStateTransitionConsumerRunnableBasicFactory.create(managedAlarmStateTransitionPipeline, kafkaAlarmStateTransitionChannel, threadId);
final KafkaConsumer<AlarmStateTransitionedEvent> kafkaAlarmStateTransitionConsumer = final KafkaConsumer<AlarmStateTransitionedEvent> kafkaAlarmStateTransitionConsumer =
kafkaAlarmStateTransitionConsumerFactory.create(kafkaAlarmStateTransitionConsumerRunnableBasic, threadId); kafkaAlarmStateTransitionConsumerFactory.create(kafkaAlarmStateTransitionConsumerRunnableBasic, threadId,
executorService);
ManagedConsumer<AlarmStateTransitionedEvent> managedAlarmStateTransitionConsumer = ManagedConsumer<AlarmStateTransitionedEvent> managedAlarmStateTransitionConsumer =
alarmStateTransitionsManagedConsumerFactory.create(kafkaAlarmStateTransitionConsumer, threadId); alarmStateTransitionsManagedConsumerFactory.create(kafkaAlarmStateTransitionConsumer, threadId);

View File

@ -33,7 +33,7 @@ public class KafkaConsumer<T> {
private static final Logger logger = LoggerFactory.getLogger(KafkaConsumer.class); private static final Logger logger = LoggerFactory.getLogger(KafkaConsumer.class);
private static final int WAIT_TIME = 10; private static final int WAIT_TIME = 5;
private ExecutorService executorService; private ExecutorService executorService;
@ -43,10 +43,12 @@ public class KafkaConsumer<T> {
@Inject @Inject
public KafkaConsumer( public KafkaConsumer(
@Assisted KafkaConsumerRunnableBasic<T> kafkaConsumerRunnableBasic, @Assisted KafkaConsumerRunnableBasic<T> kafkaConsumerRunnableBasic,
@Assisted String threadId) { @Assisted String threadId,
@Assisted ExecutorService executorService) {
this.kafkaConsumerRunnableBasic = kafkaConsumerRunnableBasic; this.kafkaConsumerRunnableBasic = kafkaConsumerRunnableBasic;
this.threadId = threadId; this.threadId = threadId;
this.executorService = executorService;
} }
@ -54,13 +56,6 @@ public class KafkaConsumer<T> {
logger.info("[{}]: start", this.threadId); logger.info("[{}]: start", this.threadId);
ThreadFactory threadFactory = new ThreadFactoryBuilder()
.setNameFormat(threadId + "-%d")
.setDaemon(true)
.build();
executorService = Executors.newSingleThreadExecutor(threadFactory);
executorService.submit(kafkaConsumerRunnableBasic.setExecutorService(executorService)); executorService.submit(kafkaConsumerRunnableBasic.setExecutorService(executorService));
} }
@ -75,8 +70,6 @@ public class KafkaConsumer<T> {
logger.info("[{}]: shutting down executor service", this.threadId); logger.info("[{}]: shutting down executor service", this.threadId);
executorService.shutdown();
try { try {
logger.info("[{}]: awaiting termination...", this.threadId); logger.info("[{}]: awaiting termination...", this.threadId);

View File

@ -17,10 +17,13 @@
package monasca.persister.consumer; package monasca.persister.consumer;
import java.util.concurrent.ExecutorService;
public interface KafkaConsumerFactory<T> { public interface KafkaConsumerFactory<T> {
KafkaConsumer<T> create( KafkaConsumer<T> create(
KafkaConsumerRunnableBasic<T> kafkaConsumerRunnableBasic, KafkaConsumerRunnableBasic<T> kafkaConsumerRunnableBasic,
String threadId); String threadId,
ExecutorService executorService);
} }

View File

@ -27,8 +27,10 @@ import org.slf4j.Logger;
import org.slf4j.LoggerFactory; import org.slf4j.LoggerFactory;
import java.util.concurrent.ExecutorService; import java.util.concurrent.ExecutorService;
import java.util.concurrent.TimeUnit;
import kafka.consumer.ConsumerIterator; import kafka.consumer.ConsumerIterator;
import monasca.persister.repository.RepoException;
public class KafkaConsumerRunnableBasic<T> implements Runnable { public class KafkaConsumerRunnableBasic<T> implements Runnable {
@ -38,6 +40,7 @@ public class KafkaConsumerRunnableBasic<T> implements Runnable {
private final String threadId; private final String threadId;
private final ManagedPipeline<T> pipeline; private final ManagedPipeline<T> pipeline;
private volatile boolean stop = false; private volatile boolean stop = false;
private boolean fatalErrorDetected = false;
private ExecutorService executorService; private ExecutorService executorService;
@ -60,7 +63,7 @@ public class KafkaConsumerRunnableBasic<T> implements Runnable {
} }
protected void publishHeartbeat() { protected void publishHeartbeat() throws RepoException {
publishEvent(null); publishEvent(null);
@ -82,9 +85,19 @@ public class KafkaConsumerRunnableBasic<T> implements Runnable {
try { try {
if (pipeline.shutdown()) { if (!this.fatalErrorDetected) {
markRead(); logger.info("[{}}: shutting pipeline down", this.threadId);
if (pipeline.shutdown()) {
markRead();
}
} else {
logger.info("[{}]: fatal error detected. Exiting immediately without flush", this.threadId);
} }
@ -93,6 +106,7 @@ public class KafkaConsumerRunnableBasic<T> implements Runnable {
logger.error("caught fatal exception while shutting down", e); logger.error("caught fatal exception while shutting down", e);
} }
} }
public void run() { public void run() {
@ -103,12 +117,28 @@ public class KafkaConsumerRunnableBasic<T> implements Runnable {
logger.debug("[{}]: KafkaChannel has stream iterator", this.threadId); logger.debug("[{}]: KafkaChannel has stream iterator", this.threadId);
while (!this.stop) { while (!this.stop) {
try {
try { try {
if (isInterrupted()) {
this.fatalErrorDetected = true;
break;
}
if (it.hasNext()) { if (it.hasNext()) {
if (isInterrupted()) {
this.fatalErrorDetected = true;
break;
}
final String msg = new String(it.next().message()); final String msg = new String(it.next().message());
logger.debug("[{}]: {}", this.threadId, msg); logger.debug("[{}]: {}", this.threadId, msg);
@ -119,45 +149,73 @@ public class KafkaConsumerRunnableBasic<T> implements Runnable {
} catch (kafka.consumer.ConsumerTimeoutException cte) { } catch (kafka.consumer.ConsumerTimeoutException cte) {
if (isInterrupted()) {
this.fatalErrorDetected = true;
break;
}
publishHeartbeat(); publishHeartbeat();
} }
if (Thread.currentThread().isInterrupted()) { } catch (Throwable e) {
logger.debug("[{}]: is interrupted. breaking out of run loop", this.threadId); logger.error(
"[{}]: caught fatal exception while publishing msg. Shutting entire persister down now!",
this.threadId, e);
break; this.stop = true;
this.fatalErrorDetected = true;
this.executorService.shutdownNow();
try {
this.executorService.awaitTermination(5, TimeUnit.SECONDS);
} catch (InterruptedException e1) {
logger.info("[{}]: interrupted while awaiting termination", this.threadId, e1);
} }
LogManager.shutdown();
System.exit(1);
} }
logger.info("[{}]: shutting down", this.threadId);
this.kafkaChannel.stop();
} }
logger.info("[{}]: shutting down", this.threadId);
protected void publishEvent(final String msg) { this.kafkaChannel.stop();
try { }
if (pipeline.publishEvent(msg)) { protected void publishEvent(final String msg) throws RepoException {
markRead(); if (pipeline.publishEvent(msg)) {
} markRead();
} catch (Exception e) { }
logger.error("caught fatal exception while publishing msg. Shutting entire persister down now!"); }
this.executorService.shutdownNow(); private boolean isInterrupted() {
LogManager.shutdown(); if (Thread.currentThread().interrupted()) {
System.exit(-1); logger.debug("[{}]: is interrupted. breaking out of run loop", this.threadId);
return true;
} else {
return false;
} }
} }

View File

@ -450,7 +450,7 @@ public class VerticaMetricRepo extends VerticaRepo implements Repo<MetricEnvelop
if (!definitionDimensionsIdSet.contains(defDimsId)) { if (!definitionDimensionsIdSet.contains(defDimsId)) {
logger.debug("[{}]: adding definitionDimension to batch: defDimsId: {}, defId: {}, dimId: {}", logger.debug("[{}]: adding definitionDimension to batch: defDimsId: {}, defId: {}, dimId: {}",
defDimsId.toHexString(), defId, dimId, id); id, defDimsId.toHexString(), defId, dimId);
stagedDefinitionDimensionsBatch.add() stagedDefinitionDimensionsBatch.add()
.bind("id", defDimsId.getSha1Hash()) .bind("id", defDimsId.getSha1Hash())