Enhance fail fast on error functionality

Make persister fail faster on errors.
Use one executor service for all perister threads.
Shutdown all threads in thread executor service on any error.
Catch java.lang.Throwable to catch java.lang.Error as well as java.lang.Exception.

Change-Id: I0dc421cf6bb4ab3f52c47e97b7f396483283b561
This commit is contained in:
Deklan Dieterly 2015-08-18 10:25:22 -06:00
parent 53fd650296
commit 452c020f63
5 changed files with 108 additions and 38 deletions

View File

@ -17,6 +17,7 @@
package monasca.persister;
import com.google.common.util.concurrent.ThreadFactoryBuilder;
import com.google.inject.Guice;
import com.google.inject.Injector;
import com.google.inject.Key;
@ -25,6 +26,10 @@ import com.google.inject.TypeLiteral;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import java.util.concurrent.ExecutorService;
import java.util.concurrent.Executors;
import java.util.concurrent.ThreadFactory;
import io.dropwizard.Application;
import io.dropwizard.setup.Bootstrap;
import io.dropwizard.setup.Environment;
@ -105,8 +110,18 @@ public class PersisterApplication extends Application<PersisterConfig> {
injector.getInstance(Key.get(new TypeLiteral<KafkaConsumerFactory<MetricEnvelope[]>>(){}));
final KafkaConsumerRunnableBasicFactory<MetricEnvelope[]> kafkaMetricConsumerRunnableBasicFactory =
injector.getInstance(Key.get(new TypeLiteral<KafkaConsumerRunnableBasicFactory
<MetricEnvelope[]>>(){}));
injector.getInstance(
Key.get(new TypeLiteral<KafkaConsumerRunnableBasicFactory<MetricEnvelope[]>>() {
}));
ThreadFactory threadFactory = new ThreadFactoryBuilder()
.setDaemon(true)
.build();
int totalNumberOfThreads = configuration.getMetricConfiguration().getNumThreads()
+ configuration.getAlarmHistoryConfiguration().getNumThreads();
ExecutorService executorService = Executors.newFixedThreadPool(totalNumberOfThreads, threadFactory);
for (int i = 0; i < configuration.getMetricConfiguration().getNumThreads(); i++) {
@ -122,7 +137,7 @@ public class PersisterApplication extends Application<PersisterConfig> {
kafkaMetricConsumerRunnableBasicFactory.create(managedMetricPipeline, kafkaMetricChannel, threadId);
final KafkaConsumer<MetricEnvelope[]> kafkaMetricConsumer =
kafkaMetricConsumerFactory.create(kafkaMetricConsumerRunnableBasic, threadId);
kafkaMetricConsumerFactory.create(kafkaMetricConsumerRunnableBasic, threadId, executorService);
ManagedConsumer<MetricEnvelope[]> managedMetricConsumer =
metricManagedConsumerFactory.create(kafkaMetricConsumer, threadId);
@ -158,7 +173,8 @@ public class PersisterApplication extends Application<PersisterConfig> {
kafkaAlarmStateTransitionConsumerRunnableBasicFactory.create(managedAlarmStateTransitionPipeline, kafkaAlarmStateTransitionChannel, threadId);
final KafkaConsumer<AlarmStateTransitionedEvent> kafkaAlarmStateTransitionConsumer =
kafkaAlarmStateTransitionConsumerFactory.create(kafkaAlarmStateTransitionConsumerRunnableBasic, threadId);
kafkaAlarmStateTransitionConsumerFactory.create(kafkaAlarmStateTransitionConsumerRunnableBasic, threadId,
executorService);
ManagedConsumer<AlarmStateTransitionedEvent> managedAlarmStateTransitionConsumer =
alarmStateTransitionsManagedConsumerFactory.create(kafkaAlarmStateTransitionConsumer, threadId);

View File

@ -33,7 +33,7 @@ public class KafkaConsumer<T> {
private static final Logger logger = LoggerFactory.getLogger(KafkaConsumer.class);
private static final int WAIT_TIME = 10;
private static final int WAIT_TIME = 5;
private ExecutorService executorService;
@ -43,10 +43,12 @@ public class KafkaConsumer<T> {
@Inject
public KafkaConsumer(
@Assisted KafkaConsumerRunnableBasic<T> kafkaConsumerRunnableBasic,
@Assisted String threadId) {
@Assisted String threadId,
@Assisted ExecutorService executorService) {
this.kafkaConsumerRunnableBasic = kafkaConsumerRunnableBasic;
this.threadId = threadId;
this.executorService = executorService;
}
@ -54,13 +56,6 @@ public class KafkaConsumer<T> {
logger.info("[{}]: start", this.threadId);
ThreadFactory threadFactory = new ThreadFactoryBuilder()
.setNameFormat(threadId + "-%d")
.setDaemon(true)
.build();
executorService = Executors.newSingleThreadExecutor(threadFactory);
executorService.submit(kafkaConsumerRunnableBasic.setExecutorService(executorService));
}
@ -75,8 +70,6 @@ public class KafkaConsumer<T> {
logger.info("[{}]: shutting down executor service", this.threadId);
executorService.shutdown();
try {
logger.info("[{}]: awaiting termination...", this.threadId);

View File

@ -17,10 +17,13 @@
package monasca.persister.consumer;
import java.util.concurrent.ExecutorService;
public interface KafkaConsumerFactory<T> {
KafkaConsumer<T> create(
KafkaConsumerRunnableBasic<T> kafkaConsumerRunnableBasic,
String threadId);
String threadId,
ExecutorService executorService);
}

View File

@ -27,8 +27,10 @@ import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import java.util.concurrent.ExecutorService;
import java.util.concurrent.TimeUnit;
import kafka.consumer.ConsumerIterator;
import monasca.persister.repository.RepoException;
public class KafkaConsumerRunnableBasic<T> implements Runnable {
@ -38,6 +40,7 @@ public class KafkaConsumerRunnableBasic<T> implements Runnable {
private final String threadId;
private final ManagedPipeline<T> pipeline;
private volatile boolean stop = false;
private boolean fatalErrorDetected = false;
private ExecutorService executorService;
@ -60,7 +63,7 @@ public class KafkaConsumerRunnableBasic<T> implements Runnable {
}
protected void publishHeartbeat() {
protected void publishHeartbeat() throws RepoException {
publishEvent(null);
@ -82,9 +85,19 @@ public class KafkaConsumerRunnableBasic<T> implements Runnable {
try {
if (pipeline.shutdown()) {
if (!this.fatalErrorDetected) {
markRead();
logger.info("[{}}: shutting pipeline down", this.threadId);
if (pipeline.shutdown()) {
markRead();
}
} else {
logger.info("[{}]: fatal error detected. Exiting immediately without flush", this.threadId);
}
@ -93,6 +106,7 @@ public class KafkaConsumerRunnableBasic<T> implements Runnable {
logger.error("caught fatal exception while shutting down", e);
}
}
public void run() {
@ -103,12 +117,28 @@ public class KafkaConsumerRunnableBasic<T> implements Runnable {
logger.debug("[{}]: KafkaChannel has stream iterator", this.threadId);
while (!this.stop) {
while (!this.stop) {
try {
try {
if (isInterrupted()) {
this.fatalErrorDetected = true;
break;
}
if (it.hasNext()) {
if (isInterrupted()) {
this.fatalErrorDetected = true;
break;
}
final String msg = new String(it.next().message());
logger.debug("[{}]: {}", this.threadId, msg);
@ -119,45 +149,73 @@ public class KafkaConsumerRunnableBasic<T> implements Runnable {
} catch (kafka.consumer.ConsumerTimeoutException cte) {
if (isInterrupted()) {
this.fatalErrorDetected = true;
break;
}
publishHeartbeat();
}
if (Thread.currentThread().isInterrupted()) {
} catch (Throwable e) {
logger.debug("[{}]: is interrupted. breaking out of run loop", this.threadId);
logger.error(
"[{}]: caught fatal exception while publishing msg. Shutting entire persister down now!",
this.threadId, e);
break;
this.stop = true;
this.fatalErrorDetected = true;
this.executorService.shutdownNow();
try {
this.executorService.awaitTermination(5, TimeUnit.SECONDS);
} catch (InterruptedException e1) {
logger.info("[{}]: interrupted while awaiting termination", this.threadId, e1);
}
LogManager.shutdown();
System.exit(1);
}
logger.info("[{}]: shutting down", this.threadId);
this.kafkaChannel.stop();
}
logger.info("[{}]: shutting down", this.threadId);
protected void publishEvent(final String msg) {
this.kafkaChannel.stop();
try {
}
if (pipeline.publishEvent(msg)) {
protected void publishEvent(final String msg) throws RepoException {
markRead();
if (pipeline.publishEvent(msg)) {
}
markRead();
} catch (Exception e) {
}
logger.error("caught fatal exception while publishing msg. Shutting entire persister down now!");
}
this.executorService.shutdownNow();
private boolean isInterrupted() {
LogManager.shutdown();
if (Thread.currentThread().interrupted()) {
System.exit(-1);
logger.debug("[{}]: is interrupted. breaking out of run loop", this.threadId);
return true;
} else {
return false;
}
}

View File

@ -450,7 +450,7 @@ public class VerticaMetricRepo extends VerticaRepo implements Repo<MetricEnvelop
if (!definitionDimensionsIdSet.contains(defDimsId)) {
logger.debug("[{}]: adding definitionDimension to batch: defDimsId: {}, defId: {}, dimId: {}",
defDimsId.toHexString(), defId, dimId, id);
id, defDimsId.toHexString(), defId, dimId);
stagedDefinitionDimensionsBatch.add()
.bind("id", defDimsId.getSha1Hash())