Adds the lazy seek

Can make the query couple of 10 times faster.

In order to debug a Presto query performance issue, I observed the
seeking in Sahara-extra is expensive and sometimes even unnecessary.
The best way to avoid the overhead and unnecessary calls of seeking
is to do it only when the client really needs the data.
After this changes, the same query in Presto able to run 30 times faster.
Both Presto and S3 clients have added the similar changes too.

Change-Id: I8586af0d481fd08d48620e699467280f7b93150a
This commit is contained in:
Ray Zhang 2016-10-03 10:39:03 -07:00
parent 2e516a87f7
commit ef3c1ab4d5
1 changed files with 22 additions and 0 deletions

View File

@ -89,6 +89,8 @@ class SwiftNativeInputStream extends FSInputStream {
*/
private long rangeOffset = 0;
private long nextReadPosition = 0;
public SwiftNativeInputStream(SwiftNativeFileSystemStore storeNative,
FileSystem.Statistics statistics, Path path, long bufferSize)
throws IOException {
@ -138,6 +140,7 @@ class SwiftNativeInputStream extends FSInputStream {
verifyOpen();
int result = -1;
try {
seekStream();
result = httpStream.read();
} catch (IOException e) {
String msg = "IOException while reading " + path
@ -297,6 +300,13 @@ class SwiftNativeInputStream extends FSInputStream {
*/
@Override
public synchronized void seek(long targetPos) throws IOException {
if (targetPos < 0) {
throw new IOException("Negative Seek offset not supported");
}
nextReadPosition = targetPos;
}
public synchronized void realSeek(long targetPos) throws IOException {
if (targetPos < 0) {
throw new IOException("Negative Seek offset not supported");
}
@ -344,6 +354,18 @@ class SwiftNativeInputStream extends FSInputStream {
fillBuffer(targetPos);
}
/**
* Lazy seek.
* @throws IOException
*/
private void seekStream() throws IOException {
if (httpStream != null && nextReadPosition == pos) {
// already at specified position
return;
}
realSeek(nextReadPosition);
}
/**
* Fill the buffer from the target position
* If the target position == current position, the