On Wed, Mar 9, 2016 at 1:39 AM, Robert Haas <robertmh...@gmail.com> wrote:
> LockWaiterCount() bravely accesses a shared memory data structure that > is mutable with no locking at all. That might actually be safe for > our purposes, but I think it would be better to take the partition > lock in shared mode if it doesn't cost too much performance. If > that's too expensive, then it should at least have a comment > explaining (1) that it is doing this without the lock and (2) why > that's safe (sketch: the LOCK can't go away because we hold it, and > nRequested could change but we don't mind a stale value, and a 32-bit > read won't be torn). > With LWLock also performance are equally good so added the lock. > > A few of the other functions in here also lack comments, and perhaps > should have them. > > The changes to RelationGetBufferForTuple need a visit from the > refactoring police. Look at the way you are calling > RelationAddOneBlock. The logic looks about like this: > > if (needLock) > { > if (trylock relation for extension) > RelationAddOneBlock(); > else > { > lock relation for extension; > if (use fsm) > { > complicated; > } > else > RelationAddOneBlock(); > } > else > RelationAddOneBlock(); > > So basically you want to do the RelationAddOneBlock() thing if > !needLock || !use_fsm || can't trylock. See if you can rearrange the > code so that there's only one fallthrough call to > RelationAddOneBlock() instead of three separate ones. > Actually in every case we need one blocks, So I have re factored it and RelationAddOneBlock is now out of any condition. > > Also, consider moving the code that adds multiple blocks at a time to > its own function instead of including it all in line. > Done Attaching a latest patch. -- Regards, Dilip Kumar EnterpriseDB: http://www.enterprisedb.com
diff --git a/src/backend/access/heap/hio.c b/src/backend/access/heap/hio.c index 8140418..eb4ee0c 100644 --- a/src/backend/access/heap/hio.c +++ b/src/backend/access/heap/hio.c @@ -169,6 +169,86 @@ GetVisibilityMapPins(Relation relation, Buffer buffer1, Buffer buffer2, } /* + * RelationAddOneBlock + * + * Extend relation by one block and lock the buffer + */ +static Buffer +RelationAddOneBlock(Relation relation, Buffer otherBuffer, BulkInsertState bistate) +{ + Buffer buffer; + /* + * XXX This does an lseek - rather expensive - but at the moment it is the + * only way to accurately determine how many blocks are in a relation. Is + * it worth keeping an accurate file length in shared memory someplace, + * rather than relying on the kernel to do it for us? + */ + buffer = ReadBufferBI(relation, P_NEW, bistate); + + /* + * We can be certain that locking the otherBuffer first is OK, since + * it must have a lower page number. + */ + if (otherBuffer != InvalidBuffer) + LockBuffer(otherBuffer, BUFFER_LOCK_EXCLUSIVE); + + /* + * Now acquire lock on the new page. + */ + LockBuffer(buffer, BUFFER_LOCK_EXCLUSIVE); + + return buffer; +} +/* + * RelationAddExtraBlocks + * + * Extend extra blocks for the relations to avoid the future contention + * on the relation extension lock. + */ + +static void +RelationAddExtraBlocks(Relation relation, BulkInsertState bistate) +{ + Page page; + Size freespace; + BlockNumber blockNum; + int extraBlocks = 0; + int lockWaiters = 0; + Buffer buffer; + + /* + * For calculating number of extra blocks to extend, find the level + * of contention on this lock, by getting the requester of this lock + * and add extra blocks in multiple of waiters. + */ + lockWaiters = RelationExtensionLockWaiter(relation); + + extraBlocks = lockWaiters * 20; + + while (extraBlocks--) + { + /* + * Here we are adding extra blocks to the relation after + * adding each block update the information in FSM so that + * other backend running parallel can find the block. + */ + buffer = ReadBufferBI(relation, P_NEW, bistate); + + LockBuffer(buffer, BUFFER_LOCK_EXCLUSIVE); + + page = BufferGetPage(buffer); + PageInit(page, BufferGetPageSize(buffer), 0); + freespace = PageGetHeapFreeSpace(page); + MarkBufferDirty(buffer); + blockNum = BufferGetBlockNumber(buffer); + + UnlockReleaseBuffer(buffer); + + RecordPageWithFreeSpace(relation, blockNum, freespace); + } +} + +/* * RelationGetBufferForTuple * * Returns pinned and exclusive-locked buffer of a page in given relation @@ -233,10 +313,11 @@ RelationGetBufferForTuple(Relation relation, Size len, bool use_fsm = !(options & HEAP_INSERT_SKIP_FSM); Buffer buffer = InvalidBuffer; Page page; - Size pageFreeSpace, - saveFreeSpace; + Size pageFreeSpace = 0, + saveFreeSpace = 0; BlockNumber targetBlock, - otherBlock; + otherBlock, + lastValidBlock = InvalidBlockNumber; bool needLock; len = MAXALIGN(len); /* be conservative */ @@ -308,6 +389,7 @@ RelationGetBufferForTuple(Relation relation, Size len, } } +loop: while (targetBlock != InvalidBlockNumber) { /* @@ -388,6 +470,8 @@ RelationGetBufferForTuple(Relation relation, Size len, otherBlock, targetBlock, vmbuffer_other, vmbuffer); + lastValidBlock = targetBlock; + /* * Now we can check to see if there's enough free space here. If so, * we're done. @@ -441,37 +525,53 @@ RelationGetBufferForTuple(Relation relation, Size len, needLock = !RELATION_IS_LOCAL(relation); if (needLock) - LockRelationForExtension(relation, ExclusiveLock); - - /* - * XXX This does an lseek - rather expensive - but at the moment it is the - * only way to accurately determine how many blocks are in a relation. Is - * it worth keeping an accurate file length in shared memory someplace, - * rather than relying on the kernel to do it for us? - */ - buffer = ReadBufferBI(relation, P_NEW, bistate); - - /* - * We can be certain that locking the otherBuffer first is OK, since it - * must have a lower page number. - */ - if (otherBuffer != InvalidBuffer) - LockBuffer(otherBuffer, BUFFER_LOCK_EXCLUSIVE); + { + /* + * First try to get the lock in no-wait mode, if succeed extend one + * block, else get the lock in normal mode and after we get the lock + * extend some extra blocks, extra blocks will be added to satisfy + * request of other waiters and avoid future contention. Here instead + * of directly taking lock we try no-wait mode, this is to handle the + * case, when there is no contention - it should not find the lock + * waiter and execute extra instructions. + */ + if (LOCKACQUIRE_OK + != RelationExtensionLockConditional(relation, ExclusiveLock)) + { + LockRelationForExtension(relation, ExclusiveLock); + + if (use_fsm) + { + if (lastValidBlock != InvalidBlockNumber) + { + targetBlock = RecordAndGetPageWithFreeSpace(relation, + lastValidBlock, + pageFreeSpace, + len + saveFreeSpace); + } + + /* Other waiter has extended the block for us*/ + if (targetBlock != InvalidBlockNumber) + { + UnlockRelationForExtension(relation, ExclusiveLock); + goto loop; + } + + RelationAddExtraBlocks(relation, bistate); + } + } + } - /* - * Now acquire lock on the new page. + /* For all case we need to add at least one block to satisfy our + * own request. */ - LockBuffer(buffer, BUFFER_LOCK_EXCLUSIVE); + buffer = RelationAddOneBlock(relation, otherBuffer, bistate); - /* - * Release the file-extension lock; it's now OK for someone else to extend - * the relation some more. Note that we cannot release this lock before - * we have buffer lock on the new page, or we risk a race condition - * against vacuumlazy.c --- see comments therein. - */ if (needLock) UnlockRelationForExtension(relation, ExclusiveLock); + + /* * We need to initialize the empty new page. Double-check that it really * is empty (this should never happen, but if it does we don't want to diff --git a/src/backend/storage/lmgr/lmgr.c b/src/backend/storage/lmgr/lmgr.c index 9d16afb..5d9454d 100644 --- a/src/backend/storage/lmgr/lmgr.c +++ b/src/backend/storage/lmgr/lmgr.c @@ -341,6 +341,40 @@ LockRelationForExtension(Relation relation, LOCKMODE lockmode) } /* + * RelationExtensionLockConditional + * + * Same as LockRelationForExtension except it will not wait on the lock. + */ +LockAcquireResult +RelationExtensionLockConditional(Relation relation, LOCKMODE lockmode) +{ + LOCKTAG tag; + + SET_LOCKTAG_RELATION_EXTEND(tag, + relation->rd_lockInfo.lockRelId.dbId, + relation->rd_lockInfo.lockRelId.relId); + + return LockAcquire(&tag, lockmode, false, true); +} + +/* + * RelationExtensionLockWaiter + * + * Count the lock requester for the RelationExtension lock. + */ +int +RelationExtensionLockWaiter(Relation relation) +{ + LOCKTAG tag; + + SET_LOCKTAG_RELATION_EXTEND(tag, + relation->rd_lockInfo.lockRelId.dbId, + relation->rd_lockInfo.lockRelId.relId); + + return LockWaiterCount(&tag); +} + +/* * UnlockRelationForExtension */ void diff --git a/src/backend/storage/lmgr/lock.c b/src/backend/storage/lmgr/lock.c index a458c68..cf723ae 100644 --- a/src/backend/storage/lmgr/lock.c +++ b/src/backend/storage/lmgr/lock.c @@ -4380,3 +4380,47 @@ VirtualXactLock(VirtualTransactionId vxid, bool wait) LockRelease(&tag, ShareLock, false); return true; } + +/* + * LockWaiterCount + * + * Find the number of lock requester on this locktag + */ +int +LockWaiterCount(const LOCKTAG *locktag) +{ + LOCKMETHODID lockmethodid = locktag->locktag_lockmethodid; + LOCALLOCKTAG localtag; + LOCALLOCK *locallock; + bool found; + uint32 hashcode; + LWLock *partitionLock; + int waiters = 0; + + if (lockmethodid <= 0 || lockmethodid >= lengthof(LockMethods)) + elog(ERROR, "unrecognized lock method: %d", lockmethodid); + + /* + * Find a LOCALLOCK entry for this lock and lockmode + */ + MemSet(&localtag, 0, sizeof(localtag)); /* must clear padding */ + localtag.lock = *locktag; + localtag.mode = ExclusiveLock; + + locallock = (LOCALLOCK *) hash_search(LockMethodLocalHash, + (void *) &localtag, + HASH_FIND, &found); + + + hashcode = locallock->hashcode; + + partitionLock = LockHashPartitionLock(hashcode); + LWLockAcquire(partitionLock, LW_EXCLUSIVE); + + if (found) + waiters = locallock->lock->nRequested; + + LWLockRelease(partitionLock); + + return waiters; +} diff --git a/src/include/storage/lmgr.h b/src/include/storage/lmgr.h index e9d41bf..ea8e19e 100644 --- a/src/include/storage/lmgr.h +++ b/src/include/storage/lmgr.h @@ -101,4 +101,7 @@ extern void UnlockSharedObjectForSession(Oid classid, Oid objid, uint16 objsubid /* Describe a locktag for error messages */ extern void DescribeLockTag(StringInfo buf, const LOCKTAG *tag); +extern LockAcquireResult RelationExtensionLockConditional(Relation relation, LOCKMODE lockmode); +extern int RelationExtensionLockWaiter(Relation relation); + #endif /* LMGR_H */ diff --git a/src/include/storage/lock.h b/src/include/storage/lock.h index 788d50a..3fd74fb 100644 --- a/src/include/storage/lock.h +++ b/src/include/storage/lock.h @@ -572,6 +572,8 @@ extern void RememberSimpleDeadLock(PGPROC *proc1, PGPROC *proc2); extern void InitDeadLockChecking(void); +extern int LockWaiterCount(const LOCKTAG *locktag); + #ifdef LOCK_DEBUG extern void DumpLocks(PGPROC *proc); extern void DumpAllLocks(void);
-- Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org) To make changes to your subscription: http://www.postgresql.org/mailpref/pgsql-hackers