"
TreeDictionary is a dictionary that has good performance in GemStone at a wide variety of sizes, 
and grows gracefully without long pauses.

It is structured as a a tree which is sorted based on a permutation of the #hash of the dictionary's keys. 
Nodes of the tree are sized to match the GemStone page size. 
Leaf nodes are page-sized hash tables, and internal nodes are a B+tree variant that allows duplicate keys.


Instance variables:

tally     How many key-value pairs I contain
rootNode  Either a HtDictionaryLeafNode (if my size is small) 
           or a HtDictionaryInternalNode (if my contents will not fit in one leaf node)
walker    A cached instance of HtDictionaryTreeWalker that helps with operations on the tree
scratchLeaf   A cached instance of HtDictionaryScratchLeafNode that is used for temporary storage 
              during the splitting of a leaf node when it becomes full
heap    A cached instance of HtHeap, used to heap-sort a leaf node being split. 
        This is necessary only in uncommon situations where the normal heuristic for splitting a 
        leaf node results in a very uneven split.

The cached instances are all dbTransient, so modifications to them will not need to be written to 
tranlogs or extents upon commit.
They are cached, rather than created at each operation, to avoid creating unnecessary garbage during 
normal operations on the dictionary.
"
Class {
	#name : 'TreeDictionary',
	#superclass : 'AbstractDictionary',
	#instVars : [
		'rootNode',
		'tally',
		'walker',
		'heap',
		'scratchLeaf'
	],
	#category : 'Collections-Dictionaries'
}

{ #category : 'instance creation' }
TreeDictionary class >> new [
	^ self basicNew initialize
]

{ #category : 'instance creation' }
TreeDictionary class >> new: someSize [
	self error: 'TreeDictionaries are not pre-sizeable. Send #new instead.'
]

{ #category : 'private' }
TreeDictionary >> _deferredGciUpdateWith: valueArray [
	" semantics of the GCI update not defined"

	self _error: #'errNoStructuralUpdate'
]

{ #category : 'private' }
TreeDictionary >> _nodesObjectSecurityPolicy: anObjectSecurityPolicy [
	"Assigns receiver's components to the given security policy. "

	heap objectSecurityPolicy: anObjectSecurityPolicy.
	rootNode objectSecurityPolicy: anObjectSecurityPolicy.
	scratchLeaf objectSecurityPolicy: anObjectSecurityPolicy.
	walker objectSecurityPolicy: anObjectSecurityPolicy
]

{ #category : 'enumerating' }
TreeDictionary >> accompaniedBy: anObj keysAndValuesDo: aBlock [
	"Iteratively evaluates the threee argument block, aBlock,
	using anObj, each key and each value
	of the receiver as the arguments to the block.  Returns the receiver."

	self keysAndValuesDo: [ :k :v | aBlock value: anObj value: k value: v ]
]

{ #category : 'enumerating' }
TreeDictionary >> associationsDo: aBlock [
	"Evaluates aBlock with each of the receiver's key/value pairs as the
	argument by creating an Association for each key/value pair.  The
	argument aBlock must be a one-argument block.

	Note that creating a large number of Associations will hurt performance;
	it's better to send keysAndValuesDo: instead of associationsDo: 
	when possible. "

	self
		keysAndValuesDo: [ :aKey :aValue | aBlock value: (Association newWithKey: aKey value: aValue) ].
	^ self
]

{ #category : 'accessing' }
TreeDictionary >> at: key [
	"Don't use at:ifAbsent: to avoid creating an unnecessary complex block."

	| theWalker found |
	key ifNil: [ ^ self _errorKeyNotFound: key ].
	theWalker := walker.	"Ensure walker stays in memory until this method returns."
	theWalker collection: self.	"Might have dropped from memory since last use."
	found := theWalker
		searchTree: rootNode
		forValueAt: key
		withHash: (self permutedHashOf: key).
	^ found
		ifTrue: [ 
			| value |
			value := theWalker value.
			theWalker reset.
			value ]
		ifFalse: [ 
			theWalker reset.
			self _errorKeyNotFound: key ]
]

{ #category : 'accessing' }
TreeDictionary >> at: key ifAbsent: absentBlock [
	| theWalker found |
	key ifNil: [ ^ self _reportKeyNotFound: key with: absentBlock ].
	theWalker := walker.	"Ensure walker stays in memory until this method returns."
	theWalker collection: self.	"Might have dropped from memory since last use."
	found := theWalker
		searchTree: rootNode
		forValueAt: key
		withHash: (self permutedHashOf: key).
	^ found ifTrue:[ 
			| value |
			value := theWalker value.
			theWalker reset  .
      value 
    ] ifFalse: [ 
			theWalker reset.
			absentBlock value 
    ]
]

{ #category : 'accessing' }
TreeDictionary >> at: key put: value [
	| hash newNode theWalker |
	key ifNil: [ ^ self _error: #'rtErrNilKey' ].
	tally := tally. 	"Put myself in the write set, for the case where the key is already present."
	hash := self permutedHashOf: key.
	theWalker := walker.	"Ensure walker stays in memory until this method returns."
	theWalker collection: self.	"Might have dropped from memory since last use."
	newNode := theWalker
		searchTree: rootNode
		at: key
		withHash: hash
		put: value.
	newNode ifNotNil: [ self splitRootWith: newNode ].
  ^ value 
]

{ #category : 'auditing' }
TreeDictionary >> audit [
	"Check myself for consistency; answer 
		true if audit passed, a string report on problems."

	| stream elementCount passingPosition |
	stream := WriteStream on: String new.
	stream nextPutAll: 'Audit report for ' , self class name , ' '.
	self asOop printOn: stream.
	stream lf.
	passingPosition := stream position.
	heap auditEmptyOnto: stream.
	scratchLeaf auditEmptyOnto: stream for: self.
	walker auditOnto: stream for: self.
	elementCount := rootNode
		auditOnto: stream
		for: self
		lowestHash: 0
		highestHash: SmallInteger maximumValue.
	elementCount = tally
		ifFalse: [ 
			stream
				nextPutAll:
						'Root tally mis-match: tally is ' , tally printString
								, ' but counting nodes gives ' , elementCount printString;
				lf ].
	^ stream position = passingPosition
		ifTrue: [ true ]
		ifFalse: [ stream contents ]
]

{ #category : 'node access' }
TreeDictionary >> decrementTally [
	"Should only be sent from TdLeafNode"

	tally := tally - 1.
	tally negative ifTrue: [
		self error: 'decrementTally when already empty' ]
]

{ #category : 'enumerating' }
TreeDictionary >> do: unaryBlock [
	"Evaluate the given one-argument block once for each key/value
	pair I contain, in no particular order, with the value of the pair
	as the argument to the block. 
	Answers the receiver."

	self keysAndValuesDo: [ :k :v | unaryBlock value: v ]
]

{ #category : 'private' }
TreeDictionary >> errorNotFound: anObject [
	"Sends an error message indicating that the expected object was not found."

	^ self _error: #'objErrNotInColl' args: {anObject}
]

{ #category : 'node access' }
TreeDictionary >> heap [

	^ heap
]

{ #category : 'private' }
TreeDictionary >> heapSize [
	^ 800
]

{ #category : 'private' }
TreeDictionary >> highestHash [
	"2 ** 60 - 1"

	^ 16rFFFFFFFFFFFFFFF
]

{ #category : 'node access' }
TreeDictionary >> incrementTally [
	"Should only be sent from TdLeafNode"

	tally := tally + 1
]

{ #category : 'initialization' }
TreeDictionary >> initialize [
	"In a sufficiently small collection, the root node is a leaf."

	super initialize.
	self initializeDbTransients.
	tally := 0.
	rootNode := self leafNodeClass
		forCollection: self
		lowestHash: 0
		highestHash: SmallInteger maximumValue
]

{ #category : 'private' }
TreeDictionary >> initializeDbTransients [
	walker := self walkerClass forCollection: self.	"Cache a walker to avoid creating garbage walkers."
	heap := HtHeap new: self heapSize.	"Size must be greater than fillLine of a leaf."
	scratchLeaf := self scratchLeafNodeClass
		forCollection: self
		lowestHash: 0
		highestHash: SmallInteger maximumValue	"In leaf splitting, used as a temporary
	leaf, to avoid creating garbage."
]

{ #category : 'private' }
TreeDictionary >> internalNodeClass [

	^ HtDictionaryInternalNode
]

{ #category : 'accessing' }
TreeDictionary >> keyAtValue: anObject ifAbsent: aBlock [
	self
		keysAndValuesDo: [ :k :v | 
			v = anObject
				ifTrue: [ ^ k ] ].
	^ aBlock value
]

{ #category : 'enumerating' }
TreeDictionary >> keysAndValuesDo: binaryBlock [
	"Evaluate the given two-argument block once for each key/value
	pair I contain, in no particular order, with the key of the pair as
	the first argument, and the value as the second argument.
	Answers the receiver."

	rootNode keysAndValuesDo: binaryBlock
]

{ #category : 'enumerating' }
TreeDictionary >> keysDo: unaryBlock [
	"Evaluate the given one-argument block once for each key/value
	pair I contain, in no particular order, with the key of the pair
	as the argument to the block. 
	Answers the receiver."

	self keysAndValuesDo: [ :k :v | unaryBlock value: k ]
]

{ #category : 'private' }
TreeDictionary >> leafNodeClass [

	^ HtDictionaryLeafNode
]

{ #category : 'accessing' }
TreeDictionary >> permutedHashOf: anObject [
	"This method is central to getting good performance from TreeDictionary.
	Internal algorithms such as searching internal nodes and splitting leaf nodes
	require making 'good guesses' about where in a node to find a hash value. 
	In order for these guesses to be close to the real value most of the time, the 
	hash values must be roughly evenly distributed through the entire non-negative 
	SmallInteger range,	[0..2^60).
	But the answers to #hash are generally not distributed this way. For instance,
	String hashes are only 24 bits, the hashes of numbers are limited to about 
	30 bits and may be negative.

	This method takes the integer answered by sending #hash to the object and
	runs that integer through a permutation of the [0..2^60) space of numbers.
	The permutation takes any integer in that range and produces a different
	number in that range. It's a full permutation with no loss of uniqueness;
	each of the 2^60 possible inputs produces a unique output. 2^60 possible
	inputs, 2^60 possible outputs, just scrambled in a way that makes it *much*
	more likely that the resulting set of permuted hashes used internally by 
	TreeDictionary is roughly evenly distributed across the entire 2^60 range.

	Permutation is 
	
	f(x) = dx^2 + ax + c (mod 2^60)
	
	Where
	a=10699279521569479
	c=7836386368351
	d=7952157022
	
	equivalent to the Smalltalk
	
	(x * d + a * x + c) bitAnd: 16rFFFFFFFFFFFFFFF
	
	Equivalent to

	^ rawHash * 7952157022 + 10699279521569479 * rawHash + 7836386368351
		  bitAnd: 16rFFFFFFFFFFFFFFF"

	| rawHash |
	rawHash := anObject hash abs.
	^ rawHash permutedHashA: 10699279521569479 c: 7836386368351 d: 7952157022
]

{ #category : 'copying' }
TreeDictionary >> postCopy [
	self initializeDbTransients.
	rootNode := rootNode copyForCollection: self
]

{ #category : 'removing' }
TreeDictionary >> removeKey: key [
	| hash newRoot theWalker |
	key ifNil: [ ^ self errorNotFound: key ].
	hash := self permutedHashOf: key.
	theWalker := walker.	"Ensure walker stays in memory until this method returns."
	theWalker collection: self.	"Might have dropped from memory since last use."
	newRoot := theWalker searchTree: rootNode removeKey: key withHash: hash.
	newRoot ifNotNil: [ rootNode := newRoot ].
	^ theWalker found
		ifTrue: [ 
			| value |
			value := walker value.
			walker reset.
			value ]
		ifFalse: [ 
			theWalker reset.
			self errorNotFound: key ]
]

{ #category : 'removing' }
TreeDictionary >> removeKey: key ifAbsent: absentBlock [
	| hash newRoot theWalker |
	key ifNil: [ ^ self errorNotFound: key ].
	hash := self permutedHashOf: key.
	theWalker := walker.	"Ensure walker stays in memory until this method returns."
	theWalker collection: self.	"Might have dropped from memory since last use."
	newRoot := theWalker searchTree: rootNode removeKey: key withHash: hash.
	newRoot ifNotNil: [ rootNode := newRoot ].
	^ theWalker found
		ifTrue: [ 
			| value |
			value := walker value.
			walker reset.
			value ]
		ifFalse: [ 
			walker reset.
			absentBlock value ]
]

{ #category : 'test access' }
TreeDictionary >> rootNode [
	^ rootNode
]

{ #category : 'node access' }
TreeDictionary >> scratchLeaf [

	^ scratchLeaf
]

{ #category : 'private' }
TreeDictionary >> scratchLeafNodeClass [
	^ HtDictionaryScratchLeafNode
]

{ #category : 'accessing' }
TreeDictionary >> size [

	^ tally
]

{ #category : 'private' }
TreeDictionary >> splitRootWith: newNode [
	"The tree is getting one level deeper."

	| newRoot highestHash |
	highestHash := self highestHash.
	newRoot := self internalNodeClass forCollection: self.
	newRoot
		appendSortedChild: rootNode;
		appendSortedChild: newNode;
		appendHash: highestHash;
		highestHash: highestHash.
	newRoot computeConstants.
	rootNode := newRoot
]

{ #category : 'enumerating' }
TreeDictionary >> valuesDo: aBlock [
	"For each key/value pair in the receiver, evaluates the one-argument block
	aBlock with the value as the argument."

	self keysAndValuesDo: [ :aKey :aValue | aBlock value: aValue ]
]

{ #category : 'private' }
TreeDictionary >> walkerClass [
	^ HtDictionaryTreeWalker
]
