!=========================================================================
! Copyright (C) VMware, Inc. 1986-2011.  All Rights Reserved.
!
! $Id: charact.gs,v 1.10.2.4 2008-03-04 19:03:19 dhenrich Exp $
!
! Superclass Hierarchy:
!   Character, AbstractCharacter, Magnitude, Object.
!
!=========================================================================

removeallmethods Character
removeallclassmethods Character

category: 'For Documentation Installation only'
classmethod: Character
installDocumentation

| doc txt |
doc := GsClassDocumentation newForClass: self.

txt := (GsDocText new) details:
'There are 65536 Characters.  You may not create new instances or subclasses of
 class Character.'.
doc documentClassWith: txt.

txt := (GsDocText new) details:
'Instances of Character respond to the following messages by comparing the
 character codes of the receiver and the argument.  Characters that are equal
 (=) are also identical (==).'.
doc documentCategory: #Comparing with: txt.

self description: doc.
%

category: 'Printable Characters'
classmethod: Character
digits

"Returns an InvariantArray containing Characters representing
 digits 0 through 9."

^#($0 $1 $2 $3 $4 $5 $6 $7 $8 $9)
%

category: 'Printable Characters'
classmethod: Character
lowercaseRoman

"Returns an InvariantArray containing all lower-case Roman ASCII
 characters in alphabetic order."

^#($a $b $c $d $e $f $g $h $i $j $k $l $m $n $o $p $q $r $s $t $u
   $v $w $x $y $z)
%

category: 'Printable Characters'
classmethod: Character
uppercaseRoman

"Returns an InvariantArray containing all upper-case Roman ASCII
 characters in alphabetic order."

^#($A $B $C $D $E $F $G $H $I $J $K $L $M $N $O $P $Q $R $S $T $U
   $V $W $X $Y $Z)
%

category: 'Non-Printable Characters'
classmethod: Character
backspace

"Returns the ASCII back-space Character."

^ Character withValue: 8
%

category: 'Non-Printable Characters'
classmethod: Character
cr

"Returns the ASCII carriage-return Character."

^ Character withValue: 13
%

category: 'Non-Printable Characters'
classmethod: Character
esc

"Returns the ASCII escape Character."

^ Character withValue: 27
%

category: 'Non-Printable Characters'
classmethod: Character
lf

"Returns the ASCII line-feed Character."

^ Character withValue: 10
%

category: 'Non-Printable Characters'
classmethod: Character
newPage

"Returns the ASCII new-page Character."

^ Character withValue: 12
%

category: 'Non-Printable Characters'
classmethod: Character
space

"Returns the ASCII space Character."

^ Character withValue: 32
%

category: 'Non-Printable Characters'
classmethod: Character
tab

"Returns the ASCII tab Character."

^ Character withValue: 9
%

category: 'Instance Creation'
classmethod: Character
new

"Disallowed.  You may not create new instances of Character."

self shouldNotImplement: #new
%

category: 'Instance Creation'
classmethod: Character
fromStream: aStream

"Returns the next Character in the stream aStream."

self _checkReadStream: aStream forClass: String.
^ aStream next.
%

category: 'Instance Creation'
classmethod: Character
fromString: aString

"If aString is a one-Character String, returns the Character in aString.
 Otherwise, generates an error."

aString _validateClass: String.
(aString size == 1)
  ifTrue: [ ^ aString at: 1 ]
  ifFalse: [ self _errIncorrectFormat: aString ]
%

category: 'Instance Creation'
classmethod: Character
withValue: anInteger

"Returns the Character with the specified value.
 Allowable range is 0 <= anInteger <= 65535."

<primitive: 72>

anInteger _validateClass: Integer.
^ anInteger _error: #numErrArgNotChr args: #[ Character ]
%

category: 'Accessing'
method: Character
asciiValue

"Returns the Unicode code of the receiver (a SmallInteger)."

<primitive: 71>

self _primitiveFailed: #asciiValue .
self _uncontinuableError
%

category: 'Testing'
method: Character
isAlphaNumeric

"Returns true if the receiver is a Roman letter or digit.  Returns false
 otherwise."

^ (self isLetter) _or: [self isDigit]
%

category: 'Case-Insensitive Comparisons'
method: Character
equalsNoCase: aCharacter

"Returns true if the receiver is the same Character as the argument regardless
 of case or internal representation."

<primitive: 75>
"If the primitive fails,
 then aCharacter must not be an instance of Character"

^ aCharacter isEquivalent: self
%

category: 'Case-Insensitive Comparisons'
method: Character
isEquivalent: aCharacter

"Returns true if the receiver is the same Character as the argument regardless
 of case or internal representation."

<primitive: 75>
"If the primitive fails,
 then aCharacter must not be an instance of Character"

^ aCharacter isEquivalent: self
%

!  old non-primitive implementation of isEquivalent:
!
!  (aCharacter class == Character)
!  ifTrue: [ ^self asUppercase == aCharacter asUppercase]
!  ifFalse: [ ^aCharacter isEquivalent: self]
!  %

category: 'Testing'
method: Character
isVowel

"Returns true if the receiver is a vowel ('Y' is considered to be a vowel).
 Returns false otherwise.

This code assumes that the collation sequence places all uppercase variations
of a given letter (including various diacritical marks) immediately following the
plain version of the letter.

"

| ucs arr |
ucs := self asUppercase sortValue.
arr := #($A $E $I $O $U $Y ) .
1 to: arr size do:[:j | | v |
  v := arr at: j .
  ( ucs < v sortValue ) ifTrue: [ ^ 
    false 
  ].
  ( ucs < ( ( Character withValue: ( v asciiValue ) + 1 ) sortValue )) ifTrue:[      ^ true 
  ] 
].
^ false
%


category: 'Testing'
method: Character
isLetter

"Returns true if the receiver is a Roman letter.  Returns false otherwise."

^ self _category < 4
%

category: 'Testing'
method: Character
_type

"Returns 1 for alpha, 2 for digit, and 3 for special."

self isLetter ifTrue: [ ^ 1].
self isDigit  ifTrue: [ ^ 2].
^ 3
%

category: 'Testing'
method: Character
_typeAsSymbol

"Private."

^ #( #alpha #digit #special ) at: self _type 
%

category: 'Testing'
method: Character
isDigit

"Returns true if the receiver is a digit.  Returns false otherwise."

^ self _category == 9
%

category: 'Testing'
method: Character
isLowercase

"Returns true if the receiver is a lower-case character.  Returns false
 otherwise."

^ self _category == 2
%

category: 'Testing'
method: Character
isUppercase

"Returns true if the receiver is an upper-case character.  Returns false
 otherwise."

^ self _category == 1
%

category: 'Testing'
method: Character
isSeparator

"Returns true if the receiver is a separator Character (space, tab,
 carriage-return, line-feed, or new-page).  Returns false otherwise.

Currently returns true for character categories #Zs #Zl #Zp #Cc "
| category |

category := self _category.
( category < 23 ) ifTrue: [ ^ false ].
( category < 27 ) ifTrue: [ ^ true ].
^ false
%

category: 'Comparisons'
method: Character
> aCharacter

"Returns true if the Unicode code of the receiver is greater than the 
 Unicode code of aCharacter. "

<primitive: 182>

(aCharacter class == Character)
  ifTrue:[ self _primitiveFailed: #> .  self _uncontinuableError ].
aCharacter _validateClass: AbstractCharacter.
^ aCharacter < self
%

category: 'Formatting'
method: Character
asString

"Returns a one-Character String or DoubleByteString containing the receiver."

<primitive: 56>

self _primitiveFailed: #asString .
self _uncontinuableError
%

category: 'Formatting'
method: Character
displayWidth

"Returns the width necessary to display the receiver.
 For a Character, this method always returns 1."

^ 1
%

! category: 'Formatting'
! method: Character
! describe
! 
! "Same as asString."
! 
! | result |
! result := String new: 2 .
! result at:1 put: $$ .
! result at:2 put: self .
! ^ result
! %

category: 'Converting'
method: Character
asCharacter

"Returns the receiver."

^ self
%

category: 'Converting'
method: Character
asInteger

"Returns the Unicode value of the receiver."

^self asciiValue
%

category: 'Converting'
method: Character
asJISCharacter

"Returns the JISCharacter corresponding to the receiver."

^ self asInteger asJISCharacter
%

category: 'Converting'
method: Character
asLowercase

"Returns a Character that is the lower-case character corresponding
 to the receiver.  If the receiver is lower-case or has no case, this
 returns the receiver itself."

<primitive: 74>

self _primitiveFailed: #asLowercase .
self _uncontinuableError
%

category: 'Converting'
method: Character
asSymbol

"Returns a one-Character Symbol that represents the receiver."

^ self asString asSymbol
%

category: 'Converting'
method: Character
asUppercase

"Returns a Character that is the upper-case character corresponding
 to the receiver.  If the receiver is upper-case or has no case, this
 returns the receiver itself."

<primitive: 73>

self _primitiveFailed: #asUppercase .
self _uncontinuableError
%

category: 'Converting'
method: Character
digitValue

"Returns a SmallInteger representing the value of the receiver,
 a digit, or returns nil if the receiver is not a digit."

(self isDigit)
  ifTrue: [ ^self asciiValue - $0 asciiValue]
  ifFalse: [ ^nil]
%

category: 'Comparisons'
method: Character
= aCharacter

"Returns true if the receiver and aCharacter are the same Unicode character."

<primitive: 60>

(aCharacter isKindOf: AbstractCharacter)
  ifTrue: [ ^ aCharacter = self ]
  ifFalse: [ ^ false ]
%

category: 'Comparisons'
method: Character
< aCharacter

"Returns true if the Unicode code of the receiver is less than that of
 aCharacter."

<primitive: 61>

(aCharacter class == Character)
  ifTrue:[ self _primitiveFailed: #< .  self _uncontinuableError ].
aCharacter _validateClass: AbstractCharacter.
^ aCharacter > self
%

category: 'Comparisons'
method: Character
<= aCharacter

"Returns true if the Unicode code of the receiver is less than or equal to the
 Unicode code of aCharacter."

<primitive: 181>

(aCharacter class == Character)
  ifTrue:[ self _primitiveFailed: #<= .  self _uncontinuableError ].
aCharacter _validateClass: AbstractCharacter.
^ aCharacter >= self
%

category: 'Comparisons'
method: Character
>= aCharacter

"Returns true if the Unicode code of the receiver is greater than or equal 
 to the Unicode code of aCharacter."

<primitive: 183>

(aCharacter class == Character)
  ifTrue:[ self _primitiveFailed: #>= .  self _uncontinuableError ].
aCharacter _validateClass: AbstractCharacter.
^ aCharacter <= self
%

category: 'Copying'
method: Character
copy

"Returns the receiver.  (Does not create a new Character.)"

^self
%

category: 'Converting'
method: Character
asDigit

"Returns the digit value (0-9) of the receiver.  If the receiver is not
 a digit, this returns 0."

(self < $0) | (self > $9) ifTrue: [^0].
^self asciiValue - $0 asciiValue
%

category: 'Converting'
method: Character
digitValueInRadix: radix

"Returns a SmallInteger representing the value of the receiver, a digit, or
 returns nil if the receiver is not a digit in the given radix."

| val up |
radix == 10 ifTrue: [ ^self digitValue ].
radix < 10 ifTrue: [
  val := self digitValue.
  val >= radix ifTrue: [ ^nil ].
  ^val
].
val := self digitValue.
val ~~ nil ifTrue: [ ^val ].
up := self asUppercase.
($A <= up _and: [ up <= (self class withValue: ($A asciiValue + radix - 11)) ])
ifTrue: [
  ^(up asciiValue - $A asciiValue) + 10
]
ifFalse: [
  ^nil
]
%

category: 'Formatting'
method: Character
printOn: aStream

"Puts a displayable representation of the receiver on the given stream."

aStream nextPut: $$ .
aStream nextPut: self 
%

category: 'Formatting'
method: Character
printString

"Returns a String whose contents are a displayable representation of the
 receiver."

"GemStone does not allow the creation of new kinds of Character, so there
 is no point in creating a stream and sending printOn:."

| result |
result := String new: 2 .
result at: 1 put: $$ .
result at: 2 put: self  .
^ result
%

category: 'Decompiling without Sources'
method: Character
_asSource

"Private."

^ self printString
%

category: 'Storing and Loading'
method: Character
containsIdentity

"Private."

^true
%

category: 'Storing and Loading'
method: Character
writeTo: passiveObj

"Converts the receiver to its passive form and writes that information on
 passiveObj."

| av |
av := self asciiValue .
av <= 255 ifTrue:[ 
  passiveObj nextPut: $$; nextPut: self
  ]
ifFalse:[ 
  passiveObj nextPut: $! ; 
             nextPut: (Character withValue: av // 256 ) ;
             nextPut: (Character withValue: av \\ 256 ) 
  ].
%
category: 'Testing'
method: Character
isSpecial

"Returns true if the receiver is a special object."

^ true
%
category: 'Accessing'
method: Character
sortValue

<primitive: 545>
self _primitiveFailed: #sortValue
%
category: 'Instance Creation'
classmethod: Character
withSortValue: anInt

<primitive: 546>
self primitiveFailed: #withSortValue:
%
!=========================================================================
!
! fix 33886: new instance methods for extended character set operations
!
!=========================================================================

category: 'Testing'
method: Character
_category

" Returns the numeric category id for this character "

<primitive: 640>
self primitiveFailed: #_category
%
category: 'Testing'
method: Character
_categoryAsSymbol

" Returns the category symbol for this character.

Category symbols are taken from the Unicode Standard:

#Lu  - Letter, Uppercase
#Ll - Letter, Lowercase
#Lt  - Letter, Titlecase
#Lm - Letter, Modifier
#Lo - Letter, Other
#Mn - Mark, Nonspacing
#Mc - Mark, Spacing Combining
#Me - Mark, Enclosing
#Nd - Number, Decimal Digit
#Nl - Number, Letter
#No - Number, Other
#Pc - Punctuation, Connector
#Pd - Punctuation, Dash
#Ps - Punctuation, Open/Start
#Pe - Punctuation, Close/End
#Pi - Punctuation, Initial Quote
#Pf - Punctuation, Final Quote
#Po - Punctuation, Other
#Sm - Symbol, Math
#Sc - Symbol, Currency
#Sk - Symbol, Modifier
#So - Symbol, Other
#Zs - Separator, Space
#Zl - Separator, Line
#Zp - Separator, Paragraph
#Cc - Other, Control
#Cf - Other, Format
#Cs - Other, Surrogate
#Co - Other, Private Use
#Cn - Other, Not Assigned

"

^ #( #Lu #Ll #Lt #Lm #Lo #Mn #Mc #Me #Nd #Nl #No #Pc #Pd #Ps 
   #Pe #Pi #Pf #Po #Sm #Sc #Sk #So #Zs #Zl #Zp #Cc #Cf #Cs #Co #Cn) 
at: self _category
%
category: 'Testing'
method: Character
_numericValue

" Returns the numeric value (if one exists) for this character "

<primitive: 641>
self primitiveFailed: #_numericValue
%
category: 'Testing'
method: Character
isNumeric

"Returns true if the receiver contains numeric content ( category is #Nd #Nl #No).  Returns false otherwise."

| category |
category := self _category.
category < 9 ifTrue: [ ^ false ].
category < 12 ifTrue: [ ^ true ].
^ false
%
category: 'Testing'
method: Character
isTitlecase

"Returns true if the receiver is a title-case character.  Returns false
 otherwise."

^ self _category == 3
%
category: 'Converting'
method: Character
asTitlecase

"Returns a Character that is the title-case character corresponding
 to the receiver.  If the receiver is title-case or has no title-case,
 this returns the receiver itself."

<primitive: 642>
self primitiveFailed: #asTitlecase
%

!=========================================================================
!
! fix 33886: new class methods for extended character set mechanisms
!
!=========================================================================

category: 'Character Data Table'
classmethod: Character
_dispatch: dispatchTable index: indexTable category: categoryTable 
    main: mainTable titleCase: titleCaseTable

" Copy byte array tables to internal C tables for String comparisions 

  dispatchTable , indexTable  
     are ByteArrays containing big-endian 16 bit values

  mainTable, titleCaseTable 
     are ByteArrays containing pairs of big-endian 16 bit values

  categoryTable is a ByteArrays containing 8 bit values .

  if dispatchTable == nil,  then other arguments are ignored, and
  the C tables are reset to the hardcoded defaults defined when
  the VM executable/shared library was built .
" 

<primitive: 643>
self primitiveFailed: #_dispatch:index:category:main:titleCase:
%

category: 'Character Data Table'
classmethod: Character
_dumpCharTables

" Dump contents of character tables to stdout  "

   <primitive: 645>
self primitiveFailed: #_dumpCharTables
%
category: 'Character Data Table'
classmethod: Character
_fetchCharTables

" Fetch contents of character tables "

<primitive: 644>
self primitiveFailed: #_fetchCharTables
%
category: 'Character Data Table'
classmethod: Character
_resetCharTablesToCDefaults

" Resets the character data tables in the VM to the hardcoded defaults 
  defined when the VM executable/shared library was built . 
  This defaults are the same as those used at session initialization if
   (Globals at: #CharacterDataTables otherwise: nil) == nil 
"

self _dispatch: nil index: nil category: nil main: nil titleCase: nil
%

category: 'Character Data Table'
classmethod: Character
_loadCharTables

"Load the character data tables recorded in 
( Globals at: #CharacterDataTables ) into the session's internal memory.
This method is called automatically during session login (unless the 
host environmental variable GS_DISABLE_CHARACTER_TABLE_LOAD is set).

This data should be in the form of an Array containing 5 ByteArrays. 

See Character>>installCharTables for information on how to configure this. "

| table dispatchBA indexBA categoryBA mainBA titleCaseBA |

table := ( Globals at: #CharacterDataTables ifAbsent: [ ^ false ] ).
table == nil ifTrue: [ ^ false ].

dispatchBA := table at: 1.
indexBA := table at: 2.
categoryBA := table at: 3.
mainBA := table at: 4.
titleCaseBA := table at: 5.

self _dispatch: dispatchBA index: indexBA category: categoryBA 
      main: mainBA titleCase: titleCaseBA.

^ table
%
category: 'Character Data Table'
classmethod: Character
activateCharTablesFromFile: file

"Install #CharacterDataTables from the passivated contents of a file.

Don't forget to commit to make the change permanent.

You must be SystemUser to execute this method.

See passivateCharTablesToFile: for the other side of this mechanism.

Use $GEMSTONE/goodies/CharacterTableUnicode.dat 
to enable full 16-bit Unicode support.

See $GEMSTONE/goodies/UnicodeData.gs for 
more info on Unicode Standards support.
"

" Check that we're SystemUser "
System myUserProfile userId = 'SystemUser' ifFalse: [
   self _halt: 'Only SystemUser may execute this method' ].

(Globals at: #CharacterDataTables ifAbsent: [ 
    Globals at: #CharacterDataTables put: nil ] ).
Globals at: #CharacterDataTables put: 
    ( PassiveObject fromServerTextFile: file ) activate.
^ self _loadCharTables
%
category: 'Character Data Table'
classmethod: Character
categoryId: aSymbol

"Given a character category symbol, return the numeric id.

See Character>>categorySymbol: for symbol meanings. "

^ #( #Lu #Ll #Lt #Lm #Lo #Mn #Mc #Me #Nd #Nl #No #Pc #Pd #Ps 
   #Pe #Pi #Pf #Po #Sm #Sc #Sk #So #Zs #Zl #Zp #Cc #Cf #Cs #Co #Cn) 
   indexOf: aSymbol
%
category: 'Character Data Table'
classmethod: Character
categorySymbol: id

"Given a character category id, return the category symbol.

Category symbols are taken from the Unicode Standard:

#Lu  - Letter, Uppercase
#Ll - Letter, Lowercase
#Lt  - Letter, Titlecase
#Lm - Letter, Modifier
#Lo - Letter, Other
#Mn - Mark, Nonspacing
#Mc - Mark, Spacing Combining
#Me - Mark, Enclosing
#Nd - Number, Decimal Digit
#Nl - Number, Letter
#No - Number, Other
#Pc - Punctuation, Connector
#Pd - Punctuation, Dash
#Ps - Punctuation, Open/Start
#Pe - Punctuation, Close/End
#Pi - Punctuation, Initial Quote
#Pf - Punctuation, Final Quote
#Po - Punctuation, Other
#Sm - Symbol, Math
#Sc - Symbol, Currency
#Sk - Symbol, Modifier
#So - Symbol, Other
#Zs - Separator, Space
#Zl - Separator, Line
#Zp - Separator, Paragraph
#Cc - Other, Control
#Cf - Other, Format
#Cs - Other, Surrogate
#Co - Other, Private Use
#Cn - Other, Not Assigned
"

^ #( #Lu #Ll #Lt #Lm #Lo #Mn #Mc #Me #Nd #Nl #No #Pc #Pd #Ps 
   #Pe #Pi #Pf #Po #Sm #Sc #Sk #So #Zs #Zl #Zp #Cc #Cf #Cs #Co #Cn) 
   at: id
%
category: 'Character Data Table'
classmethod: Character
charTables

" Reconstruct structured character data tables from the raw byte arrays 
  stored in Globals at: #CharacterDataTables

Returns:

An Array of elements, arranged according to collate order, 
each element an Array of 4 or 5 entries:

1.  The character for this entry.
2.  The symbolic character category code.
3.  Uppercase character ( if a letter ) / Numerator ( if numeric ).
4.  Lowercase character ( if a letter ) / Denominator ( if numeric/fraction ).
5.  (Optional)  Titlecase character ( if a letter )

"

| tables result dispatchBA indexBA categoryBA mainBA titleCaseBA
  titleCaseData titleCaseEntry size entry category id  |

(Globals at: #CharacterDataTables ifAbsent: [ ^ nil ] ).
tables := Globals at: #CharacterDataTables.
result := Array new.

dispatchBA := tables at: 1.
indexBA := tables at: 2.
categoryBA := tables at: 3.
mainBA := tables at: 4.
titleCaseBA := tables at: 5.

" Construct TitleCase Data for later use.. "
titleCaseData := Array new.
0 to: ( titleCaseBA size / 4 - 1 ) do: [ :i |
   entry := Array new.
   entry add: ( titleCaseBA unsigned16At: ( i * 4 + 1 )).
   entry add: ( titleCaseBA unsigned16At: ( i * 4 + 3 )).
   titleCaseData add: entry ].

" Now reconstruct character data table "
size := mainBA size / 4.
0 to: size - 1 do: [ :i |
   entry := Array new.
   id :=  ( indexBA unsigned16At: ( i * 2 + 1 )).
   entry add: ( Character withValue: id ).
   category := self categorySymbol: ( categoryBA unsigned8At: ( i + 1 )).
   entry add: category.
   ( #( #Lu #Ll #Lt ) includes: category ) 
      ifTrue: [ 
         entry add: 
            ( Character withValue: ( mainBA unsigned16At: ( i * 4 + 1 ))).
         entry add: 
            ( Character withValue: ( mainBA unsigned16At: ( i * 4 + 3 ))).
         titleCaseEntry := 
            titleCaseData detect: [ :x | ( x at: 1 ) = id ] ifNone: [ nil ].
         ( titleCaseEntry == nil ) ifFalse: [
            entry add: ( Character withValue: ( titleCaseEntry at: 2 )) ] ]
      ifFalse: [
         entry add: ( mainBA unsigned16At: ( i * 4 + 1 )).
         entry add: ( mainBA unsigned16At: ( i * 4 + 3 )) ].
   result add: entry ].
^ result
%
category: 'Character Data Table'
classmethod: Character
installCharTables: table

"Converts a structured character data table into appropriately formated 
byte arrays and then places them into Globals at: #CharacterDataTables 
for use in this and subsequent sessions.  This operation *does not* 
do a commit -- follow up with a commit if you wish to make this change 
valid for subsequent sessions.

WARNINGS:

Installing incorrectly formatted character table data will break 
character/string operations, including command line processing to a point 
where the system will be impossible to use.  In this case, clear the 
installation by doing the following:

1.  From the OS, set the host machine environmental parameter 
    GS_DISABLE_CHARACTER_TABLE_LOAD to some value 
    (it's the presence of this parameter that enables the mechanism).

2.  Login a new topaz session

3.  Execute Globals at: #CharacterDataTables put: nil.

4.  Commit

Note that changing the collation order of characters in new 
CharacterDataTables will break any indexes that are keyed off of 
Strings/DoubleByteStrings.  Before changing the tables, remove all such 
indexes, install the new tables, and then reconstruct the indexes.

You must be SystemUser to execute this method.

Table Format:

An Array of elements, arranged according to character collate order, 
each element an Array of 4 or 5 entries:

1.  The character for this entry.
2.  The symbolic character category code.  
    See Character>>categorySymbol: for a list.
3.  Uppercase character ( if a letter ) / Numerator ( if numeric ).
4.  Lowercase character ( if a letter ) / Denominator ( if numeric/fraction ).
5.  (Optional)  Titlecase character

"

| tables dispatchBA indexBA categoryBA mainBA titleCaseBA
  mainSize dispatchSize titleCaseSize titleCaseIndex entry item id  |

" Check that we're SystemUser "
System myUserProfile userId = 'SystemUser' ifFalse: [
   self _halt:'Only SystemUser may execute this method' ].

" Generate the ByteArrays "
mainSize := table size.
dispatchSize := 0.
titleCaseSize := 0.
indexBA := ByteArray new: ( mainSize * 2 ).
categoryBA := ByteArray new: mainSize.
mainBA := ByteArray new: ( mainSize * 4 ).
" First pass: do most of the work, 
  get info on sizes for dispatch and titleCase tables "
0 to: mainSize - 1 do: [ :i | 
   entry := table at: i + 1.
   id := ( entry at: 1 ) asciiValue.
   ( id > dispatchSize ) ifTrue: [ dispatchSize := id ].
   ( entry size > 4 ) ifTrue: [ titleCaseSize := titleCaseSize + 1 ].
   indexBA unsigned16At: ( i * 2 + 1 ) put: id.
   categoryBA unsigned8At: ( i + 1 ) put: 
      ( Character categoryId: ( entry at: 2 ) ).
   ((item := entry at: 3) == nil) ifFalse: [ 
      ( item isKindOf: Character )
         ifTrue: [ mainBA unsigned16At: ( i * 4 + 1 ) put: item asciiValue ]
         ifFalse: [ mainBA signed16At: ( i * 4 + 1 ) put: item ] ].
   ((item := entry at: 4) == nil) ifFalse: [
      ( item isKindOf: Character )
         ifTrue: [ mainBA unsigned16At: ( i * 4 + 3 ) put: item asciiValue ]
         ifFalse: [ mainBA signed16At: ( i * 4 + 3 ) put: item ] ] ].

" Second pass: fill in dispatch and titleCase tables "
dispatchSize := dispatchSize + 1.
dispatchBA := ByteArray new: ( dispatchSize * 2 ).
titleCaseBA := ByteArray new: ( titleCaseSize * 4 ).
titleCaseIndex := 0.
1 to: mainSize do: [ :i |
   entry := table at: i.
   id := ( entry at: 1 ) asciiValue.
   dispatchBA unsigned16At: ( id * 2 + 1 ) put: ( i - 1 ).
   ( entry size > 4 ) ifTrue: [
      titleCaseBA unsigned16At: 
         ( titleCaseIndex * 4 + 1 ) put:  id.
      titleCaseBA unsigned16At: 
         ( titleCaseIndex * 4 + 3 ) put: ( entry at: 5 ) asciiValue.
      titleCaseIndex := titleCaseIndex + 1 ] ].

" Setup the table array "
tables := Array new: 5.
tables at: 1 put: dispatchBA.
tables at: 2 put: indexBA.
tables at: 3 put: categoryBA.
tables at: 4 put: mainBA.
tables at: 5 put: titleCaseBA.

" Install on #CharacterDataTables "
(Globals at: #CharacterDataTables ifAbsent: [ 
   Globals at: #CharacterDataTables put: nil ] ).
Globals at: #CharacterDataTables put: tables.

" Now install in this session's internal tables "
self _dispatch: dispatchBA index: indexBA category: categoryBA 
   main: mainBA titleCase: titleCaseBA.

^ tables
%
category: 'Character Data Table'
classmethod: Character
passivateCharTablesToFile: file

"Write the passivated contents of the #CharacterDataTables (if present) 
to a file.

Useful for efficiently porting the CharacterDataTable to another stone.
See activateCharTablesFromFile: for the other side of this mechanism. "

(Globals at: #CharacterDataTables ifAbsent: [ ^nil ] ) 
   passivate toServerTextFile: file
%

