@@ -15,8 +15,10 @@ public string Decode(ReadOnlySpan<long> tokenIds)
1515 _ = _prefixes ?? throw new InvalidOperationException ( "Vocabulary not loaded." ) ;
1616 _ = _suffixes ?? throw new InvalidOperationException ( "Vocabulary not loaded." ) ;
1717
18- _decodeSuffixes ??= _suffixes . ToDictionary ( x => x . Value , x => x . Key . ToString ( ) ) ;
19- _decodePrefixes ??= _prefixes . ToDictionary ( x => x . Value , x => x . Key . ToString ( ) ) ;
18+ if ( _decodeSuffixes is null || _decodePrefixes is null )
19+ {
20+ InitializeDecodeDictionaries ( ) ;
21+ }
2022
2123 if ( tokenIds . Length == 0 )
2224 {
@@ -72,9 +74,24 @@ public string Decode(ReadOnlySpan<long> tokenIds)
7274 // See https://github.com/huggingface/tokenizers/blob/daf361676bdfd14088f7e0bc087effc6a9cfdf3e/tokenizers/src/decoders/wordpiece.rs#L31
7375 private bool EmitNoSpaceBefore ( string prefix )
7476 {
75- return "." . Equals ( prefix , StringComparison . Ordinal )
76- || "?" . Equals ( prefix , StringComparison . Ordinal )
77- || "!" . Equals ( prefix , StringComparison . Ordinal )
78- || "," . Equals ( prefix , StringComparison . Ordinal ) ;
77+ return prefix . Length == 1 && ( prefix [ 0 ] == '.' || prefix [ 0 ] == '?' || prefix [ 0 ] == '!' || prefix [ 0 ] == ',' ) ;
78+ }
79+
80+ private void InitializeDecodeDictionaries ( )
81+ {
82+ var decodeSuffixes = new Dictionary < long , string > ( _suffixes ! . Count ) ;
83+ foreach ( var kvp in _suffixes ! )
84+ {
85+ decodeSuffixes [ kvp . Value ] = kvp . Key . ToString ( ) ;
86+ }
87+
88+ var decodePrefixes = new Dictionary < long , string > ( _prefixes ! . Count ) ;
89+ foreach ( var kvp in _prefixes ! )
90+ {
91+ decodePrefixes [ kvp . Value ] = kvp . Key . ToString ( ) ;
92+ }
93+
94+ _decodeSuffixes = decodeSuffixes ;
95+ _decodePrefixes = decodePrefixes ;
7996 }
8097}
0 commit comments