File tree

2 files changed

+190
-16
lines changed

2 files changed

+190
-16
lines changed
Original file line numberDiff line numberDiff line change
@@ -19,7 +19,8 @@ import 'schema.dart';
1919
/// Response for Count Tokens
2020
final class CountTokensResponse {
2121
/// Constructor
22-
CountTokensResponse(this.totalTokens, {this.totalBillableCharacters});
22+
CountTokensResponse(this.totalTokens,
23+
{this.totalBillableCharacters, this.promptTokensDetails});
2324

2425
/// The number of tokens that the `model` tokenizes the `prompt` into.
2526
///
@@ -30,6 +31,9 @@ final class CountTokensResponse {
3031
///
3132
/// Always non-negative.
3233
final int? totalBillableCharacters;
34+
35+
/// List of modalities that were processed in the request input.
36+
final List<ModalityTokenCount>? promptTokensDetails;
3337
}
3438

3539
/// Response from the model; supports multiple candidates.
@@ -128,11 +132,12 @@ final class PromptFeedback {
128132
/// Metadata on the generation request's token usage.
129133
final class UsageMetadata {
130134
/// Constructor
131-
UsageMetadata._({
132-
this.promptTokenCount,
133-
this.candidatesTokenCount,
134-
this.totalTokenCount,
135-
});
135+
UsageMetadata._(
136+
{this.promptTokenCount,
137+
this.candidatesTokenCount,
138+
this.totalTokenCount,
139+
this.promptTokensDetails,
140+
this.candidatesTokensDetails});
136141

137142
/// Number of tokens in the prompt.
138143
final int? promptTokenCount;
@@ -142,6 +147,12 @@ final class UsageMetadata {
142147

143148
/// Total token count for the generation request (prompt + candidates).
144149
final int? totalTokenCount;
150+
151+
/// List of modalities that were processed in the request input.
152+
final List<ModalityTokenCount>? promptTokensDetails;
153+
154+
/// List of modalities that were returned in the response.
155+
final List<ModalityTokenCount>? candidatesTokensDetails;
145156
}
146157

147158
/// Response candidate generated from a [GenerativeModel].
@@ -481,6 +492,62 @@ enum FinishReason {
481492
String toString() => name;
482493
}
483494

495+
/// Represents token counting info for a single modality.
496+
final class ModalityTokenCount {
497+
/// Constructor
498+
ModalityTokenCount(this.modality, this.tokenCount);
499+
500+
/// The modality associated with this token count.
501+
final ContentModality modality;
502+
503+
/// The number of tokens counted.
504+
final int tokenCount;
505+
}
506+
507+
/// Content part modality.
508+
enum ContentModality {
509+
/// Unspecified modality.
510+
unspecified('MODALITY_UNSPECIFIED'),
511+
512+
/// Plain text.
513+
text('TEXT'),
514+
515+
/// Image.
516+
image('IMAGE'),
517+
518+
/// Video.
519+
video('VIDEO'),
520+
521+
/// Audio.
522+
audio('AUDIO'),
523+
524+
/// Document, e.g. PDF.
525+
document('DOCUMENT');
526+
527+
const ContentModality(this._jsonString);
528+
529+
static ContentModality _parseValue(Object jsonObject) {
530+
return switch (jsonObject) {
531+
'MODALITY_UNSPECIFIED' => ContentModality.unspecified,
532+
'TEXT' => ContentModality.text,
533+
'IMAGE' => ContentModality.image,
534+
'video' => ContentModality.video,
535+
'audio' => ContentModality.audio,
536+
'document' => ContentModality.document,
537+
_ =>
538+
throw FormatException('Unhandled ContentModality format', jsonObject),
539+
};
540+
}
541+
542+
final String _jsonString;
543+
544+
@override
545+
String toString() => name;
546+
547+
/// Convert to json format.
548+
Object toJson() => _jsonString;
549+
}
550+
484551
/// Safety setting, affecting the safety-blocking behavior.
485552
///
486553
/// Passing a safety setting for a category changes the allowed probability that
@@ -696,16 +763,28 @@ GenerateContentResponse parseGenerateContentResponse(Object jsonObject) {
696763
/// Parse the json to [CountTokensResponse]
697764
CountTokensResponse parseCountTokensResponse(Object jsonObject) {
698765
if (jsonObject case {'error': final Object error}) throw parseError(error);
699-
if (jsonObject case {'totalTokens': final int totalTokens}) {
700-
if (jsonObject
701-
case {'totalBillableCharacters': final int totalBillableCharacters}) {
702-
return CountTokensResponse(totalTokens,
703-
totalBillableCharacters: totalBillableCharacters);
704-
} else {
705-
return CountTokensResponse(totalTokens);
706-
}
766+
767+
if (jsonObject is! Map) {
768+
throw unhandledFormat('CountTokensResponse', jsonObject);
707769
}
708-
throw unhandledFormat('CountTokensResponse', jsonObject);
770+
771+
final totalTokens = jsonObject['totalTokens'] as int;
772+
final totalBillableCharacters = switch (jsonObject) {
773+
{'totalBillableCharacters': final int totalBillableCharacters} =>
774+
totalBillableCharacters,
775+
_ => null,
776+
};
777+
final promptTokensDetails = switch (jsonObject) {
778+
{'promptTokensDetails': final List<Object?> promptTokensDetails} =>
779+
promptTokensDetails.map(_parseModalityTokenCount).toList(),
780+
_ => null,
781+
};
782+
783+
return CountTokensResponse(
784+
totalTokens,
785+
totalBillableCharacters: totalBillableCharacters,
786+
promptTokensDetails: promptTokensDetails,
787+
);
709788
}
710789

711790
Candidate _parseCandidate(Object? jsonObject) {
@@ -777,10 +856,30 @@ UsageMetadata _parseUsageMetadata(Object jsonObject) {
777856
{'totalTokenCount': final int totalTokenCount} => totalTokenCount,
778857
_ => null,
779858
};
859+
final promptTokensDetails = switch (jsonObject) {
860+
{'promptTokensDetails': final List<Object?> promptTokensDetails} =>
861+
promptTokensDetails.map(_parseModalityTokenCount).toList(),
862+
_ => null,
863+
};
864+
final candidatesTokensDetails = switch (jsonObject) {
865+
{'candidatesTokensDetails': final List<Object?> candidatesTokensDetails} =>
866+
candidatesTokensDetails.map(_parseModalityTokenCount).toList(),
867+
_ => null,
868+
};
780869
return UsageMetadata._(
781870
promptTokenCount: promptTokenCount,
782871
candidatesTokenCount: candidatesTokenCount,
783-
totalTokenCount: totalTokenCount);
872+
totalTokenCount: totalTokenCount,
873+
promptTokensDetails: promptTokensDetails,
874+
candidatesTokensDetails: candidatesTokensDetails);
875+
}
876+
877+
ModalityTokenCount _parseModalityTokenCount(Object? jsonObject) {
878+
if (jsonObject is! Map) {
879+
throw unhandledFormat('ModalityTokenCount', jsonObject);
880+
}
881+
return ModalityTokenCount(ContentModality._parseValue(jsonObject['modality']),
882+
jsonObject['tokenCount'] as int);
784883
}
785884

786885
SafetyRating _parseSafetyRating(Object? jsonObject) {
Original file line numberDiff line numberDiff line change
@@ -654,6 +654,81 @@ void main() {
654654
);
655655
});
656656

657+
test('response including usage metadata', () async {
658+
const response = '''
659+
{
660+
"candidates": [{
661+
"content": {
662+
"role": "model",
663+
"parts": [{
664+
"text": "Here is a description of the image:"
665+
}]
666+
},
667+
"finishReason": "STOP"
668+
}],
669+
"usageMetadata": {
670+
"promptTokenCount": 1837,
671+
"candidatesTokenCount": 76,
672+
"totalTokenCount": 1913,
673+
"promptTokensDetails": [{
674+
"modality": "TEXT",
675+
"tokenCount": 76
676+
}, {
677+
"modality": "IMAGE",
678+
"tokenCount": 1806
679+
}],
680+
"candidatesTokensDetails": [{
681+
"modality": "TEXT",
682+
"tokenCount": 76
683+
}]
684+
}
685+
}
686+
''';
687+
final decoded = jsonDecode(response) as Object;
688+
final generateContentResponse = parseGenerateContentResponse(decoded);
689+
expect(
690+
generateContentResponse.text, 'Here is a description of the image:');
691+
expect(generateContentResponse.usageMetadata?.totalTokenCount, 1913);
692+
expect(
693+
generateContentResponse
694+
.usageMetadata?.promptTokensDetails?[1].modality,
695+
ContentModality.image);
696+
expect(
697+
generateContentResponse
698+
.usageMetadata?.promptTokensDetails?[1].tokenCount,
699+
1806);
700+
expect(
701+
generateContentResponse
702+
.usageMetadata?.candidatesTokensDetails?.first.modality,
703+
ContentModality.text);
704+
expect(
705+
generateContentResponse
706+
.usageMetadata?.candidatesTokensDetails?.first.tokenCount,
707+
76);
708+
});
709+
710+
test('countTokens with modality fields returned', () async {
711+
const response = '''
712+
{
713+
"totalTokens": 1837,
714+
"totalBillableCharacters": 117,
715+
"promptTokensDetails": [{
716+
"modality": "IMAGE",
717+
"tokenCount": 1806
718+
}, {
719+
"modality": "TEXT",
720+
"tokenCount": 31
721+
}]
722+
}
723+
''';
724+
final decoded = jsonDecode(response) as Object;
725+
final countTokensResponse = parseCountTokensResponse(decoded);
726+
expect(countTokensResponse.totalTokens, 1837);
727+
expect(countTokensResponse.promptTokensDetails?.first.modality,
728+
ContentModality.image);
729+
expect(countTokensResponse.promptTokensDetails?.first.tokenCount, 1806);
730+
});
731+
657732
test('text getter joins content', () async {
658733
const response = '''
659734
{

0 commit comments

Comments
 (0)