File size: 2,683 Bytes
948b11c
96812c9
 
948b11c
 
 
96812c9
 
 
948b11c
 
2f35054
 
 
 
 
 
 
 
 
 
bd915ca
 
 
 
 
 
 
ad5cef3
948b11c
ad5cef3
e7ba29d
 
96812c9
948b11c
 
daa5539
b1c66bb
96812c9
 
 
2656c1e
948b11c
 
08476ef
e7ba29d
96812c9
 
2656c1e
daa5539
 
2f35054
 
 
 
 
 
 
 
 
 
 
2656c1e
2f35054
96812c9
dc79f22
 
 
 
6ebf2fd
dc79f22
 
 
 
 
6ebf2fd
dc79f22
 
 
 
 
 
6ebf2fd
96812c9
59a1fe9
96812c9
 
 
 
 
59a1fe9
6ebf2fd
 
 
 
97cab0c
2f35054
59a1fe9
 
 
 
 
 
 
 
2f35054
 
 
59a1fe9
 
 
 
6ebf2fd
 
 
 
59a1fe9
 
 
 
 
 
 
6ebf2fd
59a1fe9
 
 
 
 
6ebf2fd
 
 
 
 
 
 
59a1fe9
 
97cab0c
96812c9
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
export interface Section {
  title: string
  items: string[]
}

export interface ClassificationOutput {
  sequence: string
  labels: string[]
  scores: number[]
}

export interface ChatMessage {
  role: 'system' | 'user' | 'assistant'
  content: string
}

export interface GenerationOutput {
  role: 'assistant'
  content: string
}

export type WorkerStatus =
  | 'initiate'
  | 'ready'
  | 'output'
  | 'loading'
  | 'error'
  | 'disposed'

export interface WorkerMessage {
  status: WorkerStatus
  progress?: number
  error?: string
  output?: any
}

export interface ZeroShotWorkerInput {
  type: 'classify'
  text: string
  labels: string[]
  model: string
  dtype: QuantizationType
}

export interface TextClassificationWorkerInput {
  type: 'classify'
  text: string
  model: string
  dtype: QuantizationType
}

export interface TextGenerationWorkerInput {
  type: 'generate'
  prompt?: string
  messages?: ChatMessage[]
  hasChatTemplate: boolean
  model: string
  temperature?: number
  max_new_tokens?: number
  top_p?: number
  top_k?: number
  do_sample?: boolean
  dtype: QuantizationType
}

const q8Types = ['q8', 'int8', 'bnb8', 'uint8'] as const
const q4Types = ['q4', 'bnb4', 'q4f16'] as const
const fp16Types = ['fp16'] as const
const fp32Types = ['fp32'] as const

type q8 = (typeof q8Types)[number]
type q4 = (typeof q4Types)[number]
type fp16 = (typeof fp16Types)[number]
type fp32 = (typeof fp32Types)[number]
 
export type QuantizationType = q8 | q4 | fp16 | fp32
export const allQuantizationTypes = [
  ...q8Types,
  ...q4Types,
  ...fp16Types,
  ...fp32Types
] as const

export interface ModelInfo {
  id: string
  name: string
  architecture: string
  parameters: number
  likes: number
  downloads: number
  createdAt: string
  isCompatible?: boolean
  incompatibilityReason?: string
  supportedQuantizations: QuantizationType[]
  baseId?: string
  readme?: string
  hasChatTemplate: boolean
}

export interface ModelInfoResponse {
  id: string
  createdAt: string
  config?: {
    architectures: string[]
    model_type: string
    tokenizer_config?: {
      chat_template?: string
    }
  }
  lastModified: string
  pipeline_tag: string
  tags: string[]
  cardData?: {
    base_model: string
  }
  baseId?: string
  transformersInfo: {
    pipeline_tag: string
    auto_model: string
    processor: string
  }
  safetensors?: {
    parameters: {
      BF16?: number
      F16?: number
      F32?: number
      total?: number
    }
  }
  siblings?: {
    rfilename: string
  }[]
  modelId?: string
  isCompatible: boolean
  incompatibilityReason?: string
  supportedQuantizations: QuantizationType[]
  likes: number
  downloads: number
  readme?: string
}