-
-
Notifications
You must be signed in to change notification settings - Fork 0
/
FlowStateLLMPrompt.py
442 lines (372 loc) · 21.5 KB
/
FlowStateLLMPrompt.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
# Project: FlowState LLM Prompt & LLM Output
# Description: Generate an image prompt using an LLM.
# Version: 1.0.0
# Author: Johnathan Chivington
# Contact: johnathan@flowstateengineering.com | youtube.com/@flowstateeng
##
# SYSTEM STATUS
##
print(f' - Loaded LLM Prompt & Prompt Output nodes.')
##
# FS IMPORTS
##
from .FS_Types import *
from .FS_Constants import *
from .FS_Assets import *
##
# LIBRARIES
##
import os, sys, time, copy, itertools
import importlib
import warnings
import comfy.utils
import comfy.sd
try:
Llama = importlib.import_module('llama_cpp_cuda').Llama
except ImportError:
Llama = importlib.import_module('llama_cpp').Llama
sys.path.insert(0, os.path.join(os.path.dirname(os.path.realpath(__file__)), "comfy"))
warnings.filterwarnings('ignore', message='clean_up_tokenization_spaces')
warnings.filterwarnings("ignore", message="1Torch was not compiled with flash attention")
warnings.filterwarnings("ignore", category=FutureWarning)
##
# NODES
##
class FlowStatePromptLLM:
CATEGORY = 'FlowState/LLM'
DESCRIPTION = (
'Generate an image prompt using an LLM. If using multiple parameters, will need to be '
'paired with a sampler that can accomodate them, like the FlowState Unified Sampler.'
)
FUNCTION = 'generate_prompts'
RETURN_TYPES = FS_LLM_OUT
RETURN_NAMES = (
'positive_conditioning',
'negative_conditioning',
'positive_prompt',
'negative_prompt',
)
OUTPUT_TOOLTIPS = (
'Selected positive conditioning(s) used to guide the diffusion model.',
'Original negative conditioning(s) used to guide the diffusion model.',
'Selected positive text prompt(s).',
'Original negative text prompt(s).',
)
@classmethod
def INPUT_TYPES(cls):
available_models = []
if os.path.isdir(GLOBAL_MODELS_DIR):
gguf_files = [file for file in os.listdir(GLOBAL_MODELS_DIR) if file.endswith('.gguf')]
available_models.extend(gguf_files)
else:
available_models.append('no available models')
expanded_model_list = []
for sc in range(1, len(available_models) + 1):
combinations = itertools.combinations(available_models, sc)
expanded_model_list.extend([', '.join(comb) for comb in combinations])
clip_preset_vals = ';\n\n'.join([f' - {preset}: {vals}' for preset, vals in CLIP_PRESETS.items()])
clip_preset_tooltip = f'CLIP Attention Preset values as follows (q, k, v, out):\n\n{clip_preset_vals}'
return {
'required': {
'clip': CLIP_IN,
'positive_text_prompt': STRING_PROMPT_POSITIVE,
'negative_text_prompt': STRING_PROMPT_NEGATIVE,
'prompt_type': LLM_PROMPT_TYPE,
'seed': SEED,
'llm_model': (expanded_model_list, {'tooltip': 'All combinations of your available models.'}, ),
'seed_str_list': SEED_LIST,
'max_tokens': MAX_TOKENS_LIST,
'clip_preset': (['none', 'default', 'clarity_boost', 'even_flow', 'subtle_focus', 'sharp_detail'], {'tooltip': clip_preset_tooltip}, ),
'q': Q_LIST,
'k': K_LIST,
'v': V_LIST,
'out': OUT_LIST,
}
}
@classmethod
def format_num(self, num, num_type, alt):
num_to_use = num
try:
num_to_use = num_type(num_to_use)
except:
num_to_use = alt
return num_to_use
@classmethod
def multiply_clip_attn(self, clip, q, k, v, out):
multiplied = clip.clone()
state_dict = multiplied.patcher.model_state_dict()
for key in state_dict:
if key.endswith("self_attn.q_proj.weight") or key.endswith("self_attn.q_proj.bias"):
multiplied.add_patches({key: (None,)}, 0.0, q)
if key.endswith("self_attn.k_proj.weight") or key.endswith("self_attn.k_proj.bias"):
multiplied.add_patches({key: (None,)}, 0.0, k)
if key.endswith("self_attn.v_proj.weight") or key.endswith("self_attn.v_proj.bias"):
multiplied.add_patches({key: (None,)}, 0.0, v)
if key.endswith("self_attn.out_proj.weight") or key.endswith("self_attn.out_proj.bias"):
multiplied.add_patches({key: (None,)}, 0.0, out)
return multiplied
@classmethod
def encode_text(self, clip, text):
tokens = clip.tokenize(text)
output = clip.encode_from_tokens(tokens, return_pooled=True, return_dict=True)
cond = output.pop("cond")
return ([cond, output], )
@classmethod
def generate_msgs(self, instructions):
return [
{'role': 'system', 'content': f'You are a helpful assistant.'},
{'role': 'user',
'content': f'An image generation prompt is a single paragraph summary to describe the subject and '
f'style of an image. It includes a description of the kind of image, the subject of '
f'the image, and some description of the image medium and style in the form of short '
f'keyword.\n\nCreate an image generation prompt for the subject \'a creepy creature '
f'shadow in the dark in a dimly lit tunnel\' in the style \'digital art illustration '
f'with intricate details\'.'
},
{'role': 'assistant',
'content': f'Image Description: A digitally crafted illustration portrays a chilling scene within '
f'a dimly lit, cavernous tunnel. The dominant subject of the image is a mysterious '
f'creature, its form subtly discernible only as a menacing shadow on the walls of the '
f'tunnel. Its elongated silhouette suggests a serpentine creature with sharp '
f'protrusions and an ominous aura. The creature\'s eyes, a pair of glowing orbs, appear '
f'eerily human-like yet alien. The tunnel is riddled with intricate details that '
f'enhance the eerie atmosphere: dust particles floating lazily in the feeble light, '
f'ancient and crumbling stone, water droplets sparkling on the damp walls, and a '
f'hauntingly beautiful, bioluminescent fungi growing in the shadows. The dimly lit '
f'environment is highlighted by strategically placed light sources that create '
f'dramatic chiaroscuro effects, casting an unsettling and atmospheric glow on the '
f'scene. Digital Art Illustration with Intricate Details (Dark, Atmospheric, '
f'Suspenseful)'
},
{'role': 'user', 'content': 'Now compile the description and style into a single paragraph summary'},
{'role': 'assistant',
'content': f'Digital art illustration featuring a dark, atmospheric, and suspenseful scene within '
f'a dimly lit, cavernous tunnel. The subject of the image is a mysterious creature, '
f'depicted only as a menacing shadow on the walls, with elongated silhouette and sharp '
f'protrusions. The creature\'s eyes, a pair of glowing orbs, appear eerily human-like '
f'yet alien. The tunnel is adorned with intricate details, such as dust particles, '
f'ancient crumbling stone, water droplets, and a hauntingly beautiful bioluminescent '
f'fungi growing in the shadows. Dramatic chiaroscuro effects are created through '
f'strategically placed light sources, casting an unsettling and atmospheric glow on '
f'the scene.'
},
{'role': 'user', 'content': f'create a detailed summary without the title or style'},
{'role': 'assistant',
'content': f'A dimly lit, cavernous tunnel is the setting for this chilling digital illustration. '
f'A mysterious creature lurks in the shadows, its elongated silhouette suggestive of a '
f'serpentine creature with sharp protrusions and an ominous aura. The creature\'s eyes, '
f'a pair of glowing orbs, appear eerily human-like yet alien. The tunnel is riddled '
f'with intricate details that enhance the eerie atmosphere: dust particles floating '
f'lazily in the feeble light, ancient and crumbling stone, water droplets sparkling on '
f'the damp walls, and a hauntingly beautiful, bioluminescent fungi growing in the '
f'shadows. Dramatic chiaroscuro effects are created through strategically placed light '
f'sources, casting an unsettling and atmospheric glow on the scene.'
},
{'role': 'user',
'content': f'Generate a prompt from \'magical garden, sitting on a blue_bench, Disney Princess in '
f'pink_dress, blonde_hair, portrait, Pixar, Disney style, photorealistic, high '
f'details, detailed skin, pores, freckles\''
},
{'role': 'assistant',
'content': f'In a captivating, Pixar-inspired, Disney-style, photorealistic portrait, a magical '
f'garden unfolds as a breathtaking backdrop. The subject of the image is a Disney '
f'Princess, dressed elegantly in a pink gown with flowing fabric, sitting serenely on '
f'a blue bench. The princess boasts long, blonde hair and a radiant smile. The garden '
f'is meticulously detailed, with vibrant, enchanted flora and fauna, as well as '
f'whimsical details like sparkling fairy lights and a picturesque waterfall. The '
f'princess is highlighted against the lush, detailed foliage, with a focus on the '
f'high-definition details of her porcelain skin, visible freckles, and the intricacies '
f'of her elegant gown. The image is rendered in the captivating, photorealistic style '
f'that exemplifies both the Disney and Pixar brands, capturing the princess\'s timeless '
f'beauty and the magic of her enchanting surroundings.'
},
{'role': 'user', 'content': instructions},
]
@classmethod
def generate_prompt(self, clip, positive_text_prompt, negative_text_prompt, prompt_type, seed, llm_model, max_tokens, q, k, v, out):
param_string = (
f'llm_prompt_type:{prompt_type},'
f'llm_seed:{seed},'
f'llm_model:{llm_model},'
f'llm_max_tokens:{max_tokens},'
f'q:{q},'
f'k:{k},'
f'v:{v},'
f'out:{out}'
)
if prompt_type == 'original':
param_string = param_string.replace(f'llm_seed:{seed},llm_model:{llm_model},llm_max_tokens:{max_tokens},', '')
multiplied_clip = self.multiply_clip_attn(clip, q, k, v, out)
neg_prompt = negative_text_prompt.replace('\n\n', '. ').replace('\n', '. ')
pos_prompt = positive_text_prompt.replace('\n\n', '. ').replace('\n', '. ')
out_prompt = None
if prompt_type == 'original':
print(f' - Using original prompt.\n')
positive_conditioning = self.encode_text(multiplied_clip, pos_prompt)
negative_conditioning = self.encode_text(multiplied_clip, neg_prompt)
tagged_pos = f'{FS_LLM_PROMPT_TAG}-{param_string}-----{pos_prompt}'
out_prompt = (positive_conditioning, negative_conditioning, tagged_pos, neg_prompt, )
if prompt_type == 'extra_crispy' and not llm_model.endswith('.gguf'):
print(f' - Not a valid GGUF model.\n')
positive_conditioning = self.encode_text(multiplied_clip, pos_prompt)
negative_conditioning = self.encode_text(multiplied_clip, neg_prompt)
tagged_pos = f'{FS_LLM_PROMPT_TAG}-{param_string}-----NOT A VALID GGUF MODEL - {pos_prompt}'
out_prompt = (positive_conditioning, negative_conditioning, tagged_pos, neg_prompt, )
if prompt_type == 'extra_crispy' and llm_model.endswith('.gguf'):
print(f' - Using LLM generated prompt.\n')
model_path = os.path.join(GLOBAL_MODELS_DIR, llm_model)
loaded_model = Llama(model_path=model_path, n_gpu_layers=-1, seed=seed, verbose=False, n_ctx=2048,)
generate_kwargs = {'max_tokens': max_tokens, 'temperature': 1.0, 'top_p': 0.9, 'top_k': 50, 'repeat_penalty': 1.2}
llm_messages = self.generate_msgs(f'Generate a prompt from {pos_prompt}')
llm_prompt = loaded_model.create_chat_completion(llm_messages, **generate_kwargs)
llm_prompt_cleaned = llm_prompt['choices'][0]['message']['content'].strip()
llm_conditioning = self.encode_text(multiplied_clip, llm_prompt_cleaned)
negative_conditioning = self.encode_text(multiplied_clip, neg_prompt)
tagged_pos_llm = f'{FS_LLM_PROMPT_TAG}-{param_string}-----{llm_prompt_cleaned}'.replace('\n\n', '. ').replace('\n', '. ')
out_prompt = (llm_conditioning, negative_conditioning, tagged_pos_llm, neg_prompt, )
return out_prompt
@classmethod
def extract_prompt_parts(self, prompts_out, clip, pos_prompts, neg_prompts, run_prompt, seed_to_use, run_model,
token_to_use, q_to_use, k_to_use, v_to_use, out_to_use):
prompts_out_copy = copy.deepcopy(prompts_out)
pos_count = len(pos_prompts)
neg_count = len(neg_prompts)
equal_counts = pos_count == neg_count
different_pos = False if all(prompt == pos_prompts[0] for prompt in pos_prompts) else True
different_neg = False if all(prompt == neg_prompts[0] for prompt in neg_prompts) else True
one_neg_to_many_pos = neg_count == 1 and pos_count > 1
one_pos_to_many_neg = pos_count == 1 and neg_count > 1
one_to_one = equal_counts and different_pos and different_neg
same_neg_to_many_pos = (equal_counts and different_pos and not different_neg) or one_neg_to_many_pos
same_pos_to_many_neg = (equal_counts and different_neg and not different_pos) or one_pos_to_many_neg
if one_to_one or same_neg_to_many_pos:
for p, pos_prompt in enumerate(pos_prompts):
neg_prompt = neg_prompts[p] if one_to_one else neg_prompts[0]
prompt_parts = self.generate_prompt(
clip, pos_prompt, neg_prompt, run_prompt, seed_to_use, run_model,
token_to_use, q_to_use, k_to_use, v_to_use, out_to_use
)
prompts_out_copy[0] += [prompt_parts[0]]
prompts_out_copy[1] += [prompt_parts[1]]
prompts_out_copy[2] += [prompt_parts[2]]
prompts_out_copy[3] += [prompt_parts[3]]
elif same_pos_to_many_neg:
for n, neg_prompt in enumerate(neg_prompts):
pos_prompt = pos_prompts[n]
prompt_parts = self.generate_prompt(
clip, pos_prompt, neg_prompt, run_prompt, seed_to_use, run_model,
token_to_use, q_to_use, k_to_use, v_to_use, out_to_use
)
prompts_out_copy[0] += [prompt_parts[0]]
prompts_out_copy[1] += [prompt_parts[1]]
prompts_out_copy[2] += [prompt_parts[2]]
prompts_out_copy[3] += [prompt_parts[3]]
else:
for p, pos_prompt in enumerate(pos_prompts):
for n, neg_prompt in enumerate(neg_prompts):
prompt_parts = self.generate_prompt(
clip, pos_prompt, neg_prompt, run_prompt, seed_to_use, run_model,
token_to_use, q_to_use, k_to_use, v_to_use, out_to_use
)
prompts_out_copy[0] += [prompt_parts[0]]
prompts_out_copy[1] += [prompt_parts[1]]
prompts_out_copy[2] += [prompt_parts[2]]
prompts_out_copy[3] += [prompt_parts[3]]
return prompts_out_copy
def generate_prompts(self, clip, positive_text_prompt, negative_text_prompt, prompt_type, seed, llm_model, seed_str_list, max_tokens, clip_preset, q, k, v, out):
print(f'\n\nFlowState LLM Prompt')
start_time = time.time()
positive_prompts = [prompt.strip() for prompt in positive_text_prompt.split('-----')]
negative_prompts = [prompt.strip() for prompt in negative_text_prompt.split('-----')]
seed_list = seed_str_list.replace(' ', '').split(',')
model_list = llm_model.replace(' ', '').split(',')
token_list = max_tokens.replace(' ', '').split(',')
q_list = q.replace(' ', '').split(',')
k_list = k.replace(' ', '').split(',')
v_list = v.replace(' ', '').split(',')
out_list = out.replace(' ', '').split(',')
if clip_preset != 'none':
print(f' USING PRESET: {clip_preset}')
q_list = [CLIP_PRESETS[clip_preset][0]]
k_list = [CLIP_PRESETS[clip_preset][1]]
v_list = [CLIP_PRESETS[clip_preset][2]]
out_list = [CLIP_PRESETS[clip_preset][3]]
prompts_out = [[], [], [], []]
for token_num, run_token in enumerate(token_list):
token_to_use = self.format_num(run_token, int, seed)
for q_num, run_q in enumerate(q_list):
q_to_use = self.format_num(run_q, float, 1.0)
for k_num, run_k in enumerate(k_list):
k_to_use = self.format_num(run_k, float, 1.0)
for v_num, run_v in enumerate(v_list):
v_to_use = self.format_num(run_v, float, 1.0)
for out_num, run_out in enumerate(out_list):
out_to_use = self.format_num(run_out, float, 1.0)
for model_num, run_model in enumerate(model_list):
both_prompt_types = prompt_type == 'both'
prompt_type_list = ['extra_crispy', 'original'] if both_prompt_types else [prompt_type]
for prompt_num, run_prompt in enumerate(prompt_type_list):
if run_prompt == 'original':
prompts_out = self.extract_prompt_parts(
prompts_out, clip, positive_prompts, negative_prompts, run_prompt, 0, run_model,
token_to_use, q_to_use, k_to_use, v_to_use, out_to_use
)
else:
for seed_num, run_seed in enumerate(seed_list):
seed_to_use = self.format_num(run_seed, int, seed)
prompts_out = self.extract_prompt_parts(
prompts_out, clip, positive_prompts, negative_prompts, run_prompt, seed_to_use, run_model,
token_to_use, q_to_use, k_to_use, v_to_use, out_to_use
)
prompt_gen_duration = time.time() - start_time
prompt_gen_mins = int(prompt_gen_duration // 60)
prompt_gen_secs = int(prompt_gen_duration - prompt_gen_mins * 60)
print(
f'\n\n\nFlowState LLM Prompt - Prompt generation complete.'
f'\n - Generation Time: {prompt_gen_mins}m {prompt_gen_secs}s'
)
if len(prompts_out[0]) == 1:
return (prompts_out[0][0], prompts_out[1][0], prompts_out[2][0], prompts_out[3][0], )
return (prompts_out[0], prompts_out[1], prompts_out[2], prompts_out[3], )
class FlowStatePromptLLMOutput:
CATEGORY = 'FlowState/LLM'
DESCRIPTION = 'Show selected text output.'
FUNCTION = 'show_prompt'
RETURN_TYPES = STRING_OUT
RETURN_NAMES = ('selected_prompt',)
OUTPUT_TOOLTIPS = ('Generated prompt from the LLM.',)
OUTPUT_NODE = True
@classmethod
def INPUT_TYPES(cls):
return {
'required': {
'text': ANY,
},
'hidden': {
'unique_id': 'UNIQUE_ID', 'extra_pnginfo': 'EXTRA_PNGINFO',
},
}
def show_prompt(self, text, unique_id=None, extra_pnginfo=None):
print(f' - Displaying output text.')
in_text = copy.deepcopy(text) if isinstance(text, list) else [copy.deepcopy(text)]
out_text = ''
separator = '-' * 25
for p, prompt in enumerate(in_text):
if FS_LLM_PROMPT_TAG in prompt:
build_text = prompt.replace(f'{FS_LLM_PROMPT_TAG}-', '')
out_split = build_text.split('-----')
param_str = out_split[0].replace(',', ', ').replace('llm_', '').replace(':', ': ')
build_text = f'PROMPT {p + 1}:\n{param_str}\n{separator}\n{out_split[1]}\n\n\n'
out_text += build_text
else:
out_text += f'PROMPT {p + 1}:\n{separator}\n{prompt}\n\n\n'
if unique_id is not None and extra_pnginfo is not None and len(extra_pnginfo) > 0:
workflow = None
if 'workflow' in extra_pnginfo:
workflow = extra_pnginfo['workflow']
node = None
if workflow and 'nodes' in workflow:
node = next((x for x in workflow['nodes'] if str(x['id']) == unique_id), None)
if node:
node['widgets_values'] = [str(out_text)]
return {'ui': {'text': (str(out_text),)}}