IP-Adapter for Image Style Transfer
IP-Adapter (Image Prompt Adapter) transfers style, appearance, or identity from a reference image into generation without requiring model fine-tuning. Works as a plug-in: reference image → visual embeddings → attention control.
Installation and Basic Usage
from diffusers import StableDiffusionXLPipeline
from PIL import Image
import torch
import io
pipe = StableDiffusionXLPipeline.from_pretrained(
"stabilityai/stable-diffusion-xl-base-1.0",
torch_dtype=torch.float16
).to("cuda")
# Load IP-Adapter SDXL
pipe.load_ip_adapter(
"h94/IP-Adapter",
subfolder="sdxl_models",
weight_name="ip-adapter_sdxl.bin"
)
def generate_with_style_reference(
style_image: bytes,
prompt: str,
ip_adapter_scale: float = 0.6, # 0.0=no influence, 1.0=maximum
steps: int = 30
) -> bytes:
ref_image = Image.open(io.BytesIO(style_image)).convert("RGB")
pipe.set_ip_adapter_scale(ip_adapter_scale)
result = pipe(
prompt=prompt,
ip_adapter_image=ref_image,
num_inference_steps=steps,
guidance_scale=7.5
).images[0]
buf = io.BytesIO()
result.save(buf, format="PNG")
return buf.getvalue()
IP-Adapter Face ID — Preserving Face Identity
from insightface.app import FaceAnalysis
import cv2
# IP-Adapter FaceID preserves face recognition in different styles
pipe.load_ip_adapter(
"h94/IP-Adapter-FaceID",
subfolder=None,
weight_name="ip-adapter-faceid_sdxl.bin",
image_encoder_folder=None
)
app = FaceAnalysis(name="buffalo_l", providers=["CUDAExecutionProvider"])
app.prepare(ctx_id=0, det_size=(640, 640))
def generate_face_in_style(
face_photo: bytes,
prompt: str,
scale: float = 0.7
) -> bytes:
img_np = cv2.imdecode(np.frombuffer(face_photo, np.uint8), cv2.IMREAD_COLOR)
faces = app.get(img_np)
if not faces:
raise ValueError("Face not detected in reference image")
face_embedding = torch.from_numpy(faces[0].normed_embedding).unsqueeze(0)
pipe.set_ip_adapter_scale(scale)
result = pipe(
prompt=prompt,
ip_adapter_image=face_embedding,
num_inference_steps=30
).images[0]
buf = io.BytesIO()
result.save(buf, format="PNG")
return buf.getvalue()
Combining IP-Adapter with ControlNet
from diffusers import StableDiffusionXLControlNetPipeline, ControlNetModel
controlnet = ControlNetModel.from_pretrained(
"diffusers/controlnet-canny-sdxl-1.0",
torch_dtype=torch.float16
)
pipe = StableDiffusionXLControlNetPipeline.from_pretrained(
"stabilityai/stable-diffusion-xl-base-1.0",
controlnet=controlnet,
torch_dtype=torch.float16
).to("cuda")
pipe.load_ip_adapter("h94/IP-Adapter", subfolder="sdxl_models", weight_name="ip-adapter_sdxl.bin")
pipe.set_ip_adapter_scale(0.5)
# Generation: structure from ControlNet + style from IP-Adapter
result = pipe(
prompt=prompt,
image=canny_control_image, # Structure from Canny
ip_adapter_image=style_reference, # Style from reference
controlnet_conditioning_scale=0.8,
num_inference_steps=30
).images[0]
Usage Scenarios
| Scenario | IP-Adapter scale | ControlNet |
|---|---|---|
| Artistic style transfer | 0.7–0.9 | No |
| Avatar generation with face | 0.6–0.8 (FaceID) | Optional OpenPose |
| Product in brand style | 0.5–0.7 | Canny for shape |
| Character in different scenes | 0.6–0.8 | No |
IP-Adapter is 5–10 times faster than LoRA/DreamBooth training for tasks requiring style reference without precise detail reproduction. Integration timeline — 1–2 days.







