Can it load on CPU mode?

#4
by FenixInDarkSolo - opened

I try to load the program in CPU mode by remove cuda() on AutoModel.FromPretrained(). But it failed. I don't care the inference speed. Do we have anyway to setup the device parameter on model object? Thank you.

in README.md
CPU部署( use CPU )
model = AutoModel.from_pretrained("THUDM/chatglm-6b", trust_remote_code=True).half().cuda()
change to
model = AutoModel.from_pretrained("THUDM/chatglm-6b", trust_remote_code=True).float()

No it didn't work. The problem is coming from cpm_kernels library.
It will load the cuda setting. But I didn't found anyway to avoid it.

It worked for me. I am able to deploy the model on a 48gb ram and 2vcpu, without gpu. It took at least 2-3 minutes for a simple question(less than 10 tokens) though.

Knowledge Engineering Group (KEG) & Data Mining at Tsinghua University org

Code in 'dev' branch might be what you are looking for, won't load cpm_kernels if don't have one.
Or you can try "THUDM/chatglm-6b-int4", the new feature has been merged into 'main' already:
model = AutoModel.from_pretrained("THUDM/chatglm-6b-int4", trust_remote_code=True).cpu().float()

I'm seeing a similar issue (trying to run model on CPU from Google colab), issue seems to be from the cpm_kernels package

Explicitly passing a revision is encouraged when loading a model with custom code to ensure no malicious code has been contributed in a newer revision.
Explicitly passing a revision is encouraged when loading a configuration with custom code to ensure no malicious code has been contributed in a newer revision.
Explicitly passing a revision is encouraged when loading a model with custom code to ensure no malicious code has been contributed in a newer revision.

OSError Traceback (most recent call last)
in <cell line: 8>()
6 # CPU model
7 # model = AutoModel.from_pretrained("THUDM/chatglm2-6b-int4",trust_remote_code=True).cpu().float()
----> 8 model = AutoModel.from_pretrained("THUDM/chatglm-6b-int4", trust_remote_code=True).cpu().float()
9
10 model = model.eval()

17 frames
/usr/local/lib/python3.10/dist-packages/transformers/models/auto/auto_factory.py in from_pretrained(cls, pretrained_model_name_or_path, *model_args, **kwargs)
460 class_ref = config.auto_map[cls.name]
461 module_file, class_name = class_ref.split(".")
--> 462 model_class = get_class_from_dynamic_module(
463 pretrained_model_name_or_path, module_file + ".py", class_name, **hub_kwargs, **kwargs
464 )

/usr/local/lib/python3.10/dist-packages/transformers/dynamic_module_utils.py in get_class_from_dynamic_module(pretrained_model_name_or_path, module_file, class_name, cache_dir, force_download, resume_download, proxies, use_auth_token, revision, local_files_only, **kwargs)
386 ```"""
387 # And lastly we get the class inside our newly created module
--> 388 final_module = get_cached_module_file(
389 pretrained_model_name_or_path,
390 module_file,

/usr/local/lib/python3.10/dist-packages/transformers/dynamic_module_utils.py in get_cached_module_file(pretrained_model_name_or_path, module_file, cache_dir, force_download, resume_download, proxies, use_auth_token, revision, local_files_only)
297 for module_needed in modules_needed:
298 if not (submodule_path / module_needed).exists():
--> 299 get_cached_module_file(
300 pretrained_model_name_or_path,
301 f"{module_needed}.py",

/usr/local/lib/python3.10/dist-packages/transformers/dynamic_module_utils.py in get_cached_module_file(pretrained_model_name_or_path, module_file, cache_dir, force_download, resume_download, proxies, use_auth_token, revision, local_files_only)
267
268 # Check we have all the requirements in our environment
--> 269 modules_needed = check_imports(resolved_module_file)
270
271 # Now we move the module inside our cached dynamic modules.

/usr/local/lib/python3.10/dist-packages/transformers/dynamic_module_utils.py in check_imports(filename)
132 for imp in imports:
133 try:
--> 134 importlib.import_module(imp)
135 except ImportError:
136 missing_packages.append(imp)

/usr/lib/python3.10/importlib/init.py in import_module(name, package)
124 break
125 level += 1
--> 126 return _bootstrap._gcd_import(name[level:], package, level)
127
128

/usr/lib/python3.10/importlib/_bootstrap.py in _gcd_import(name, package, level)

/usr/lib/python3.10/importlib/_bootstrap.py in find_and_load(name, import)

/usr/lib/python3.10/importlib/_bootstrap.py in find_and_load_unlocked(name, import)

/usr/lib/python3.10/importlib/_bootstrap.py in _load_unlocked(spec)

/usr/lib/python3.10/importlib/_bootstrap_external.py in exec_module(self, module)

/usr/lib/python3.10/importlib/_bootstrap.py in _call_with_frames_removed(f, *args, **kwds)

/usr/local/lib/python3.10/dist-packages/cpm_kernels/init.py in
----> 1 from . import library
2 from .kernels import *

/usr/local/lib/python3.10/dist-packages/cpm_kernels/library/init.py in
1 from . import nvrtc
----> 2 from . import cuda
3 from . import cudart
4 from . import cublaslt

/usr/local/lib/python3.10/dist-packages/cpm_kernels/library/cuda.py in
7 cuda = Lib.from_lib("cuda", ctypes.WinDLL("nvcuda.dll"))
8 else:
----> 9 cuda = Lib("cuda")
10 CUresult = ctypes.c_int
11

/usr/local/lib/python3.10/dist-packages/cpm_kernels/library/base.py in init(self, name)
53 self.__lib_path = lib_path
54 if lib_path is not None:
---> 55 self.__lib = ctypes.cdll.LoadLibrary(lib_path)
56 else:
57 self.__lib = None

/usr/lib/python3.10/ctypes/init.py in LoadLibrary(self, name)
450
451 def LoadLibrary(self, name):
--> 452 return self._dlltype(name)
453
454 class_getitem = classmethod(_types.GenericAlias)

/usr/lib/python3.10/ctypes/init.py in init(self, name, mode, handle, use_errno, use_last_error, winmode)
372
373 if handle is None:
--> 374 self._handle = _dlopen(self._name, mode)
375 else:
376 self._handle = handle

OSError: libcuda.so.1: cannot open shared object file: No such file or directory

Got : return F.layer_norm(
File "E:\oobabooga_windows\oobabooga_windows\installer_files\env\lib\site-packages\torch\nn\functional.py", line 2515, in layer_norm
return torch.layer_norm(input, normalized_shape, weight, bias, eps, torch.backends.cudnn.enabled)
RuntimeError: mixed dtype (CPU): expect input to have scalar type of BFloat16

Sign up or log in to comment