Skip to content

Commit e6e68ca

Browse files
Merge pull request #126 from kerthcet/cleanup/modelclaims-change
Prepare for v0.0.5
2 parents 71a9652 + 68bbf77 commit e6e68ca

File tree

6 files changed

+59
-12
lines changed

6 files changed

+59
-12
lines changed

docs/examples/README.md

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -43,6 +43,4 @@ By default, we use [vLLM](https://github.com/vllm-project/vllm) as the inference
4343

4444
### Speculative Decoding with vLLM
4545

46-
> Not supported yet because llama.cpp doesn't support speculative decoding in the server side, see https://github.com/ggerganov/llama.cpp/issues/5877.
47-
4846
[Speculative Decoding](https://arxiv.org/abs/2211.17192) can improve inference performance efficiently, see [example](./speculative-decoding/vllm/) here.

docs/examples/speculative-decoding/vllm/playground.yaml

Lines changed: 8 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -13,5 +13,11 @@ spec:
1313
backendConfig:
1414
args:
1515
- --use-v2-block-manager
16-
- --num_speculative_tokens 5
17-
- -tp 1
16+
- --num_speculative_tokens
17+
- "5"
18+
- -tp
19+
- "1"
20+
resources:
21+
limits:
22+
cpu: 8
23+
memory: "16Gi"

llmaz/model_loader/model_hub/huggingface.py

Lines changed: 12 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -26,6 +26,7 @@
2626
ModelHub,
2727
)
2828
from llmaz.util.logger import Logger
29+
from llmaz.model_loader.model_hub.util import get_folder_total_size
2930

3031
from typing import Optional
3132

@@ -50,14 +51,18 @@ def load_model(
5051
local_dir=MODEL_LOCAL_DIR,
5152
revision=revision,
5253
)
54+
file_size = os.path.getsize(MODEL_LOCAL_DIR + filename) / (1024**3)
55+
Logger.info(
56+
f"The total size of {MODEL_LOCAL_DIR + filename} is {file_size: .2f} GB"
57+
)
5358
return
5459

60+
local_dir = os.path.join(
61+
MODEL_LOCAL_DIR, f"models--{model_id.replace('/','--')}"
62+
)
63+
5564
# # TODO: Should we verify the download is finished?
5665
with concurrent.futures.ThreadPoolExecutor(max_workers=MAX_WORKERS) as executor:
57-
local_dir = os.path.join(
58-
MODEL_LOCAL_DIR, f"models--{model_id.replace('/','--')}"
59-
)
60-
6166
futures = []
6267
for file in list_repo_files(repo_id=model_id):
6368
# TODO: support version management, right now we didn't distinguish with them.
@@ -71,6 +76,9 @@ def load_model(
7176
).add_done_callback(handle_completion)
7277
)
7378

79+
total_size = get_folder_total_size(local_dir)
80+
Logger.info(f"The total size of {local_dir} is {total_size: .2f} GB")
81+
7482

7583
def handle_completion(future):
7684
filename = future.result()

llmaz/model_loader/model_hub/modelscope.py

Lines changed: 8 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -27,6 +27,7 @@
2727
ModelHub,
2828
)
2929
from llmaz.util.logger import Logger
30+
from llmaz.model_loader.model_hub.util import get_folder_total_size
3031

3132

3233
class ModelScope(ModelHub):
@@ -43,11 +44,12 @@ def load_model(
4344
f"Start to download, model_id: {model_id}, filename: {filename}, revision: {revision}"
4445
)
4546

47+
local_dir = os.path.join(
48+
MODEL_LOCAL_DIR, f"models--{model_id.replace('/','--')}"
49+
)
50+
4651
with concurrent.futures.ThreadPoolExecutor(max_workers=MAX_WORKERS) as executor:
4752
futures = []
48-
local_dir = os.path.join(
49-
MODEL_LOCAL_DIR, f"models--{model_id.replace('/','--')}"
50-
)
5153
futures.append(
5254
executor.submit(
5355
snapshot_download,
@@ -57,6 +59,9 @@ def load_model(
5759
).add_done_callback(handle_completion)
5860
)
5961

62+
total_size = get_folder_total_size(local_dir)
63+
Logger.info(f"The total size of {local_dir} is {total_size:.2f} GB")
64+
6065

6166
def handle_completion(future):
6267
filename = future.result()

llmaz/model_loader/model_hub/util.py

Lines changed: 30 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,30 @@
1+
"""
2+
Copyright 2024.
3+
4+
Licensed under the Apache License, Version 2.0 (the "License");
5+
you may not use this file except in compliance with the License.
6+
You may obtain a copy of the License at
7+
8+
http://www.apache.org/licenses/LICENSE-2.0
9+
10+
Unless required by applicable law or agreed to in writing, software
11+
distributed under the License is distributed on an "AS IS" BASIS,
12+
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13+
See the License for the specific language governing permissions and
14+
limitations under the License.
15+
"""
16+
17+
import os
18+
19+
20+
def get_folder_total_size(folder_path: str):
21+
total_size = 0
22+
23+
for dirpath, _, filenames in os.walk(folder_path):
24+
for filename in filenames:
25+
file_path = os.path.join(dirpath, filename)
26+
if os.path.exists(file_path):
27+
total_size += os.path.getsize(file_path)
28+
29+
total_size_gb = total_size / (1024**3)
30+
return total_size_gb

pkg/defaults.go

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -17,5 +17,5 @@ limitations under the License.
1717
package pkg
1818

1919
const (
20-
LOADER_IMAGE = "inftyai/model-loader:v0.0.7"
20+
LOADER_IMAGE = "inftyai/model-loader:v0.0.8"
2121
)

0 commit comments

Comments
 (0)