Skip to content

Commit 4a1a494

Browse files
committed
adds POST /tokenize endpoint
1 parent 8f9b089 commit 4a1a494

File tree

34 files changed

+1184
-237
lines changed

34 files changed

+1184
-237
lines changed
Lines changed: 59 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,59 @@
1+
import logging
2+
3+
from fastapi import APIRouter
4+
from fastapi.responses import JSONResponse
5+
from neuronpedia_inference_client.models.tokenize_post200_response import (
6+
TokenizePost200Response,
7+
)
8+
from neuronpedia_inference_client.models.tokenize_post_request import (
9+
TokenizePostRequest,
10+
)
11+
12+
from neuronpedia_inference.config import Config
13+
from neuronpedia_inference.shared import Model, with_request_lock
14+
15+
logger = logging.getLogger(__name__)
16+
17+
router = APIRouter()
18+
19+
20+
@router.post("/tokenize")
21+
@with_request_lock()
22+
async def tokenize(
23+
request: TokenizePostRequest,
24+
):
25+
model = Model.get_instance()
26+
config = Config.get_instance()
27+
28+
prepend_bos = (
29+
request.prepend_bos
30+
if request.prepend_bos is not None
31+
else model.cfg.default_prepend_bos
32+
)
33+
34+
tokens = model.to_tokens(
35+
request.text,
36+
prepend_bos=prepend_bos,
37+
truncate=False,
38+
)[0]
39+
40+
if len(tokens) > config.TOKEN_LIMIT:
41+
logger.error(
42+
"Text too long: %s tokens, max is %s",
43+
len(tokens),
44+
config.TOKEN_LIMIT,
45+
)
46+
return JSONResponse(
47+
content={
48+
"error": f"Text too long: {len(tokens)} tokens, max is {config.TOKEN_LIMIT}"
49+
},
50+
status_code=400,
51+
)
52+
53+
str_tokens = model.to_str_tokens(request.text, prepend_bos=prepend_bos)
54+
55+
return TokenizePost200Response(
56+
tokens=tokens.tolist(),
57+
token_strings=str_tokens, # type: ignore
58+
prepend_bos=prepend_bos,
59+
)

apps/inference/neuronpedia_inference/server.py

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -33,6 +33,9 @@
3333
from neuronpedia_inference.endpoints.steer.completion_chat import (
3434
router as steer_completion_chat_router,
3535
)
36+
from neuronpedia_inference.endpoints.tokenize import (
37+
router as tokenize_router,
38+
)
3639
from neuronpedia_inference.endpoints.util.sae_topk_by_decoder_cossim import (
3740
router as sae_topk_by_decoder_cossim_router,
3841
)
@@ -88,6 +91,7 @@ async def startup_event():
8891
v1_router.include_router(activation_topk_by_token_router)
8992
v1_router.include_router(sae_topk_by_decoder_cossim_router)
9093
v1_router.include_router(sae_vector_router)
94+
v1_router.include_router(tokenize_router)
9195

9296
app.include_router(v1_router)
9397

apps/inference/poetry.lock

Lines changed: 72 additions & 221 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

apps/inference/pyproject.toml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -28,8 +28,8 @@ sae-lens = "^5.8.1"
2828
fastapi = "^0.115.6"
2929
uvicorn = "^0.34.0"
3030
sentry-sdk = {extras = ["fastapi"], version = "^2.20.0"}
31-
neuronpedia-inference-client = {path = "../../packages/python/neuronpedia-inference-client"}
3231
nnsight = "^0.4.3"
32+
neuronpedia-inference-client = {path = "../../packages/python/neuronpedia-inference-client"}
3333

3434
[tool.poetry.group.dev.dependencies]
3535
# line-profiler = "^4.1.3"

packages/python/neuronpedia-inference-client/.github/workflows/python.yml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -28,4 +28,4 @@ jobs:
2828
pip install -r test-requirements.txt
2929
- name: Test with pytest
3030
run: |
31-
pytest --cov={{packageName}}
31+
pytest --cov=neuronpedia_inference_client

packages/python/neuronpedia-inference-client/.openapi-generator/FILES

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -27,6 +27,8 @@ docs/SteerCompletionChatPost200Response.md
2727
docs/SteerCompletionChatPostRequest.md
2828
docs/SteerCompletionPost200Response.md
2929
docs/SteerCompletionRequest.md
30+
docs/TokenizePost200Response.md
31+
docs/TokenizePostRequest.md
3032
docs/UtilSaeTopkByDecoderCossimPost200Response.md
3133
docs/UtilSaeTopkByDecoderCossimPost200ResponseTopkDecoderCossimFeaturesInner.md
3234
docs/UtilSaeTopkByDecoderCossimPostRequest.md
@@ -63,6 +65,8 @@ neuronpedia_inference_client/models/steer_completion_chat_post200_response.py
6365
neuronpedia_inference_client/models/steer_completion_chat_post_request.py
6466
neuronpedia_inference_client/models/steer_completion_post200_response.py
6567
neuronpedia_inference_client/models/steer_completion_request.py
68+
neuronpedia_inference_client/models/tokenize_post200_response.py
69+
neuronpedia_inference_client/models/tokenize_post_request.py
6670
neuronpedia_inference_client/models/util_sae_topk_by_decoder_cossim_post200_response.py
6771
neuronpedia_inference_client/models/util_sae_topk_by_decoder_cossim_post200_response_topk_decoder_cossim_features_inner.py
6872
neuronpedia_inference_client/models/util_sae_topk_by_decoder_cossim_post_request.py
@@ -99,6 +103,8 @@ test/test_steer_completion_chat_post200_response.py
99103
test/test_steer_completion_chat_post_request.py
100104
test/test_steer_completion_post200_response.py
101105
test/test_steer_completion_request.py
106+
test/test_tokenize_post200_response.py
107+
test/test_tokenize_post_request.py
102108
test/test_util_sae_topk_by_decoder_cossim_post200_response.py
103109
test/test_util_sae_topk_by_decoder_cossim_post200_response_topk_decoder_cossim_features_inner.py
104110
test/test_util_sae_topk_by_decoder_cossim_post_request.py
Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1 +1 @@
1-
7.11.0
1+
7.12.0

packages/python/neuronpedia-inference-client/README.md

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -4,8 +4,8 @@ No description provided (generated by Openapi Generator https://github.com/opena
44
This Python package is automatically generated by the [OpenAPI Generator](https://openapi-generator.tech) project:
55

66
- API version: 1.1.0
7-
- Package version: 1.1.1
8-
- Generator version: 7.11.0
7+
- Package version: 1.2.0
8+
- Generator version: 7.12.0
99
- Build package: org.openapitools.codegen.languages.PythonClientCodegen
1010

1111
## Requirements.
@@ -100,6 +100,7 @@ Class | Method | HTTP request | Description
100100
*DefaultApi* | [**activation_topk_by_token_post**](docs/DefaultApi.md#activation_topk_by_token_post) | **POST** /activation/topk-by-token | For a given prompt, get the top activating features at each token position for a single SAE.
101101
*DefaultApi* | [**steer_completion_chat_post**](docs/DefaultApi.md#steer_completion_chat_post) | **POST** /steer/completion-chat | For a given prompt, complete it by steering with the given feature or vector
102102
*DefaultApi* | [**steer_completion_post**](docs/DefaultApi.md#steer_completion_post) | **POST** /steer/completion | For a given prompt, complete it by steering with the given feature or vector
103+
*DefaultApi* | [**tokenize_post**](docs/DefaultApi.md#tokenize_post) | **POST** /tokenize | Tokenize input text for a given model
103104
*DefaultApi* | [**util_sae_topk_by_decoder_cossim_post**](docs/DefaultApi.md#util_sae_topk_by_decoder_cossim_post) | **POST** /util/sae-topk-by-decoder-cossim | Given a specific vector or SAE feature, return the top features by cosine similarity in the same SAE
104105
*DefaultApi* | [**util_sae_vector_post**](docs/DefaultApi.md#util_sae_vector_post) | **POST** /util/sae-vector | Get the raw vector for an SAE feature
105106

@@ -128,6 +129,8 @@ Class | Method | HTTP request | Description
128129
- [SteerCompletionChatPostRequest](docs/SteerCompletionChatPostRequest.md)
129130
- [SteerCompletionPost200Response](docs/SteerCompletionPost200Response.md)
130131
- [SteerCompletionRequest](docs/SteerCompletionRequest.md)
132+
- [TokenizePost200Response](docs/TokenizePost200Response.md)
133+
- [TokenizePostRequest](docs/TokenizePostRequest.md)
131134
- [UtilSaeTopkByDecoderCossimPost200Response](docs/UtilSaeTopkByDecoderCossimPost200Response.md)
132135
- [UtilSaeTopkByDecoderCossimPost200ResponseTopkDecoderCossimFeaturesInner](docs/UtilSaeTopkByDecoderCossimPost200ResponseTopkDecoderCossimFeaturesInner.md)
133136
- [UtilSaeTopkByDecoderCossimPostRequest](docs/UtilSaeTopkByDecoderCossimPostRequest.md)

packages/python/neuronpedia-inference-client/docs/DefaultApi.md

Lines changed: 80 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,7 @@ Method | HTTP request | Description
99
[**activation_topk_by_token_post**](DefaultApi.md#activation_topk_by_token_post) | **POST** /activation/topk-by-token | For a given prompt, get the top activating features at each token position for a single SAE.
1010
[**steer_completion_chat_post**](DefaultApi.md#steer_completion_chat_post) | **POST** /steer/completion-chat | For a given prompt, complete it by steering with the given feature or vector
1111
[**steer_completion_post**](DefaultApi.md#steer_completion_post) | **POST** /steer/completion | For a given prompt, complete it by steering with the given feature or vector
12+
[**tokenize_post**](DefaultApi.md#tokenize_post) | **POST** /tokenize | Tokenize input text for a given model
1213
[**util_sae_topk_by_decoder_cossim_post**](DefaultApi.md#util_sae_topk_by_decoder_cossim_post) | **POST** /util/sae-topk-by-decoder-cossim | Given a specific vector or SAE feature, return the top features by cosine similarity in the same SAE
1314
[**util_sae_vector_post**](DefaultApi.md#util_sae_vector_post) | **POST** /util/sae-vector | Get the raw vector for an SAE feature
1415

@@ -408,6 +409,85 @@ Name | Type | Description | Notes
408409

409410
[[Back to top]](#) [[Back to API list]](../README.md#documentation-for-api-endpoints) [[Back to Model list]](../README.md#documentation-for-models) [[Back to README]](../README.md)
410411

412+
# **tokenize_post**
413+
> TokenizePost200Response tokenize_post(tokenize_post_request)
414+
415+
Tokenize input text for a given model
416+
417+
### Example
418+
419+
* Api Key Authentication (SimpleSecretAuth):
420+
421+
```python
422+
import neuronpedia_inference_client
423+
from neuronpedia_inference_client.models.tokenize_post200_response import TokenizePost200Response
424+
from neuronpedia_inference_client.models.tokenize_post_request import TokenizePostRequest
425+
from neuronpedia_inference_client.rest import ApiException
426+
from pprint import pprint
427+
428+
# Defining the host is optional and defaults to /v1
429+
# See configuration.py for a list of all supported configuration parameters.
430+
configuration = neuronpedia_inference_client.Configuration(
431+
host = "/v1"
432+
)
433+
434+
# The client must configure the authentication and authorization parameters
435+
# in accordance with the API server security policy.
436+
# Examples for each auth method are provided below, use the example that
437+
# satisfies your auth use case.
438+
439+
# Configure API key authorization: SimpleSecretAuth
440+
configuration.api_key['SimpleSecretAuth'] = os.environ["API_KEY"]
441+
442+
# Uncomment below to setup prefix (e.g. Bearer) for API key, if needed
443+
# configuration.api_key_prefix['SimpleSecretAuth'] = 'Bearer'
444+
445+
# Enter a context with an instance of the API client
446+
with neuronpedia_inference_client.ApiClient(configuration) as api_client:
447+
# Create an instance of the API class
448+
api_instance = neuronpedia_inference_client.DefaultApi(api_client)
449+
tokenize_post_request = neuronpedia_inference_client.TokenizePostRequest() # TokenizePostRequest |
450+
451+
try:
452+
# Tokenize input text for a given model
453+
api_response = api_instance.tokenize_post(tokenize_post_request)
454+
print("The response of DefaultApi->tokenize_post:\n")
455+
pprint(api_response)
456+
except Exception as e:
457+
print("Exception when calling DefaultApi->tokenize_post: %s\n" % e)
458+
```
459+
460+
461+
462+
### Parameters
463+
464+
465+
Name | Type | Description | Notes
466+
------------- | ------------- | ------------- | -------------
467+
**tokenize_post_request** | [**TokenizePostRequest**](TokenizePostRequest.md)| |
468+
469+
### Return type
470+
471+
[**TokenizePost200Response**](TokenizePost200Response.md)
472+
473+
### Authorization
474+
475+
[SimpleSecretAuth](../README.md#SimpleSecretAuth)
476+
477+
### HTTP request headers
478+
479+
- **Content-Type**: application/json
480+
- **Accept**: application/json
481+
482+
### HTTP response details
483+
484+
| Status code | Description | Response headers |
485+
|-------------|-------------|------------------|
486+
**200** | Successful tokenization | - |
487+
**401** | X-SECRET-KEY header is missing or invalid | * WWW_Authenticate - <br> |
488+
489+
[[Back to top]](#) [[Back to API list]](../README.md#documentation-for-api-endpoints) [[Back to Model list]](../README.md#documentation-for-models) [[Back to README]](../README.md)
490+
411491
# **util_sae_topk_by_decoder_cossim_post**
412492
> UtilSaeTopkByDecoderCossimPost200Response util_sae_topk_by_decoder_cossim_post(util_sae_topk_by_decoder_cossim_post_request)
413493
Lines changed: 31 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,31 @@
1+
# TokenizePost200Response
2+
3+
4+
## Properties
5+
6+
Name | Type | Description | Notes
7+
------------ | ------------- | ------------- | -------------
8+
**tokens** | **List[int]** | Token IDs for the input text |
9+
**token_strings** | **List[str]** | String representation of each token |
10+
**prepend_bos** | **bool** | Whether beginning-of-sequence token was prepended |
11+
12+
## Example
13+
14+
```python
15+
from neuronpedia_inference_client.models.tokenize_post200_response import TokenizePost200Response
16+
17+
# TODO update the JSON string below
18+
json = "{}"
19+
# create an instance of TokenizePost200Response from a JSON string
20+
tokenize_post200_response_instance = TokenizePost200Response.from_json(json)
21+
# print the JSON string representation of the object
22+
print(TokenizePost200Response.to_json())
23+
24+
# convert the object into a dict
25+
tokenize_post200_response_dict = tokenize_post200_response_instance.to_dict()
26+
# create an instance of TokenizePost200Response from a dict
27+
tokenize_post200_response_from_dict = TokenizePost200Response.from_dict(tokenize_post200_response_dict)
28+
```
29+
[[Back to Model list]](../README.md#documentation-for-models) [[Back to API list]](../README.md#documentation-for-api-endpoints) [[Back to README]](../README.md)
30+
31+

0 commit comments

Comments
 (0)