Sonnyjim commited on
Commit
359f819
Β·
1 Parent(s): 76942b4

Added pyproject.toml file, updated some dependencies

Browse files
Dockerfile CHANGED
@@ -1,5 +1,5 @@
1
  # Stage 1: Build dependencies and download models
2
- FROM public.ecr.aws/docker/library/python:3.11.11-slim-bookworm AS builder
3
 
4
  # Install system dependencies.
5
  RUN apt-get update && apt-get install -y \
@@ -22,15 +22,13 @@ ENV CMAKE_ARGS="-DGGML_BLAS=ON -DGGML_BLAS_VENDOR=OpenBLAS"
22
 
23
  COPY requirements_aws.txt .
24
 
25
- RUN pip uninstall -y typing_extensions \
26
- && pip install --no-cache-dir --target=/install typing_extensions==4.12.2 \
27
- && pip install --no-cache-dir --target=/install torch==2.7.1+cpu --extra-index-url https://download.pytorch.org/whl/cpu \
28
  && pip install --no-cache-dir --target=/install -r requirements_aws.txt
29
 
30
  RUN rm requirements_aws.txt
31
 
32
  # Stage 2: Final runtime image
33
- FROM public.ecr.aws/docker/library/python:3.11.11-slim-bookworm
34
 
35
  # Install system dependencies.
36
  RUN apt-get update \
@@ -62,7 +60,6 @@ ENV HOME=/home/user \
62
  GRADIO_SERVER_NAME=0.0.0.0 \
63
  GRADIO_SERVER_PORT=7860 \
64
  GRADIO_THEME=huggingface \
65
- TLDEXTRACT_CACHE=$HOME/app/tld/.tld_set_snapshot \
66
  SYSTEM=spaces
67
 
68
  # Set the working directory to the user's home directory
 
1
  # Stage 1: Build dependencies and download models
2
+ FROM public.ecr.aws/docker/library/python:3.11.13-slim-bookworm AS builder
3
 
4
  # Install system dependencies.
5
  RUN apt-get update && apt-get install -y \
 
22
 
23
  COPY requirements_aws.txt .
24
 
25
+ RUN pip install --no-cache-dir --target=/install torch==2.7.1+cpu --extra-index-url https://download.pytorch.org/whl/cpu \
 
 
26
  && pip install --no-cache-dir --target=/install -r requirements_aws.txt
27
 
28
  RUN rm requirements_aws.txt
29
 
30
  # Stage 2: Final runtime image
31
+ FROM public.ecr.aws/docker/library/python:3.11.13-slim-bookworm
32
 
33
  # Install system dependencies.
34
  RUN apt-get update \
 
60
  GRADIO_SERVER_NAME=0.0.0.0 \
61
  GRADIO_SERVER_PORT=7860 \
62
  GRADIO_THEME=huggingface \
 
63
  SYSTEM=spaces
64
 
65
  # Set the working directory to the user's home directory
README.md CHANGED
@@ -3,7 +3,7 @@ title: Large language model topic modelling
3
  emoji: πŸ“
4
  colorFrom: purple
5
  colorTo: yellow
6
- sdk: gradio
7
  app_file: app.py
8
  pinned: true
9
  license: agpl-3.0
 
3
  emoji: πŸ“
4
  colorFrom: purple
5
  colorTo: yellow
6
+ sdk: docker
7
  app_file: app.py
8
  pinned: true
9
  license: agpl-3.0
pyproject.toml ADDED
@@ -0,0 +1,5 @@
 
 
 
 
 
 
1
+ [project]
2
+ name = "Large language model topic modelling"
3
+ version = "0.1.0"
4
+ description = "Topic model open text data files with a large language model."
5
+ requires-python = ">=3.10"
requirements.txt CHANGED
@@ -1,5 +1,5 @@
1
  pandas==2.3.1
2
- gradio==5.42.0
3
  transformers==4.55.2
4
  spaces==0.40.0
5
  boto3==1.40.11
@@ -8,19 +8,19 @@ openpyxl==3.1.5
8
  markdown==3.7
9
  tabulate==0.9.0
10
  lxml==5.3.0
11
- google-genai==1.30.0
12
  html5lib==1.1
13
  beautifulsoup4==4.12.3
14
  rapidfuzz==3.13.0
15
  python-dotenv==1.1.0
16
  # Torch and Llama CPP Python
17
  # GPU
18
- torch==2.6.0 --extra-index-url https://download.pytorch.org/whl/cu124 # Latest compatible with CUDA 12.4
19
- https://github.com/abetlen/llama-cpp-python/releases/download/v0.3.16-cu124/llama_cpp_python-0.3.16-cp310-cp310-linux_x86_64.whl # Specify exact llama_cpp for cuda compatibility on Hugging Face
20
  #
21
  # CPU only (for e.g. Hugging Face CPU instances):
22
- # torch==2.6.0 --extra-index-url https://download.pytorch.org/whl/cpu
23
- # llama-cpp-python==0.3.16 # should work on local Linux systems, but will be extremely slow on Hugging Face and will most likely time out
24
  # For Hugging Face can only specify the latest non-CUDA wheel for Python 3.10, currently only llama-cpp-python v0.3.2 that can only be used with Gemma 2 2b for local inference
25
  #https://github.com/abetlen/llama-cpp-#python/releases/download/v0.3.2/llama_cpp_python-0.3.2-cp310-cp310-linux_x86_64.whl
26
 
 
1
  pandas==2.3.1
2
+ gradio==5.44.1
3
  transformers==4.55.2
4
  spaces==0.40.0
5
  boto3==1.40.11
 
8
  markdown==3.7
9
  tabulate==0.9.0
10
  lxml==5.3.0
11
+ google-genai==1.32.0
12
  html5lib==1.1
13
  beautifulsoup4==4.12.3
14
  rapidfuzz==3.13.0
15
  python-dotenv==1.1.0
16
  # Torch and Llama CPP Python
17
  # GPU
18
+ #torch==2.6.0 --extra-index-url https://download.pytorch.org/whl/cu124 # Latest compatible with CUDA 12.4
19
+ #https://github.com/abetlen/llama-cpp-python/releases/download/v0.3.16-cu124/llama_cpp_python-0.3.16-cp310-cp310-linux_x86_64.whl # Specify exact llama_cpp for cuda compatibility on Hugging Face
20
  #
21
  # CPU only (for e.g. Hugging Face CPU instances):
22
+ torch==2.7.1 --extra-index-url https://download.pytorch.org/whl/cpu
23
+ llama-cpp-python==0.3.16 # should work on local Linux systems, but will be extremely slow on Hugging Face and will most likely time out
24
  # For Hugging Face can only specify the latest non-CUDA wheel for Python 3.10, currently only llama-cpp-python v0.3.2 that can only be used with Gemma 2 2b for local inference
25
  #https://github.com/abetlen/llama-cpp-#python/releases/download/v0.3.2/llama_cpp_python-0.3.2-cp310-cp310-linux_x86_64.whl
26
 
requirements_aws.txt CHANGED
@@ -1,5 +1,5 @@
1
  pandas==2.3.1
2
- gradio==5.42.0
3
  transformers==4.55.2
4
  spaces==0.40.0
5
  boto3==1.40.11
@@ -8,10 +8,10 @@ openpyxl==3.1.5
8
  markdown==3.7
9
  tabulate==0.9.0
10
  lxml==5.3.0
11
- google-genai==1.30.0
12
  html5lib==1.1
13
  beautifulsoup4==4.12.3
14
  rapidfuzz==3.13.0
15
  python-dotenv==1.1.0
16
- torch==2.6.0 --extra-index-url https://download.pytorch.org/whl/cu124 # Latest compatible with CUDA 12.4
17
  llama-cpp-python==0.3.16
 
1
  pandas==2.3.1
2
+ gradio==5.44.1
3
  transformers==4.55.2
4
  spaces==0.40.0
5
  boto3==1.40.11
 
8
  markdown==3.7
9
  tabulate==0.9.0
10
  lxml==5.3.0
11
+ google-genai==1.32.0
12
  html5lib==1.1
13
  beautifulsoup4==4.12.3
14
  rapidfuzz==3.13.0
15
  python-dotenv==1.1.0
16
+ torch==2.7.1 --extra-index-url https://download.pytorch.org/whl/cpu
17
  llama-cpp-python==0.3.16
requirements_cpu_win.txt CHANGED
@@ -1,5 +1,5 @@
1
  pandas==2.3.1
2
- gradio==5.42.0
3
  transformers==4.55.2
4
  spaces==0.40.0
5
  boto3==1.40.11
@@ -8,11 +8,11 @@ openpyxl==3.1.5
8
  markdown==3.7
9
  tabulate==0.9.0
10
  lxml==5.3.0
11
- google-genai==1.30.0
12
  html5lib==1.1
13
  beautifulsoup4==4.12.3
14
  rapidfuzz==3.13.0
15
  python-dotenv==1.1.0
16
  # Following llama-cpp-python installation is that latest that has a wheel for Windows. will only work with Gemma 2
17
- torch==2.6.0 --extra-index-url https://download.pytorch.org/whl/cpu # Latest compatible with CUDA 12.4
18
  https://github.com/abetlen/llama-cpp-python/releases/download/v0.3.2/llama_cpp_python-0.3.2-cp311-cp311-win_amd64.whl
 
1
  pandas==2.3.1
2
+ gradio==5.44.1
3
  transformers==4.55.2
4
  spaces==0.40.0
5
  boto3==1.40.11
 
8
  markdown==3.7
9
  tabulate==0.9.0
10
  lxml==5.3.0
11
+ google-genai==1.32.0
12
  html5lib==1.1
13
  beautifulsoup4==4.12.3
14
  rapidfuzz==3.13.0
15
  python-dotenv==1.1.0
16
  # Following llama-cpp-python installation is that latest that has a wheel for Windows. will only work with Gemma 2
17
+ torch==2.7.1 --extra-index-url https://download.pytorch.org/whl/cpu # Latest compatible with CUDA 12.4
18
  https://github.com/abetlen/llama-cpp-python/releases/download/v0.3.2/llama_cpp_python-0.3.2-cp311-cp311-win_amd64.whl
requirements_gpu.txt CHANGED
@@ -1,5 +1,5 @@
1
  pandas==2.3.1
2
- gradio==5.44.0
3
  transformers==4.55.2
4
  spaces==0.40.0
5
  boto3==1.40.11
@@ -8,7 +8,7 @@ openpyxl==3.1.5
8
  markdown==3.7
9
  tabulate==0.9.0
10
  lxml==5.3.0
11
- google-genai==1.30.0
12
  html5lib==1.1
13
  beautifulsoup4==4.12.3
14
  rapidfuzz==3.13.0
 
1
  pandas==2.3.1
2
+ gradio==5.44.1
3
  transformers==4.55.2
4
  spaces==0.40.0
5
  boto3==1.40.11
 
8
  markdown==3.7
9
  tabulate==0.9.0
10
  lxml==5.3.0
11
+ google-genai==1.32.0
12
  html5lib==1.1
13
  beautifulsoup4==4.12.3
14
  rapidfuzz==3.13.0