use AutoProcessor with use_fast=False since there's a bug with use_fa… #125

Workflow file for this run

.github/workflows/test.yml at 2d20000

	name: Python Tests on M1 Mac

	on:
	push:
	branches: [ main ]
	pull_request:
	branches: [ main ]

	jobs:
	unit_test:
	runs-on: macos-14
	steps:
	- uses: actions/checkout@v2

	- name: Set up Python
	uses: actions/setup-python@v2
	with:
	python-version: '3.12'

	- name: Cache huggingface hub models
	uses: actions/cache@v3
	with:
	path: ~/.cache/huggingface/hub
	key: ${{ runner.os }}-huggingface-hub-${{ hashFiles('~/.cache/huggingface/hub/*/') }}-${{ github.job }}

	- name: Install dependencies
	run: \|
	python3 -m pip install --upgrade pip
	pip install .

	- name: Run tests
	run: \|
	# Check if cached files are present
	ls ~/.cache/huggingface/hub/models--mlx-community--Meta-Llama-3-8B-Instruct-4bit/*/ \|\| true

	# Run unit tests
	METAL_XCODE=1 python3 -m exo.inference.test_inference_engine

	discovery_integration_test:
	runs-on: macos-latest
	steps:
	- uses: actions/checkout@v2

	- name: Set up Python
	uses: actions/setup-python@v2
	with:
	python-version: '3.x'

	- name: Install dependencies
	run: \|
	python3 -m pip install --upgrade pip
	pip install .

	- name: Run discovery integration test
	run: \|
	# Start first instance
	DEBUG_DISCOVERY=9 DEBUG=9 python3 main.py --listen-port 5678 --broadcast-port 5679 --chatgpt-api-port 8000 > output1.log 2>&1 &
	PID1=$!

	# Start second instance
	DEBUG_DISCOVERY=9 DEBUG=9 python3 main.py --listen-port 5679 --broadcast-port 5678 --chatgpt-api-port 8001 > output2.log 2>&1 &
	PID2=$!

	# Wait for discovery
	sleep 10

	# Stop both instances
	kill $PID1 $PID2

	# Check outputs
	if grep -q "Connected to peer" output1.log && grep -q "Connected to peer" output2.log; then
	echo "Test passed: Both instances discovered each other"
	exit 0
	else
	echo "Test failed: Devices did not discover each other"
	echo "Output of first instance:"
	cat output1.log
	echo "Output of second instance:"
	cat output2.log
	exit 1
	fi

	chatgpt_api_integration_test:
	runs-on: macos-latest
	steps:
	- uses: actions/checkout@v2

	- name: Set up Python
	uses: actions/setup-python@v2
	with:
	python-version: '3.x'

	- name: Cache huggingface hub models
	uses: actions/cache@v3
	with:
	path: ~/.cache/huggingface/hub
	key: ${{ runner.os }}-huggingface-hub-${{ hashFiles('~/.cache/huggingface/hub/*/') }}-${{ github.job }}
	restore-keys: \|
	${{ runner.os }}-huggingface-hub-

	- name: Cache tinygrad downloaded models
	uses: actions/cache@v3
	with:
	path: ~/Library/Caches/tinygrad/downloads
	key: ${{ runner.os }}-tinygrad-downloads-${{ hashFiles('~/Library/Caches/tinygrad/downloads/*/') }}-${{ github.job }}
	restore-keys: \|
	${{ runner.os }}-tinygrad-downloads-

	- name: Install dependencies
	run: \|
	python3 -m pip install --upgrade pip
	pip install .

	- name: Run chatgpt api integration test
	run: \|
	exit 0 # TODO
	# Check if cached files are present
	ls ~/.cache/huggingface/hub/models--mlx-community--Meta-Llama-3-8B-Instruct-4bit/*/ \|\| true

	# Start first instance
	DEBUG_DISCOVERY=9 DEBUG=9 python3 main.py --inference-engine mlx --listen-port 5678 --broadcast-port 5679 --chatgpt-api-port 8000 --chatgpt-api-response-timeout-secs 900 > output1.log 2>&1 &
	PID1=$!

	# Start second instance
	DEBUG_DISCOVERY=9 DEBUG=9 python3 main.py --inference-engine mlx --listen-port 5679 --broadcast-port 5678 --chatgpt-api-port 8001 --chatgpt-api-response-timeout-secs 900 > output2.log 2>&1 &
	PID2=$!

	# Wait for discovery
	sleep 10

	# Function to check if processes are still running
	check_processes() {
	if ! kill -0 $PID1 2>/dev/null; then
	echo "First instance (PID $PID1) died unexpectedly. Log output:"
	cat output1.log
	exit 1
	fi
	if ! kill -0 $PID2 2>/dev/null; then
	echo "Second instance (PID $PID2) died unexpectedly. Log output:"
	cat output2.log
	exit 1
	fi
	}

	# Check processes before proceeding
	check_processes

	# first one to load the model
	curl -s http://localhost:8000/v1/chat/completions \
	-H "Content-Type: application/json" \
	-d '{
	"model": "llama-3-8b",
	"messages": [{"role": "user", "content": "Keep responses concise. Placeholder to load model..."}],
	"temperature": 0.7
	}'

	# Check processes after model load
	check_processes

	response_1=$(curl -s http://localhost:8000/v1/chat/completions \
	-H "Content-Type: application/json" \
	-d '{
	"model": "llama-3-8b",
	"messages": [{"role": "user", "content": "Keep responses concise. Who was the king of pop?"}],
	"temperature": 0.7
	}')
	echo "Response 1: $response_1"

	# Check processes after first response
	check_processes

	response_2=$(curl -s http://localhost:8000/v1/chat/completions \
	-H "Content-Type: application/json" \
	-d '{
	"model": "llama-3-8b",
	"messages": [{"role": "user", "content": "Keep responses concise. Who was the king of pop?"}],
	"temperature": 0.7
	}')
	echo "Response 2: $response_2"

	# Check processes after second response
	check_processes

	# Stop both instances
	kill $PID1 $PID2

	echo ""
	if ! echo "$response_1" \| grep -q "Michael Jackson" \|\| ! echo "$response_2" \| grep -q "Michael Jackson"; then
	echo "Test failed: Response does not contain 'Michael Jackson'"
	echo "Response 1: $response_1"
	echo ""
	echo "Response 2: $response_2"
	echo "Output of first instance:"
	cat output1.log
	echo "Output of second instance:"
	cat output2.log
	exit 1
	else
	echo "Test passed: Response from both nodes contains 'Michael Jackson'"
	fi

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

use AutoProcessor with use_fast=False since there's a bug with use_fa… #125

Workflow file

use AutoProcessor with use_fast=False since there's a bug with use_fa… #125

Jobs

Run details

Workflow file for this run