Spaces:

Gonalb
/

aceup

Sleeping

App Files Files Community

gonalbz commited on 17 days ago

Commit

62151d3

1 Parent(s): e924c11

init

Browse files

Files changed (30) hide show

.dockerignore +20 -0
.gitignore +5 -0
Dockerfile +29 -0
README.md +140 -5
app/__init__.py +0 -0
app/adapters/__init__.py +0 -0
app/adapters/openai.py +59 -0
app/configurations.py +10 -0
app/domain.py +8 -0
app/errors.py +14 -0
app/frontend.py +86 -0
app/llm_schema.py +6 -0
app/main.py +104 -0
app/mappers.py +18 -0
app/ports/__init__.py +1 -0
app/ports/llm.py +22 -0
app/prompts.py +11 -0
app/repositories.py +26 -0
app/schemas.py +15 -0
app/services.py +104 -0
assessment.md +55 -0
poetry.lock +0 -0
pyproject.toml +20 -0
tests/__init__.py +0 -0
tests/adapters/__init__.py +0 -0
tests/adapters/mock_data.py +70 -0
tests/adapters/test_openai.py +38 -0
tests/api/test_transcript_analysis_api.py +110 -0
tests/frontend/test_gradio_frontend.py +59 -0
tests/services/test_transcript_analysis_service.py +116 -0

.dockerignore ADDED Viewed

	@@ -0,0 +1,20 @@

+.git
+.gitignore
+.DS_Store
+__pycache__/
+*.py[cod]
+*.pyo
+.coverage
+.pytest_cache/
+.mypy_cache/
+.ruff_cache/
+htmlcov/
+.env
+.env.*
+.venv/
+venv/
+tests/

.gitignore ADDED Viewed

	@@ -0,0 +1,5 @@

+.env
+.venv/
+.pytest_cache/
+__pycache__/
+*.py[cod]

Dockerfile ADDED Viewed

	@@ -0,0 +1,29 @@

+FROM python:3.12-slim
+ENV PYTHONDONTWRITEBYTECODE=1 \
+    PYTHONUNBUFFERED=1 \
+    PIP_NO_CACHE_DIR=1 \
+    POETRY_VERSION=2.4.1 \
+    POETRY_NO_INTERACTION=1 \
+    POETRY_VIRTUALENVS_IN_PROJECT=true
+RUN useradd -m -u 1000 user
+USER user
+ENV HOME=/home/user \
+    PATH=/home/user/.local/bin:/home/user/app/.venv/bin:$PATH
+WORKDIR $HOME/app
+RUN pip install --upgrade pip && \
+    pip install "poetry==$POETRY_VERSION"
+COPY --chown=user pyproject.toml poetry.lock ./
+RUN poetry install --only main --no-root
+COPY --chown=user app ./app
+EXPOSE 7860
+CMD ["uvicorn", "app.main:app", "--host", "0.0.0.0", "--port", "7860"]

README.md CHANGED Viewed

@@ -1,11 +1,146 @@
 ---
-title: Aceup
-emoji: 📉
 colorFrom: blue
 colorTo: gray
 sdk: docker
-pinned: false
-short_description: aceup assessment
 ---
-Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference

 ---
+title: Aceup Transcript Analysis
 colorFrom: blue
 colorTo: gray
 sdk: docker
+app_port: 7860
+base_path: /ui
+short_description: Transcript analysis API and Gradio interface.
 ---
+# ml-tech-assessment
+## Environment Setup
+### Using Conda (Recommended)
+1. Install Conda if you haven't already:
+   - Download and install [Miniconda](https://docs.conda.io/en/latest/miniconda.html) or [Anaconda](https://www.anaconda.com/products/distribution)
+2. Create and activate a new conda environment:
+   ```bash
+   conda create -n ml-assessment python=3.12
+   conda activate ml-assessment
+   ```
+## Installing Poetry and Dependencies
+1. Install Poetry using pip:
+   ```bash
+   pip install poetry
+   ```
+2. Install project dependencies:
+   ```bash
+   poetry install
+   ```
+## Environment Variables
+1. Create a `.env` file in the root directory of the project
+2. Copy the contents of the provided `.env` file into your local `.env` file
+Required values:
+```bash
+OPENAI_API_KEY=<your-openai-api-key>
+OPENAI_MODEL=gpt-4o-2024-08-06
+```
+## Running Tests
+To run the tests, make sure you have:
+1. Activated your virtual environment
+2. Installed all dependencies using Poetry
+3. Created and populated the `.env` file
+Then run:
+```bash
+pytest
+```
+For more detailed test output:
+```bash
+pytest -v
+```
+For test coverage report:
+```bash
+pytest --cov
+```
+## Running the API
+Start the FastAPI application with:
+```bash
+poetry run uvicorn app.main:app --reload
+```
+If Poetry is not installed globally but dependencies are already installed in the local virtual environment, run:
+```bash
+./.venv/bin/uvicorn app.main:app --reload
+```
+Swagger documentation is available at:
+```text
+http://127.0.0.1:8000/docs
+```
+The Gradio frontend is available at:
+```text
+http://127.0.0.1:8000/ui
+```
+Analyze one transcript:
+```bash
+curl -G "http://127.0.0.1:8000/analyses" \
+  --data-urlencode "transcript=Discuss the launch plan and assign next steps."
+```
+Retrieve a stored analysis:
+```bash
+curl "http://127.0.0.1:8000/analyses/<analysis-id>"
+```
+Analyze multiple transcripts concurrently:
+```bash
+curl -X POST "http://127.0.0.1:8000/analyses/batch" \
+  -H "Content-Type: application/json" \
+  -d '{"transcripts":["Discuss launch risks.","Review onboarding plan."]}'
+```
+Analysis results are stored in memory, so they reset when the API process restarts.
+## Hugging Face Spaces Deployment
+This repository is configured as a Docker Space. The container serves FastAPI and the mounted Gradio UI through Uvicorn on port `7860`, and the Space opens directly at `/ui`.
+Set `OPENAI_API_KEY` as a Hugging Face Space Secret. Optionally set `OPENAI_MODEL` as a Space Variable if you want to override the default model.
+Build and run the container locally:
+```bash
+docker build -t aceup-transcript-analysis .
+docker run --rm -p 7860:7860 --env-file .env aceup-transcript-analysis
+```
+Then open:
+```text
+http://127.0.0.1:7860/ui
+```
+## OpenAI Adapter Integration Test
+The live OpenAI adapter test is skipped by default so local test runs do not require network access or credentials. To run it explicitly:
+```bash
+RUN_OPENAI_INTEGRATION_TESTS=1 poetry run pytest tests/adapters/test_openai.py
+```

app/__init__.py ADDED Viewed

File without changes

app/adapters/__init__.py ADDED Viewed

File without changes

app/adapters/openai.py ADDED Viewed

	@@ -0,0 +1,59 @@

+import openai
+import pydantic
+from app import ports
+class OpenAIAdapter(ports.LLm):
+    def __init__(self, api_key: str, model: str) -> None:
+        self._model = model
+        self._client = openai.OpenAI(api_key=api_key)
+        self._aclient = openai.AsyncOpenAI(api_key=api_key)
+    def run_completion(self, system_prompt: str, user_prompt: str, dto: type[pydantic.BaseModel]) -> pydantic.BaseModel:
+        """
+        Executes a completion request using the OpenAI API with the provided prompts and response format.
+        Args:
+            system_prompt (str): The system's introductory message for the chat.
+            user_prompt (str): The user input for which a response is needed.
+            dto (Type[pydantic.BaseModel]): A Pydantic model class used to define the structure of the API response.
+        Returns:
+            pydantic.BaseModel: An instance of the provided DTO class populated with the API response data.
+            more info: https://platform.openai.com/docs/guides/structured-outputs?api-mode=chat
+        """
+        completion = self._client.beta.chat.completions.parse(
+            model=self._model,
+            messages=[
+                {"role": "system", "content": system_prompt},
+                {"role": "user", "content": user_prompt},
+            ],
+            response_format=dto
+        )
+        return completion.choices[0].message.parsed
+    async def run_completion_async(self, system_prompt: str, user_prompt: str,
+                                   dto: type[pydantic.BaseModel]) -> pydantic.BaseModel:
+        """
+        Executes a completion request using the OpenAI API with the provided prompts and response format.
+        Args:
+            system_prompt (str): The system's introductory message for the chat.
+            user_prompt (str): The user input for which a response is needed.
+            dto (Type[pydantic.BaseModel]): A Pydantic model class used to define the structure of the API response.
+        Returns:
+         pydantic.BaseModel: An instance of the provided DTO class populated with the API response data.
+         more info: https://platform.openai.com/docs/guides/structured-outputs?api-mode=chat
+         """
+        completion = await self._aclient.beta.chat.completions.parse(
+            model=self._model,
+            messages=[
+                {"role": "system", "content": system_prompt},
+                {"role": "user", "content": user_prompt},
+            ],
+            response_format=dto
+        )
+        return completion.choices[0].message.parsed

app/configurations.py ADDED Viewed

	@@ -0,0 +1,10 @@

+import pydantic_settings
+class EnvConfigs(pydantic_settings.BaseSettings):
+    model_config =pydantic_settings.SettingsConfigDict(env_file=".env", env_file_encoding="utf-8")
+    OPENAI_API_KEY: str
+    OPENAI_MODEL: str = "gpt-4o-2024-08-06"

app/domain.py ADDED Viewed

	@@ -0,0 +1,8 @@

+from dataclasses import dataclass
+@dataclass(frozen=True)
+class TranscriptAnalysis:
+    id: str
+    summary: str
+    action_items: tuple[str, ...]

app/errors.py ADDED Viewed

	@@ -0,0 +1,14 @@

+class TranscriptAnalysisError(Exception):
+    pass
+class InvalidTranscriptError(TranscriptAnalysisError):
+    pass
+class AnalysisNotFoundError(TranscriptAnalysisError):
+    pass
+class LLMCompletionError(TranscriptAnalysisError):
+    pass

app/frontend.py ADDED Viewed

	@@ -0,0 +1,86 @@

+from collections.abc import Callable
+import gradio as gr
+from app.domain import TranscriptAnalysis
+from app.errors import AnalysisNotFoundError, InvalidTranscriptError, LLMCompletionError
+from app.services import TranscriptAnalysisService
+ServiceFactory = Callable[[], TranscriptAnalysisService]
+FrontendResult = tuple[str, str, str]
+def build_gradio_app(service_factory: ServiceFactory) -> gr.Blocks:
+    with gr.Blocks(title="Transcript Analysis") as frontend:
+        gr.Markdown("# Transcript Analysis")
+        with gr.Tab("Analyze"):
+            transcript_input = gr.Textbox(
+                label="Transcript",
+                lines=12,
+                placeholder="Paste a transcript here...",
+            )
+            analyze_button = gr.Button("Analyze", variant="primary")
+            analysis_id_output = gr.Textbox(label="Analysis ID", interactive=False)
+            summary_output = gr.Textbox(label="Summary", lines=5, interactive=False)
+            action_items_output = gr.Textbox(
+                label="Suggested Next Steps",
+                lines=6,
+                interactive=False,
+            )
+            analyze_button.click(
+                fn=lambda transcript: analyze_transcript(transcript, service_factory),
+                inputs=transcript_input,
+                outputs=[analysis_id_output, summary_output, action_items_output],
+            )
+        with gr.Tab("Lookup"):
+            analysis_id_input = gr.Textbox(label="Analysis ID")
+            lookup_button = gr.Button("Lookup", variant="primary")
+            lookup_id_output = gr.Textbox(label="Analysis ID", interactive=False)
+            lookup_summary_output = gr.Textbox(label="Summary", lines=5, interactive=False)
+            lookup_action_items_output = gr.Textbox(
+                label="Suggested Next Steps",
+                lines=6,
+                interactive=False,
+            )
+            lookup_button.click(
+                fn=lambda analysis_id: lookup_analysis(analysis_id, service_factory),
+                inputs=analysis_id_input,
+                outputs=[lookup_id_output, lookup_summary_output, lookup_action_items_output],
+            )
+    return frontend
+def analyze_transcript(transcript: str, service_factory: ServiceFactory) -> FrontendResult:
+    try:
+        analysis = service_factory().analyze(transcript)
+    except (InvalidTranscriptError, LLMCompletionError) as exc:
+        raise gr.Error(str(exc)) from exc
+    return format_analysis(analysis)
+def lookup_analysis(analysis_id: str, service_factory: ServiceFactory) -> FrontendResult:
+    try:
+        analysis = service_factory().get(analysis_id.strip())
+    except (AnalysisNotFoundError, InvalidTranscriptError, LLMCompletionError) as exc:
+        raise gr.Error(str(exc)) from exc
+    return format_analysis(analysis)
+def format_analysis(analysis: TranscriptAnalysis) -> FrontendResult:
+    return analysis.id, analysis.summary, format_action_items(analysis.action_items)
+def format_action_items(action_items: tuple[str, ...]) -> str:
+    if not action_items:
+        return "No suggested next steps returned."
+    return "\n".join(f"{index}. {action_item}" for index, action_item in enumerate(action_items, start=1))

app/llm_schema.py ADDED Viewed

	@@ -0,0 +1,6 @@

+from pydantic import BaseModel
+class LLMTranscriptAnalysisResponse(BaseModel):
+    summary: str
+    action_items: list[str]

app/main.py ADDED Viewed

	@@ -0,0 +1,104 @@

+from functools import lru_cache
+from typing import Annotated
+from fastapi import Depends, FastAPI, Query, Request
+from fastapi.responses import JSONResponse, RedirectResponse
+import gradio as gr
+from app.adapters.openai import OpenAIAdapter
+from app.configurations import EnvConfigs
+from app.mappers import to_batch_transcript_analysis_response, to_transcript_analysis_response
+from app.schemas import (
+    BatchTranscriptAnalysisRequest,
+    BatchTranscriptAnalysisResponse,
+    TranscriptAnalysisResponse,
+)
+from app.errors import AnalysisNotFoundError, InvalidTranscriptError, LLMCompletionError
+from app.frontend import build_gradio_app
+from app.ports import LLm
+from app.repositories import InMemoryTranscriptAnalysisRepository, TranscriptAnalysisRepository
+from app.services import TranscriptAnalysisService
+app = FastAPI(
+    title="Transcript Analysis API",
+    version="0.1.0",
+    description="Analyze plain text transcripts and retrieve stored analysis results.",
+)
+_repository = InMemoryTranscriptAnalysisRepository()
+@lru_cache
+def get_env_configs() -> EnvConfigs:
+    return EnvConfigs()
+@lru_cache
+def get_llm() -> LLm:
+    env_configs = get_env_configs()
+    return OpenAIAdapter(env_configs.OPENAI_API_KEY, env_configs.OPENAI_MODEL)
+def get_repository() -> TranscriptAnalysisRepository:
+    return _repository
+def get_analysis_service(
+    llm: Annotated[LLm, Depends(get_llm)],
+    repository: Annotated[TranscriptAnalysisRepository, Depends(get_repository)],
+) -> TranscriptAnalysisService:
+    return TranscriptAnalysisService(llm, repository)
+def get_gradio_analysis_service() -> TranscriptAnalysisService:
+    return TranscriptAnalysisService(get_llm(), get_repository())
+@app.exception_handler(InvalidTranscriptError)
+async def invalid_transcript_handler(_: Request, exc: InvalidTranscriptError) -> JSONResponse:
+    return JSONResponse(status_code=400, content={"detail": str(exc)})
+@app.exception_handler(AnalysisNotFoundError)
+async def analysis_not_found_handler(_: Request, exc: AnalysisNotFoundError) -> JSONResponse:
+    return JSONResponse(status_code=404, content={"detail": str(exc)})
+@app.exception_handler(LLMCompletionError)
+async def llm_completion_handler(_: Request, exc: LLMCompletionError) -> JSONResponse:
+    return JSONResponse(status_code=502, content={"detail": str(exc)})
+@app.get("/", include_in_schema=False)
+def redirect_to_ui() -> RedirectResponse:
+    return RedirectResponse(url="/ui")
+@app.get("/analyses", response_model=TranscriptAnalysisResponse)
+def analyze_transcript(
+    transcript: Annotated[str, Query(description="Plain text transcript to analyze.")],
+    service: Annotated[TranscriptAnalysisService, Depends(get_analysis_service)],
+) -> TranscriptAnalysisResponse:
+    analysis = service.analyze(transcript)
+    return to_transcript_analysis_response(analysis)
+@app.get("/analyses/{analysis_id}", response_model=TranscriptAnalysisResponse)
+def get_transcript_analysis(
+    analysis_id: str,
+    service: Annotated[TranscriptAnalysisService, Depends(get_analysis_service)],
+) -> TranscriptAnalysisResponse:
+    analysis = service.get(analysis_id)
+    return to_transcript_analysis_response(analysis)
+@app.post("/analyses/batch", response_model=BatchTranscriptAnalysisResponse)
+async def analyze_transcripts_batch(
+    request: BatchTranscriptAnalysisRequest,
+    service: Annotated[TranscriptAnalysisService, Depends(get_analysis_service)],
+) -> BatchTranscriptAnalysisResponse:
+    analyses = await service.analyze_many(request.transcripts)
+    return to_batch_transcript_analysis_response(analyses)
+app = gr.mount_gradio_app(app, build_gradio_app(get_gradio_analysis_service), path="/ui")

app/mappers.py ADDED Viewed

	@@ -0,0 +1,18 @@

+from app.domain import TranscriptAnalysis
+from app.schemas import BatchTranscriptAnalysisResponse, TranscriptAnalysisResponse
+def to_transcript_analysis_response(analysis: TranscriptAnalysis) -> TranscriptAnalysisResponse:
+    return TranscriptAnalysisResponse(
+        id=analysis.id,
+        summary=analysis.summary,
+        action_items=list(analysis.action_items),
+    )
+def to_batch_transcript_analysis_response(
+    analyses: list[TranscriptAnalysis],
+) -> BatchTranscriptAnalysisResponse:
+    return BatchTranscriptAnalysisResponse(
+        items=[to_transcript_analysis_response(analysis) for analysis in analyses]
+    )

app/ports/__init__.py ADDED Viewed

	@@ -0,0 +1 @@


1	+ from app.ports.llm import LLm

app/ports/llm.py ADDED Viewed

	@@ -0,0 +1,22 @@

+from pydantic import BaseModel
+from abc import ABC, abstractmethod
+class LLm(ABC):
+    @abstractmethod
+    def run_completion(
+        self,
+        system_prompt: str,
+        user_prompt: str,
+        dto: type[BaseModel],
+    ) -> BaseModel:
+        pass
+    @abstractmethod
+    async def run_completion_async(
+        self,
+        system_prompt: str,
+        user_prompt: str,
+        dto: type[BaseModel],
+    ) -> BaseModel:
+        pass

app/prompts.py ADDED Viewed

	@@ -0,0 +1,11 @@

+SYSTEM_PROMPT = """You are an expert business coach skilled in analyzing conversation transcripts.
+                    Your job is to provide insightful, concise summaries and recommend clear, actionable next steps
+                    to help clients achieve their goals effectively."""
+RAW_USER_PROMPT = """Given the transcript below, generate:
+                    1. A brief, insightful summary highlighting key points discussed.
+                    2. A clear, structured list of recommended next actions.
+                    Transcript:
+                    {transcript}"""

app/repositories.py ADDED Viewed

	@@ -0,0 +1,26 @@

+from threading import RLock
+from typing import Protocol
+from app.domain import TranscriptAnalysis
+class TranscriptAnalysisRepository(Protocol):
+    def save(self, analysis: TranscriptAnalysis) -> None:
+        pass
+    def get(self, analysis_id: str) -> TranscriptAnalysis | None:
+        pass
+class InMemoryTranscriptAnalysisRepository:
+    def __init__(self) -> None:
+        self._analyses: dict[str, TranscriptAnalysis] = {}
+        self._lock = RLock()
+    def save(self, analysis: TranscriptAnalysis) -> None:
+        with self._lock:
+            self._analyses[analysis.id] = analysis
+    def get(self, analysis_id: str) -> TranscriptAnalysis | None:
+        with self._lock:
+            return self._analyses.get(analysis_id)

app/schemas.py ADDED Viewed

	@@ -0,0 +1,15 @@

+from pydantic import BaseModel
+class TranscriptAnalysisResponse(BaseModel):
+    id: str
+    summary: str
+    action_items: list[str]
+class BatchTranscriptAnalysisRequest(BaseModel):
+    transcripts: list[str]
+class BatchTranscriptAnalysisResponse(BaseModel):
+    items: list[TranscriptAnalysisResponse]

app/services.py ADDED Viewed

	@@ -0,0 +1,104 @@

+import asyncio
+import uuid
+from collections.abc import Sequence
+from pydantic import BaseModel, ValidationError
+from app import prompts
+from app.domain import TranscriptAnalysis
+from app.llm_schema import LLMTranscriptAnalysisResponse
+from app.errors import AnalysisNotFoundError, InvalidTranscriptError, LLMCompletionError
+from app.ports import LLm
+from app.repositories import TranscriptAnalysisRepository
+class TranscriptAnalysisService:
+    def __init__(self, llm: LLm, repository: TranscriptAnalysisRepository) -> None:
+        self._llm = llm
+        self._repository = repository
+    def analyze(self, transcript: str) -> TranscriptAnalysis:
+        clean_transcript = self._validate_transcript(transcript)
+        analysis = self._create_analysis(clean_transcript)
+        self._repository.save(analysis)
+        return analysis
+    async def analyze_many(self, transcripts: Sequence[str]) -> list[TranscriptAnalysis]:
+        if not transcripts:
+            raise InvalidTranscriptError("At least one transcript is required.")
+        clean_transcripts = [self._validate_transcript(transcript) for transcript in transcripts]
+        analyses = await asyncio.gather(
+            *(self._create_analysis_async(transcript) for transcript in clean_transcripts)
+        )
+        for analysis in analyses:
+            self._repository.save(analysis)
+        return list(analyses)
+    def get(self, analysis_id: str) -> TranscriptAnalysis:
+        analysis = self._repository.get(analysis_id)
+        if analysis is None:
+            raise AnalysisNotFoundError(f"Transcript analysis '{analysis_id}' was not found.")
+        return analysis
+    def _create_analysis(self, transcript: str) -> TranscriptAnalysis:
+        response = self._run_completion(transcript)
+        return self._build_analysis(response)
+    async def _create_analysis_async(self, transcript: str) -> TranscriptAnalysis:
+        response = await self._run_completion_async(transcript)
+        return self._build_analysis(response)
+    @staticmethod
+    def _build_analysis(response: LLMTranscriptAnalysisResponse) -> TranscriptAnalysis:
+        return TranscriptAnalysis(
+            id=str(uuid.uuid4()),
+            summary=response.summary,
+            action_items=tuple(response.action_items),
+        )
+    def _run_completion(self, transcript: str) -> LLMTranscriptAnalysisResponse:
+        user_prompt = prompts.RAW_USER_PROMPT.format(transcript=transcript)
+        try:
+            completion = self._llm.run_completion(
+                prompts.SYSTEM_PROMPT,
+                user_prompt,
+                LLMTranscriptAnalysisResponse,
+            )
+        except Exception as exc:
+            raise LLMCompletionError("Transcript analysis failed.") from exc
+        return self._parse_completion_response(completion)
+    async def _run_completion_async(self, transcript: str) -> LLMTranscriptAnalysisResponse:
+        user_prompt = prompts.RAW_USER_PROMPT.format(transcript=transcript)
+        try:
+            completion = await self._llm.run_completion_async(
+                prompts.SYSTEM_PROMPT,
+                user_prompt,
+                LLMTranscriptAnalysisResponse,
+            )
+        except Exception as exc:
+            raise LLMCompletionError("Transcript analysis failed.") from exc
+        return self._parse_completion_response(completion)
+    @staticmethod
+    def _parse_completion_response(completion: BaseModel | object) -> LLMTranscriptAnalysisResponse:
+        try:
+            if isinstance(completion, BaseModel):
+                return LLMTranscriptAnalysisResponse.model_validate(completion.model_dump())
+            return LLMTranscriptAnalysisResponse.model_validate(completion)
+        except ValidationError as exc:
+            raise LLMCompletionError("Transcript analysis returned an invalid response.") from exc
+    @staticmethod
+    def _validate_transcript(transcript: str) -> str:
+        clean_transcript = transcript.strip()
+        if not clean_transcript:
+            raise InvalidTranscriptError("Transcript cannot be empty.")
+        return clean_transcript

assessment.md ADDED Viewed

	@@ -0,0 +1,55 @@

+# Python Interview Task
+## Scenario
+You are asked to build a Python web API that analyzes plain text transcripts and returns a summary along with a list of next actions. Your implementation should clearly demonstrate good architectural practices.
+## Provided Adapter (do not implement)
+- **OpenAI Adapter**: Sends the transcript text to OpenAI's API along with a predefined, hardcoded prompt. This adapter returns a DTO.
+- The transcript, system prompt, and user prompt are provided.
+A file defining the interface (ports) for this adapter will be provided.
+## Requirements (Point 1)
+### Analyze Transcript
+- Create an HTTP endpoint (e.g., using FastAPI or Flask) that accepts GET requests containing a plain text transcript.
+- Perform basic input validation (when the transcript is empty)
+- Invoke the provided OpenAI adapter to analyze the transcript.
+- Store the analysis result in memory (an external DB is not required).
+- Return a response containing:
+  - A unique ID.
+  - A summary of the transcript.
+  - A suggested list of next steps or actions based on the transcript analysis.
+### Get a Transcript by ID
+- Create an HTTP endpoint to get transcript analysis by ID.
+### Additional Requirements
+- Adhere strictly to the interfaces defined in the provided ports file.
+## Optional Advanced Requirements (Point 2)
+- Build an additional endpoint to support concurrent analysis of multiple transcripts within a single request:
+  - Implement asynchronous processing (e.g., using asyncio).
+  - Handle multiple transcript analyses simultaneously without blocking the main API thread.
+## Success Criteria
+- Code readability, modularity, and adherence to best practices.
+- Functional correctness of the API.
+- Swagger
+- Clear error handling and appropriate HTTP response statuses.
+- Testability of the code (clear separation of concerns, ease of unit testing).
+- (Optional) Effective asynchronous processing implementation.
+## Hints
+- You will find a test running openai adapter. This will be the documentation to build the prompt to analyze the transcript
+- The provided OpenAI adapter utilizes structured output, allowing you to specify a system prompt, a user prompt, and a DTO. The adapter then returns a model instance populated according to the DTO's defined structure.
+- Create a DTO that contains the requested fields.
+- Hexagonal (or clean) architecture consists of distinct layers. Consider creating a separate model layer for the LLM responses. Pay attention to avoiding layer coupling.

poetry.lock ADDED Viewed

The diff for this file is too large to render. See raw diff

pyproject.toml ADDED Viewed

	@@ -0,0 +1,20 @@

+[tool.poetry]
+name = "ml-tech-assessment"
+version = "0.1.0"
+description = ""
+authors = ["jhonvalderrama <jhonvalderrama@aceup.com>"]
+readme = "README.md"
+[tool.poetry.dependencies]
+python = "^3.12"
+fastapi = "^0.115.12"
+gradio = "^6.14.0"
+openai = "^1.76.2"
+pydantic-settings = "^2.9.1"
+pytest = "^8.3.5"
+uvicorn = "^0.34.2"
+[build-system]
+requires = ["poetry-core"]
+build-backend = "poetry.core.masonry.api"

tests/__init__.py ADDED Viewed

File without changes

tests/adapters/__init__.py ADDED Viewed

File without changes

tests/adapters/mock_data.py ADDED Viewed

	@@ -0,0 +1,70 @@

+SYSTEM_PROMPT = """You are an expert business coach skilled in analyzing conversation transcripts.
+                    Your job is to provide insightful, concise summaries and recommend clear, actionable next steps
+                    to help clients achieve their goals effectively."""
+RAW_USER_PROMPT = """Given the transcript below, generate:
+                    1. A brief, insightful summary highlighting key points discussed.
+                    2. A clear, structured list of recommended next actions.
+                    Transcript:
+                    {transcript}"""
+TRANSCRIPT = """Mark Foster | MCC, ACTC: Hey there, Liam. Glad we could find a few minutes for this one-on-one. How are things going?
+Liam Garcia: Hey, Mark. Doing well, thanks. It’s been a busy week—my local dev environment is a bit cluttered from a new feature branch, but I’m making progress. Ready to dig in on Python best practices?
+Mark Foster | MCC, ACTC: Absolutely. I know you wanted to focus on a handful of coding guidelines and how they tie into your team’s speed. Let’s start big picture: what’s motivating you to tighten up your Python practices right now?
+Liam Garcia: Mainly two reasons. First, the codebase is growing, and I want to make sure we’re consistent in how we name things, structure modules, and write docstrings. Second, I’m onboarding new developers, and I’ve noticed they can get lost if we don’t have explicit standards in place.
+Mark Foster | MCC, ACTC: Makes sense. So, if we look at code readability—PEP 8, docstrings, that sort of thing—what’s your first priority?
+Liam Garcia: Definitely PEP 8. That’s sort of non-negotiable. I’d like us to adopt a tool like Black to auto-format. That alone can reduce the back-and-forth on code reviews. It’s a small step but a huge time-saver.
+Mark Foster | MCC, ACTC: I love it. Automating style enforcement frees you up to focus on more important stuff—like logic, architecture, and performance. Any concerns about pushback from your devs?
+Liam Garcia: A bit. Some folks are used to their own formatting quirks. But I keep reminding them it’s not about personal style—it’s about consistent style that benefits everyone. I think once they see the time saved, they’ll be on board.
+Mark Foster | MCC, ACTC: Good call. How about docstrings? I know some devs skip them unless forced.
+Liam Garcia: Right. I’m pushing for Google-style docstrings. For classes, methods, and modules, they clarify purpose and expected inputs/outputs. It’s a bit of extra effort at first, but it pays off when you come back months later or when a new dev jumps in.
+Mark Foster | MCC, ACTC: So your plan is PEP 8 plus auto-formatting, then Google-style docstrings. Anything else on your radar?
+Liam Garcia: Yes—test coverage. We’re aiming for 80% coverage in the short term. That ensures we catch regressions early. I’m also encouraging test-driven development for bigger features. It’s not mandatory, but I want the team comfortable with writing tests before the code whenever possible.
+Mark Foster | MCC, ACTC: Great. You mentioned wanting to go faster as a team. How do you see these coding best practices speeding things up, rather than slowing them down?
+Liam Garcia: Well, the time you invest in writing docstrings or running auto-format tools is minimal compared to the hassle of deciphering unstructured code. It’s like a Formula One pit stop—everyone knows their role, follows the same procedure, and the car is back on track fast. Consistency and clarity remove friction.
+Mark Foster | MCC, ACTC: That’s an excellent analogy. So what’s your biggest concern about implementing all this?
+Liam Garcia: Probably that initial pushback, or the fear that it’s “too much process.” But I think if I keep reminding folks it’s about removing headaches—like merges, weird naming conflicts, missing tests—they’ll adopt it.
+Mark Foster | MCC, ACTC: It often helps to show quick wins. For instance, once your team sees how auto-formatting catches stray imports or how docstrings make a confusing function crystal clear, they’ll realize it’s worth it.
+Liam Garcia: Exactly. I’ll start small, maybe run a pilot on one module, let them see the difference, and then expand.
+Mark Foster | MCC, ACTC: That’s a solid plan, Liam. So to recap, you’re committing to:
+PEP 8 compliance via Black (or a similar auto-formatting tool).
+Google-style docstrings for all modules, classes, and major functions.
+A drive toward 80% test coverage, with TDD on key features.
+Anything else?
+Liam Garcia: That’s the core. I might also do a weekly quick code review session—just me and one other developer—so we can keep each other honest on these standards.
+Mark Foster | MCC, ACTC: That sounds like a perfect next step. How are you feeling as we wrap up?
+Liam Garcia: Confident. I know it’ll take some nudging, but once everyone sees the impact, I think we’ll be coding cleaner, shipping faster.
+Mark Foster | MCC, ACTC: Couldn’t have said it better. Thanks for the update, Liam. I look forward to hearing how it goes once you put these into practice.
+Liam Garcia: Thanks, Mark. I appreciate the guidance and encouragement. We’ll talk again soon—hopefully with good news on the coverage front!
+Mark Foster | MCC, ACTC: Sounds like a plan. Take care, Liam.
+"""

tests/adapters/test_openai.py ADDED Viewed

	@@ -0,0 +1,38 @@

+import os
+import pydantic
+import pytest
+from app import configurations
+from app.adapters import openai
+from tests.adapters import mock_data
+class Response(pydantic.BaseModel):
+    summary: str
+    action_items: list[str]
+def test_openai_adapter() -> None:
+    if os.getenv("RUN_OPENAI_INTEGRATION_TESTS") != "1":
+        pytest.skip("Set RUN_OPENAI_INTEGRATION_TESTS=1 to run the live OpenAI adapter test.")
+    # Configuration
+    env_variables = configurations.EnvConfigs()
+    system_prompt = mock_data.SYSTEM_PROMPT
+    raw_user_prompt = mock_data.RAW_USER_PROMPT
+    transcript = mock_data.TRANSCRIPT
+    user_prompt = raw_user_prompt.format(
+        transcript=transcript)
+    openai_adapter = openai.OpenAIAdapter(env_variables.OPENAI_API_KEY, env_variables.OPENAI_MODEL)
+    # action
+    response = openai_adapter.run_completion(system_prompt, user_prompt, Response)
+    serialized_response = response.model_dump()
+    # assert
+    print(serialized_response)
+    assert "summary" in serialized_response.keys()
+    assert "action_items" in serialized_response.keys()

tests/api/test_transcript_analysis_api.py ADDED Viewed

	@@ -0,0 +1,110 @@

+from pydantic import BaseModel
+import pytest
+from fastapi.testclient import TestClient
+from app.main import app, get_analysis_service
+from app.ports import LLm
+from app.repositories import InMemoryTranscriptAnalysisRepository
+from app.services import TranscriptAnalysisService
+class FakeLLM(LLm):
+    def run_completion(
+        self,
+        system_prompt: str,
+        user_prompt: str,
+        dto: type[BaseModel],
+    ) -> BaseModel:
+        return dto(summary="API summary.", action_items=["Confirm owner", "Set deadline"])
+    async def run_completion_async(
+        self,
+        system_prompt: str,
+        user_prompt: str,
+        dto: type[BaseModel],
+    ) -> BaseModel:
+        return dto(summary="API summary.", action_items=["Confirm owner", "Set deadline"])
+@pytest.fixture
+def client() -> TestClient:
+    service = TranscriptAnalysisService(FakeLLM(), InMemoryTranscriptAnalysisRepository())
+    app.dependency_overrides[get_analysis_service] = lambda: service
+    with TestClient(app) as test_client:
+        yield test_client
+    app.dependency_overrides.clear()
+def test_analyze_transcript_success(client: TestClient) -> None:
+    response = client.get("/analyses", params={"transcript": "Discuss rollout plan."})
+    assert response.status_code == 200
+    payload = response.json()
+    assert payload["id"]
+    assert payload["summary"] == "API summary."
+    assert payload["action_items"] == ["Confirm owner", "Set deadline"]
+def test_analyze_transcript_rejects_empty_query(client: TestClient) -> None:
+    response = client.get("/analyses", params={"transcript": "   "})
+    assert response.status_code == 400
+    assert response.json()["detail"] == "Transcript cannot be empty."
+def test_get_transcript_analysis_success(client: TestClient) -> None:
+    created = client.get("/analyses", params={"transcript": "Discuss rollout plan."}).json()
+    response = client.get(f"/analyses/{created['id']}")
+    assert response.status_code == 200
+    assert response.json() == created
+def test_get_transcript_analysis_returns_404(client: TestClient) -> None:
+    response = client.get("/analyses/missing-id")
+    assert response.status_code == 404
+    assert response.json()["detail"] == "Transcript analysis 'missing-id' was not found."
+def test_analyze_batch_success(client: TestClient) -> None:
+    response = client.post(
+        "/analyses/batch",
+        json={"transcripts": ["Discuss roadmap.", "Review hiring plan."]},
+    )
+    assert response.status_code == 200
+    payload = response.json()
+    assert len(payload["items"]) == 2
+    assert all(item["summary"] == "API summary." for item in payload["items"])
+def test_analyze_batch_rejects_empty_list(client: TestClient) -> None:
+    response = client.post("/analyses/batch", json={"transcripts": []})
+    assert response.status_code == 400
+    assert response.json()["detail"] == "At least one transcript is required."
+def test_analyze_batch_rejects_empty_transcript(client: TestClient) -> None:
+    response = client.post("/analyses/batch", json={"transcripts": ["Discuss roadmap.", "  "]})
+    assert response.status_code == 400
+    assert response.json()["detail"] == "Transcript cannot be empty."
+def test_gradio_ui_is_mounted(client: TestClient) -> None:
+    response = client.get("/ui/", follow_redirects=True)
+    assert response.status_code == 200
+    assert "text/html" in response.headers["content-type"]
+def test_root_redirects_to_gradio_ui(client: TestClient) -> None:
+    response = client.get("/", follow_redirects=False)
+    assert response.status_code == 307
+    assert response.headers["location"] == "/ui"

tests/frontend/test_gradio_frontend.py ADDED Viewed

	@@ -0,0 +1,59 @@

+import gradio as gr
+import pytest
+from app.domain import TranscriptAnalysis
+from app.errors import AnalysisNotFoundError, InvalidTranscriptError
+from app.frontend import analyze_transcript, format_action_items, lookup_analysis
+class FakeService:
+    def __init__(self) -> None:
+        self.analysis = TranscriptAnalysis(
+            id="analysis-1",
+            summary="The team aligned on next steps.",
+            action_items=("Confirm owner", "Set deadline"),
+        )
+    def analyze(self, transcript: str) -> TranscriptAnalysis:
+        if not transcript.strip():
+            raise InvalidTranscriptError("Transcript cannot be empty.")
+        return self.analysis
+    def get(self, analysis_id: str) -> TranscriptAnalysis:
+        if analysis_id != self.analysis.id:
+            raise AnalysisNotFoundError(f"Transcript analysis '{analysis_id}' was not found.")
+        return self.analysis
+def test_analyze_transcript_returns_formatted_result() -> None:
+    result = analyze_transcript("Discuss roadmap.", FakeService)
+    assert result == (
+        "analysis-1",
+        "The team aligned on next steps.",
+        "1. Confirm owner\n2. Set deadline",
+    )
+def test_lookup_analysis_returns_formatted_result() -> None:
+    result = lookup_analysis(" analysis-1 ", FakeService)
+    assert result == (
+        "analysis-1",
+        "The team aligned on next steps.",
+        "1. Confirm owner\n2. Set deadline",
+    )
+def test_analyze_transcript_maps_empty_input_to_gradio_error() -> None:
+    with pytest.raises(gr.Error, match="Transcript cannot be empty."):
+        analyze_transcript("   ", FakeService)
+def test_lookup_analysis_maps_missing_id_to_gradio_error() -> None:
+    with pytest.raises(gr.Error, match="Transcript analysis 'missing-id' was not found."):
+        lookup_analysis("missing-id", FakeService)
+def test_format_action_items_handles_empty_list() -> None:
+    assert format_action_items(()) == "No suggested next steps returned."

tests/services/test_transcript_analysis_service.py ADDED Viewed

	@@ -0,0 +1,116 @@

+import asyncio
+import pydantic
+import pytest
+from app.errors import AnalysisNotFoundError, InvalidTranscriptError, LLMCompletionError
+from app.ports import LLm
+from app.repositories import InMemoryTranscriptAnalysisRepository
+from app.services import TranscriptAnalysisService
+class FakeLLM(LLm):
+    def __init__(self) -> None:
+        self.user_prompts: list[str] = []
+        self.async_user_prompts: list[str] = []
+    def run_completion(
+        self,
+        system_prompt: str,
+        user_prompt: str,
+        dto: type[pydantic.BaseModel],
+    ) -> pydantic.BaseModel:
+        self.user_prompts.append(user_prompt)
+        return dto(summary="A concise summary.", action_items=["Follow up", "Share notes"])
+    async def run_completion_async(
+        self,
+        system_prompt: str,
+        user_prompt: str,
+        dto: type[pydantic.BaseModel],
+    ) -> pydantic.BaseModel:
+        self.async_user_prompts.append(user_prompt)
+        return dto(summary="A concise summary.", action_items=["Follow up", "Share notes"])
+class FailingLLM(LLm):
+    def run_completion(
+        self,
+        system_prompt: str,
+        user_prompt: str,
+        dto: type[pydantic.BaseModel],
+    ) -> pydantic.BaseModel:
+        raise RuntimeError("provider unavailable")
+    async def run_completion_async(
+        self,
+        system_prompt: str,
+        user_prompt: str,
+        dto: type[pydantic.BaseModel],
+    ) -> pydantic.BaseModel:
+        raise RuntimeError("provider unavailable")
+def build_service(llm: LLm | None = None) -> TranscriptAnalysisService:
+    return TranscriptAnalysisService(llm or FakeLLM(), InMemoryTranscriptAnalysisRepository())
+def test_analyze_returns_and_persists_result() -> None:
+    llm = FakeLLM()
+    service = build_service(llm)
+    analysis = service.analyze("  Discuss launch plan.  ")
+    assert analysis.id
+    assert analysis.summary == "A concise summary."
+    assert analysis.action_items == ("Follow up", "Share notes")
+    assert service.get(analysis.id) == analysis
+    assert "Discuss launch plan." in llm.user_prompts[0]
+def test_analyze_rejects_empty_transcript() -> None:
+    service = build_service()
+    with pytest.raises(InvalidTranscriptError):
+        service.analyze("   ")
+def test_get_raises_when_analysis_is_missing() -> None:
+    service = build_service()
+    with pytest.raises(AnalysisNotFoundError):
+        service.get("missing-id")
+def test_analyze_many_processes_and_persists_all_results() -> None:
+    llm = FakeLLM()
+    service = build_service(llm)
+    analyses = asyncio.run(service.analyze_many(["First transcript", "Second transcript"]))
+    assert len(analyses) == 2
+    assert len({analysis.id for analysis in analyses}) == 2
+    assert [service.get(analysis.id) for analysis in analyses] == analyses
+    assert len(llm.async_user_prompts) == 2
+    assert llm.user_prompts == []
+def test_analyze_many_rejects_empty_list() -> None:
+    service = build_service()
+    with pytest.raises(InvalidTranscriptError):
+        asyncio.run(service.analyze_many([]))
+def test_llm_errors_are_wrapped() -> None:
+    service = build_service(FailingLLM())
+    with pytest.raises(LLMCompletionError):
+        service.analyze("Discuss launch plan.")
+def test_analyze_many_wraps_llm_errors() -> None:
+    service = build_service(FailingLLM())
+    with pytest.raises(LLMCompletionError):
+        asyncio.run(service.analyze_many(["Discuss launch plan."]))