Spaces:
Runtime error
Runtime error
Upload 3 files
Browse files- README.md +46 -0
- app.py +113 -0
- requirements.txt +3 -0
README.md
ADDED
|
@@ -0,0 +1,46 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
---
|
| 2 |
+
title: dir2md + Spicy - Repository to Markdown Converter
|
| 3 |
+
emoji: 📂
|
| 4 |
+
colorFrom: blue
|
| 5 |
+
colorTo: green
|
| 6 |
+
sdk: gradio
|
| 7 |
+
sdk_version: 5.45.0
|
| 8 |
+
app_file: app.py
|
| 9 |
+
pinned: false
|
| 10 |
+
license: mit
|
| 11 |
+
short_description: Markdown + spicy risk blueprints for GitHub repos
|
| 12 |
+
tags:
|
| 13 |
+
- developer-tools
|
| 14 |
+
- markdown
|
| 15 |
+
- repository-analysis
|
| 16 |
+
- llm
|
| 17 |
+
- code-analysis
|
| 18 |
+
- python
|
| 19 |
+
---
|
| 20 |
+
|
| 21 |
+
# dir2md + Spicy (Hugging Face Demo)
|
| 22 |
+
|
| 23 |
+
Convert any public GitHub repository into an LLM-ready markdown blueprint plus optional spicy (5-level) risk report.
|
| 24 |
+
|
| 25 |
+
## What the demo does
|
| 26 |
+
- Analyze repo structure and key files.
|
| 27 |
+
- Generate tree + sampled content with token-aware budgets.
|
| 28 |
+
- Output human markdown and JSONL (for LLMs), with optional spicy findings.
|
| 29 |
+
|
| 30 |
+
## Quick start
|
| 31 |
+
1) Paste a GitHub URL.
|
| 32 |
+
2) Choose options: include contents, emit manifest, enable spicy/strict.
|
| 33 |
+
3) Run and download the markdown/JSONL outputs.
|
| 34 |
+
|
| 35 |
+
## Fresh highlights (1.1.0)
|
| 36 |
+
- `--fast` preset (tree + manifest only, no file reads).
|
| 37 |
+
- Default dual outputs (md + jsonl) for human + LLM.
|
| 38 |
+
- Spicy risk report (`--spicy`, `--spicy-strict`) with 5 severity levels.
|
| 39 |
+
- Modular pipeline (`walker`, `selector`, `renderer`, `orchestrator`) for cleaner extensibility.
|
| 40 |
+
|
| 41 |
+
## Notes
|
| 42 |
+
- Current Gradio SDK: **5.45.0**. A newer **6.0.2** is available; update `sdk_version` and `gradio` pin if you want to try it.
|
| 43 |
+
- App entrypoint: `demo/app.py`; deps in `demo/requirements.txt`.
|
| 44 |
+
|
| 45 |
+
Made with care by Flamehaven for developers who want their AI to understand their code.
|
| 46 |
+
|
app.py
ADDED
|
@@ -0,0 +1,113 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import gradio as gr
|
| 2 |
+
import git
|
| 3 |
+
import tempfile
|
| 4 |
+
import shutil
|
| 5 |
+
from pathlib import Path
|
| 6 |
+
import os
|
| 7 |
+
import json
|
| 8 |
+
|
| 9 |
+
from dir2md.core import generate_markdown_report, Config
|
| 10 |
+
|
| 11 |
+
|
| 12 |
+
def process_github_repo(
|
| 13 |
+
repo_url: str,
|
| 14 |
+
preset: str,
|
| 15 |
+
spicy: bool,
|
| 16 |
+
include_contents: bool,
|
| 17 |
+
max_size_mb: int,
|
| 18 |
+
shallow: bool,
|
| 19 |
+
):
|
| 20 |
+
"""
|
| 21 |
+
Clone a public GitHub repository and generate dir2md outputs (human MD + AI JSONL).
|
| 22 |
+
"""
|
| 23 |
+
temp_dir_path = None
|
| 24 |
+
try:
|
| 25 |
+
temp_dir_path = tempfile.mkdtemp()
|
| 26 |
+
temp_dir = Path(temp_dir_path)
|
| 27 |
+
|
| 28 |
+
gr.Info(f"Cloning repository: {repo_url} ...")
|
| 29 |
+
clone_args = {"to_path": temp_dir}
|
| 30 |
+
if shallow:
|
| 31 |
+
clone_args["depth"] = 1
|
| 32 |
+
clone_args["single_branch"] = True
|
| 33 |
+
git.Repo.clone_from(repo_url, **clone_args)
|
| 34 |
+
gr.Info("Repository cloned. Generating blueprint...")
|
| 35 |
+
|
| 36 |
+
# size guard
|
| 37 |
+
total_bytes = sum(f.stat().st_size for f in temp_dir.rglob("*") if f.is_file())
|
| 38 |
+
total_mb = total_bytes / (1024 * 1024)
|
| 39 |
+
if total_mb > max_size_mb:
|
| 40 |
+
raise ValueError(f"Repository too large: ~{total_mb:.1f} MB (limit {max_size_mb} MB)")
|
| 41 |
+
|
| 42 |
+
output_path = temp_dir / "blueprint.md"
|
| 43 |
+
|
| 44 |
+
cfg = Config(
|
| 45 |
+
root=temp_dir,
|
| 46 |
+
output=output_path,
|
| 47 |
+
preset=preset,
|
| 48 |
+
include_globs=[],
|
| 49 |
+
exclude_globs=[],
|
| 50 |
+
omit_globs=[],
|
| 51 |
+
respect_gitignore=True,
|
| 52 |
+
follow_symlinks=False,
|
| 53 |
+
max_bytes=None,
|
| 54 |
+
max_lines=None,
|
| 55 |
+
include_contents=include_contents,
|
| 56 |
+
llm_mode="summary", # more readable by default
|
| 57 |
+
budget_tokens=8000,
|
| 58 |
+
max_file_tokens=2000,
|
| 59 |
+
dedup_bits=16,
|
| 60 |
+
sample_head=120,
|
| 61 |
+
sample_tail=40,
|
| 62 |
+
strip_comments=False,
|
| 63 |
+
emit_manifest=False, # demo keeps outputs in-memory
|
| 64 |
+
explain_capsule=False,
|
| 65 |
+
no_timestamp=True,
|
| 66 |
+
masking_mode="basic",
|
| 67 |
+
spicy=spicy,
|
| 68 |
+
)
|
| 69 |
+
|
| 70 |
+
md_output = generate_markdown_report(cfg)
|
| 71 |
+
|
| 72 |
+
# AI-friendly JSONL output: reuse cfg with jsonl
|
| 73 |
+
cfg.output_format = "jsonl"
|
| 74 |
+
cfg.output = temp_dir / "blueprint.jsonl"
|
| 75 |
+
jsonl_output = generate_markdown_report(cfg)
|
| 76 |
+
|
| 77 |
+
gr.Info("Blueprint generated successfully!")
|
| 78 |
+
|
| 79 |
+
return md_output, jsonl_output
|
| 80 |
+
|
| 81 |
+
except Exception as e:
|
| 82 |
+
if temp_dir_path and os.path.exists(temp_dir_path):
|
| 83 |
+
shutil.rmtree(temp_dir_path)
|
| 84 |
+
return f"An error occurred: {e}", ""
|
| 85 |
+
finally:
|
| 86 |
+
if temp_dir_path and os.path.exists(temp_dir_path):
|
| 87 |
+
shutil.rmtree(temp_dir_path)
|
| 88 |
+
|
| 89 |
+
|
| 90 |
+
demo = gr.Interface(
|
| 91 |
+
fn=process_github_repo,
|
| 92 |
+
inputs=[
|
| 93 |
+
gr.Textbox(label="GitHub Repository URL", placeholder="https://github.com/Flamehaven/dir2md"),
|
| 94 |
+
gr.Radio(choices=["pro", "raw"], value="pro", label="Select Preset"),
|
| 95 |
+
gr.Checkbox(label="Spicy risk report", value=True),
|
| 96 |
+
gr.Checkbox(label="Include file contents", value=True),
|
| 97 |
+
gr.Slider(label="Max repo size (MB)", minimum=10, maximum=500, value=200, step=10),
|
| 98 |
+
gr.Checkbox(label="Shallow clone (depth=1)", value=True),
|
| 99 |
+
],
|
| 100 |
+
outputs=[
|
| 101 |
+
gr.Textbox(label="Markdown Blueprint (human)", lines=30, show_copy_button=True),
|
| 102 |
+
gr.Textbox(label="JSONL (AI-ready)", lines=15, show_copy_button=True),
|
| 103 |
+
],
|
| 104 |
+
title="dir2md: AI-Ready Repository Blueprint Generator",
|
| 105 |
+
description="Enter a public GitHub repository URL to convert its structure/content into a Markdown blueprint (human) and JSONL (AI).",
|
| 106 |
+
allow_flagging="never",
|
| 107 |
+
examples=[
|
| 108 |
+
["https://github.com/psf/requests", "pro", True, True, 200, True],
|
| 109 |
+
["https://github.com/gradio-app/gradio", "raw", True, True, 200, True],
|
| 110 |
+
],
|
| 111 |
+
)
|
| 112 |
+
|
| 113 |
+
demo.launch()
|
requirements.txt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
gradio==5.45.0
|
| 2 |
+
pathspec>=0.12.0
|
| 3 |
+
GitPython
|