Flamehaven commited on
Commit
60f743e
·
verified ·
1 Parent(s): 5df9f93

Upload 3 files

Browse files
Files changed (3) hide show
  1. README.md +46 -0
  2. app.py +113 -0
  3. requirements.txt +3 -0
README.md ADDED
@@ -0,0 +1,46 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ title: dir2md + Spicy - Repository to Markdown Converter
3
+ emoji: 📂
4
+ colorFrom: blue
5
+ colorTo: green
6
+ sdk: gradio
7
+ sdk_version: 5.45.0
8
+ app_file: app.py
9
+ pinned: false
10
+ license: mit
11
+ short_description: Markdown + spicy risk blueprints for GitHub repos
12
+ tags:
13
+ - developer-tools
14
+ - markdown
15
+ - repository-analysis
16
+ - llm
17
+ - code-analysis
18
+ - python
19
+ ---
20
+
21
+ # dir2md + Spicy (Hugging Face Demo)
22
+
23
+ Convert any public GitHub repository into an LLM-ready markdown blueprint plus optional spicy (5-level) risk report.
24
+
25
+ ## What the demo does
26
+ - Analyze repo structure and key files.
27
+ - Generate tree + sampled content with token-aware budgets.
28
+ - Output human markdown and JSONL (for LLMs), with optional spicy findings.
29
+
30
+ ## Quick start
31
+ 1) Paste a GitHub URL.
32
+ 2) Choose options: include contents, emit manifest, enable spicy/strict.
33
+ 3) Run and download the markdown/JSONL outputs.
34
+
35
+ ## Fresh highlights (1.1.0)
36
+ - `--fast` preset (tree + manifest only, no file reads).
37
+ - Default dual outputs (md + jsonl) for human + LLM.
38
+ - Spicy risk report (`--spicy`, `--spicy-strict`) with 5 severity levels.
39
+ - Modular pipeline (`walker`, `selector`, `renderer`, `orchestrator`) for cleaner extensibility.
40
+
41
+ ## Notes
42
+ - Current Gradio SDK: **5.45.0**. A newer **6.0.2** is available; update `sdk_version` and `gradio` pin if you want to try it.
43
+ - App entrypoint: `demo/app.py`; deps in `demo/requirements.txt`.
44
+
45
+ Made with care by Flamehaven for developers who want their AI to understand their code.
46
+
app.py ADDED
@@ -0,0 +1,113 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ import git
3
+ import tempfile
4
+ import shutil
5
+ from pathlib import Path
6
+ import os
7
+ import json
8
+
9
+ from dir2md.core import generate_markdown_report, Config
10
+
11
+
12
+ def process_github_repo(
13
+ repo_url: str,
14
+ preset: str,
15
+ spicy: bool,
16
+ include_contents: bool,
17
+ max_size_mb: int,
18
+ shallow: bool,
19
+ ):
20
+ """
21
+ Clone a public GitHub repository and generate dir2md outputs (human MD + AI JSONL).
22
+ """
23
+ temp_dir_path = None
24
+ try:
25
+ temp_dir_path = tempfile.mkdtemp()
26
+ temp_dir = Path(temp_dir_path)
27
+
28
+ gr.Info(f"Cloning repository: {repo_url} ...")
29
+ clone_args = {"to_path": temp_dir}
30
+ if shallow:
31
+ clone_args["depth"] = 1
32
+ clone_args["single_branch"] = True
33
+ git.Repo.clone_from(repo_url, **clone_args)
34
+ gr.Info("Repository cloned. Generating blueprint...")
35
+
36
+ # size guard
37
+ total_bytes = sum(f.stat().st_size for f in temp_dir.rglob("*") if f.is_file())
38
+ total_mb = total_bytes / (1024 * 1024)
39
+ if total_mb > max_size_mb:
40
+ raise ValueError(f"Repository too large: ~{total_mb:.1f} MB (limit {max_size_mb} MB)")
41
+
42
+ output_path = temp_dir / "blueprint.md"
43
+
44
+ cfg = Config(
45
+ root=temp_dir,
46
+ output=output_path,
47
+ preset=preset,
48
+ include_globs=[],
49
+ exclude_globs=[],
50
+ omit_globs=[],
51
+ respect_gitignore=True,
52
+ follow_symlinks=False,
53
+ max_bytes=None,
54
+ max_lines=None,
55
+ include_contents=include_contents,
56
+ llm_mode="summary", # more readable by default
57
+ budget_tokens=8000,
58
+ max_file_tokens=2000,
59
+ dedup_bits=16,
60
+ sample_head=120,
61
+ sample_tail=40,
62
+ strip_comments=False,
63
+ emit_manifest=False, # demo keeps outputs in-memory
64
+ explain_capsule=False,
65
+ no_timestamp=True,
66
+ masking_mode="basic",
67
+ spicy=spicy,
68
+ )
69
+
70
+ md_output = generate_markdown_report(cfg)
71
+
72
+ # AI-friendly JSONL output: reuse cfg with jsonl
73
+ cfg.output_format = "jsonl"
74
+ cfg.output = temp_dir / "blueprint.jsonl"
75
+ jsonl_output = generate_markdown_report(cfg)
76
+
77
+ gr.Info("Blueprint generated successfully!")
78
+
79
+ return md_output, jsonl_output
80
+
81
+ except Exception as e:
82
+ if temp_dir_path and os.path.exists(temp_dir_path):
83
+ shutil.rmtree(temp_dir_path)
84
+ return f"An error occurred: {e}", ""
85
+ finally:
86
+ if temp_dir_path and os.path.exists(temp_dir_path):
87
+ shutil.rmtree(temp_dir_path)
88
+
89
+
90
+ demo = gr.Interface(
91
+ fn=process_github_repo,
92
+ inputs=[
93
+ gr.Textbox(label="GitHub Repository URL", placeholder="https://github.com/Flamehaven/dir2md"),
94
+ gr.Radio(choices=["pro", "raw"], value="pro", label="Select Preset"),
95
+ gr.Checkbox(label="Spicy risk report", value=True),
96
+ gr.Checkbox(label="Include file contents", value=True),
97
+ gr.Slider(label="Max repo size (MB)", minimum=10, maximum=500, value=200, step=10),
98
+ gr.Checkbox(label="Shallow clone (depth=1)", value=True),
99
+ ],
100
+ outputs=[
101
+ gr.Textbox(label="Markdown Blueprint (human)", lines=30, show_copy_button=True),
102
+ gr.Textbox(label="JSONL (AI-ready)", lines=15, show_copy_button=True),
103
+ ],
104
+ title="dir2md: AI-Ready Repository Blueprint Generator",
105
+ description="Enter a public GitHub repository URL to convert its structure/content into a Markdown blueprint (human) and JSONL (AI).",
106
+ allow_flagging="never",
107
+ examples=[
108
+ ["https://github.com/psf/requests", "pro", True, True, 200, True],
109
+ ["https://github.com/gradio-app/gradio", "raw", True, True, 200, True],
110
+ ],
111
+ )
112
+
113
+ demo.launch()
requirements.txt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ gradio==5.45.0
2
+ pathspec>=0.12.0
3
+ GitPython