From ffa638429e4ac249493cb3878d83ecf2f6d6fdce Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Franti=C5=A1ek=20Musil?= Date: Tue, 12 May 2026 08:47:06 +0200 Subject: [PATCH] Initial project setup: Bun + cheerio scraper with dev mode - config/ with default + local (gitignored) merge pattern - fetcher with manual redirect tracking, cookie jar, dev mode JSON logging - cheerio parser stub ready for selectors - telegram sender - weekday-aware entry point Co-Authored-By: Claude Sonnet 4.6 --- .gitignore | 40 +++++++++++++++++ CLAUDE.md | 106 +++++++++++++++++++++++++++++++++++++++++++ README.md | 15 +++++++ bun.lock | 78 ++++++++++++++++++++++++++++++++ config/default.ts | 35 +++++++++++++++ config/index.ts | 20 +++++++++ package.json | 20 +++++++++ src/fetcher.ts | 112 ++++++++++++++++++++++++++++++++++++++++++++++ src/index.ts | 54 ++++++++++++++++++++++ src/parser.ts | 19 ++++++++ src/telegram.ts | 12 +++++ tsconfig.json | 30 +++++++++++++ 12 files changed, 541 insertions(+) create mode 100644 .gitignore create mode 100644 CLAUDE.md create mode 100644 README.md create mode 100644 bun.lock create mode 100644 config/default.ts create mode 100644 config/index.ts create mode 100644 package.json create mode 100644 src/fetcher.ts create mode 100644 src/index.ts create mode 100644 src/parser.ts create mode 100644 src/telegram.ts create mode 100644 tsconfig.json diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..d9129f8 --- /dev/null +++ b/.gitignore @@ -0,0 +1,40 @@ +# dependencies (bun install) +node_modules + +# output +out +dist +*.tgz + +# code coverage +coverage +*.lcov + +# logs +logs +_.log +report.[0-9]_.[0-9]_.[0-9]_.[0-9]_.json + +# dotenv environment variable files +.env +.env.development.local +.env.test.local +.env.production.local +.env.local + +# caches +.eslintcache +.cache +*.tsbuildinfo + +# IntelliJ based IDEs +.idea + +# Finder (MacOS) folder config +.DS_Store + +# Local config overrides +config/local.ts + +# Dev mode request/response logs +debug/ diff --git a/CLAUDE.md b/CLAUDE.md new file mode 100644 index 0000000..764c1dd --- /dev/null +++ b/CLAUDE.md @@ -0,0 +1,106 @@ + +Default to using Bun instead of Node.js. + +- Use `bun ` instead of `node ` or `ts-node ` +- Use `bun test` instead of `jest` or `vitest` +- Use `bun build ` instead of `webpack` or `esbuild` +- Use `bun install` instead of `npm install` or `yarn install` or `pnpm install` +- Use `bun run + + +``` + +With the following `frontend.tsx`: + +```tsx#frontend.tsx +import React from "react"; +import { createRoot } from "react-dom/client"; + +// import .css files directly and it works +import './index.css'; + +const root = createRoot(document.body); + +export default function Frontend() { + return

Hello, world!

; +} + +root.render(); +``` + +Then, run index.ts + +```sh +bun --hot ./index.ts +``` + +For more information, read the Bun API docs in `node_modules/bun-types/docs/**.mdx`. diff --git a/README.md b/README.md new file mode 100644 index 0000000..e5eb0ad --- /dev/null +++ b/README.md @@ -0,0 +1,15 @@ +# ogaracheck + +To install dependencies: + +```bash +bun install +``` + +To run: + +```bash +bun run index.ts +``` + +This project was created using `bun init` in bun v1.3.13. [Bun](https://bun.com) is a fast all-in-one JavaScript runtime. diff --git a/bun.lock b/bun.lock new file mode 100644 index 0000000..cbce80f --- /dev/null +++ b/bun.lock @@ -0,0 +1,78 @@ +{ + "lockfileVersion": 1, + "configVersion": 1, + "workspaces": { + "": { + "name": "ogaracheck", + "dependencies": { + "cheerio": "^1.2.0", + }, + "devDependencies": { + "@types/bun": "latest", + "@types/cheerio": "^1.0.0", + }, + "peerDependencies": { + "typescript": "^5", + }, + }, + }, + "packages": { + "@types/bun": ["@types/bun@1.3.13", "", { "dependencies": { "bun-types": "1.3.13" } }, "sha512-9fqXWk5YIHGGnUau9TEi+qdlTYDAnOj+xLCmSTwXfAIqXr2x4tytJb43E9uCvt09zJURKXwAtkoH4nLQfzeTXw=="], + + "@types/cheerio": ["@types/cheerio@1.0.0", "", { "dependencies": { "cheerio": "*" } }, "sha512-zAaImHWoh5RY2CLgU2mvg3bl2k3F65B0N5yphuII3ythFLPmJhL7sj1RDu6gSxcgqHlETbr/lhA2OBY+WF1fXQ=="], + + "@types/node": ["@types/node@25.7.0", "", { "dependencies": { "undici-types": "~7.21.0" } }, "sha512-z+pdZyxE+RTQE9AcboAZCb4otwcrvgHD+GlBpPgn0emDVt0ohrTMhAwlr2Wd9nZ+nihhYFxO2pThz3C5qSu2Eg=="], + + "boolbase": ["boolbase@1.0.0", "", {}, "sha512-JZOSA7Mo9sNGB8+UjSgzdLtokWAky1zbztM3WRLCbZ70/3cTANmQmOdR7y2g+J0e2WXywy1yS468tY+IruqEww=="], + + "bun-types": ["bun-types@1.3.13", "", { "dependencies": { "@types/node": "*" } }, "sha512-QXKeHLlOLqQX9LgYaHJfzdBaV21T63HhFJnvuRCcjZiaUDpbs5ED1MgxbMra71CsryN/1dAoXuJJJwIv/2drVA=="], + + "cheerio": ["cheerio@1.2.0", "", { "dependencies": { "cheerio-select": "^2.1.0", "dom-serializer": "^2.0.0", "domhandler": "^5.0.3", "domutils": "^3.2.2", "encoding-sniffer": "^0.2.1", "htmlparser2": "^10.1.0", "parse5": "^7.3.0", "parse5-htmlparser2-tree-adapter": "^7.1.0", "parse5-parser-stream": "^7.1.2", "undici": "^7.19.0", "whatwg-mimetype": "^4.0.0" } }, "sha512-WDrybc/gKFpTYQutKIK6UvfcuxijIZfMfXaYm8NMsPQxSYvf+13fXUJ4rztGGbJcBQ/GF55gvrZ0Bc0bj/mqvg=="], + + "cheerio-select": ["cheerio-select@2.1.0", "", { "dependencies": { "boolbase": "^1.0.0", "css-select": "^5.1.0", "css-what": "^6.1.0", "domelementtype": "^2.3.0", "domhandler": "^5.0.3", "domutils": "^3.0.1" } }, "sha512-9v9kG0LvzrlcungtnJtpGNxY+fzECQKhK4EGJX2vByejiMX84MFNQw4UxPJl3bFbTMw+Dfs37XaIkCwTZfLh4g=="], + + "css-select": ["css-select@5.2.2", "", { "dependencies": { "boolbase": "^1.0.0", "css-what": "^6.1.0", "domhandler": "^5.0.2", "domutils": "^3.0.1", "nth-check": "^2.0.1" } }, "sha512-TizTzUddG/xYLA3NXodFM0fSbNizXjOKhqiQQwvhlspadZokn1KDy0NZFS0wuEubIYAV5/c1/lAr0TaaFXEXzw=="], + + "css-what": ["css-what@6.2.2", "", {}, "sha512-u/O3vwbptzhMs3L1fQE82ZSLHQQfto5gyZzwteVIEyeaY5Fc7R4dapF/BvRoSYFeqfBk4m0V1Vafq5Pjv25wvA=="], + + "dom-serializer": ["dom-serializer@2.0.0", "", { "dependencies": { "domelementtype": "^2.3.0", "domhandler": "^5.0.2", "entities": "^4.2.0" } }, "sha512-wIkAryiqt/nV5EQKqQpo3SToSOV9J0DnbJqwK7Wv/Trc92zIAYZ4FlMu+JPFW1DfGFt81ZTCGgDEabffXeLyJg=="], + + "domelementtype": ["domelementtype@2.3.0", "", {}, "sha512-OLETBj6w0OsagBwdXnPdN0cnMfF9opN69co+7ZrbfPGrdpPVNBUj02spi6B1N7wChLQiPn4CSH/zJvXw56gmHw=="], + + "domhandler": ["domhandler@5.0.3", "", { "dependencies": { "domelementtype": "^2.3.0" } }, "sha512-cgwlv/1iFQiFnU96XXgROh8xTeetsnJiDsTc7TYCLFd9+/WNkIqPTxiM/8pSd8VIrhXGTf1Ny1q1hquVqDJB5w=="], + + "domutils": ["domutils@3.2.2", "", { "dependencies": { "dom-serializer": "^2.0.0", "domelementtype": "^2.3.0", "domhandler": "^5.0.3" } }, "sha512-6kZKyUajlDuqlHKVX1w7gyslj9MPIXzIFiz/rGu35uC1wMi+kMhQwGhl4lt9unC9Vb9INnY9Z3/ZA3+FhASLaw=="], + + "encoding-sniffer": ["encoding-sniffer@0.2.1", "", { "dependencies": { "iconv-lite": "^0.6.3", "whatwg-encoding": "^3.1.1" } }, "sha512-5gvq20T6vfpekVtqrYQsSCFZ1wEg5+wW0/QaZMWkFr6BqD3NfKs0rLCx4rrVlSWJeZb5NBJgVLswK/w2MWU+Gw=="], + + "entities": ["entities@4.5.0", "", {}, "sha512-V0hjH4dGPh9Ao5p0MoRY6BVqtwCjhz6vI5LT8AJ55H+4g9/4vbHx1I54fS0XuclLhDHArPQCiMjDxjaL8fPxhw=="], + + "htmlparser2": ["htmlparser2@10.1.0", "", { "dependencies": { "domelementtype": "^2.3.0", "domhandler": "^5.0.3", "domutils": "^3.2.2", "entities": "^7.0.1" } }, "sha512-VTZkM9GWRAtEpveh7MSF6SjjrpNVNNVJfFup7xTY3UpFtm67foy9HDVXneLtFVt4pMz5kZtgNcvCniNFb1hlEQ=="], + + "iconv-lite": ["iconv-lite@0.6.3", "", { "dependencies": { "safer-buffer": ">= 2.1.2 < 3.0.0" } }, "sha512-4fCk79wshMdzMp2rH06qWrJE4iolqLhCUH+OiuIgU++RB0+94NlDL81atO7GX55uUKueo0txHNtvEyI6D7WdMw=="], + + "nth-check": ["nth-check@2.1.1", "", { "dependencies": { "boolbase": "^1.0.0" } }, "sha512-lqjrjmaOoAnWfMmBPL+XNnynZh2+swxiX3WUE0s4yEHI6m+AwrK2UZOimIRl3X/4QctVqS8AiZjFqyOGrMXb/w=="], + + "parse5": ["parse5@7.3.0", "", { "dependencies": { "entities": "^6.0.0" } }, "sha512-IInvU7fabl34qmi9gY8XOVxhYyMyuH2xUNpb2q8/Y+7552KlejkRvqvD19nMoUW/uQGGbqNpA6Tufu5FL5BZgw=="], + + "parse5-htmlparser2-tree-adapter": ["parse5-htmlparser2-tree-adapter@7.1.0", "", { "dependencies": { "domhandler": "^5.0.3", "parse5": "^7.0.0" } }, "sha512-ruw5xyKs6lrpo9x9rCZqZZnIUntICjQAd0Wsmp396Ul9lN/h+ifgVV1x1gZHi8euej6wTfpqX8j+BFQxF0NS/g=="], + + "parse5-parser-stream": ["parse5-parser-stream@7.1.2", "", { "dependencies": { "parse5": "^7.0.0" } }, "sha512-JyeQc9iwFLn5TbvvqACIF/VXG6abODeB3Fwmv/TGdLk2LfbWkaySGY72at4+Ty7EkPZj854u4CrICqNk2qIbow=="], + + "safer-buffer": ["safer-buffer@2.1.2", "", {}, "sha512-YZo3K82SD7Riyi0E1EQPojLz7kpepnSQI9IyPbHHg1XXXevb5dJI7tpyN2ADxGcQbHG7vcyRHk0cbwqcQriUtg=="], + + "typescript": ["typescript@5.9.3", "", { "bin": { "tsc": "bin/tsc", "tsserver": "bin/tsserver" } }, "sha512-jl1vZzPDinLr9eUt3J/t7V6FgNEw9QjvBPdysz9KfQDD41fQrC2Y4vKQdiaUpFT4bXlb1RHhLpp8wtm6M5TgSw=="], + + "undici": ["undici@7.25.0", "", {}, "sha512-xXnp4kTyor2Zq+J1FfPI6Eq3ew5h6Vl0F/8d9XU5zZQf1tX9s2Su1/3PiMmUANFULpmksxkClamIZcaUqryHsQ=="], + + "undici-types": ["undici-types@7.21.0", "", {}, "sha512-w9IMgQrz4O0YN1LtB7K5P63vhlIOvC7opSmouCJ+ZywlPAlO9gIkJ+otk6LvGpAs2wg4econaCz3TvQ9xPoyuQ=="], + + "whatwg-encoding": ["whatwg-encoding@3.1.1", "", { "dependencies": { "iconv-lite": "0.6.3" } }, "sha512-6qN4hJdMwfYBtE3YBTTHhoeuUrDBPZmbQaxWAqSALV/MeEnR5z1xd8UKud2RAkFoPkmB+hli1TZSnyi84xz1vQ=="], + + "whatwg-mimetype": ["whatwg-mimetype@4.0.0", "", {}, "sha512-QaKxh0eNIi2mE9p2vEdzfagOKHCcj1pJ56EEHGQOVxp8r9/iszLUUV7v89x9O1p/T+NlTM5W7jW6+cz4Fq1YVg=="], + + "htmlparser2/entities": ["entities@7.0.1", "", {}, "sha512-TWrgLOFUQTH994YUyl1yT4uyavY5nNB5muff+RtWaqNVCAK408b5ZnnbNAUEWLTCpum9w6arT70i1XdQ4UeOPA=="], + + "parse5/entities": ["entities@6.0.1", "", {}, "sha512-aN97NXWF6AWBTahfVOIrB/NShkzi5H7F9r1s9mD3cDj4Ko5f2qhhVoYMibXF7GlLveb/D2ioWay8lxI97Ven3g=="], + } +} diff --git a/config/default.ts b/config/default.ts new file mode 100644 index 0000000..a60b336 --- /dev/null +++ b/config/default.ts @@ -0,0 +1,35 @@ +export interface DayUrls { + monday: string + tuesday: string + wednesday: string + thursday: string + friday: string +} + +export interface TelegramConfig { + botToken: string + chatId: string +} + +export interface Config { + days: DayUrls + devMode: boolean + telegram: TelegramConfig +} + +const config: Config = { + days: { + monday: "", + tuesday: "", + wednesday: "", + thursday: "", + friday: "", + }, + devMode: false, + telegram: { + botToken: "", + chatId: "", + }, +} + +export default config diff --git a/config/index.ts b/config/index.ts new file mode 100644 index 0000000..837dfd1 --- /dev/null +++ b/config/index.ts @@ -0,0 +1,20 @@ +import defaultConfig from "./default" + +let localConfig: Partial = {} + +try { + const local = await import("./local") + localConfig = local.default +} catch { + // no local config, that's fine +} + +const config = { + ...defaultConfig, + ...localConfig, + days: { ...defaultConfig.days, ...localConfig.days }, + telegram: { ...defaultConfig.telegram, ...localConfig.telegram }, +} + +export default config +export type { Config, DayUrls, TelegramConfig } from "./default" diff --git a/package.json b/package.json new file mode 100644 index 0000000..131be18 --- /dev/null +++ b/package.json @@ -0,0 +1,20 @@ +{ + "name": "ogaracheck", + "module": "src/index.ts", + "type": "module", + "private": true, + "scripts": { + "start": "bun run src/index.ts", + "dev": "DEV_MODE=true bun run src/index.ts" + }, + "devDependencies": { + "@types/bun": "latest", + "@types/cheerio": "^1.0.0" + }, + "peerDependencies": { + "typescript": "^5" + }, + "dependencies": { + "cheerio": "^1.2.0" + } +} diff --git a/src/fetcher.ts b/src/fetcher.ts new file mode 100644 index 0000000..1b87161 --- /dev/null +++ b/src/fetcher.ts @@ -0,0 +1,112 @@ +import fs from "fs/promises" +import path from "path" + +interface RequestLog { + request: { + url: string + method: string + headers: Record + body: string | null + } + redirects: Array<{ status: number; location: string }> + response: { + status: number + headers: Record + cookies: Record + body: string + } +} + +let requestCounter = 0 +const cookieJar: Record = {} + +function parseCookies(headers: Headers): Record { + const cookies: Record = {} + const setCookieHeader = headers.getSetCookie?.() ?? [] + for (const raw of setCookieHeader) { + const [pair] = raw.split(";") + const [name, ...rest] = pair.split("=") + cookies[name.trim()] = rest.join("=").trim() + } + return cookies +} + +function cookieHeader(): string { + return Object.entries(cookieJar) + .map(([k, v]) => `${k}=${v}`) + .join("; ") +} + +async function saveLog(log: RequestLog, url: string): Promise { + await fs.mkdir("debug", { recursive: true }) + const index = String(++requestCounter).padStart(3, "0") + const slug = new URL(url).hostname.replace(/\./g, "_") + const file = path.join("debug", `${index}_${log.request.method}_${slug}.json`) + await fs.writeFile(file, JSON.stringify(log, null, 2)) +} + +export async function fetchPage( + url: string, + devMode: boolean, + options: RequestInit = {} +): Promise<{ status: number; body: string; finalUrl: string }> { + const method = (options.method ?? "GET").toUpperCase() + const cookie = cookieHeader() + const requestHeaders: Record = { + "User-Agent": "Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 Chrome/124.0 Safari/537.36", + ...(cookie ? { Cookie: cookie } : {}), + ...(options.headers as Record ?? {}), + } + + const log: RequestLog = { + request: { + url, + method, + headers: requestHeaders, + body: options.body ? String(options.body) : null, + }, + redirects: [], + response: { status: 0, headers: {}, cookies: {}, body: "" }, + } + + let currentUrl = url + let response: Response | null = null + + // manual redirect tracking so we can log the chain + for (let i = 0; i < 10; i++) { + response = await fetch(currentUrl, { + ...options, + headers: requestHeaders, + redirect: "manual", + }) + + const newCookies = parseCookies(response.headers) + Object.assign(cookieJar, newCookies) + + if (response.status >= 300 && response.status < 400) { + const location = response.headers.get("location") ?? "" + log.redirects.push({ status: response.status, location }) + currentUrl = new URL(location, currentUrl).href + continue + } + + break + } + + const body = await response!.text() + const responseHeaders: Record = {} + response!.headers.forEach((v, k) => { responseHeaders[k] = v }) + + log.response = { + status: response!.status, + headers: responseHeaders, + cookies: { ...cookieJar }, + body, + } + + if (devMode) { + await saveLog(log, url) + } + + return { status: response!.status, body, finalUrl: currentUrl } +} diff --git a/src/index.ts b/src/index.ts new file mode 100644 index 0000000..84969f3 --- /dev/null +++ b/src/index.ts @@ -0,0 +1,54 @@ +import config from "../config" +import { fetchPage } from "./fetcher" +import { parseMenu } from "./parser" +import { sendMessage } from "./telegram" + +type Weekday = "monday" | "tuesday" | "wednesday" | "thursday" | "friday" + +const WEEKDAYS: Weekday[] = ["monday", "tuesday", "wednesday", "thursday", "friday"] + +function todayWeekday(): Weekday | null { + const day = new Date().getDay() // 0=Sun, 1=Mon, ..., 5=Fri, 6=Sat + if (day === 0 || day === 6) return null + return WEEKDAYS[day - 1] +} + +async function main() { + const weekday = todayWeekday() + + if (!weekday) { + console.log("Weekend — nothing to do.") + return + } + + const url = config.days[weekday] + if (!url) { + console.error(`No URL configured for ${weekday}.`) + process.exit(1) + } + + console.log(`Fetching menu for ${weekday}: ${url}`) + + const { status, body } = await fetchPage(url, config.devMode) + console.log(`Response status: ${status}`) + + const result = parseMenu(body) + + if (!result.valid) { + console.log("Restaurant appears closed today.") + if (config.devMode) console.log("Raw preview:", result.raw) + return + } + + const message = JSON.stringify(result, null, 2) + console.log("Menu:", message) + + if (config.telegram.botToken && config.telegram.chatId) { + await sendMessage(config.telegram.botToken, config.telegram.chatId, `
${message}
`) + console.log("Sent to Telegram.") + } else { + console.log("Telegram not configured, skipping send.") + } +} + +main().catch(console.error) diff --git a/src/parser.ts b/src/parser.ts new file mode 100644 index 0000000..4858235 --- /dev/null +++ b/src/parser.ts @@ -0,0 +1,19 @@ +import * as cheerio from "cheerio" + +export interface MenuResult { + valid: boolean + address?: string + items?: string[] + raw?: string +} + +export function parseMenu(html: string): MenuResult { + const $ = cheerio.load(html) + + // TODO: implement actual selectors once we see the page structure + // For now returns raw body text so we can inspect it in dev mode + return { + valid: false, + raw: $("body").text().trim().slice(0, 500), + } +} diff --git a/src/telegram.ts b/src/telegram.ts new file mode 100644 index 0000000..caa3e2a --- /dev/null +++ b/src/telegram.ts @@ -0,0 +1,12 @@ +export async function sendMessage(botToken: string, chatId: string, text: string): Promise { + const url = `https://api.telegram.org/bot${botToken}/sendMessage` + const res = await fetch(url, { + method: "POST", + headers: { "Content-Type": "application/json" }, + body: JSON.stringify({ chat_id: chatId, text, parse_mode: "HTML" }), + }) + if (!res.ok) { + const err = await res.text() + throw new Error(`Telegram error ${res.status}: ${err}`) + } +} diff --git a/tsconfig.json b/tsconfig.json new file mode 100644 index 0000000..b2e7497 --- /dev/null +++ b/tsconfig.json @@ -0,0 +1,30 @@ +{ + "compilerOptions": { + // Environment setup & latest features + "lib": ["ESNext"], + "target": "ESNext", + "module": "Preserve", + "moduleDetection": "force", + "jsx": "react-jsx", + "allowJs": true, + "types": ["bun"], + + // Bundler mode + "moduleResolution": "bundler", + "allowImportingTsExtensions": true, + "verbatimModuleSyntax": true, + "noEmit": true, + + // Best practices + "strict": true, + "skipLibCheck": true, + "noFallthroughCasesInSwitch": true, + "noUncheckedIndexedAccess": true, + "noImplicitOverride": true, + + // Some stricter flags (disabled by default) + "noUnusedLocals": false, + "noUnusedParameters": false, + "noPropertyAccessFromIndexSignature": false + } +}