This commit is contained in:
Tobias Klemp
2025-11-02 20:40:27 +01:00
commit e7bd070b3e
67 changed files with 3205 additions and 0 deletions

View File

@@ -0,0 +1,13 @@
import { Effect } from 'effect'
import type { Page } from 'puppeteer'
import { getCapacityVariations } from '../variations/capacity'
import { getSelected } from '.'
export const getCapacity = (page: Page) =>
Effect.gen(function* () {
const capacityVariations = yield* getCapacityVariations(page)
const selectedCapacity = yield* getSelected(capacityVariations)
return selectedCapacity.label
})

View File

@@ -0,0 +1,13 @@
import { Effect } from 'effect'
import type { Page } from 'puppeteer'
import { getSelected } from '.'
import { getColorVariations } from '../variations/color'
export const getColor = (page: Page) =>
Effect.gen(function* () {
const colorVariations = yield* getColorVariations(page)
const selectedCapacity = yield* getSelected(colorVariations)
return selectedCapacity.label
})

View File

@@ -0,0 +1,49 @@
import { Data, Effect } from 'effect'
import type { Page } from 'puppeteer'
import { CrawlerElementNotFoundError } from '../..'
const PRODUCT_CONDITION_SELECTOR =
'#dynamic_content > div.modell_page_grid > div.modell_page_name > div.modell_page_name_container > div'
export const getCondition = (page: Page) =>
Effect.gen(function* () {
let conditionString = yield* Effect.tryPromise({
try: () =>
page.evaluate((PRODUCT_CONDITION_SELECTOR) => {
const productConditionDiv: HTMLDivElement | null =
document.querySelector(PRODUCT_CONDITION_SELECTOR)
if (!productConditionDiv) {
throw new Error('Could not find product condition')
}
return productConditionDiv.innerText
}, PRODUCT_CONDITION_SELECTOR),
catch: (cause) =>
new CrawlerElementNotFoundError({
message: 'Could not parse product condition',
cause,
}),
})
conditionString = conditionString.replace('Akku: Neu', '')
const [_, condition] = conditionString.split(':').map((str) => str.trim())
if (!condition) {
return yield* Effect.fail(
new ParseConditionError({
message: 'Could not parse condition from condition string',
}),
)
}
return condition
})
export class ParseConditionError extends Data.TaggedError(
'ParseConditionError',
)<{
cause?: unknown
message?: string
}> {}

View File

@@ -0,0 +1,130 @@
import { Data, Effect } from 'effect'
import type { Page } from 'puppeteer'
const OPEN_MODAL_BUTTON_SELECTOR =
'#dynamic_content > div.modell_page_grid > div.modell_page_filter_and_info > div.show_single_container > div.show_single_text > button'
const DEVICE_LIST_SELECTOR =
'#popup-singles > div.popup_box > div > div.single_item_list.js_single_item_list'
export class ParseDeviceModalError extends Data.TaggedError(
'ParseDeviceModalError',
)<{
cause?: unknown
message?: string
}> {}
export class OpenDeviceModalError extends Data.TaggedError(
'OpenDeviceModalError',
)<{
cause?: unknown
message?: string
}> {}
const openDeviceModal = (page: Page) =>
Effect.gen(function* () {
const childrenCount = yield* Effect.tryPromise({
try: () =>
page.evaluate(
async (DEVICE_LIST_SELECTOR, OPEN_MODAL_BUTTON_SELECTOR) => {
const button: HTMLButtonElement | null = document.querySelector(
OPEN_MODAL_BUTTON_SELECTOR,
)
if (!button) return
button.click()
let deviceListDiv: HTMLDivElement | null = null
const tryLimit = 200
for (let tryCount = 0; tryCount < tryLimit; tryCount++) {
deviceListDiv = document.querySelector(DEVICE_LIST_SELECTOR)
if (deviceListDiv) {
break
}
await new Promise((resolve) => setTimeout(resolve, 50))
}
if (!deviceListDiv) {
throw new Error('List not found')
}
return deviceListDiv.children.length
},
DEVICE_LIST_SELECTOR,
OPEN_MODAL_BUTTON_SELECTOR,
),
catch: (cause) =>
new OpenDeviceModalError({
message: `Unable to open device modal: ${cause}`,
cause,
}),
})
})
export const getDevices = (page: Page) =>
Effect.gen(function* () {
yield* openDeviceModal(page)
return yield* Effect.tryPromise({
try: () =>
page.evaluate((DEVICE_LIST_SELECTOR) => {
const deviceListDiv: HTMLDivElement | null =
document.querySelector(DEVICE_LIST_SELECTOR)
if (!deviceListDiv) {
throw new Error('Unable to find device list')
}
const parseDevice = (element: HTMLDivElement) => {
const detailListElements: HTMLLIElement[] = Array.from(
element.querySelectorAll('.inline_list'),
)
const details = detailListElements.map((li) => li.innerText)
const extraBarDiv: HTMLDivElement | null = element.querySelector(
'.extra_information_list',
)
let batteryStatusString: string = ''
let imagesCountString: string = ''
let originalPagacking: string = ''
if (extraBarDiv) {
;[batteryStatusString, imagesCountString, originalPagacking] =
extraBarDiv.innerText.split('\n')
}
let imageCount = 0
if (imagesCountString) {
imageCount = parseInt(imagesCountString.match(/\d+/)?.[0] ?? '0')
}
let batteryPercentage = 0
if (batteryStatusString) {
batteryPercentage = parseInt(
batteryStatusString.match(/\d+(?=%)/)?.[0] ?? '0',
)
}
return { details, originalPagacking, imageCount, batteryPercentage }
}
const deviceDivs: HTMLDivElement[] = Array.from(
deviceListDiv.querySelectorAll(':scope > div'),
)
return deviceDivs.map((device) => parseDevice(device))
}, DEVICE_LIST_SELECTOR),
catch: (cause) =>
new ParseDeviceModalError({
message: `Unable to parse device list: ${cause}`,
cause,
}),
})
})

View File

@@ -0,0 +1,93 @@
import { Data, Effect } from 'effect'
import { getPrice } from './price'
import type { Page } from 'puppeteer'
import { getProductName } from './productName'
import { getCondition } from './condition'
import { getCapacity } from './capacity'
import type { Variation } from '../variations'
import { getColor } from './color'
import { getSim } from './sim'
import { getStockLevel } from './stockLevel'
import { getDevices } from './devices'
export class ExtractSelectedVariationError extends Data.TaggedError(
'ExtractSelectedVariationError',
)<{
cause?: unknown
message?: string
}> {}
export const getSelected = (variations: Variation[]) =>
Effect.gen(function* () {
const selected = variations.find((variation) => variation.selected)
if (!selected) {
return yield* Effect.fail(
new ExtractSelectedVariationError({
message: 'No selected variation option found',
}),
)
}
return selected
})
const log = (value: unknown, name: string) =>
Effect.gen(function* () {
if (typeof value === 'object') {
yield* Effect.logInfo(`${name}: ${JSON.stringify(value)}`)
} else {
yield* Effect.logInfo(`${name}: ${value}`)
}
})
export type PageData = {
price: number
productName: string
condition: string
capacity: string
color: string
sim: string
stockLevel: string
devices: DeviceData[]
}
export type DeviceData = {
details: string[]
originalPagacking: string
imageCount: number
batteryPercentage: number
}
export const getPageData = (page: Page) =>
Effect.gen(function* () {
const price = yield* getPrice(page)
const productName = yield* getProductName(page).pipe(
Effect.tap((val) => log(val, 'Product Name')),
)
const condition = yield* getCondition(page)
const capacity = yield* getCapacity(page)
const color = yield* getColor(page)
const sim = yield* getSim(page)
const stockLevel = yield* getStockLevel(page)
const devices = yield* getDevices(page)
const pageData: PageData = {
price,
productName,
condition,
capacity,
color,
sim,
stockLevel,
devices,
}
return pageData
})

View File

@@ -0,0 +1,57 @@
import { Data, Effect } from 'effect'
import type { Page } from 'puppeteer'
import { CrawlerElementNotFoundError } from '../..'
export class ParsePriceError extends Data.TaggedError('ParsePriceError')<{
cause?: unknown
message?: string
}> {}
const PRICE_SELECTOR =
'#js-buybox-container > div > div.buybox_price_container > div > span.js_adjust_main_price'
export const getPrice = (page: Page) =>
Effect.gen(function* () {
const priceString = yield* Effect.tryPromise({
try: () =>
page.evaluate((PRICE_SELECTOR) => {
const priceSpan: HTMLSpanElement | null =
document.querySelector(PRICE_SELECTOR)
if (!priceSpan) {
throw new Error('Could not find price span')
}
return priceSpan.innerText
}, PRICE_SELECTOR),
catch: (cause) =>
new CrawlerElementNotFoundError({
message: 'Could not parse price',
cause,
}),
})
return priceString
}).pipe(Effect.flatMap(toNumber))
const toNumber = (priceString: string) =>
Effect.gen(function* () {
// Remove all non-digit characters except comma and dot
let cleaned = priceString.replace(/[^\d,.-]/g, '')
// Replace comma with dot (for European format)
cleaned = cleaned.replace(',', '.')
const parsed = parseFloat(cleaned)
if (Number.isNaN(parsed)) {
return yield* Effect.fail(
new ParsePriceError({
message: 'Could not parse price string',
cause: 'Number is NaN',
}),
)
} else {
return yield* Effect.succeed(parsed)
}
})

View File

@@ -0,0 +1,28 @@
import { Effect } from 'effect'
import type { Page } from 'puppeteer'
import { CrawlerElementNotFoundError } from '../..'
const PRODUCT_NAME_SELECTOR =
'#dynamic_content > div.modell_page_grid > div.modell_page_name > div.modell_page_name_container > h1 > span'
export const getProductName = (page: Page) =>
Effect.gen(function* () {
return yield* Effect.tryPromise({
try: () =>
page.evaluate((PRODUCT_NAME_SELECTOR) => {
const productNameSpan: HTMLSpanElement | null =
document.querySelector(PRODUCT_NAME_SELECTOR)
if (!productNameSpan) {
throw new Error('Could not find product name')
}
return productNameSpan.innerText
}, PRODUCT_NAME_SELECTOR),
catch: (cause) =>
new CrawlerElementNotFoundError({
message: 'Could not parse product name',
cause,
}),
})
})

View File

@@ -0,0 +1,17 @@
import { Effect } from 'effect'
import type { Page } from 'puppeteer'
import { getSelected } from '.'
import { getSimVariations } from '../variations/sim'
export const getSim = (page: Page) =>
Effect.gen(function* () {
const simVariations = yield* getSimVariations(page)
if (simVariations.length === 0) {
return ''
}
const selectedSim = yield* getSelected(simVariations)
return selectedSim.label
})

View File

@@ -0,0 +1,29 @@
import { Effect } from 'effect'
import type { Page } from 'puppeteer'
import { CrawlerElementNotFoundError } from '../..'
const STOCK_LEVEL_BADGE_SELECTOR =
'.modellfilter_row > .modellfilter_head > .colored_label_wrapper'
export const getStockLevel = (page: Page) =>
Effect.gen(function* () {
return yield* Effect.tryPromise({
try: () =>
page.evaluate((STOCK_LEVEL_BADGE_SELECTOR) => {
const stockLevelBadge: HTMLDivElement | null = document.querySelector(
STOCK_LEVEL_BADGE_SELECTOR,
)
if (!stockLevelBadge) {
throw new Error('Could not find stock level')
}
return stockLevelBadge.innerText
}, STOCK_LEVEL_BADGE_SELECTOR),
catch: (cause) =>
new CrawlerElementNotFoundError({
message: `Could not parse stock level, ${cause}`,
cause,
}),
})
})