init
This commit is contained in:
13
src/lib/server/crawler/clevertronik/data/capacity.ts
Normal file
13
src/lib/server/crawler/clevertronik/data/capacity.ts
Normal file
@@ -0,0 +1,13 @@
|
||||
import { Effect } from 'effect'
|
||||
import type { Page } from 'puppeteer'
|
||||
import { getCapacityVariations } from '../variations/capacity'
|
||||
import { getSelected } from '.'
|
||||
|
||||
export const getCapacity = (page: Page) =>
|
||||
Effect.gen(function* () {
|
||||
const capacityVariations = yield* getCapacityVariations(page)
|
||||
|
||||
const selectedCapacity = yield* getSelected(capacityVariations)
|
||||
|
||||
return selectedCapacity.label
|
||||
})
|
||||
13
src/lib/server/crawler/clevertronik/data/color.ts
Normal file
13
src/lib/server/crawler/clevertronik/data/color.ts
Normal file
@@ -0,0 +1,13 @@
|
||||
import { Effect } from 'effect'
|
||||
import type { Page } from 'puppeteer'
|
||||
import { getSelected } from '.'
|
||||
import { getColorVariations } from '../variations/color'
|
||||
|
||||
export const getColor = (page: Page) =>
|
||||
Effect.gen(function* () {
|
||||
const colorVariations = yield* getColorVariations(page)
|
||||
|
||||
const selectedCapacity = yield* getSelected(colorVariations)
|
||||
|
||||
return selectedCapacity.label
|
||||
})
|
||||
49
src/lib/server/crawler/clevertronik/data/condition.ts
Normal file
49
src/lib/server/crawler/clevertronik/data/condition.ts
Normal file
@@ -0,0 +1,49 @@
|
||||
import { Data, Effect } from 'effect'
|
||||
import type { Page } from 'puppeteer'
|
||||
import { CrawlerElementNotFoundError } from '../..'
|
||||
|
||||
const PRODUCT_CONDITION_SELECTOR =
|
||||
'#dynamic_content > div.modell_page_grid > div.modell_page_name > div.modell_page_name_container > div'
|
||||
|
||||
export const getCondition = (page: Page) =>
|
||||
Effect.gen(function* () {
|
||||
let conditionString = yield* Effect.tryPromise({
|
||||
try: () =>
|
||||
page.evaluate((PRODUCT_CONDITION_SELECTOR) => {
|
||||
const productConditionDiv: HTMLDivElement | null =
|
||||
document.querySelector(PRODUCT_CONDITION_SELECTOR)
|
||||
|
||||
if (!productConditionDiv) {
|
||||
throw new Error('Could not find product condition')
|
||||
}
|
||||
|
||||
return productConditionDiv.innerText
|
||||
}, PRODUCT_CONDITION_SELECTOR),
|
||||
catch: (cause) =>
|
||||
new CrawlerElementNotFoundError({
|
||||
message: 'Could not parse product condition',
|
||||
cause,
|
||||
}),
|
||||
})
|
||||
|
||||
conditionString = conditionString.replace('Akku: Neu', '')
|
||||
|
||||
const [_, condition] = conditionString.split(':').map((str) => str.trim())
|
||||
|
||||
if (!condition) {
|
||||
return yield* Effect.fail(
|
||||
new ParseConditionError({
|
||||
message: 'Could not parse condition from condition string',
|
||||
}),
|
||||
)
|
||||
}
|
||||
|
||||
return condition
|
||||
})
|
||||
|
||||
export class ParseConditionError extends Data.TaggedError(
|
||||
'ParseConditionError',
|
||||
)<{
|
||||
cause?: unknown
|
||||
message?: string
|
||||
}> {}
|
||||
130
src/lib/server/crawler/clevertronik/data/devices.ts
Normal file
130
src/lib/server/crawler/clevertronik/data/devices.ts
Normal file
@@ -0,0 +1,130 @@
|
||||
import { Data, Effect } from 'effect'
|
||||
import type { Page } from 'puppeteer'
|
||||
|
||||
const OPEN_MODAL_BUTTON_SELECTOR =
|
||||
'#dynamic_content > div.modell_page_grid > div.modell_page_filter_and_info > div.show_single_container > div.show_single_text > button'
|
||||
|
||||
const DEVICE_LIST_SELECTOR =
|
||||
'#popup-singles > div.popup_box > div > div.single_item_list.js_single_item_list'
|
||||
|
||||
export class ParseDeviceModalError extends Data.TaggedError(
|
||||
'ParseDeviceModalError',
|
||||
)<{
|
||||
cause?: unknown
|
||||
message?: string
|
||||
}> {}
|
||||
|
||||
export class OpenDeviceModalError extends Data.TaggedError(
|
||||
'OpenDeviceModalError',
|
||||
)<{
|
||||
cause?: unknown
|
||||
message?: string
|
||||
}> {}
|
||||
|
||||
const openDeviceModal = (page: Page) =>
|
||||
Effect.gen(function* () {
|
||||
const childrenCount = yield* Effect.tryPromise({
|
||||
try: () =>
|
||||
page.evaluate(
|
||||
async (DEVICE_LIST_SELECTOR, OPEN_MODAL_BUTTON_SELECTOR) => {
|
||||
const button: HTMLButtonElement | null = document.querySelector(
|
||||
OPEN_MODAL_BUTTON_SELECTOR,
|
||||
)
|
||||
|
||||
if (!button) return
|
||||
|
||||
button.click()
|
||||
|
||||
let deviceListDiv: HTMLDivElement | null = null
|
||||
|
||||
const tryLimit = 200
|
||||
for (let tryCount = 0; tryCount < tryLimit; tryCount++) {
|
||||
deviceListDiv = document.querySelector(DEVICE_LIST_SELECTOR)
|
||||
|
||||
if (deviceListDiv) {
|
||||
break
|
||||
}
|
||||
await new Promise((resolve) => setTimeout(resolve, 50))
|
||||
}
|
||||
|
||||
if (!deviceListDiv) {
|
||||
throw new Error('List not found')
|
||||
}
|
||||
|
||||
return deviceListDiv.children.length
|
||||
},
|
||||
DEVICE_LIST_SELECTOR,
|
||||
OPEN_MODAL_BUTTON_SELECTOR,
|
||||
),
|
||||
catch: (cause) =>
|
||||
new OpenDeviceModalError({
|
||||
message: `Unable to open device modal: ${cause}`,
|
||||
cause,
|
||||
}),
|
||||
})
|
||||
})
|
||||
|
||||
export const getDevices = (page: Page) =>
|
||||
Effect.gen(function* () {
|
||||
yield* openDeviceModal(page)
|
||||
|
||||
return yield* Effect.tryPromise({
|
||||
try: () =>
|
||||
page.evaluate((DEVICE_LIST_SELECTOR) => {
|
||||
const deviceListDiv: HTMLDivElement | null =
|
||||
document.querySelector(DEVICE_LIST_SELECTOR)
|
||||
|
||||
if (!deviceListDiv) {
|
||||
throw new Error('Unable to find device list')
|
||||
}
|
||||
|
||||
const parseDevice = (element: HTMLDivElement) => {
|
||||
const detailListElements: HTMLLIElement[] = Array.from(
|
||||
element.querySelectorAll('.inline_list'),
|
||||
)
|
||||
|
||||
const details = detailListElements.map((li) => li.innerText)
|
||||
|
||||
const extraBarDiv: HTMLDivElement | null = element.querySelector(
|
||||
'.extra_information_list',
|
||||
)
|
||||
|
||||
let batteryStatusString: string = ''
|
||||
let imagesCountString: string = ''
|
||||
let originalPagacking: string = ''
|
||||
|
||||
if (extraBarDiv) {
|
||||
;[batteryStatusString, imagesCountString, originalPagacking] =
|
||||
extraBarDiv.innerText.split('\n')
|
||||
}
|
||||
|
||||
let imageCount = 0
|
||||
|
||||
if (imagesCountString) {
|
||||
imageCount = parseInt(imagesCountString.match(/\d+/)?.[0] ?? '0')
|
||||
}
|
||||
|
||||
let batteryPercentage = 0
|
||||
|
||||
if (batteryStatusString) {
|
||||
batteryPercentage = parseInt(
|
||||
batteryStatusString.match(/\d+(?=%)/)?.[0] ?? '0',
|
||||
)
|
||||
}
|
||||
|
||||
return { details, originalPagacking, imageCount, batteryPercentage }
|
||||
}
|
||||
|
||||
const deviceDivs: HTMLDivElement[] = Array.from(
|
||||
deviceListDiv.querySelectorAll(':scope > div'),
|
||||
)
|
||||
|
||||
return deviceDivs.map((device) => parseDevice(device))
|
||||
}, DEVICE_LIST_SELECTOR),
|
||||
catch: (cause) =>
|
||||
new ParseDeviceModalError({
|
||||
message: `Unable to parse device list: ${cause}`,
|
||||
cause,
|
||||
}),
|
||||
})
|
||||
})
|
||||
93
src/lib/server/crawler/clevertronik/data/index.ts
Normal file
93
src/lib/server/crawler/clevertronik/data/index.ts
Normal file
@@ -0,0 +1,93 @@
|
||||
import { Data, Effect } from 'effect'
|
||||
import { getPrice } from './price'
|
||||
import type { Page } from 'puppeteer'
|
||||
import { getProductName } from './productName'
|
||||
import { getCondition } from './condition'
|
||||
import { getCapacity } from './capacity'
|
||||
import type { Variation } from '../variations'
|
||||
import { getColor } from './color'
|
||||
import { getSim } from './sim'
|
||||
import { getStockLevel } from './stockLevel'
|
||||
import { getDevices } from './devices'
|
||||
|
||||
export class ExtractSelectedVariationError extends Data.TaggedError(
|
||||
'ExtractSelectedVariationError',
|
||||
)<{
|
||||
cause?: unknown
|
||||
message?: string
|
||||
}> {}
|
||||
|
||||
export const getSelected = (variations: Variation[]) =>
|
||||
Effect.gen(function* () {
|
||||
const selected = variations.find((variation) => variation.selected)
|
||||
|
||||
if (!selected) {
|
||||
return yield* Effect.fail(
|
||||
new ExtractSelectedVariationError({
|
||||
message: 'No selected variation option found',
|
||||
}),
|
||||
)
|
||||
}
|
||||
|
||||
return selected
|
||||
})
|
||||
|
||||
const log = (value: unknown, name: string) =>
|
||||
Effect.gen(function* () {
|
||||
if (typeof value === 'object') {
|
||||
yield* Effect.logInfo(`${name}: ${JSON.stringify(value)}`)
|
||||
} else {
|
||||
yield* Effect.logInfo(`${name}: ${value}`)
|
||||
}
|
||||
})
|
||||
|
||||
export type PageData = {
|
||||
price: number
|
||||
productName: string
|
||||
condition: string
|
||||
capacity: string
|
||||
color: string
|
||||
sim: string
|
||||
stockLevel: string
|
||||
devices: DeviceData[]
|
||||
}
|
||||
|
||||
export type DeviceData = {
|
||||
details: string[]
|
||||
originalPagacking: string
|
||||
imageCount: number
|
||||
batteryPercentage: number
|
||||
}
|
||||
|
||||
export const getPageData = (page: Page) =>
|
||||
Effect.gen(function* () {
|
||||
const price = yield* getPrice(page)
|
||||
|
||||
const productName = yield* getProductName(page).pipe(
|
||||
Effect.tap((val) => log(val, 'Product Name')),
|
||||
)
|
||||
|
||||
const condition = yield* getCondition(page)
|
||||
|
||||
const capacity = yield* getCapacity(page)
|
||||
|
||||
const color = yield* getColor(page)
|
||||
|
||||
const sim = yield* getSim(page)
|
||||
|
||||
const stockLevel = yield* getStockLevel(page)
|
||||
const devices = yield* getDevices(page)
|
||||
|
||||
const pageData: PageData = {
|
||||
price,
|
||||
productName,
|
||||
condition,
|
||||
capacity,
|
||||
color,
|
||||
sim,
|
||||
stockLevel,
|
||||
devices,
|
||||
}
|
||||
|
||||
return pageData
|
||||
})
|
||||
57
src/lib/server/crawler/clevertronik/data/price.ts
Normal file
57
src/lib/server/crawler/clevertronik/data/price.ts
Normal file
@@ -0,0 +1,57 @@
|
||||
import { Data, Effect } from 'effect'
|
||||
import type { Page } from 'puppeteer'
|
||||
import { CrawlerElementNotFoundError } from '../..'
|
||||
|
||||
export class ParsePriceError extends Data.TaggedError('ParsePriceError')<{
|
||||
cause?: unknown
|
||||
message?: string
|
||||
}> {}
|
||||
|
||||
const PRICE_SELECTOR =
|
||||
'#js-buybox-container > div > div.buybox_price_container > div > span.js_adjust_main_price'
|
||||
|
||||
export const getPrice = (page: Page) =>
|
||||
Effect.gen(function* () {
|
||||
const priceString = yield* Effect.tryPromise({
|
||||
try: () =>
|
||||
page.evaluate((PRICE_SELECTOR) => {
|
||||
const priceSpan: HTMLSpanElement | null =
|
||||
document.querySelector(PRICE_SELECTOR)
|
||||
|
||||
if (!priceSpan) {
|
||||
throw new Error('Could not find price span')
|
||||
}
|
||||
|
||||
return priceSpan.innerText
|
||||
}, PRICE_SELECTOR),
|
||||
catch: (cause) =>
|
||||
new CrawlerElementNotFoundError({
|
||||
message: 'Could not parse price',
|
||||
cause,
|
||||
}),
|
||||
})
|
||||
|
||||
return priceString
|
||||
}).pipe(Effect.flatMap(toNumber))
|
||||
|
||||
const toNumber = (priceString: string) =>
|
||||
Effect.gen(function* () {
|
||||
// Remove all non-digit characters except comma and dot
|
||||
let cleaned = priceString.replace(/[^\d,.-]/g, '')
|
||||
|
||||
// Replace comma with dot (for European format)
|
||||
cleaned = cleaned.replace(',', '.')
|
||||
|
||||
const parsed = parseFloat(cleaned)
|
||||
|
||||
if (Number.isNaN(parsed)) {
|
||||
return yield* Effect.fail(
|
||||
new ParsePriceError({
|
||||
message: 'Could not parse price string',
|
||||
cause: 'Number is NaN',
|
||||
}),
|
||||
)
|
||||
} else {
|
||||
return yield* Effect.succeed(parsed)
|
||||
}
|
||||
})
|
||||
28
src/lib/server/crawler/clevertronik/data/productName.ts
Normal file
28
src/lib/server/crawler/clevertronik/data/productName.ts
Normal file
@@ -0,0 +1,28 @@
|
||||
import { Effect } from 'effect'
|
||||
import type { Page } from 'puppeteer'
|
||||
import { CrawlerElementNotFoundError } from '../..'
|
||||
|
||||
const PRODUCT_NAME_SELECTOR =
|
||||
'#dynamic_content > div.modell_page_grid > div.modell_page_name > div.modell_page_name_container > h1 > span'
|
||||
|
||||
export const getProductName = (page: Page) =>
|
||||
Effect.gen(function* () {
|
||||
return yield* Effect.tryPromise({
|
||||
try: () =>
|
||||
page.evaluate((PRODUCT_NAME_SELECTOR) => {
|
||||
const productNameSpan: HTMLSpanElement | null =
|
||||
document.querySelector(PRODUCT_NAME_SELECTOR)
|
||||
|
||||
if (!productNameSpan) {
|
||||
throw new Error('Could not find product name')
|
||||
}
|
||||
|
||||
return productNameSpan.innerText
|
||||
}, PRODUCT_NAME_SELECTOR),
|
||||
catch: (cause) =>
|
||||
new CrawlerElementNotFoundError({
|
||||
message: 'Could not parse product name',
|
||||
cause,
|
||||
}),
|
||||
})
|
||||
})
|
||||
17
src/lib/server/crawler/clevertronik/data/sim.ts
Normal file
17
src/lib/server/crawler/clevertronik/data/sim.ts
Normal file
@@ -0,0 +1,17 @@
|
||||
import { Effect } from 'effect'
|
||||
import type { Page } from 'puppeteer'
|
||||
import { getSelected } from '.'
|
||||
import { getSimVariations } from '../variations/sim'
|
||||
|
||||
export const getSim = (page: Page) =>
|
||||
Effect.gen(function* () {
|
||||
const simVariations = yield* getSimVariations(page)
|
||||
|
||||
if (simVariations.length === 0) {
|
||||
return ''
|
||||
}
|
||||
|
||||
const selectedSim = yield* getSelected(simVariations)
|
||||
|
||||
return selectedSim.label
|
||||
})
|
||||
29
src/lib/server/crawler/clevertronik/data/stockLevel.ts
Normal file
29
src/lib/server/crawler/clevertronik/data/stockLevel.ts
Normal file
@@ -0,0 +1,29 @@
|
||||
import { Effect } from 'effect'
|
||||
import type { Page } from 'puppeteer'
|
||||
import { CrawlerElementNotFoundError } from '../..'
|
||||
|
||||
const STOCK_LEVEL_BADGE_SELECTOR =
|
||||
'.modellfilter_row > .modellfilter_head > .colored_label_wrapper'
|
||||
|
||||
export const getStockLevel = (page: Page) =>
|
||||
Effect.gen(function* () {
|
||||
return yield* Effect.tryPromise({
|
||||
try: () =>
|
||||
page.evaluate((STOCK_LEVEL_BADGE_SELECTOR) => {
|
||||
const stockLevelBadge: HTMLDivElement | null = document.querySelector(
|
||||
STOCK_LEVEL_BADGE_SELECTOR,
|
||||
)
|
||||
|
||||
if (!stockLevelBadge) {
|
||||
throw new Error('Could not find stock level')
|
||||
}
|
||||
|
||||
return stockLevelBadge.innerText
|
||||
}, STOCK_LEVEL_BADGE_SELECTOR),
|
||||
catch: (cause) =>
|
||||
new CrawlerElementNotFoundError({
|
||||
message: `Could not parse stock level, ${cause}`,
|
||||
cause,
|
||||
}),
|
||||
})
|
||||
})
|
||||
Reference in New Issue
Block a user