Skip to content

关于puppeteer定时爬虫的dockerfile写法记录 #20

@arcsin1

Description

@arcsin1
 {
  "name": "cron-app",
  "version": "1.0.0",
  "description": "A Node.js app with cron jobs managed by node-cron and pm2.",
  "main": "cron-script.js",
  "scripts": {
    "start": "pm2-runtime start cron-script.js"
  },
  "dependencies": {
    "axios": "^1.7.2",
    "cheerio": "^1.0.0-rc.12",
    "node-cron": "^3.0.0",
    "pm2": "^5.3.1",
    "puppeteer": "^22.10.0"
  }
}
 FROM node:18.14.2-bullseye-slim

# 设置工作目录
WORKDIR /usr/src/app

# 复制 package.json 
COPY package.json ./

# 安装项目依赖
RUN npm install

# 复制应用代码
COPY . .

# tx
# RUN sed -i 's#http://deb.debian.org#http://mirrors.cloud.tencent.com#g;s#http://security.debian.org#http://mirrors.cloud.tencent.com#g' /etc/apt/sources.list \
#   && npm c set registry="https://registry.npmmirror.com" \
#   && echo "puppeteer_download_host=https://registry.npmmirror.com/-/binary" >> ~/.npmrc

# aliyun
RUN sed -i 's#http://deb.debian.org#http://mirrors.aliyun.com#g;s#http://security.debian.org#http://mirrors.aliyun.com#g' /etc/apt/sources.list \
  && npm config set registry="https://registry.npmmirror.com" \
  && echo "puppeteer_download_host=https://registry.npmmirror.com/-/binary" >> ~/.npmrc

RUN apt-get update \
  && apt-get install -y \
  ca-certificates \
  fonts-freefont-ttf \
  fonts-ipafont-gothic \
  fonts-kacst \
  fonts-liberation \
  fonts-thai-tlwg \
  fonts-wqy-zenhei \
  libappindicator3-1 \
  libasound2 \
  libatk-bridge2.0-0 \
  libatk1.0-0 \
  libc6 \
  libcairo2 \
  libcups2 \
  libdbus-1-3 \
  libexpat1 \
  libfontconfig1 \
  libgbm1 \
  libgcc1 \
  libglib2.0-0 \
  libgtk-3-0 \
  libnspr4 \
  libnss3 \
  libpango-1.0-0 \
  libpangocairo-1.0-0 \
  libstdc++6 \
  libx11-6 \
  libx11-xcb1 \
  libxcb1 \
  libxcomposite1 \
  libxcursor1 \
  libxdamage1 \
  libxext6 \
  libxfixes3 \
  libxi6 \
  libxrandr2 \
  libxrender1 \
  libxss1 \
  libxtst6 \
  lsb-release \
  wget \
  xdg-utils \
  chromium \
  --no-install-recommends \
  && rm -rf /var/lib/apt/lists/*


  # 设置环境变量
ENV PUPPETEER_SKIP_CHROMIUM_DOWNLOAD=true
ENV PUPPETEER_EXECUTABLE_PATH=/usr/bin/chromium

# 测试 Puppeteer 和 Chromium
RUN echo "const puppeteer = require('puppeteer'); \
(async () => { \
  const browser = await puppeteer.launch({ \
    headless: true, \
    executablePath: '/usr/bin/chromium', \
    args: ['--no-sandbox', '--disable-setuid-sandbox'] \
  }); \
  console.log('Chromium started successfully'); \
  await browser.close(); \
})();" > test.js && node test.js


# 安装 pm2
RUN npm install pm2 -g

# 启动应用
CMD ["pm2-runtime", "start", "cron-script.js"]

Metadata

Metadata

Assignees

No one assigned

    Labels

    No labels
    No labels

    Projects

    No projects

    Milestone

    No milestone

    Relationships

    None yet

    Development

    No branches or pull requests

    Issue actions