コンテンツにスキップ

推薦者一覧の取り込み(Zキャリア)

概要

Zキャリアのデータをairtableに取り込む

手順

1️⃣ scriptを実行してcsvを2件取得

コード
import asyncio
import os
import pandas as pd
import re
from dotenv import load_dotenv
from playwright.async_api import async_playwright, TimeoutError as PlaywrightTimeoutError

# 環境変数ロード
load_dotenv()
EMAIL = os.getenv("EMAIL")
PASSWORD = os.getenv("PASSWORD")

BASE_URL = "https://agent-bank.com"
ENTRY_URL_TEMPLATE = BASE_URL + "/service/entry?agent_id&limit=20&order=desc&page={page}&phase_group&query&sortBy=entryDate&talent_id"
TALENT_URL_TEMPLATE = BASE_URL + "/service/talent?agents&favorite&order=desc&page={page}&query&sort=talents.id&support_status&talents"

def write_partial_csv(all_data):
    if not all_data:
        print("⚠️ データが1件もないためCSVは出力しません。"); return

    print("💾 途中までのデータを output.csv に出力します")
    df = pd.DataFrame(all_data)
    original_headers = ["選考", "応募検討", "書類確認", "書類選考", "一次面接", "二次面接", "最終面接", "内定", "入社"]
    df.columns = original_headers[:df.shape[1]]

    if "選考" not in df.columns:
        df["選考"] = ""

    selection_split = df["選考"].str.split("\n\n", expand=True)
    selection_split = selection_split.reindex(columns=range(3), fill_value="")
    selection_split.columns = ["応募者", "会社", "ポジション"]

    def extract_status_and_date(cell):
        if not isinstance(cell, str):
            return pd.Series(["", pd.NaT])
        date_match = re.search(r"\d{4}/\d{2}/\d{2}", cell)
        date_str = date_match.group(0) if date_match else None
        status_str = cell.replace(date_str, "").strip() if date_str else cell.strip()
        return pd.Series([status_str, pd.to_datetime(date_str) if date_str else pd.NaT])

    status_date_frames = []
    for col in original_headers[1:]:
        if col in df.columns:
            tmp = df[col].apply(extract_status_and_date)
            tmp.columns = [f"{col}ステータス", f"{col}日付"]
            status_date_frames.append(tmp)

    final_df = pd.concat([selection_split] + status_date_frames, axis=1)
    final_df.to_csv("output.csv", index=False, encoding="utf-8-sig")
    print("✅ output.csv を出力しました")

async def login(page):
    await page.goto(f"{BASE_URL}/service/login")
    await page.fill('input[name="email"]', EMAIL)
    await page.fill('input[name="password"]', PASSWORD)
    await page.click('button:has-text("ログイン")')

    await page.wait_for_url("**/service/home", timeout=20000)
    print("✅ ログイン成功")

async def fetch_applicated(page):
    applicated_data = []
    page_num = 1

    while True:
        url = TALENT_URL_TEMPLATE.format(page=page_num)
        print(f"\n📄 Fetching Talent page {page_num}: {url}")
        await page.goto(url)

        try:
            await page.wait_for_selector("table tbody tr", timeout=20000)
        except PlaywrightTimeoutError:
            print("⚠️ Talentページの読み込みがタイムアウトしました。処理を終了します。")
            break

        rows = await page.query_selector_all("table tbody tr")
        if not rows:
            print("⚠️ Talentデータがありません。処理を終了します。")
            break

        for row in rows:
            cells = await row.query_selector_all("td")
            if len(cells) < 3:
                continue
            name = await cells[1].inner_text()
            person_in_charge = await cells[2].inner_text()
            applicated_data.append({"名前": name, "担当者": person_in_charge})

        page_num += 1

    if applicated_data:
        df_applicated = pd.DataFrame(applicated_data)
        df_applicated.to_csv("applicated.csv", index=False, encoding="utf-8-sig")
        print("✅ applicated.csv を出力しました")

async def fetch_selection_data(page):
    all_data = []
    page_num = 1

    while True:
        url = ENTRY_URL_TEMPLATE.format(page=page_num)
        print(f"\n📄 Fetching Entry page {page_num}: {url}")
        await page.goto(url)

        try:
            # テーブルがヘッダーだけでなくデータ行を持つまで待つ
            await page.wait_for_function(
                """() => {
                    const lockRows = document.querySelectorAll('.lock-data table tr');
                    const scrollRows = document.querySelectorAll('.scroll-data table tr');
                    return lockRows.length > 1 && scrollRows.length > 1;
                }""",
                timeout=20000
            )
        except PlaywrightTimeoutError:
            print("⚠️ Entryページの読み込みがタイムアウトしました。処理を終了します。")
            break

        # データ取得
        lock_rows = await page.query_selector_all(".lock-data table tr")
        scroll_rows = await page.query_selector_all(".scroll-data table tr")

        if len(lock_rows) <= 1 or len(scroll_rows) <= 1:
            print("⚠️ lock または scroll にデータがありません。処理を終了します。")
            break

        data_lock_rows, data_scroll_rows = lock_rows[1:], scroll_rows[1:]

        for lock_row, scroll_row in zip(data_lock_rows, data_scroll_rows):
            lock_values = [await c.inner_text() for c in await lock_row.query_selector_all("td")]
            scroll_values = [await c.inner_text() for c in await scroll_row.query_selector_all("td")]
            all_data.append(lock_values + scroll_values)

        page_num += 1

    write_partial_csv(all_data)

async def run():
    async with async_playwright() as p:
        browser = await p.chromium.launch(headless=False, slow_mo=100)
        context = await browser.new_context()
        page = await context.new_page()

        await login(page)
        await fetch_applicated(page)
        await fetch_selection_data(page)

        await browser.close()

if __name__ == "__main__":
    asyncio.run(run())

2️⃣ https://docs.google.com/spreadsheets/d/129-AEpHYiG4jM5FoJ_mWmliVNhWzo4-oFWIRt3WEr00/edit?gid=1802503069#gid=1802503069

info@meemで開く

3️⃣ inputシートに応募データimport(output.csv)

4️⃣ 担当シートのa-bに候補者データ(applicated.csv)貼り付け

新規が出たら、Zキャリアにログインし、

https://agent-bank.com/service/home

転職者から担当を確認し、フルネームを記入

5️⃣ airtable用のシートをcsvダウンロード

6️⃣ Airtable(推薦一覧)にAirtable用シートをimport

https://airtable.com/app4q6RHs9MxhnbFQ

Zキャリアの欄にデータインプット