推薦者一覧の取り込み(Zキャリア)¶
概要
Zキャリアのデータをairtableに取り込む
手順
1️⃣ scriptを実行してcsvを2件取得
コード
import asyncio
import os
import pandas as pd
import re
from dotenv import load_dotenv
from playwright.async_api import async_playwright, TimeoutError as PlaywrightTimeoutError
# 環境変数ロード
load_dotenv()
EMAIL = os.getenv("EMAIL")
PASSWORD = os.getenv("PASSWORD")
BASE_URL = "https://agent-bank.com"
ENTRY_URL_TEMPLATE = BASE_URL + "/service/entry?agent_id&limit=20&order=desc&page={page}&phase_group&query&sortBy=entryDate&talent_id"
TALENT_URL_TEMPLATE = BASE_URL + "/service/talent?agents&favorite&order=desc&page={page}&query&sort=talents.id&support_status&talents"
def write_partial_csv(all_data):
if not all_data:
print("⚠️ データが1件もないためCSVは出力しません。"); return
print("💾 途中までのデータを output.csv に出力します")
df = pd.DataFrame(all_data)
original_headers = ["選考", "応募検討", "書類確認", "書類選考", "一次面接", "二次面接", "最終面接", "内定", "入社"]
df.columns = original_headers[:df.shape[1]]
if "選考" not in df.columns:
df["選考"] = ""
selection_split = df["選考"].str.split("\n\n", expand=True)
selection_split = selection_split.reindex(columns=range(3), fill_value="")
selection_split.columns = ["応募者", "会社", "ポジション"]
def extract_status_and_date(cell):
if not isinstance(cell, str):
return pd.Series(["", pd.NaT])
date_match = re.search(r"\d{4}/\d{2}/\d{2}", cell)
date_str = date_match.group(0) if date_match else None
status_str = cell.replace(date_str, "").strip() if date_str else cell.strip()
return pd.Series([status_str, pd.to_datetime(date_str) if date_str else pd.NaT])
status_date_frames = []
for col in original_headers[1:]:
if col in df.columns:
tmp = df[col].apply(extract_status_and_date)
tmp.columns = [f"{col}ステータス", f"{col}日付"]
status_date_frames.append(tmp)
final_df = pd.concat([selection_split] + status_date_frames, axis=1)
final_df.to_csv("output.csv", index=False, encoding="utf-8-sig")
print("✅ output.csv を出力しました")
async def login(page):
await page.goto(f"{BASE_URL}/service/login")
await page.fill('input[name="email"]', EMAIL)
await page.fill('input[name="password"]', PASSWORD)
await page.click('button:has-text("ログイン")')
await page.wait_for_url("**/service/home", timeout=20000)
print("✅ ログイン成功")
async def fetch_applicated(page):
applicated_data = []
page_num = 1
while True:
url = TALENT_URL_TEMPLATE.format(page=page_num)
print(f"\n📄 Fetching Talent page {page_num}: {url}")
await page.goto(url)
try:
await page.wait_for_selector("table tbody tr", timeout=20000)
except PlaywrightTimeoutError:
print("⚠️ Talentページの読み込みがタイムアウトしました。処理を終了します。")
break
rows = await page.query_selector_all("table tbody tr")
if not rows:
print("⚠️ Talentデータがありません。処理を終了します。")
break
for row in rows:
cells = await row.query_selector_all("td")
if len(cells) < 3:
continue
name = await cells[1].inner_text()
person_in_charge = await cells[2].inner_text()
applicated_data.append({"名前": name, "担当者": person_in_charge})
page_num += 1
if applicated_data:
df_applicated = pd.DataFrame(applicated_data)
df_applicated.to_csv("applicated.csv", index=False, encoding="utf-8-sig")
print("✅ applicated.csv を出力しました")
async def fetch_selection_data(page):
all_data = []
page_num = 1
while True:
url = ENTRY_URL_TEMPLATE.format(page=page_num)
print(f"\n📄 Fetching Entry page {page_num}: {url}")
await page.goto(url)
try:
# テーブルがヘッダーだけでなくデータ行を持つまで待つ
await page.wait_for_function(
"""() => {
const lockRows = document.querySelectorAll('.lock-data table tr');
const scrollRows = document.querySelectorAll('.scroll-data table tr');
return lockRows.length > 1 && scrollRows.length > 1;
}""",
timeout=20000
)
except PlaywrightTimeoutError:
print("⚠️ Entryページの読み込みがタイムアウトしました。処理を終了します。")
break
# データ取得
lock_rows = await page.query_selector_all(".lock-data table tr")
scroll_rows = await page.query_selector_all(".scroll-data table tr")
if len(lock_rows) <= 1 or len(scroll_rows) <= 1:
print("⚠️ lock または scroll にデータがありません。処理を終了します。")
break
data_lock_rows, data_scroll_rows = lock_rows[1:], scroll_rows[1:]
for lock_row, scroll_row in zip(data_lock_rows, data_scroll_rows):
lock_values = [await c.inner_text() for c in await lock_row.query_selector_all("td")]
scroll_values = [await c.inner_text() for c in await scroll_row.query_selector_all("td")]
all_data.append(lock_values + scroll_values)
page_num += 1
write_partial_csv(all_data)
async def run():
async with async_playwright() as p:
browser = await p.chromium.launch(headless=False, slow_mo=100)
context = await browser.new_context()
page = await context.new_page()
await login(page)
await fetch_applicated(page)
await fetch_selection_data(page)
await browser.close()
if __name__ == "__main__":
asyncio.run(run())
info@meemで開く
3️⃣ inputシートに応募データimport(output.csv)
4️⃣ 担当シートのa-bに候補者データ(applicated.csv)貼り付け
新規が出たら、Zキャリアにログインし、
https://agent-bank.com/service/home
転職者から担当を確認し、フルネームを記入
5️⃣ airtable用のシートをcsvダウンロード
6️⃣ Airtable(推薦一覧)にAirtable用シートをimport
https://airtable.com/app4q6RHs9MxhnbFQ
Zキャリアの欄にデータインプット