某bingwallpaper网站所有必应壁纸历史图片数据抓取

该网站数据还算全,如果是很久以前的图片,或者bing官方没提供的UHD情况下,分辨率被强行拉伸到了3840x2160,一团糊不是真的超高清。 运行该脚本时候请自行替换其中的 url = "https://bing.xxxxxx.com/bing/info/list" ,该脚本会抓取从2009.08 - 2025.4月所有数据,数据包含标题,作者,图片链接,日期,描述等结构化数据,所有数据保存到 年/月/data.json 下。

import os
import json
import time
import random
import requests
import base64
from cryptography.hazmat.primitives import serialization
from cryptography.hazmat.primitives.asymmetric import padding
from cryptography.hazmat.primitives import hashes
from cryptography.hazmat.backends import default_backend

# 公钥
PUBLIC_KEY_PEM = '''-----BEGIN PUBLIC KEY-----
MIIBIjANBgkqhkiG9w0BAQEFAAOCAQ8AMIIBCgKCAQEAvGkxb6fi4ZVCWQMaW7k1s8wqUltOPDE2R4N5XWOXDeDMfGDGKZSWTNs0QTPoXS96egM3GVJcY9Lmfxos/dDkeN3SD7811/vuIqNHX/fogIEC2o8mq2BVkxudfN0UkvT4+5HNd3e3kYQIFg5/Jmn+rMJ3oCvlJvjwNi+6+yo3rQYFPbVzWQz+RVocluRxpCKaOMOsihRhRKJWygL0h7vObClo1SnC6ij+CcbrEGwKyxNsxX5Ykb/BF7bHgN+hnn4fT2rO0uBQekbrRxxWZU5pZ+mnZwe4UrseReH86JY2ALHgJ+3CY57EgPAqDM9mqBdxZnHX/oIOXu2ZKYQEn68jbwIDAQAB
-----END PUBLIC KEY-----'''

def get_param(year, month):
    try:
        params = {
            "param1": str(int(time.time() * 1000)),
            "param2": f"{year}-{month:02d}"
        }
        # 保证和 JS 一致的 JSON 字符串
        json_str = json.dumps(params, separators=(',', ':'))
        print("加密前内容:", json_str, "长度:", len(json_str.encode()))
        public_key = serialization.load_pem_public_key(PUBLIC_KEY_PEM.encode(), backend=default_backend())
        encrypted = public_key.encrypt(json_str.encode(), padding.PKCS1v15())
        return base64.b64encode(encrypted).decode()
    except Exception as e:
        print(f"Error in get_param: {e}")
        raise

def wait_random(min_ms, max_ms):
    try:
        ms = min_ms + random.random() * (max_ms - min_ms)
        time.sleep(ms / 1000)
    except KeyboardInterrupt:
        print("\nOperation cancelled by user")
        raise
    except Exception as e:
        print(f"Error in wait_random: {e}")
        raise

def fetch_with_retry(url, headers, max_retries=3):
    for attempt in range(max_retries):
        try:
            response = requests.get(url, headers=headers, timeout=30)
            response.raise_for_status()
            return response
        except requests.exceptions.RequestException as e:
            if attempt == max_retries - 1:
                raise
            backoff = 2000 * (attempt + 1) + random.random() * 1000
            print(f"Fetch failed (attempt {attempt + 1}), retrying in {int(backoff)}ms...")
            time.sleep(backoff / 1000)

def fetch_data_for_date_range(start_year, start_month, end_year, end_month):
    url = "https://bing.xxxxxx.com/bing/info/list"
    user_agent = "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/122.0.0.0 Safari/537.36"
    
    total_months = (end_year - start_year) * 12 + (end_month - start_month + 1)
    current_month = 0

    try:
        for year in range(start_year, end_year + 1):
            start_m = start_month if year == start_year else 1
            end_m = end_month if year == end_year else 12

            for month in range(start_m, end_m + 1):
                current_month += 1
                progress = (current_month / total_months) * 100
                print(f"\nProgress: {progress:.1f}% ({current_month}/{total_months})")
                print(f"Fetching data for {year}-{month:02d}")
                
                try:
                    headers = {
                        "Content-Type": "application/json",
                        "Authorization": get_param(year, month),
                        "User-Agent": user_agent
                    }
                    response = fetch_with_retry(url, headers)
                    #print("Response text:", response.text)  # 调试
                    data = response.json()

                    # 保存到 年份/月份/ 目录下
                    year_dir = str(year)
                    month_dir = os.path.join(year_dir, f"{month:02d}")
                    os.makedirs(month_dir, exist_ok=True)
                    file_path = os.path.join(month_dir, "data.json")
                    with open(file_path, "w", encoding="utf-8") as f:
                        json.dump(data, f, indent=2)

                    print(f"Successfully saved data for {year}-{month:02d}")
                    
                    # 请求间隔 3-5 秒 + 随机抖动
                    wait_random(3000, 5000)
                except Exception as e:
                    print(f"Error fetching data for {year}-{month}: {e}")
                    continue

    except KeyboardInterrupt:
        print("\nOperation cancelled by user")
        raise
    except Exception as e:
        print(f"Error in fetch_data_for_date_range: {e}")
        raise

def main():
    start_year = 2009
    start_month = 8
    end_year = 2025
    end_month = 4

    print("Starting data fetch...")
    print(f"Date range: {start_year}-{start_month:02d} to {end_year}-{end_month:02d}")
    
    try:
        fetch_data_for_date_range(start_year, start_month, end_year, end_month)
        print("\nData fetch completed. All results saved by year/month/data.json")
    except KeyboardInterrupt:
        print("\nOperation cancelled by user")
    except Exception as e:
        print(f"\nError in main execution: {e}")

if __name__ == "__main__":
    main() 
← 将Wordpress 博客迁移到了Cloudflare Workers
给宝宝一周岁的生日纪念册,已开源 →
二维码

微信扫码关注公众号