<?xml version="1.0" encoding="UTF-8"?>
<rss version="2.0">
  <channel>
    <title>catalystmind 님의 블로그</title>
    <link>https://catalystmind.tistory.com/</link>
    <description>AI 실험실, 2차전지 기술 동향, 퀀트 투자, 개인 생각 정리</description>
    <language>ko</language>
    <pubDate>Thu, 11 Jun 2026 16:29:38 +0900</pubDate>
    <generator>TISTORY</generator>
    <ttl>100</ttl>
    <managingEditor>catalystmind</managingEditor>
    <item>
      <title>Playwright와 Trafilatura를 활용한 JavaScript 기반 웹사이트 추출</title>
      <link>https://catalystmind.tistory.com/22</link>
      <description>&lt;div id=&quot;code_1749825813596&quot; data-ke-type=&quot;html&quot; data-source=&quot;&amp;lt;!DOCTYPE html&amp;gt;
&amp;lt;html lang=&amp;quot;ko&amp;quot;&amp;gt;
&amp;lt;head&amp;gt;
    &amp;lt;meta charset=&amp;quot;UTF-8&amp;quot;&amp;gt;
    &amp;lt;meta name=&amp;quot;viewport&amp;quot; content=&amp;quot;width=device-width, initial-scale=1.0&amp;quot;&amp;gt;
    &amp;lt;title&amp;gt;블로그 TL;DR 템플릿&amp;lt;/title&amp;gt;
    &amp;lt;style&amp;gt;
        body {
            font-family: 'Segoe UI', Tahoma, Geneva, Verdana, sans-serif;
            line-height: 1.6;
            margin: 20px;
            background-color: #f8fafc;
        }
    &amp;lt;/style&amp;gt;
&amp;lt;/head&amp;gt;
&amp;lt;body&amp;gt;
    &amp;lt;div style=&amp;quot;background-color: #fff; border-radius: 8px; padding: 24px; box-shadow: 0 2px 8px rgba(0, 0, 0, 0.05); margin-bottom: 24px;&amp;quot;&amp;gt;
        &amp;lt;h1 style=&amp;quot;color: #1e40af; font-size: 24px; font-weight: 700; margin-top: 0; margin-bottom: 16px;&amp;quot;&amp;gt;TL;DR&amp;lt;/h1&amp;gt;
        &amp;lt;div style=&amp;quot;background-color: #f0f7ff; padding: 12px 16px; border-radius: 6px; margin: 16px 0; border-left: 4px solid #3b82f6;&amp;quot;&amp;gt;
            &amp;lt;ul style=&amp;quot;padding-left: 20px; margin: 0;&amp;quot;&amp;gt;
                &amp;lt;li style=&amp;quot;margin-bottom: 10px;&amp;quot;&amp;gt;기존 &amp;lt;span style=&amp;quot;color: #1e40af; font-weight: 600;&amp;quot;&amp;gt;Trafilatura + Requests&amp;lt;/span&amp;gt; 조합은 JavaScript 기반 사이트에서 &amp;lt;span style=&amp;quot;color: #1e40af; font-weight: 600;&amp;quot;&amp;gt;본문 추출 한계&amp;lt;/span&amp;gt; 발생&amp;lt;/li&amp;gt;
                &amp;lt;li style=&amp;quot;margin-bottom: 10px;&amp;quot;&amp;gt;&amp;lt;span style=&amp;quot;color: #1e40af; font-weight: 600;&amp;quot;&amp;gt;Playwright 도입&amp;lt;/span&amp;gt; 후 requests에서 실패한 biz.chosun.com 등은 &amp;lt;span style=&amp;quot;color: #1e40af; font-weight: 600;&amp;quot;&amp;gt;100% 추출 성공&amp;lt;/span&amp;gt;&amp;lt;/li&amp;gt;
                &amp;lt;li style=&amp;quot;margin-bottom: 10px;&amp;quot;&amp;gt;www.msn.com 등 일부 사이트는 &amp;lt;span style=&amp;quot;color: #1e40af; font-weight: 600;&amp;quot;&amp;gt;여전히 추출 실패&amp;lt;/span&amp;gt;, 특화된 로직 필요&amp;lt;/li&amp;gt;
                &amp;lt;li style=&amp;quot;margin-bottom: 10px;&amp;quot;&amp;gt;처리 속도는 requests 대비 &amp;lt;span style=&amp;quot;color: #1e40af; font-weight: 600;&amp;quot;&amp;gt;평균 10배 감소&amp;lt;/span&amp;gt;하여 성능 최적화 필요&amp;lt;/li&amp;gt;
                &amp;lt;li style=&amp;quot;margin-bottom: 10px;&amp;quot;&amp;gt;&amp;lt;span style=&amp;quot;color: #1e40af; font-weight: 600;&amp;quot;&amp;gt;Playwright 병렬화&amp;lt;/span&amp;gt;를 통한 처리 시간 단축이 핵심 개선 과제&amp;lt;/li&amp;gt;
            &amp;lt;/ul&amp;gt;
        &amp;lt;/div&amp;gt;
        
        &amp;lt;div style=&amp;quot;margin-top: 20px; padding: 16px; background-color: #fef3c7; border-radius: 6px; border-left: 4px solid #f59e0b;&amp;quot;&amp;gt;
            &amp;lt;h3 style=&amp;quot;color: #92400e; margin-top: 0; margin-bottom: 12px; font-size: 16px;&amp;quot;&amp;gt;향후 개선 방향&amp;lt;/h3&amp;gt;
            &amp;lt;ul style=&amp;quot;padding-left: 20px; margin: 0; color: #92400e;&amp;quot;&amp;gt;
                &amp;lt;li style=&amp;quot;margin-bottom: 8px;&amp;quot;&amp;gt;&amp;lt;strong&amp;gt;개선 방향 ①&amp;lt;/strong&amp;gt; 처리 시간 단축을 위한 Playwright &amp;lt;strong&amp;gt;병렬화 필요&amp;lt;/strong&amp;gt;&amp;lt;/li&amp;gt;
                &amp;lt;li style=&amp;quot;margin-bottom: 8px;&amp;quot;&amp;gt;&amp;lt;strong&amp;gt;개선 방향 ②&amp;lt;/strong&amp;gt; msn.com 등 반복 실패 사이트에 &amp;lt;strong&amp;gt;특화된 추출 로직 필요&amp;lt;/strong&amp;gt;&amp;lt;/li&amp;gt;
            &amp;lt;/ul&amp;gt;
        &amp;lt;/div&amp;gt;
    &amp;lt;/div&amp;gt;
&amp;lt;/body&amp;gt;
&amp;lt;/html&amp;gt;&quot;&gt;
&lt;style&gt;
        body {
            font-family: 'Segoe UI', Tahoma, Geneva, Verdana, sans-serif;
            line-height: 1.6;
            margin: 20px;
            background-color: #f8fafc;
        }
    &lt;/style&gt;
&lt;div style=&quot;background-color: #fff; border-radius: 8px; padding: 24px; box-shadow: 0 2px 8px rgba(0, 0, 0, 0.05); margin-bottom: 24px;&quot;&gt;
&lt;h1 style=&quot;color: #1e40af; font-size: 24px; font-weight: bold; margin-top: 0; margin-bottom: 16px;&quot;&gt;TL;DR&lt;/h1&gt;
&lt;div style=&quot;background-color: #f0f7ff; padding: 12px 16px; border-radius: 6px; margin: 16px 0; border-left: 4px solid #3b82f6;&quot;&gt;
&lt;ul style=&quot;padding-left: 20px; margin: 0px; list-style-type: disc;&quot; data-ke-list-type=&quot;disc&quot;&gt;
&lt;li style=&quot;margin-bottom: 10px;&quot;&gt;기존 &lt;span style=&quot;color: #1e40af; font-weight: 600;&quot;&gt;Trafilatura + Requests&lt;/span&gt; 조합은 JavaScript 기반 사이트에서 &lt;span style=&quot;color: #1e40af; font-weight: 600;&quot;&gt;본문 추출 한계&lt;/span&gt; 발생&lt;/li&gt;
&lt;li style=&quot;margin-bottom: 10px;&quot;&gt;&lt;span style=&quot;color: #1e40af; font-weight: 600;&quot;&gt;Playwright 도입&lt;/span&gt; 후 requests에서 실패한 biz.chosun.com 등은 &lt;span style=&quot;color: #1e40af; font-weight: 600;&quot;&gt;100% 추출 성공&lt;/span&gt;&lt;/li&gt;
&lt;li style=&quot;margin-bottom: 10px;&quot;&gt;www.msn.com 등 일부 사이트는 &lt;span style=&quot;color: #1e40af; font-weight: 600;&quot;&gt;여전히 추출 실패&lt;/span&gt;, 특화된 로직 필요&lt;/li&gt;
&lt;li style=&quot;margin-bottom: 10px;&quot;&gt;처리 속도는 requests 대비 &lt;span style=&quot;color: #1e40af; font-weight: 600;&quot;&gt;평균 10배 감소&lt;/span&gt;하여 성능 최적화 필요&lt;/li&gt;
&lt;li style=&quot;margin-bottom: 10px;&quot;&gt;&lt;span style=&quot;color: #1e40af; font-weight: 600;&quot;&gt;Playwright 병렬화&lt;/span&gt;를 통한 처리 시간 단축이 핵심 개선 과제&lt;/li&gt;
&lt;/ul&gt;
&lt;/div&gt;
&lt;div style=&quot;margin-top: 20px; padding: 16px; background-color: #fef3c7; border-radius: 6px; border-left: 4px solid #f59e0b;&quot;&gt;
&lt;h3 style=&quot;color: #92400e; margin-top: 0; margin-bottom: 12px; font-size: 16px;&quot; data-ke-size=&quot;size23&quot;&gt;향후 개선 방향&lt;/h3&gt;
&lt;ul style=&quot;padding-left: 20px; margin: 0px; color: #92400e; list-style-type: disc;&quot; data-ke-list-type=&quot;disc&quot;&gt;
&lt;li style=&quot;margin-bottom: 8px;&quot;&gt;&lt;b&gt;개선 방향 ①&lt;/b&gt; 처리 시간 단축을 위한 Playwright &lt;b&gt;병렬화 필요&lt;/b&gt;&lt;/li&gt;
&lt;li style=&quot;margin-bottom: 8px;&quot;&gt;&lt;b&gt;개선 방향 ②&lt;/b&gt; msn.com 등 반복 실패 사이트에 &lt;b&gt;특화된 추출 로직 필요&lt;/b&gt;&lt;/li&gt;
&lt;/ul&gt;
&lt;/div&gt;
&lt;/div&gt;
&lt;/div&gt;
&lt;hr contenteditable=&quot;false&quot; data-ke-type=&quot;horizontalRule&quot; data-ke-style=&quot;style6&quot; /&gt;
&lt;p data-ke-size=&quot;size16&quot;&gt;trafilatura+ requests를 이용하여 약 80%의 기사 본문 추출에 성공하였다. 실패한 20%를 분석한 결과, 특정 도메인에서는 본문 추출이 100% 실패하였다. &lt;span style=&quot;color: #006dd7;&quot;&gt;&lt;b&gt; 하지만, 우리가 실제 브라우저에서 접속하면 모든 것이 정상으로 보인다.&lt;/b&gt;&lt;/span&gt; 이는 왜 그럴까? 이는 &lt;b&gt;&lt;span style=&quot;color: #006dd7;&quot;&gt;JavaScript로 만들어진 웹사이트의 특징으로 JavaScript가 실행된 후에 실제 내용이 나타나는 방식&lt;/span&gt;&lt;/b&gt;이기 때문이다. 따라서, 이런 웹사이트에서 본문을 추출하려면 실제 브라우저를 열고 웹사이트로 이동하도록 자동화해야 한다.&lt;/p&gt;
&lt;div id=&quot;code_1749304465241&quot; data-ke-type=&quot;html&quot; data-source=&quot;&amp;lt;!DOCTYPE html&amp;gt;
&amp;lt;html lang=&amp;quot;ko&amp;quot;&amp;gt;
&amp;lt;head&amp;gt;
  &amp;lt;meta charset=&amp;quot;UTF-8&amp;quot;&amp;gt;
  &amp;lt;meta name=&amp;quot;viewport&amp;quot; content=&amp;quot;width=device-width, initial-scale=1.0&amp;quot;&amp;gt;
  &amp;lt;title&amp;gt;request + trafilatura 조합 실패 사례 분석&amp;lt;/title&amp;gt;
  &amp;lt;style&amp;gt;
    #failure-report {
      font-family: 'Segoe UI', Tahoma, Geneva, Verdana, sans-serif;
      line-height: 1.6;
      color: #333;
      background: #f5f7fa;
      padding: 20px;
      border-radius: 12px;
      box-shadow: 0 4px 20px rgba(0,0,0,0.08);
    }

    #failure-report .fr-container {
      max-width: 1000px;
      margin: 0 auto;
      background: white;
      border-radius: 12px;
      box-shadow: 0 4px 20px rgba(0,0,0,0.08);
      overflow: hidden;
    }

    #failure-report .fr-content {
      padding: 25px;
    }

    #failure-report .fr-section {
      margin-bottom: 25px;
    }

    #failure-report .fr-section h2 {
      font-size: 1.2rem;
      margin-bottom: 15px;
      color: #2c3e50;
      border-bottom: 2px solid #2c3e50;
      padding-bottom: 5px;
      font-weight: 600;
    }

    #failure-report .fr-insights-grid {
      display: grid;
      grid-template-columns: repeat(auto-fit, minmax(280px, 1fr));
      gap: 20px;
    }

    #failure-report .fr-insight-card {
      background: #f8f9fa;
      border-radius: 6px;
      padding: 18px;
      border-left: 3px solid #2c3e50;
    }

    #failure-report .fr-insight-title {
      font-size: 1rem;
      font-weight: 600;
      color: #2c3e50;
      margin-bottom: 12px;
      display: flex;
      align-items: center;
    }

    #failure-report .fr-insight-title .icon {
      margin-right: 8px;
      font-size: 1.2rem;
    }

    #failure-report .fr-domain-list {
      list-style: none;
      margin: 0;
      padding: 0;
    }

    #failure-report .fr-domain-item {
      display: flex;
      justify-content: space-between;
      align-items: center;
      padding: 6px 0;
      border-bottom: 1px solid #e9ecef;
    }

    #failure-report .fr-domain-item:last-child {
      border-bottom: none;
    }

    #failure-report .fr-domain-name {
      font-weight: 500;
      color: #495057;
      font-size: 0.9rem;
    }

    #failure-report .fr-failure-rate {
      background: #2c3e50;
      color: white;
      padding: 3px 8px;
      border-radius: 3px;
      font-size: 0.75rem;
      font-weight: 600;
    }

    #failure-report .fr-error-breakdown {
      background: #f8f9fa;
      border-radius: 6px;
      padding: 18px;
      border-left: 3px solid #2c3e50;
    }

    #failure-report .fr-error-item {
      display: flex;
      justify-content: space-between;
      align-items: center;
      padding: 8px 0;
      border-bottom: 1px solid #e9ecef;
    }

    #failure-report .fr-error-item:last-child {
      border-bottom: none;
    }

    #failure-report .fr-error-desc {
      flex: 1;
    }

    #failure-report .fr-error-title {
      font-weight: 600;
      color: #2c3e50;
      margin-bottom: 2px;
      font-size: 0.9rem;
    }

    #failure-report .fr-error-detail {
      font-size: 0.8rem;
      color: #666;
    }

    #failure-report .fr-error-count {
      background: #2c3e50;
      color: white;
      padding: 4px 10px;
      border-radius: 15px;
      font-size: 0.75rem;
      font-weight: 600;
      min-width: 40px;
      text-align: center;
    }

    @media (max-width: 768px) {
      #failure-report .fr-insights-grid {
        grid-template-columns: 1fr;
        gap: 15px;
      }
    }
  &amp;lt;/style&amp;gt;
&amp;lt;/head&amp;gt;
&amp;lt;body&amp;gt;
  &amp;lt;div id=&amp;quot;failure-report&amp;quot;&amp;gt;
    &amp;lt;div class=&amp;quot;fr-container&amp;quot;&amp;gt;
      &amp;lt;div class=&amp;quot;fr-content&amp;quot;&amp;gt;
        &amp;lt;!-- request + trafilatura 조합 실패 사례 분석 --&amp;gt;
        &amp;lt;div class=&amp;quot;fr-section&amp;quot;&amp;gt;
          &amp;lt;h2&amp;gt;requests + trafilatura 조합 실패 사례 분석&amp;lt;/h2&amp;gt;
          &amp;lt;div class=&amp;quot;fr-insights-grid&amp;quot;&amp;gt;
            &amp;lt;div class=&amp;quot;fr-insight-card&amp;quot;&amp;gt;
              &amp;lt;div class=&amp;quot;fr-insight-title&amp;quot;&amp;gt;
                &amp;lt;span class=&amp;quot;icon&amp;quot;&amp;gt; &amp;lt;/span&amp;gt;
                문제 도메인 집중도
              &amp;lt;/div&amp;gt;
              &amp;lt;ul class=&amp;quot;fr-domain-list&amp;quot;&amp;gt;
                &amp;lt;li class=&amp;quot;fr-domain-item&amp;quot;&amp;gt;
                  &amp;lt;span class=&amp;quot;fr-domain-name&amp;quot;&amp;gt;biz.chosun.com&amp;lt;/span&amp;gt;
                  &amp;lt;span class=&amp;quot;fr-failure-rate&amp;quot;&amp;gt;100% (62건)&amp;lt;/span&amp;gt;
                &amp;lt;/li&amp;gt;
                &amp;lt;li class=&amp;quot;fr-domain-item&amp;quot;&amp;gt;
                  &amp;lt;span class=&amp;quot;fr-domain-name&amp;quot;&amp;gt;www.msn.com&amp;lt;/span&amp;gt;
                  &amp;lt;span class=&amp;quot;fr-failure-rate&amp;quot;&amp;gt;100% (23건)&amp;lt;/span&amp;gt;
                &amp;lt;/li&amp;gt;
                &amp;lt;li class=&amp;quot;fr-domain-item&amp;quot;&amp;gt;
                  &amp;lt;span class=&amp;quot;fr-domain-name&amp;quot;&amp;gt;기타 도메인&amp;lt;/span&amp;gt;
                  &amp;lt;span class=&amp;quot;fr-failure-rate&amp;quot;&amp;gt;약 10% (9건)&amp;lt;/span&amp;gt;
                &amp;lt;/li&amp;gt;
              &amp;lt;/ul&amp;gt;
            &amp;lt;/div&amp;gt;

            &amp;lt;div class=&amp;quot;fr-insight-card&amp;quot;&amp;gt;
              &amp;lt;div class=&amp;quot;fr-insight-title&amp;quot;&amp;gt;
                &amp;lt;span class=&amp;quot;icon&amp;quot;&amp;gt; &amp;lt;/span&amp;gt;
                프로토콜 분석
              &amp;lt;/div&amp;gt;
              &amp;lt;ul class=&amp;quot;fr-domain-list&amp;quot;&amp;gt;
                &amp;lt;li class=&amp;quot;fr-domain-item&amp;quot;&amp;gt;
                  &amp;lt;span class=&amp;quot;fr-domain-name&amp;quot;&amp;gt;HTTPS 사이트&amp;lt;/span&amp;gt;
                  &amp;lt;span class=&amp;quot;fr-failure-rate&amp;quot;&amp;gt;85건 (90.4%)&amp;lt;/span&amp;gt;
                &amp;lt;/li&amp;gt;
                &amp;lt;li class=&amp;quot;fr-domain-item&amp;quot;&amp;gt;
                  &amp;lt;span class=&amp;quot;fr-domain-name&amp;quot;&amp;gt;HTTP 사이트&amp;lt;/span&amp;gt;
                  &amp;lt;span class=&amp;quot;fr-failure-rate&amp;quot;&amp;gt;9건 (9.6%)&amp;lt;/span&amp;gt;
                &amp;lt;/li&amp;gt;
                &amp;lt;li class=&amp;quot;fr-domain-item&amp;quot;&amp;gt;
                  &amp;lt;span class=&amp;quot;fr-domain-name&amp;quot;&amp;gt;평균 처리시간&amp;lt;/span&amp;gt;
                  &amp;lt;span class=&amp;quot;fr-failure-rate&amp;quot;&amp;gt;1.45초&amp;lt;/span&amp;gt;
                &amp;lt;/li&amp;gt;
              &amp;lt;/ul&amp;gt;
            &amp;lt;/div&amp;gt;
          &amp;lt;/div&amp;gt;
        &amp;lt;/div&amp;gt;

        &amp;lt;!-- 오류 유형 분석 --&amp;gt;
        &amp;lt;div class=&amp;quot;fr-section&amp;quot;&amp;gt;
          &amp;lt;h2&amp;gt;오류 유형별 분석&amp;lt;/h2&amp;gt;
          &amp;lt;div class=&amp;quot;fr-error-breakdown&amp;quot;&amp;gt;
            &amp;lt;div class=&amp;quot;fr-error-item&amp;quot;&amp;gt;
              &amp;lt;div class=&amp;quot;fr-error-desc&amp;quot;&amp;gt;
                &amp;lt;div class=&amp;quot;fr-error-title&amp;quot;&amp;gt;콘텐츠 추출 실패&amp;lt;/div&amp;gt;
                &amp;lt;div class=&amp;quot;fr-error-detail&amp;quot;&amp;gt;페이지 구조 변경, JS 렌더링, 또는 동적 로딩으로 인한 실패&amp;lt;/div&amp;gt;
              &amp;lt;/div&amp;gt;
              &amp;lt;div class=&amp;quot;fr-error-count&amp;quot;&amp;gt;92건&amp;lt;/div&amp;gt;
            &amp;lt;/div&amp;gt;
            &amp;lt;div class=&amp;quot;fr-error-item&amp;quot;&amp;gt;
              &amp;lt;div class=&amp;quot;fr-error-desc&amp;quot;&amp;gt;
                &amp;lt;div class=&amp;quot;fr-error-title&amp;quot;&amp;gt;연결 타임아웃&amp;lt;/div&amp;gt;
                &amp;lt;div class=&amp;quot;fr-error-detail&amp;quot;&amp;gt;서버 응답 지연 (antnews.org)&amp;lt;/div&amp;gt;
              &amp;lt;/div&amp;gt;
              &amp;lt;div class=&amp;quot;fr-error-count&amp;quot;&amp;gt;1건&amp;lt;/div&amp;gt;
            &amp;lt;/div&amp;gt;
            &amp;lt;div class=&amp;quot;fr-error-item&amp;quot;&amp;gt;
              &amp;lt;div class=&amp;quot;fr-error-desc&amp;quot;&amp;gt;
                &amp;lt;div class=&amp;quot;fr-error-title&amp;quot;&amp;gt;서비스 이용불가&amp;lt;/div&amp;gt;
                &amp;lt;div class=&amp;quot;fr-error-detail&amp;quot;&amp;gt;503 에러 - 서버 일시적 장애&amp;lt;/div&amp;gt;
              &amp;lt;/div&amp;gt;
              &amp;lt;div class=&amp;quot;fr-error-count&amp;quot;&amp;gt;1건&amp;lt;/div&amp;gt;
            &amp;lt;/div&amp;gt;
          &amp;lt;/div&amp;gt;
        &amp;lt;/div&amp;gt;

      &amp;lt;/div&amp;gt;
    &amp;lt;/div&amp;gt;
  &amp;lt;/div&amp;gt;
&amp;lt;/body&amp;gt;
&amp;lt;/html&amp;gt;
&quot;&gt;
&lt;style&gt;
    #failure-report {
      font-family: 'Segoe UI', Tahoma, Geneva, Verdana, sans-serif;
      line-height: 1.6;
      color: #333;
      background: #f5f7fa;
      padding: 20px;
      border-radius: 12px;
      box-shadow: 0 4px 20px rgba(0,0,0,0.08);
    }

    #failure-report .fr-container {
      max-width: 1000px;
      margin: 0 auto;
      background: white;
      border-radius: 12px;
      box-shadow: 0 4px 20px rgba(0,0,0,0.08);
      overflow: hidden;
    }

    #failure-report .fr-content {
      padding: 25px;
    }

    #failure-report .fr-section {
      margin-bottom: 25px;
    }

    #failure-report .fr-section h2 {
      font-size: 1.2rem;
      margin-bottom: 15px;
      color: #2c3e50;
      border-bottom: 2px solid #2c3e50;
      padding-bottom: 5px;
      font-weight: 600;
    }

    #failure-report .fr-insights-grid {
      display: grid;
      grid-template-columns: repeat(auto-fit, minmax(280px, 1fr));
      gap: 20px;
    }

    #failure-report .fr-insight-card {
      background: #f8f9fa;
      border-radius: 6px;
      padding: 18px;
      border-left: 3px solid #2c3e50;
    }

    #failure-report .fr-insight-title {
      font-size: 1rem;
      font-weight: 600;
      color: #2c3e50;
      margin-bottom: 12px;
      display: flex;
      align-items: center;
    }

    #failure-report .fr-insight-title .icon {
      margin-right: 8px;
      font-size: 1.2rem;
    }

    #failure-report .fr-domain-list {
      list-style: none;
      margin: 0;
      padding: 0;
    }

    #failure-report .fr-domain-item {
      display: flex;
      justify-content: space-between;
      align-items: center;
      padding: 6px 0;
      border-bottom: 1px solid #e9ecef;
    }

    #failure-report .fr-domain-item:last-child {
      border-bottom: none;
    }

    #failure-report .fr-domain-name {
      font-weight: 500;
      color: #495057;
      font-size: 0.9rem;
    }

    #failure-report .fr-failure-rate {
      background: #2c3e50;
      color: white;
      padding: 3px 8px;
      border-radius: 3px;
      font-size: 0.75rem;
      font-weight: 600;
    }

    #failure-report .fr-error-breakdown {
      background: #f8f9fa;
      border-radius: 6px;
      padding: 18px;
      border-left: 3px solid #2c3e50;
    }

    #failure-report .fr-error-item {
      display: flex;
      justify-content: space-between;
      align-items: center;
      padding: 8px 0;
      border-bottom: 1px solid #e9ecef;
    }

    #failure-report .fr-error-item:last-child {
      border-bottom: none;
    }

    #failure-report .fr-error-desc {
      flex: 1;
    }

    #failure-report .fr-error-title {
      font-weight: 600;
      color: #2c3e50;
      margin-bottom: 2px;
      font-size: 0.9rem;
    }

    #failure-report .fr-error-detail {
      font-size: 0.8rem;
      color: #666;
    }

    #failure-report .fr-error-count {
      background: #2c3e50;
      color: white;
      padding: 4px 10px;
      border-radius: 15px;
      font-size: 0.75rem;
      font-weight: 600;
      min-width: 40px;
      text-align: center;
    }

    @media (max-width: 768px) {
      #failure-report .fr-insights-grid {
        grid-template-columns: 1fr;
        gap: 15px;
      }
    }
  &lt;/style&gt;
&lt;div id=&quot;failure-report&quot;&gt;
&lt;div class=&quot;fr-container&quot;&gt;
&lt;div class=&quot;fr-content&quot;&gt;&lt;!-- request + trafilatura 조합 실패 사례 분석 --&gt;
&lt;div class=&quot;fr-section&quot;&gt;
&lt;h2 data-ke-size=&quot;size26&quot;&gt;requests + trafilatura 조합 실패 사례 분석&lt;/h2&gt;
&lt;div class=&quot;fr-insights-grid&quot;&gt;
&lt;div class=&quot;fr-insight-card&quot;&gt;
&lt;div class=&quot;fr-insight-title&quot;&gt;&lt;span class=&quot;icon&quot;&gt; &lt;/span&gt; 문제 도메인 집중도&lt;/div&gt;
&lt;ul class=&quot;fr-domain-list&quot; style=&quot;list-style-type: disc;&quot; data-ke-list-type=&quot;disc&quot;&gt;
&lt;li class=&quot;fr-domain-item&quot;&gt;&lt;span class=&quot;fr-domain-name&quot;&gt;biz.chosun.com&lt;/span&gt; &lt;span class=&quot;fr-failure-rate&quot;&gt;100% (62건)&lt;/span&gt;&lt;/li&gt;
&lt;li class=&quot;fr-domain-item&quot;&gt;&lt;span class=&quot;fr-domain-name&quot;&gt;www.msn.com&lt;/span&gt; &lt;span class=&quot;fr-failure-rate&quot;&gt;100% (23건)&lt;/span&gt;&lt;/li&gt;
&lt;li class=&quot;fr-domain-item&quot;&gt;&lt;span class=&quot;fr-domain-name&quot;&gt;기타 도메인&lt;/span&gt; &lt;span class=&quot;fr-failure-rate&quot;&gt;약 10% (9건)&lt;/span&gt;&lt;/li&gt;
&lt;/ul&gt;
&lt;/div&gt;
&lt;div class=&quot;fr-insight-card&quot;&gt;
&lt;div class=&quot;fr-insight-title&quot;&gt;&lt;span class=&quot;icon&quot;&gt; &lt;/span&gt; 프로토콜 분석&lt;/div&gt;
&lt;ul class=&quot;fr-domain-list&quot; style=&quot;list-style-type: disc;&quot; data-ke-list-type=&quot;disc&quot;&gt;
&lt;li class=&quot;fr-domain-item&quot;&gt;&lt;span class=&quot;fr-domain-name&quot;&gt;HTTPS 사이트&lt;/span&gt; &lt;span class=&quot;fr-failure-rate&quot;&gt;85건 (90.4%)&lt;/span&gt;&lt;/li&gt;
&lt;li class=&quot;fr-domain-item&quot;&gt;&lt;span class=&quot;fr-domain-name&quot;&gt;HTTP 사이트&lt;/span&gt; &lt;span class=&quot;fr-failure-rate&quot;&gt;9건 (9.6%)&lt;/span&gt;&lt;/li&gt;
&lt;li class=&quot;fr-domain-item&quot;&gt;&lt;span class=&quot;fr-domain-name&quot;&gt;평균 처리시간&lt;/span&gt; &lt;span class=&quot;fr-failure-rate&quot;&gt;1.45초&lt;/span&gt;&lt;/li&gt;
&lt;/ul&gt;
&lt;/div&gt;
&lt;/div&gt;
&lt;/div&gt;
&lt;!-- 오류 유형 분석 --&gt;
&lt;div class=&quot;fr-section&quot;&gt;
&lt;h2 data-ke-size=&quot;size26&quot;&gt;오류 유형별 분석&lt;/h2&gt;
&lt;div class=&quot;fr-error-breakdown&quot;&gt;
&lt;div class=&quot;fr-error-item&quot;&gt;
&lt;div class=&quot;fr-error-desc&quot;&gt;
&lt;div class=&quot;fr-error-title&quot;&gt;콘텐츠 추출 실패&lt;/div&gt;
&lt;div class=&quot;fr-error-detail&quot;&gt;페이지 구조 변경, JS 렌더링, 또는 동적 로딩으로 인한 실패&lt;/div&gt;
&lt;/div&gt;
&lt;div class=&quot;fr-error-count&quot;&gt;92건&lt;/div&gt;
&lt;/div&gt;
&lt;div class=&quot;fr-error-item&quot;&gt;
&lt;div class=&quot;fr-error-desc&quot;&gt;
&lt;div class=&quot;fr-error-title&quot;&gt;연결 타임아웃&lt;/div&gt;
&lt;div class=&quot;fr-error-detail&quot;&gt;서버 응답 지연 (antnews.org)&lt;/div&gt;
&lt;/div&gt;
&lt;div class=&quot;fr-error-count&quot;&gt;1건&lt;/div&gt;
&lt;/div&gt;
&lt;div class=&quot;fr-error-item&quot;&gt;
&lt;div class=&quot;fr-error-desc&quot;&gt;
&lt;div class=&quot;fr-error-title&quot;&gt;서비스 이용불가&lt;/div&gt;
&lt;div class=&quot;fr-error-detail&quot;&gt;503 에러 - 서버 일시적 장애&lt;/div&gt;
&lt;/div&gt;
&lt;div class=&quot;fr-error-count&quot;&gt;1건&lt;/div&gt;
&lt;/div&gt;
&lt;/div&gt;
&lt;/div&gt;
&lt;/div&gt;
&lt;/div&gt;
&lt;/div&gt;
&lt;/div&gt;
&lt;hr contenteditable=&quot;false&quot; data-ke-type=&quot;horizontalRule&quot; data-ke-style=&quot;style6&quot; /&gt;
&lt;h3 data-ke-size=&quot;size23&quot;&gt;&lt;b&gt;  Playwright란?&lt;/b&gt;&lt;/h3&gt;
&lt;p data-ke-size=&quot;size16&quot;&gt;Playwright는 마이크로소프트에서 개발한 웹 브라우저 자동화 도구로, &lt;b&gt;&lt;span style=&quot;color: #006dd7;&quot;&gt;사람이 직접 웹브라우저에서 하는 모든 행동&lt;/span&gt;&lt;/b&gt;을 코드로 자동화할 수 있게 해주는 프로그램이다.&lt;/p&gt;
&lt;p data-ke-size=&quot;size16&quot;&gt;&amp;nbsp;&lt;/p&gt;
&lt;h4 data-ke-size=&quot;size20&quot;&gt;&lt;b&gt;  무엇을 할 수 있나?&lt;/b&gt;&lt;/h4&gt;
&lt;p data-ke-size=&quot;size16&quot;&gt;실제 브라우저를 열어서 웹페이지에 접속하고, 버튼을 클릭하고, 텍스트를 입력하고, 스크롤을 내리는 등 &lt;span style=&quot;color: #006dd7;&quot;&gt;&lt;b&gt;사용자가 할 수 있는 모든 행동을 자동으로 수행&lt;/b&gt;&lt;/span&gt;할 수 있다. 간단한 작업은 Powerautomate로도 가능하지만, 조금만 복잡해져도 금방 한계가 드러난다.&amp;nbsp;&lt;/p&gt;
&lt;h4 data-ke-size=&quot;size20&quot;&gt;&lt;b&gt;⭐ 왜 유용한가?&lt;/b&gt;&lt;/h4&gt;
&lt;ol style=&quot;list-style-type: decimal;&quot; data-ke-list-type=&quot;decimal&quot;&gt;
&lt;li&gt;&lt;b&gt;⚙️ JavaScript 처리&lt;/b&gt;: 요즘 웹사이트는 대부분 JavaScript로 동적으로 콘텐츠를 로딩한다. 이런 웹사이트는 초기 HTML은 빈 껍데기만 제공하고, JavaScript가 실행된 후에야 실제 내용이 나타나는 방식이므로, requests로는 빈 껍데기만 가져올 뿐이다. 반면, Playwright는 이런 복잡한 웹사이트도 완벽하게 처리할 수 있다.&lt;/li&gt;
&lt;li&gt;&lt;b&gt;  실제 브라우저 환경&lt;/b&gt;:  Chrome,  Firefox,  Safari 등 실제 브라우저를 사용하므로 웹사이트가 정확히 어떻게 보이고 동작하는지 파악할 수 있다. 특히, 문제가 발생하면 어디서 문제가 발생하는지 정확하게 확인할 수 있다.&lt;/li&gt;
&lt;li&gt;&lt;b&gt;  안정성&lt;/b&gt;: 페이지가 완전히 로딩될 때까지 자동으로 기다리고, 오류가 발생하면 재시도하는 등 안정적으로 작동한다.&lt;/li&gt;
&lt;/ol&gt;
&lt;h4 data-ke-size=&quot;size20&quot;&gt;&lt;b&gt;  주요 활용 분야&lt;/b&gt;&lt;/h4&gt;
&lt;ul style=&quot;list-style-type: disc;&quot; data-ke-list-type=&quot;disc&quot;&gt;
&lt;li&gt;&lt;b&gt; ️ 웹 스크래핑&lt;/b&gt;: 복잡한 웹사이트에서 데이터를 수집할 때&lt;/li&gt;
&lt;li&gt;&lt;b&gt;  자동화 테스트&lt;/b&gt;: 웹사이트가 제대로 작동하는지 자동으로 검사&lt;/li&gt;
&lt;li&gt;&lt;b&gt;  반복 작업 자동화&lt;/b&gt;: 매일 해야 하는 웹 작업을 자동화&lt;/li&gt;
&lt;li&gt;&lt;b&gt;  모니터링&lt;/b&gt;: 웹사이트 상태를 주기적으로 확인&lt;/li&gt;
&lt;/ul&gt;
&lt;h4 data-ke-size=&quot;size20&quot;&gt;&lt;b&gt;⚠️ 단점&lt;/b&gt;&lt;/h4&gt;
&lt;p data-ke-size=&quot;size16&quot;&gt;속도가 상대적으로 느리고(실제 브라우저를 실행하기 때문), 설치 파일이 크며(수백 MB), 리소스를 많이 사용한다. 따라서, Playwright는 &lt;b&gt;&lt;span style=&quot;color: #006dd7;&quot;&gt;&quot;진짜 브라우저에서 복잡한 작업을 자동화해야 할 때&quot;&lt;/span&gt;&lt;/b&gt; 사용하는 강력한 도구라고 보면 된다.&lt;/p&gt;
&lt;p data-ke-size=&quot;size16&quot;&gt;&amp;nbsp;&lt;/p&gt;
&lt;h2 data-ke-size=&quot;size26&quot;&gt;  Playwright vs Requests 비교표&lt;/h2&gt;
&lt;div id=&quot;code_1749470857503&quot; data-ke-type=&quot;html&quot; data-source=&quot;&amp;lt;!DOCTYPE html&amp;gt;
&amp;lt;html lang=&amp;quot;ko&amp;quot;&amp;gt;
&amp;lt;head&amp;gt;
    &amp;lt;meta charset=&amp;quot;UTF-8&amp;quot;&amp;gt;
    &amp;lt;meta name=&amp;quot;viewport&amp;quot; content=&amp;quot;width=device-width, initial-scale=1.0&amp;quot;&amp;gt;
    &amp;lt;title&amp;gt;Playwright vs Requests 비교&amp;lt;/title&amp;gt;
    &amp;lt;style&amp;gt;
        * {
            margin: 0;
            padding: 0;
            box-sizing: border-box;
        }

        body {
            font-family: -apple-system, BlinkMacSystemFont, 'Segoe UI', Roboto, sans-serif;
            background-color: #f8fafc;
            padding: 40px 20px;
            line-height: 1.6;
        }

        .container {
            max-width: 900px;
            margin: 0 auto;
            background: white;
            border-radius: 12px;
            box-shadow: 0 4px 6px rgba(0, 0, 0, 0.05);
            overflow: hidden;
        }

        .header {
            background: #1e293b;
            color: white;
            padding: 32px;
            text-align: center;
        }

        .header h1 {
            font-size: 28px;
            font-weight: 600;
            margin-bottom: 8px;
        }

        .header p {
            font-size: 16px;
            opacity: 0.8;
        }

        .table {
            width: 100%;
            border-collapse: collapse;
        }

        .table th {
            background: #f1f5f9;
            padding: 12px;
            text-align: left;
            font-weight: 600;
            color: #334155;
            border-bottom: 1px solid #e2e8f0;
        }

        .table th:first-child {
            width: 30%;
        }

        .table th:nth-child(2) {
            width: 35%;
            color: #1e40af;
        }

        .table th:nth-child(3) {
            width: 35%;
            color: #dc2626;
        }

        .table td {
            padding: 10px 12px;
            border-bottom: 1px solid #f1f5f9;
            vertical-align: top;
        }

        .table tr:hover {
            background: #f8fafc;
        }

        .category {
            font-weight: 500;
            color: #475569;
        }

        .playwright {
            color: #1e40af;
        }

        .requests {
            color: #dc2626;
        }

        .support-yes {
            color: #059669;
            font-weight: 500;
        }

        .support-no {
            color: #dc2626;
            font-weight: 500;
        }

        .support-partial {
            color: #d97706;
            font-weight: 500;
        }

        .status {
            display: inline-block;
            width: 8px;
            height: 8px;
            border-radius: 50%;
            margin-right: 8px;
        }

        .status-good {
            background: #10b981;
        }

        .status-bad {
            background: #ef4444;
        }

        .status-warning {
            background: #f59e0b;
        }

        @media (max-width: 768px) {
            .container {
                margin: 0;
                border-radius: 0;
            }
            
            .header {
                padding: 24px;
            }
            
            .header h1 {
                font-size: 24px;
            }
            
            .table th,
            .table td {
                padding: 12px;
                font-size: 14px;
            }
        }
    &amp;lt;/style&amp;gt;
&amp;lt;/head&amp;gt;
&amp;lt;body&amp;gt;
    &amp;lt;div class=&amp;quot;container&amp;quot;&amp;gt;

        
        &amp;lt;table class=&amp;quot;table&amp;quot;&amp;gt;
            &amp;lt;thead&amp;gt;
                &amp;lt;tr&amp;gt;
                    &amp;lt;th&amp;gt;항목&amp;lt;/th&amp;gt;
                    &amp;lt;th&amp;gt;  Playwright&amp;lt;/th&amp;gt;
                    &amp;lt;th&amp;gt;  Requests&amp;lt;/th&amp;gt;
                &amp;lt;/tr&amp;gt;
            &amp;lt;/thead&amp;gt;
            &amp;lt;tbody&amp;gt;
                &amp;lt;tr&amp;gt;
                    &amp;lt;td class=&amp;quot;category&amp;quot;&amp;gt;주요 목적&amp;lt;/td&amp;gt;
                    &amp;lt;td class=&amp;quot;playwright&amp;quot;&amp;gt;브라우저 자동화 및 렌더링된 콘텐츠 제어&amp;lt;/td&amp;gt;
                    &amp;lt;td class=&amp;quot;requests&amp;quot;&amp;gt;HTTP 요청으로 정적 데이터 수집&amp;lt;/td&amp;gt;
                &amp;lt;/tr&amp;gt;
                
                &amp;lt;tr&amp;gt;
                    &amp;lt;td class=&amp;quot;category&amp;quot;&amp;gt;⚙️ JavaScript 처리&amp;lt;/td&amp;gt;
                    &amp;lt;td class=&amp;quot;playwright&amp;quot;&amp;gt;
                        &amp;lt;span class=&amp;quot;status status-good&amp;quot;&amp;gt;&amp;lt;/span&amp;gt;
                        &amp;lt;span class=&amp;quot;support-yes&amp;quot;&amp;gt;완전 지원&amp;lt;/span&amp;gt;
                    &amp;lt;/td&amp;gt;
                    &amp;lt;td class=&amp;quot;requests&amp;quot;&amp;gt;
                        &amp;lt;span class=&amp;quot;status status-bad&amp;quot;&amp;gt;&amp;lt;/span&amp;gt;
                        &amp;lt;span class=&amp;quot;support-no&amp;quot;&amp;gt;지원하지 않음&amp;lt;/span&amp;gt;
                    &amp;lt;/td&amp;gt;
                &amp;lt;/tr&amp;gt;
                
                &amp;lt;tr&amp;gt;
                    &amp;lt;td class=&amp;quot;category&amp;quot;&amp;gt; ️ UI 상호작용&amp;lt;/td&amp;gt;
                    &amp;lt;td class=&amp;quot;playwright&amp;quot;&amp;gt;
                        &amp;lt;span class=&amp;quot;status status-good&amp;quot;&amp;gt;&amp;lt;/span&amp;gt;
                        &amp;lt;span class=&amp;quot;support-yes&amp;quot;&amp;gt;클릭, 입력, 스크롤&amp;lt;/span&amp;gt;
                    &amp;lt;/td&amp;gt;
                    &amp;lt;td class=&amp;quot;requests&amp;quot;&amp;gt;
                        &amp;lt;span class=&amp;quot;status status-bad&amp;quot;&amp;gt;&amp;lt;/span&amp;gt;
                        &amp;lt;span class=&amp;quot;support-no&amp;quot;&amp;gt;불가능&amp;lt;/span&amp;gt;
                    &amp;lt;/td&amp;gt;
                &amp;lt;/tr&amp;gt;
                
                &amp;lt;tr&amp;gt;
                    &amp;lt;td class=&amp;quot;category&amp;quot;&amp;gt;⚡ 처리 속도&amp;lt;/td&amp;gt;
                    &amp;lt;td class=&amp;quot;playwright&amp;quot;&amp;gt;
                        &amp;lt;span class=&amp;quot;status status-warning&amp;quot;&amp;gt;&amp;lt;/span&amp;gt;
                        &amp;lt;span class=&amp;quot;support-partial&amp;quot;&amp;gt;느림&amp;lt;/span&amp;gt;
                    &amp;lt;/td&amp;gt;
                    &amp;lt;td class=&amp;quot;requests&amp;quot;&amp;gt;
                        &amp;lt;span class=&amp;quot;status status-good&amp;quot;&amp;gt;&amp;lt;/span&amp;gt;
                        &amp;lt;span class=&amp;quot;support-yes&amp;quot;&amp;gt;매우 빠름&amp;lt;/span&amp;gt;
                    &amp;lt;/td&amp;gt;
                &amp;lt;/tr&amp;gt;
                
                &amp;lt;tr&amp;gt;
                    &amp;lt;td class=&amp;quot;category&amp;quot;&amp;gt;  설치 크기&amp;lt;/td&amp;gt;
                    &amp;lt;td class=&amp;quot;playwright&amp;quot;&amp;gt;
                        &amp;lt;span class=&amp;quot;status status-warning&amp;quot;&amp;gt;&amp;lt;/span&amp;gt;
                        &amp;lt;span class=&amp;quot;support-partial&amp;quot;&amp;gt;큼 (수백 MB)&amp;lt;/span&amp;gt;
                    &amp;lt;/td&amp;gt;
                    &amp;lt;td class=&amp;quot;requests&amp;quot;&amp;gt;
                        &amp;lt;span class=&amp;quot;status status-good&amp;quot;&amp;gt;&amp;lt;/span&amp;gt;
                        &amp;lt;span class=&amp;quot;support-yes&amp;quot;&amp;gt;가벼움&amp;lt;/span&amp;gt;
                    &amp;lt;/td&amp;gt;
                &amp;lt;/tr&amp;gt;
            &amp;lt;/tbody&amp;gt;
        &amp;lt;/table&amp;gt;
    &amp;lt;/div&amp;gt;
&amp;lt;/body&amp;gt;
&amp;lt;/html&amp;gt;&quot;&gt;
&lt;style&gt;
        * {
            margin: 0;
            padding: 0;
            box-sizing: border-box;
        }

        body {
            font-family: -apple-system, BlinkMacSystemFont, 'Segoe UI', Roboto, sans-serif;
            background-color: #f8fafc;
            padding: 40px 20px;
            line-height: 1.6;
        }

        .container {
            max-width: 900px;
            margin: 0 auto;
            background: white;
            border-radius: 12px;
            box-shadow: 0 4px 6px rgba(0, 0, 0, 0.05);
            overflow: hidden;
        }

        .header {
            background: #1e293b;
            color: white;
            padding: 32px;
            text-align: center;
        }

        .header h1 {
            font-size: 28px;
            font-weight: 600;
            margin-bottom: 8px;
        }

        .header p {
            font-size: 16px;
            opacity: 0.8;
        }

        .table {
            width: 100%;
            border-collapse: collapse;
        }

        .table th {
            background: #f1f5f9;
            padding: 12px;
            text-align: left;
            font-weight: 600;
            color: #334155;
            border-bottom: 1px solid #e2e8f0;
        }

        .table th:first-child {
            width: 30%;
        }

        .table th:nth-child(2) {
            width: 35%;
            color: #1e40af;
        }

        .table th:nth-child(3) {
            width: 35%;
            color: #dc2626;
        }

        .table td {
            padding: 10px 12px;
            border-bottom: 1px solid #f1f5f9;
            vertical-align: top;
        }

        .table tr:hover {
            background: #f8fafc;
        }

        .category {
            font-weight: 500;
            color: #475569;
        }

        .playwright {
            color: #1e40af;
        }

        .requests {
            color: #dc2626;
        }

        .support-yes {
            color: #059669;
            font-weight: 500;
        }

        .support-no {
            color: #dc2626;
            font-weight: 500;
        }

        .support-partial {
            color: #d97706;
            font-weight: 500;
        }

        .status {
            display: inline-block;
            width: 8px;
            height: 8px;
            border-radius: 50%;
            margin-right: 8px;
        }

        .status-good {
            background: #10b981;
        }

        .status-bad {
            background: #ef4444;
        }

        .status-warning {
            background: #f59e0b;
        }

        @media (max-width: 768px) {
            .container {
                margin: 0;
                border-radius: 0;
            }
            
            .header {
                padding: 24px;
            }
            
            .header h1 {
                font-size: 24px;
            }
            
            .table th,
            .table td {
                padding: 12px;
                font-size: 14px;
            }
        }
    &lt;/style&gt;
&lt;div class=&quot;container&quot;&gt;
&lt;table class=&quot;table&quot;&gt;
&lt;thead&gt;
&lt;tr&gt;
&lt;th&gt;항목&lt;/th&gt;
&lt;th&gt;  Playwright&lt;/th&gt;
&lt;th&gt;  Requests&lt;/th&gt;
&lt;/tr&gt;
&lt;/thead&gt;
&lt;tbody&gt;
&lt;tr&gt;
&lt;td class=&quot;category&quot;&gt;주요 목적&lt;/td&gt;
&lt;td class=&quot;playwright&quot;&gt;브라우저 자동화 및 렌더링된 콘텐츠 제어&lt;/td&gt;
&lt;td class=&quot;requests&quot;&gt;HTTP 요청으로 정적 데이터 수집&lt;/td&gt;
&lt;/tr&gt;
&lt;tr&gt;
&lt;td class=&quot;category&quot;&gt;⚙️ JavaScript 처리&lt;/td&gt;
&lt;td class=&quot;playwright&quot;&gt;&lt;span class=&quot;status status-good&quot;&gt;&lt;/span&gt; &lt;span class=&quot;support-yes&quot;&gt;완전 지원&lt;/span&gt;&lt;/td&gt;
&lt;td class=&quot;requests&quot;&gt;&lt;span class=&quot;status status-bad&quot;&gt;&lt;/span&gt; &lt;span class=&quot;support-no&quot;&gt;지원하지 않음&lt;/span&gt;&lt;/td&gt;
&lt;/tr&gt;
&lt;tr&gt;
&lt;td class=&quot;category&quot;&gt; ️ UI 상호작용&lt;/td&gt;
&lt;td class=&quot;playwright&quot;&gt;&lt;span class=&quot;status status-good&quot;&gt;&lt;/span&gt; &lt;span class=&quot;support-yes&quot;&gt;클릭, 입력, 스크롤&lt;/span&gt;&lt;/td&gt;
&lt;td class=&quot;requests&quot;&gt;&lt;span class=&quot;status status-bad&quot;&gt;&lt;/span&gt; &lt;span class=&quot;support-no&quot;&gt;불가능&lt;/span&gt;&lt;/td&gt;
&lt;/tr&gt;
&lt;tr&gt;
&lt;td class=&quot;category&quot;&gt;⚡ 처리 속도&lt;/td&gt;
&lt;td class=&quot;playwright&quot;&gt;&lt;span class=&quot;status status-warning&quot;&gt;&lt;/span&gt; &lt;span class=&quot;support-partial&quot;&gt;느림&lt;/span&gt;&lt;/td&gt;
&lt;td class=&quot;requests&quot;&gt;&lt;span class=&quot;status status-good&quot;&gt;&lt;/span&gt; &lt;span class=&quot;support-yes&quot;&gt;매우 빠름&lt;/span&gt;&lt;/td&gt;
&lt;/tr&gt;
&lt;tr&gt;
&lt;td class=&quot;category&quot;&gt;  설치 크기&lt;/td&gt;
&lt;td class=&quot;playwright&quot;&gt;&lt;span class=&quot;status status-warning&quot;&gt;&lt;/span&gt; &lt;span class=&quot;support-partial&quot;&gt;큼 (수백 MB)&lt;/span&gt;&lt;/td&gt;
&lt;td class=&quot;requests&quot;&gt;&lt;span class=&quot;status status-good&quot;&gt;&lt;/span&gt; &lt;span class=&quot;support-yes&quot;&gt;가벼움&lt;/span&gt;&lt;/td&gt;
&lt;/tr&gt;
&lt;/tbody&gt;
&lt;/table&gt;
&lt;/div&gt;
&lt;/div&gt;
&lt;hr contenteditable=&quot;false&quot; data-ke-type=&quot;horizontalRule&quot; data-ke-style=&quot;style6&quot; /&gt;
&lt;h3 data-ke-size=&quot;size23&quot;&gt;playwright와 trafilatura를 결합한 기본 사용법&lt;/h3&gt;
&lt;p data-ke-size=&quot;size16&quot;&gt;가장 많은 실패건수를 기록하고 있는 &lt;span&gt;biz.chosun.com&lt;/span&gt; 웹사이트를 테스트 해보기로 했다. requests에 해당하는 부분을 playwright로 바꿔서 html을 추출하는 코드로 변경했다. 테스트 결과, 성공적으로 본문이 추출됨을 확인했다.&amp;nbsp;&lt;/p&gt;
&lt;pre id=&quot;code_1749351684382&quot; class=&quot;python&quot; data-ke-language=&quot;python&quot; data-ke-type=&quot;codeblock&quot;&gt;&lt;code&gt;import asyncio
from playwright.sync_api import sync_playwright
import trafilatura
from trafilatura.metadata import extract_metadata

# 대상 URL 설정
url = &quot;https://biz.chosun.com/stock/c-biz_bot/2025/05/20/ANYFOCJLKBURJBPW4ZXRNUSKLE&quot;

# HTML 다운로드 (Playwright 사용)
def fetch_with_playwright(target_url):
    with sync_playwright() as p:
        browser = p.chromium.launch(headless=True)
        context = await browser.new_context(
            user_agent=&quot;Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36&quot;
        )
        page = browser.new_page()
        page.goto(target_url, wait_until='domcontentloaded', timeout=30000)
        page.wait_for_timeout(3000)
        content = page.content()
        browser.close()
        return content

# HTML 가져오기
downloaded = fetch_with_playwright(url)

# 메타데이터 및 본문 텍스트 추출
metadata = extract_metadata(downloaded)
text = trafilatura.extract(downloaded, output_format='txt', include_comments=False, favor_precision=True)

# 결과 출력
print(f&quot;  제목: {metadata.title}&quot;)
print(f&quot;  날짜: {metadata.date}&quot;)
print(f&quot;  본문:\n{text}&quot;)&lt;/code&gt;&lt;/pre&gt;
&lt;h3 data-ke-size=&quot;size23&quot;&gt;&lt;span&gt; ️&lt;/span&gt; 실행결과&lt;/h3&gt;
&lt;p&gt;&lt;figure class=&quot;imageblock alignCenter&quot; data-ke-mobileStyle=&quot;widthOrigin&quot; data-origin-width=&quot;1115&quot; data-origin-height=&quot;628&quot;&gt;&lt;span data-url=&quot;https://blog.kakaocdn.net/dn/Kwkd8/btsOtviUpHd/vhqS0ZKMN5yRglzx0uJMCK/img.png&quot; data-phocus=&quot;https://blog.kakaocdn.net/dn/Kwkd8/btsOtviUpHd/vhqS0ZKMN5yRglzx0uJMCK/img.png&quot;&gt;&lt;img src=&quot;https://blog.kakaocdn.net/dn/Kwkd8/btsOtviUpHd/vhqS0ZKMN5yRglzx0uJMCK/img.png&quot; srcset=&quot;https://img1.daumcdn.net/thumb/R1280x0/?scode=mtistory2&amp;fname=https%3A%2F%2Fblog.kakaocdn.net%2Fdn%2FKwkd8%2FbtsOtviUpHd%2FvhqS0ZKMN5yRglzx0uJMCK%2Fimg.png&quot; onerror=&quot;this.onerror=null; this.src='//t1.daumcdn.net/tistory_admin/static/images/no-image-v1.png'; this.srcset='//t1.daumcdn.net/tistory_admin/static/images/no-image-v1.png';&quot; loading=&quot;lazy&quot; width=&quot;1115&quot; height=&quot;628&quot; data-origin-width=&quot;1115&quot; data-origin-height=&quot;628&quot;/&gt;&lt;/span&gt;&lt;/figure&gt;
&lt;/p&gt;
&lt;p data-ke-size=&quot;size16&quot;&gt;&amp;nbsp;&lt;/p&gt;
&lt;p data-ke-size=&quot;size16&quot;&gt;⚠️ 주의 사항:&amp;nbsp; &lt;span style=&quot;color: #006dd7;&quot;&gt;&lt;b&gt;Jupyter Notebook이 이미 asyncio 이벤트 루프를 실행 중&lt;/b&gt;&lt;/span&gt;이기 때문에, &lt;span style=&quot;color: #006dd7;&quot;&gt;&lt;b&gt;Jupyter에서는 비동기 버전&lt;/b&gt;&lt;/span&gt;을 사용해야 한다. (Jupyter Notebook 수정된 코드는 아래를 참고)&lt;/p&gt;
&lt;pre id=&quot;code_1749352446312&quot; class=&quot;python&quot; data-ke-language=&quot;python&quot; data-ke-type=&quot;codeblock&quot;&gt;&lt;code&gt;import asyncio
from playwright.async_api import async_playwright
import trafilatura
from trafilatura.metadata import extract_metadata

# 대상 URL
url = &quot;https://biz.chosun.com/stock/c-biz_bot/2025/05/20/ANYFOCJLKBURJBPW4ZXRNUSKLE&quot;

# Playwright로 HTML 가져오기 (비동기)
async def fetch_with_playwright(target_url):
    async with async_playwright() as p:
        browser = await p.chromium.launch(headless=True)
        context = await browser.new_context(
            user_agent=&quot;Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36&quot;
        )
        page = await browser.new_page()        
        await page.goto(target_url, wait_until='domcontentloaded', timeout=30000)
        await page.wait_for_timeout(3000)
        content = await page.content()
        await browser.close()
        return content

# Jupyter에서는 asyncio.run() 대신 nest_asyncio 사용 필요
import nest_asyncio
nest_asyncio.apply()

# 실행 및 결과 추출
downloaded = asyncio.get_event_loop().run_until_complete(fetch_with_playwright(url))
metadata = extract_metadata(downloaded)
text = trafilatura.extract(downloaded, output_format='txt', include_comments=False, favor_precision=True)

# 결과 출력
print(f&quot;  제목: {metadata.title}&quot;)
print(f&quot;  날짜: {metadata.date}&quot;)
print(f&quot;  본문:\n{text}&quot;)&lt;/code&gt;&lt;/pre&gt;
&lt;hr contenteditable=&quot;false&quot; data-ke-type=&quot;horizontalRule&quot; data-ke-style=&quot;style6&quot; /&gt;
&lt;h2 data-ke-size=&quot;size26&quot; data-start=&quot;2890&quot; data-end=&quot;2913&quot;&gt;Trafilatura - Playwright 조합의 성능 평가&lt;/h2&gt;
&lt;p data-ke-size=&quot;size16&quot;&gt;성능 평가를 위해, Trafilatura - Requests 조합에서 실패한 94개의 웹사이트를 그대로 사용했다. 성능 평가를 위한 최종 코드는 아래와 같이 작성하였다.&amp;nbsp;&lt;/p&gt;
&lt;div id=&quot;code_1749817996769&quot; data-ke-type=&quot;html&quot; data-source=&quot;&amp;lt;!DOCTYPE html&amp;gt;
&amp;lt;html lang=&amp;quot;ko&amp;quot;&amp;gt;
&amp;lt;head&amp;gt;
    &amp;lt;meta charset=&amp;quot;UTF-8&amp;quot;&amp;gt;
    &amp;lt;meta name=&amp;quot;viewport&amp;quot; content=&amp;quot;width=device-width, initial-scale=1.0&amp;quot;&amp;gt;
    &amp;lt;title&amp;gt;티스토리 코드 블록&amp;lt;/title&amp;gt;
    &amp;lt;!-- ★ 스타일 --&amp;gt;
    &amp;lt;style&amp;gt;
    /* 티스토리 기본 코드 블록 스타일 - Carbon one-light 테마 적용 */
    .code-box{
      border:1px solid #e3e3e3;
      border-radius:8px;
      margin:20px 0; /* ✅ 위아래 여백 축소 */
      background:#ffffff;
      font-family:'Hack','D2Coding','Nanum Gothic Coding',monospace;
      overflow:hidden;
      box-shadow:0 2px 8px rgba(0,0,0,0.06)
      /* ── NEW: 폭 제한 풀기 ── */
      width:100% !important;
      max-width:none !important;
    }
    .code-hd{
      background:#3a4250;
      color:#fff;
      padding:14px 20px;
      font-weight:600;
      font-size:14px;
      display:flex;
      justify-content:space-between;
      align-items:center;
      cursor:pointer;      
      border-bottom: 1px solid #2e333f; /* ✅ 어두운 테두리 */
    }
    .code-hd:hover{
      background: #4b5563;  /* 약간 밝은 회색-파랑 계열 */
    }
    .toggle-btn {
      background: #6b7280;       /* 버튼 배경 (회색) */
      color: #fff;               /* 글자색 */
      font-size: 13px;
      padding: 4px 10px;
      border-radius: 5px;
      font-weight: normal;
      display: inline-block;
}


.code-ct {
  display: none;
  padding: 8px 0; /* ✅ 위아래 padding 줄임, 좌우 제거 */
  background: #ffffff;
  font-size: 14px;
  line-height: 1.3;
  overflow-x: auto;
  white-space: pre;
  font-family: 'Hack','D2Coding','Consolas','Monaco', monospace;
  color: #383a42;
  scrollbar-width: thin;
  scrollbar-color: #d0d0d0 #f5f5f5;
}

.code-ct::-webkit-scrollbar {
  height: 6px;
  background: #f5f5f5;
}
.code-ct::-webkit-scrollbar-track {
  background: #f5f5f5;
  border-radius: 4px;
}
.code-ct::-webkit-scrollbar-thumb {
  background: #d0d0d0;
  border-radius: 4px;
}
.code-ct::-webkit-scrollbar-thumb:hover {
  background: #b0b0b0;
}

.code-act {
  display: none;
  text-align: right;
  padding: 12px 20px; /* ✅ 복사 버튼 줄도 축소 */
  background: #fafafa;
  border-top: 1px solid #e3e3e3;
}
    .copy-btn{
      background:#50a14f;
      color:#fff;
      border:0;
      padding:8px 20px;
      border-radius:6px;
      font-size:14px;
      cursor:pointer;
      font-weight:500;
      transition:all 0.2s ease
    }
    .copy-btn:hover{
      background:#40a33f;
      transform:translateY(-1px);
      box-shadow:0 2px 4px rgba(0,0,0,0.1)
    }

    /* 파이썬 신택스 하이라이팅 - one-light 테마 */
    .keyword{color:#a626a4;font-weight:normal}
    .string{color:#50a14f}
    .comment{color:#a0a1a7;font-style:italic}
    .function{color:#4078f2}
    .number{color:#986801}
    .operator{color:#383a42}
    .builtin{color:#c18401}


&amp;lt;/style&amp;gt;


&amp;lt;/head&amp;gt;

&amp;lt;!-- ★ 코드 박스 시작 --&amp;gt;
&amp;lt;div class=&amp;quot;code-box&amp;quot;&amp;gt;
  &amp;lt;div class=&amp;quot;code-hd&amp;quot;&amp;gt;
      playwright Batch Processor (w/ Trafilatura)
    &amp;lt;span class=&amp;quot;toggle-btn&amp;quot;&amp;gt;  펼치기&amp;lt;/span&amp;gt;
  &amp;lt;/div&amp;gt;
  &amp;lt;div class=&amp;quot;code-ct&amp;quot;&amp;gt;
&amp;lt;pre&amp;gt;&amp;lt;code class=&amp;quot;language-python&amp;quot;&amp;gt;
import pandas as pd
import trafilatura
from trafilatura.metadata import extract_metadata
from playwright.sync_api import sync_playwright
import time
from datetime import datetime
import csv
from typing import List, Optional


def fetch_html_with_playwright(url: str, timeout: int = 30000) -&amp;gt; dict:
    &amp;quot;&amp;quot;&amp;quot;
    Playwright로 HTML 다운로드하는 함수
    &amp;quot;&amp;quot;&amp;quot;
    result = {
        'url': url,
        'html': '',
        'success': False,
        'error': None,
        'method_used': 'playwright'
    }

    try:
        with sync_playwright() as p:
            browser = p.chromium.launch(headless=True)
            context = browser.new_context(
                user_agent=&amp;quot;Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36&amp;quot;
            )
            page = browser.new_page()
            page.goto(url, wait_until='domcontentloaded', timeout=timeout)
            # 페이지 로딩 대기 (필요시)
            page.wait_for_timeout(3000)
            content = page.content()
            browser.close()
            
            result['html'] = content
            result['success'] = True

    except Exception as e:
        result['error'] = str(e)

    return result


def process_single_url(url, index, total_urls):
    &amp;quot;&amp;quot;&amp;quot;단일 URL 처리 함수&amp;quot;&amp;quot;&amp;quot;
    start_time = time.time()
    result = {
        &amp;quot;index&amp;quot;: index,
        &amp;quot;url&amp;quot;: url,
        &amp;quot;status&amp;quot;: &amp;quot;failed&amp;quot;,
        &amp;quot;title&amp;quot;: None,
        &amp;quot;date&amp;quot;: None,
        &amp;quot;content&amp;quot;: None,
        &amp;quot;processing_time&amp;quot;: 0,
        &amp;quot;error_message&amp;quot;: None,
    }
    
    try:
        print(f&amp;quot;Processing URL {index + 1}/{total_urls}: {url[:60]}...&amp;quot;)
        
        # HTML 다운로드 (Playwright 사용)
        html_result = fetch_html_with_playwright(url)
        
        if html_result['success']:
            html_content = html_result['html']
            
            # trafilatura로 메타데이터 추출
            metadata = trafilatura.extract_metadata(html_content)
            
            # trafilatura로 본문 추출
            text = trafilatura.extract(
                html_content,
                output_format=&amp;quot;txt&amp;quot;,
                include_comments=False,
                favor_precision=True,
            )
            
            if text and len(text.strip()) &amp;gt; 0:
                result.update({
                    &amp;quot;status&amp;quot;: &amp;quot;success&amp;quot;,
                    &amp;quot;title&amp;quot;: metadata.title if metadata and metadata.title else &amp;quot;No title&amp;quot;,
                    &amp;quot;date&amp;quot;: str(metadata.date) if metadata and metadata.date else &amp;quot;No date&amp;quot;,
                    &amp;quot;content&amp;quot;: text,
                })
            else:
                result[&amp;quot;error_message&amp;quot;] = &amp;quot;Empty content extracted&amp;quot;
        else:
            result[&amp;quot;error_message&amp;quot;] = f&amp;quot;Failed to download page: {html_result['error']}&amp;quot;
            
    except Exception as e:
        result[&amp;quot;error_message&amp;quot;] = str(e)
    
    # 처리 시간 계산
    processing_time = time.time() - start_time
    result[&amp;quot;processing_time&amp;quot;] = processing_time
    
    return result


def save_results_to_csv(results, input_file_path=None, output_file=None):
    &amp;quot;&amp;quot;&amp;quot;결과를 CSV 파일로 저장&amp;quot;&amp;quot;&amp;quot;
    import os

    # 파일명이 지정되지 않으면 현재 시간을 포함한 파일명 생성
    if output_file is None:
        timestamp = datetime.now().strftime(&amp;quot;%Y%m%d_%H%M%S&amp;quot;)
        filename = f&amp;quot;url_processing_results_playwright_{timestamp}.csv&amp;quot;
        
        # 입력 파일과 같은 디렉토리에 저장
        if input_file_path and os.path.exists(input_file_path):
            input_dir = os.path.dirname(input_file_path)
            output_file = os.path.join(input_dir, filename)
        else:
            # 입력 파일 경로가 없으면 현재 디렉토리에 저장
            output_file = filename

    try:
        with open(output_file, &amp;quot;w&amp;quot;, newline=&amp;quot;&amp;quot;, encoding=&amp;quot;utf-8&amp;quot;) as csvfile:
            fieldnames = [
                &amp;quot;index&amp;quot;,
                &amp;quot;url&amp;quot;,
                &amp;quot;status&amp;quot;,
                &amp;quot;title&amp;quot;,
                &amp;quot;date&amp;quot;,
                &amp;quot;content&amp;quot;,
                &amp;quot;processing_time&amp;quot;,
                &amp;quot;error_message&amp;quot;,
            ]
            writer = csv.DictWriter(csvfile, fieldnames=fieldnames)

            writer.writeheader()
            for result in results:
                writer.writerow(result)

        print(f&amp;quot;✅ 결과가 '{output_file}' 파일에 저장되었습니다.&amp;quot;)
        return output_file  # 실제 저장된 파일명 반환

    except Exception as e:
        print(f&amp;quot;❌ 결과 저장 실패: {e}&amp;quot;)
        return None


def print_summary(results, total_time, skipped_count=0):
    &amp;quot;&amp;quot;&amp;quot;처리 결과 요약 출력&amp;quot;&amp;quot;&amp;quot;
    total_urls = len(results)
    successful_count = sum(1 for r in results if r[&amp;quot;status&amp;quot;] == &amp;quot;success&amp;quot;)
    failed_count = total_urls - successful_count
    processing_times = [r[&amp;quot;processing_time&amp;quot;] for r in results]

    print(&amp;quot;\n&amp;quot; + &amp;quot;=&amp;quot; * 7 + &amp;quot; SUMMARY &amp;quot; + &amp;quot;=&amp;quot; * 7)
    print(f&amp;quot;Total URLs processed: {total_urls}&amp;quot;)
    print(&amp;quot;Workers used: 1&amp;quot;)  # 단일 스레드 처리
    print(f&amp;quot;Successfully decoded: {successful_count} ({successful_count/total_urls*100:.1f}%)&amp;quot;)
    print(f&amp;quot;Failed to decode: {failed_count} ({failed_count/total_urls*100:.1f}%)&amp;quot;)
    print(f&amp;quot;Skipped (Google News URLs): {skipped_count} ({skipped_count/(total_urls + skipped_count)*100:.1f}%)&amp;quot;)

    if processing_times:
        avg_time = sum(processing_times) / len(processing_times)

        print(&amp;quot;\n&amp;quot; + &amp;quot;-&amp;quot; * 5 + &amp;quot; TIMING INFORMATION &amp;quot; + &amp;quot;-&amp;quot; * 5)
        print(f&amp;quot;Total processing time: {int(total_time//60)}:{total_time%60:05.2f}&amp;quot;)
        print(f&amp;quot;Average processing time per URL: {avg_time:.2f} seconds&amp;quot;)
        print(f&amp;quot;Fastest URL processing time: {min(processing_times):.2f} seconds&amp;quot;)
        print(f&amp;quot;Slowest URL processing time: {max(processing_times):.2f} seconds&amp;quot;)

    print(&amp;quot;\nProcess completed successfully. Results saved to CSV file.&amp;quot;)


def process_urls_from_csv(csv_file_path, url_column=&amp;quot;url&amp;quot;, status_column=&amp;quot;status&amp;quot;):
    &amp;quot;&amp;quot;&amp;quot;CSV 파일에서 status가 'failed'인 URL들을 읽어서 순차 처리&amp;quot;&amp;quot;&amp;quot;

    print(&amp;quot;=&amp;quot; * 50)
    print(&amp;quot;  URL 배치 처리 시작 (Playwright - Failed URLs만)&amp;quot;)
    print(&amp;quot;=&amp;quot; * 50)

    # CSV 파일 읽기
    try:
        df = pd.read_csv(csv_file_path)
        print(f&amp;quot;  CSV 파일 컬럼들: {list(df.columns)}&amp;quot;)
        print(f&amp;quot;  총 행 수: {len(df)}&amp;quot;)

        # 필요한 컬럼 존재 확인
        if url_column not in df.columns:
            raise ValueError(f&amp;quot;Column '{url_column}' not found in CSV file&amp;quot;)
        if status_column not in df.columns:
            raise ValueError(f&amp;quot;Column '{status_column}' not found in CSV file&amp;quot;)

        # status가 'failed'인 행들만 필터링
        failed_df = df[df[status_column] == 'failed']
        print(f&amp;quot;  전체 행 수: {len(df)}&amp;quot;)
        print(f&amp;quot;  status가 'failed'인 행 수: {len(failed_df)}&amp;quot;)

        if len(failed_df) == 0:
            print(&amp;quot;⚠️ status가 'failed'인 URL이 없습니다.&amp;quot;)
            return None, None

        # failed URL들 추출 (NULL 값 제외)
        all_urls = failed_df[url_column].dropna().tolist()
        print(f&amp;quot;  '{url_column}' 컬럼의 NULL이 아닌 값 개수: {len(all_urls)}&amp;quot;)
        
        if all_urls:
            print(f&amp;quot;  첫 번째 URL 샘플: {all_urls[0]}&amp;quot;)

        # news.google.com이 포함되지 않은 URL만 필터링
        urls = [url for url in all_urls if &amp;quot;news.google.com&amp;quot; not in str(url)]

        total_urls = len(urls)
        skipped_urls = len(all_urls) - total_urls

        print(f&amp;quot;  failed 상태의 전체 URL: {len(all_urls)}개&amp;quot;)
        print(f&amp;quot;  처리 대상 URL: {total_urls}개&amp;quot;)
        print(f&amp;quot;  건너뛴 URL (Google News URLs): {skipped_urls}개&amp;quot;)

        if urls:
            print(f&amp;quot;  첫 번째 처리 대상 URL 샘플: {urls[0]}&amp;quot;)

        print(&amp;quot;-&amp;quot; * 30)

    except Exception as e:
        print(f&amp;quot;❌ CSV 파일 읽기 실패: {e}&amp;quot;)
        return None

    total_start_time = time.time()
    results = []
    successful_count = 0
    failed_count = 0

    for i, url in enumerate(urls):
        result = process_single_url(url, i, total_urls)
        results.append(result)

        if result[&amp;quot;status&amp;quot;] == &amp;quot;success&amp;quot;:
            successful_count += 1
        else:
            failed_count += 1

        if (i + 1) % 10 == 0 or (i + 1) == total_urls:
            print(
                f&amp;quot;진행: {i + 1}/{total_urls} &amp;quot;
                f&amp;quot;({(i + 1)/total_urls*100:.1f}%) &amp;quot;
                f&amp;quot;성공: {successful_count}, 실패: {failed_count}&amp;quot;
            )

    total_processing_time = time.time() - total_start_time
    print_summary(results, total_processing_time, skipped_urls)

    saved_file = save_results_to_csv(results, input_file_path=csv_file_path)
    return results, saved_file


def main():
    import sys
    import os
    
    # 명령줄 인자로 파일명을 받은 경우
    if len(sys.argv) &amp;gt; 1:
        csv_file_path = sys.argv[1]
    else:
        # 대화형으로 파일명 입력받기
        print(&amp;quot;=&amp;quot; * 50)
        print(&amp;quot;  CSV 파일 입력&amp;quot;)
        print(&amp;quot;=&amp;quot; * 50)
        
        while True:
            csv_file_path = input(&amp;quot;처리할 CSV 파일 경로를 입력하세요: &amp;quot;).strip()
            
            # 따옴표 제거 (드래그앤드롭으로 붙여넣을 때 따옴표가 붙는 경우)
            csv_file_path = csv_file_path.strip('&amp;quot;').strip(&amp;quot;'&amp;quot;)
            
            # 파일 존재 여부 확인
            if os.path.exists(csv_file_path):
                break
            else:
                print(f&amp;quot;❌ 파일을 찾을 수 없습니다: {csv_file_path}&amp;quot;)
                print(&amp;quot;다시 입력해주세요.\n&amp;quot;)
    
    # 파일 존재 여부 최종 확인
    if not os.path.exists(csv_file_path):
        print(f&amp;quot;❌ 파일을 찾을 수 없습니다: {csv_file_path}&amp;quot;)
        return
    
    print(f&amp;quot;  처리할 파일: {csv_file_path}&amp;quot;)
    
    # 컬럼명 설정 (필요시 여기서 변경 가능)
    url_column = &amp;quot;url&amp;quot;
    status_column = &amp;quot;status&amp;quot;
    
    # 컬럼명을 사용자가 지정하고 싶은 경우
    while True:
        change_columns = input(f&amp;quot;\nURL 컬럼명 (현재: '{url_column}')과 Status 컬럼명 (현재: '{status_column}')을 변경하시겠습니까? (y/n): &amp;quot;).strip().lower()
        
        if change_columns in ['y', 'yes']:
            url_column = input(f&amp;quot;URL 컬럼명을 입력하세요 (기본값: {url_column}): &amp;quot;).strip() or url_column
            status_column = input(f&amp;quot;Status 컬럼명을 입력하세요 (기본값: {status_column}): &amp;quot;).strip() or status_column
            break
        elif change_columns in ['n', 'no']:
            break
        else:
            print(&amp;quot;y 또는 n을 입력해주세요.&amp;quot;)
    
    print(f&amp;quot;  사용할 컬럼: URL='{url_column}', Status='{status_column}'&amp;quot;)
    
    # URL 처리 시작
    results, saved_file = process_urls_from_csv(csv_file_path, url_column=url_column, status_column=status_column)

    if results:
        print(f&amp;quot;\n  저장된 파일: {saved_file}&amp;quot;)

        print(&amp;quot;\n  처리 결과 샘플:&amp;quot;)
        for i, result in enumerate(results[:3]):
            print(f&amp;quot;\n[{i+1}] {result['url'][:50]}...&amp;quot;)
            print(f&amp;quot;    상태: {result['status']}&amp;quot;)
            print(f&amp;quot;    제목: {result['title']}&amp;quot;)
            print(f&amp;quot;    처리시간: {result['processing_time']:.2f}초&amp;quot;)
            if result[&amp;quot;status&amp;quot;] == &amp;quot;failed&amp;quot;:
                print(f&amp;quot;    오류: {result['error_message']}&amp;quot;)
    else:
        print(&amp;quot;\n❌ 처리할 URL이 없거나 처리에 실패했습니다.&amp;quot;)


if __name__ == &amp;quot;__main__&amp;quot;:
    main()
&amp;lt;/code&amp;gt;&amp;lt;/pre&amp;gt;
  &amp;lt;/div&amp;gt;
  &amp;lt;div class=&amp;quot;code-act&amp;quot;&amp;gt;
    &amp;lt;button class=&amp;quot;copy-btn&amp;quot;&amp;gt;  코드 복사&amp;lt;/button&amp;gt;
  &amp;lt;/div&amp;gt;
&amp;lt;/div&amp;gt;
&amp;lt;!-- ★ 코드 박스 끝 --&amp;gt;

&amp;lt;!-- ★ 토글 &amp;amp; 복사 스크립트 --&amp;gt;
&amp;lt;script&amp;gt;
document.addEventListener(&amp;quot;DOMContentLoaded&amp;quot;, () =&amp;gt; {

  /* 토글 */
  document.body.addEventListener(&amp;quot;click&amp;quot;, e =&amp;gt; {
    if (!e.target.classList.contains(&amp;quot;toggle-btn&amp;quot;)) return;
    const box = e.target.closest(&amp;quot;.code-box&amp;quot;);
    const cont = box.querySelector(&amp;quot;.code-ct&amp;quot;);
    const act = box.querySelector(&amp;quot;.code-act&amp;quot;);
    const open = cont.style.display === &amp;quot;block&amp;quot;;
    cont.style.display = act.style.display = open ? &amp;quot;none&amp;quot; : &amp;quot;block&amp;quot;;
    e.target.textContent = open ? &amp;quot;  펼치기&amp;quot; : &amp;quot;  접기&amp;quot;;
  });

  /* 복사 */
  document.body.addEventListener(&amp;quot;click&amp;quot;, e =&amp;gt; {
    if (!e.target.classList.contains(&amp;quot;copy-btn&amp;quot;)) return;
    const btn = e.target;
    const code = btn.closest(&amp;quot;.code-box&amp;quot;).querySelector(&amp;quot;code&amp;quot;).innerText;
    if (navigator.clipboard &amp;amp;&amp;amp; window.isSecureContext) {
      navigator.clipboard.writeText(code).then(() =&amp;gt; flash(btn))
                                         .catch(() =&amp;gt; fallback(code, btn));
    } else {
      fallback(code, btn);
    }
  });

  const flash = btn =&amp;gt; {
    const orig = btn.textContent;
    btn.textContent = &amp;quot;✅ 복사됨!&amp;quot;;
    setTimeout(() =&amp;gt; (btn.textContent = orig), 2000);
  };

  const fallback = (text, btn) =&amp;gt; {
    const ta = document.createElement(&amp;quot;textarea&amp;quot;);
    ta.value = text;
    ta.style.position = &amp;quot;fixed&amp;quot;;
    ta.style.top = &amp;quot;-1000px&amp;quot;;
    document.body.appendChild(ta);
    ta.focus();
    ta.select();
    try {
      document.execCommand(&amp;quot;copy&amp;quot;);
      flash(btn);
    } catch {
      alert(&amp;quot;복사 실패   &amp;ndash; 브라우저가 클립보드를 차단했습니다.&amp;quot;);
    }
    document.body.removeChild(ta);
  };
});
&amp;lt;/script&amp;gt;
&amp;lt;/body&amp;gt;
&amp;lt;/html&amp;gt;
&quot;&gt;&lt;!-- ★ 스타일 --&gt;
&lt;style&gt;
    /* 티스토리 기본 코드 블록 스타일 - Carbon one-light 테마 적용 */
    .code-box{
      border:1px solid #e3e3e3;
      border-radius:8px;
      margin:20px 0; /* ✅ 위아래 여백 축소 */
      background:#ffffff;
      font-family:'Hack','D2Coding','Nanum Gothic Coding',monospace;
      overflow:hidden;
      box-shadow:0 2px 8px rgba(0,0,0,0.06)
      /* ── NEW: 폭 제한 풀기 ── */
      width:100% !important;
      max-width:none !important;
    }
    .code-hd{
      background:#3a4250;
      color:#fff;
      padding:14px 20px;
      font-weight:600;
      font-size:14px;
      display:flex;
      justify-content:space-between;
      align-items:center;
      cursor:pointer;      
      border-bottom: 1px solid #2e333f; /* ✅ 어두운 테두리 */
    }
    .code-hd:hover{
      background: #4b5563;  /* 약간 밝은 회색-파랑 계열 */
    }
    .toggle-btn {
      background: #6b7280;       /* 버튼 배경 (회색) */
      color: #fff;               /* 글자색 */
      font-size: 13px;
      padding: 4px 10px;
      border-radius: 5px;
      font-weight: normal;
      display: inline-block;
}


.code-ct {
  display: none;
  padding: 8px 0; /* ✅ 위아래 padding 줄임, 좌우 제거 */
  background: #ffffff;
  font-size: 14px;
  line-height: 1.3;
  overflow-x: auto;
  white-space: pre;
  font-family: 'Hack','D2Coding','Consolas','Monaco', monospace;
  color: #383a42;
  scrollbar-width: thin;
  scrollbar-color: #d0d0d0 #f5f5f5;
}

.code-ct::-webkit-scrollbar {
  height: 6px;
  background: #f5f5f5;
}
.code-ct::-webkit-scrollbar-track {
  background: #f5f5f5;
  border-radius: 4px;
}
.code-ct::-webkit-scrollbar-thumb {
  background: #d0d0d0;
  border-radius: 4px;
}
.code-ct::-webkit-scrollbar-thumb:hover {
  background: #b0b0b0;
}

.code-act {
  display: none;
  text-align: right;
  padding: 12px 20px; /* ✅ 복사 버튼 줄도 축소 */
  background: #fafafa;
  border-top: 1px solid #e3e3e3;
}
    .copy-btn{
      background:#50a14f;
      color:#fff;
      border:0;
      padding:8px 20px;
      border-radius:6px;
      font-size:14px;
      cursor:pointer;
      font-weight:500;
      transition:all 0.2s ease
    }
    .copy-btn:hover{
      background:#40a33f;
      transform:translateY(-1px);
      box-shadow:0 2px 4px rgba(0,0,0,0.1)
    }

    /* 파이썬 신택스 하이라이팅 - one-light 테마 */
    .keyword{color:#a626a4;font-weight:normal}
    .string{color:#50a14f}
    .comment{color:#a0a1a7;font-style:italic}
    .function{color:#4078f2}
    .number{color:#986801}
    .operator{color:#383a42}
    .builtin{color:#c18401}


&lt;/style&gt;
&lt;!-- ★ 코드 박스 시작 --&gt;
&lt;div class=&quot;code-box&quot;&gt;
&lt;div class=&quot;code-hd&quot;&gt;  playwright Batch Processor (w/ Trafilatura) &lt;span class=&quot;toggle-btn&quot;&gt;  펼치기&lt;/span&gt;&lt;/div&gt;
&lt;div class=&quot;code-ct&quot;&gt;
&lt;pre&gt;&lt;code class=&quot;language-python&quot;&gt;
import pandas as pd
import trafilatura
from trafilatura.metadata import extract_metadata
from playwright.sync_api import sync_playwright
import time
from datetime import datetime
import csv
from typing import List, Optional


def fetch_html_with_playwright(url: str, timeout: int = 30000) -&amp;gt; dict:
    &quot;&quot;&quot;
    Playwright로 HTML 다운로드하는 함수
    &quot;&quot;&quot;
    result = {
        'url': url,
        'html': '',
        'success': False,
        'error': None,
        'method_used': 'playwright'
    }

    try:
        with sync_playwright() as p:
            browser = p.chromium.launch(headless=True)
            context = browser.new_context(
                user_agent=&quot;Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36&quot;
            )
            page = browser.new_page()
            page.goto(url, wait_until='domcontentloaded', timeout=timeout)
            # 페이지 로딩 대기 (필요시)
            page.wait_for_timeout(3000)
            content = page.content()
            browser.close()
            
            result['html'] = content
            result['success'] = True

    except Exception as e:
        result['error'] = str(e)

    return result


def process_single_url(url, index, total_urls):
    &quot;&quot;&quot;단일 URL 처리 함수&quot;&quot;&quot;
    start_time = time.time()
    result = {
        &quot;index&quot;: index,
        &quot;url&quot;: url,
        &quot;status&quot;: &quot;failed&quot;,
        &quot;title&quot;: None,
        &quot;date&quot;: None,
        &quot;content&quot;: None,
        &quot;processing_time&quot;: 0,
        &quot;error_message&quot;: None,
    }
    
    try:
        print(f&quot;Processing URL {index + 1}/{total_urls}: {url[:60]}...&quot;)
        
        # HTML 다운로드 (Playwright 사용)
        html_result = fetch_html_with_playwright(url)
        
        if html_result['success']:
            html_content = html_result['html']
            
            # trafilatura로 메타데이터 추출
            metadata = trafilatura.extract_metadata(html_content)
            
            # trafilatura로 본문 추출
            text = trafilatura.extract(
                html_content,
                output_format=&quot;txt&quot;,
                include_comments=False,
                favor_precision=True,
            )
            
            if text and len(text.strip()) &amp;gt; 0:
                result.update({
                    &quot;status&quot;: &quot;success&quot;,
                    &quot;title&quot;: metadata.title if metadata and metadata.title else &quot;No title&quot;,
                    &quot;date&quot;: str(metadata.date) if metadata and metadata.date else &quot;No date&quot;,
                    &quot;content&quot;: text,
                })
            else:
                result[&quot;error_message&quot;] = &quot;Empty content extracted&quot;
        else:
            result[&quot;error_message&quot;] = f&quot;Failed to download page: {html_result['error']}&quot;
            
    except Exception as e:
        result[&quot;error_message&quot;] = str(e)
    
    # 처리 시간 계산
    processing_time = time.time() - start_time
    result[&quot;processing_time&quot;] = processing_time
    
    return result


def save_results_to_csv(results, input_file_path=None, output_file=None):
    &quot;&quot;&quot;결과를 CSV 파일로 저장&quot;&quot;&quot;
    import os

    # 파일명이 지정되지 않으면 현재 시간을 포함한 파일명 생성
    if output_file is None:
        timestamp = datetime.now().strftime(&quot;%Y%m%d_%H%M%S&quot;)
        filename = f&quot;url_processing_results_playwright_{timestamp}.csv&quot;
        
        # 입력 파일과 같은 디렉토리에 저장
        if input_file_path and os.path.exists(input_file_path):
            input_dir = os.path.dirname(input_file_path)
            output_file = os.path.join(input_dir, filename)
        else:
            # 입력 파일 경로가 없으면 현재 디렉토리에 저장
            output_file = filename

    try:
        with open(output_file, &quot;w&quot;, newline=&quot;&quot;, encoding=&quot;utf-8&quot;) as csvfile:
            fieldnames = [
                &quot;index&quot;,
                &quot;url&quot;,
                &quot;status&quot;,
                &quot;title&quot;,
                &quot;date&quot;,
                &quot;content&quot;,
                &quot;processing_time&quot;,
                &quot;error_message&quot;,
            ]
            writer = csv.DictWriter(csvfile, fieldnames=fieldnames)

            writer.writeheader()
            for result in results:
                writer.writerow(result)

        print(f&quot;✅ 결과가 '{output_file}' 파일에 저장되었습니다.&quot;)
        return output_file  # 실제 저장된 파일명 반환

    except Exception as e:
        print(f&quot;❌ 결과 저장 실패: {e}&quot;)
        return None


def print_summary(results, total_time, skipped_count=0):
    &quot;&quot;&quot;처리 결과 요약 출력&quot;&quot;&quot;
    total_urls = len(results)
    successful_count = sum(1 for r in results if r[&quot;status&quot;] == &quot;success&quot;)
    failed_count = total_urls - successful_count
    processing_times = [r[&quot;processing_time&quot;] for r in results]

    print(&quot;\n&quot; + &quot;=&quot; * 7 + &quot; SUMMARY &quot; + &quot;=&quot; * 7)
    print(f&quot;Total URLs processed: {total_urls}&quot;)
    print(&quot;Workers used: 1&quot;)  # 단일 스레드 처리
    print(f&quot;Successfully decoded: {successful_count} ({successful_count/total_urls*100:.1f}%)&quot;)
    print(f&quot;Failed to decode: {failed_count} ({failed_count/total_urls*100:.1f}%)&quot;)
    print(f&quot;Skipped (Google News URLs): {skipped_count} ({skipped_count/(total_urls + skipped_count)*100:.1f}%)&quot;)

    if processing_times:
        avg_time = sum(processing_times) / len(processing_times)

        print(&quot;\n&quot; + &quot;-&quot; * 5 + &quot; TIMING INFORMATION &quot; + &quot;-&quot; * 5)
        print(f&quot;Total processing time: {int(total_time//60)}:{total_time%60:05.2f}&quot;)
        print(f&quot;Average processing time per URL: {avg_time:.2f} seconds&quot;)
        print(f&quot;Fastest URL processing time: {min(processing_times):.2f} seconds&quot;)
        print(f&quot;Slowest URL processing time: {max(processing_times):.2f} seconds&quot;)

    print(&quot;\nProcess completed successfully. Results saved to CSV file.&quot;)


def process_urls_from_csv(csv_file_path, url_column=&quot;url&quot;, status_column=&quot;status&quot;):
    &quot;&quot;&quot;CSV 파일에서 status가 'failed'인 URL들을 읽어서 순차 처리&quot;&quot;&quot;

    print(&quot;=&quot; * 50)
    print(&quot;  URL 배치 처리 시작 (Playwright - Failed URLs만)&quot;)
    print(&quot;=&quot; * 50)

    # CSV 파일 읽기
    try:
        df = pd.read_csv(csv_file_path)
        print(f&quot;  CSV 파일 컬럼들: {list(df.columns)}&quot;)
        print(f&quot;  총 행 수: {len(df)}&quot;)

        # 필요한 컬럼 존재 확인
        if url_column not in df.columns:
            raise ValueError(f&quot;Column '{url_column}' not found in CSV file&quot;)
        if status_column not in df.columns:
            raise ValueError(f&quot;Column '{status_column}' not found in CSV file&quot;)

        # status가 'failed'인 행들만 필터링
        failed_df = df[df[status_column] == 'failed']
        print(f&quot;  전체 행 수: {len(df)}&quot;)
        print(f&quot;  status가 'failed'인 행 수: {len(failed_df)}&quot;)

        if len(failed_df) == 0:
            print(&quot;⚠️ status가 'failed'인 URL이 없습니다.&quot;)
            return None, None

        # failed URL들 추출 (NULL 값 제외)
        all_urls = failed_df[url_column].dropna().tolist()
        print(f&quot;  '{url_column}' 컬럼의 NULL이 아닌 값 개수: {len(all_urls)}&quot;)
        
        if all_urls:
            print(f&quot;  첫 번째 URL 샘플: {all_urls[0]}&quot;)

        # news.google.com이 포함되지 않은 URL만 필터링
        urls = [url for url in all_urls if &quot;news.google.com&quot; not in str(url)]

        total_urls = len(urls)
        skipped_urls = len(all_urls) - total_urls

        print(f&quot;  failed 상태의 전체 URL: {len(all_urls)}개&quot;)
        print(f&quot;  처리 대상 URL: {total_urls}개&quot;)
        print(f&quot;  건너뛴 URL (Google News URLs): {skipped_urls}개&quot;)

        if urls:
            print(f&quot;  첫 번째 처리 대상 URL 샘플: {urls[0]}&quot;)

        print(&quot;-&quot; * 30)

    except Exception as e:
        print(f&quot;❌ CSV 파일 읽기 실패: {e}&quot;)
        return None

    total_start_time = time.time()
    results = []
    successful_count = 0
    failed_count = 0

    for i, url in enumerate(urls):
        result = process_single_url(url, i, total_urls)
        results.append(result)

        if result[&quot;status&quot;] == &quot;success&quot;:
            successful_count += 1
        else:
            failed_count += 1

        if (i + 1) % 10 == 0 or (i + 1) == total_urls:
            print(
                f&quot;진행: {i + 1}/{total_urls} &quot;
                f&quot;({(i + 1)/total_urls*100:.1f}%) &quot;
                f&quot;성공: {successful_count}, 실패: {failed_count}&quot;
            )

    total_processing_time = time.time() - total_start_time
    print_summary(results, total_processing_time, skipped_urls)

    saved_file = save_results_to_csv(results, input_file_path=csv_file_path)
    return results, saved_file


def main():
    import sys
    import os
    
    # 명령줄 인자로 파일명을 받은 경우
    if len(sys.argv) &amp;gt; 1:
        csv_file_path = sys.argv[1]
    else:
        # 대화형으로 파일명 입력받기
        print(&quot;=&quot; * 50)
        print(&quot;  CSV 파일 입력&quot;)
        print(&quot;=&quot; * 50)
        
        while True:
            csv_file_path = input(&quot;처리할 CSV 파일 경로를 입력하세요: &quot;).strip()
            
            # 따옴표 제거 (드래그앤드롭으로 붙여넣을 때 따옴표가 붙는 경우)
            csv_file_path = csv_file_path.strip('&quot;').strip(&quot;'&quot;)
            
            # 파일 존재 여부 확인
            if os.path.exists(csv_file_path):
                break
            else:
                print(f&quot;❌ 파일을 찾을 수 없습니다: {csv_file_path}&quot;)
                print(&quot;다시 입력해주세요.\n&quot;)
    
    # 파일 존재 여부 최종 확인
    if not os.path.exists(csv_file_path):
        print(f&quot;❌ 파일을 찾을 수 없습니다: {csv_file_path}&quot;)
        return
    
    print(f&quot;  처리할 파일: {csv_file_path}&quot;)
    
    # 컬럼명 설정 (필요시 여기서 변경 가능)
    url_column = &quot;url&quot;
    status_column = &quot;status&quot;
    
    # 컬럼명을 사용자가 지정하고 싶은 경우
    while True:
        change_columns = input(f&quot;\nURL 컬럼명 (현재: '{url_column}')과 Status 컬럼명 (현재: '{status_column}')을 변경하시겠습니까? (y/n): &quot;).strip().lower()
        
        if change_columns in ['y', 'yes']:
            url_column = input(f&quot;URL 컬럼명을 입력하세요 (기본값: {url_column}): &quot;).strip() or url_column
            status_column = input(f&quot;Status 컬럼명을 입력하세요 (기본값: {status_column}): &quot;).strip() or status_column
            break
        elif change_columns in ['n', 'no']:
            break
        else:
            print(&quot;y 또는 n을 입력해주세요.&quot;)
    
    print(f&quot;  사용할 컬럼: URL='{url_column}', Status='{status_column}'&quot;)
    
    # URL 처리 시작
    results, saved_file = process_urls_from_csv(csv_file_path, url_column=url_column, status_column=status_column)

    if results:
        print(f&quot;\n  저장된 파일: {saved_file}&quot;)

        print(&quot;\n  처리 결과 샘플:&quot;)
        for i, result in enumerate(results[:3]):
            print(f&quot;\n[{i+1}] {result['url'][:50]}...&quot;)
            print(f&quot;    상태: {result['status']}&quot;)
            print(f&quot;    제목: {result['title']}&quot;)
            print(f&quot;    처리시간: {result['processing_time']:.2f}초&quot;)
            if result[&quot;status&quot;] == &quot;failed&quot;:
                print(f&quot;    오류: {result['error_message']}&quot;)
    else:
        print(&quot;\n❌ 처리할 URL이 없거나 처리에 실패했습니다.&quot;)


if __name__ == &quot;__main__&quot;:
    main()
&lt;/code&gt;&lt;/pre&gt;
&lt;/div&gt;
&lt;div class=&quot;code-act&quot;&gt;&lt;button class=&quot;copy-btn&quot;&gt;  코드 복사&lt;/button&gt;&lt;/div&gt;
&lt;/div&gt;
&lt;!-- ★ 코드 박스 끝 --&gt; &lt;!-- ★ 토글 &amp; 복사 스크립트 --&gt;
&lt;script&gt;
document.addEventListener(&quot;DOMContentLoaded&quot;, () =&gt; {

  /* 토글 */
  document.body.addEventListener(&quot;click&quot;, e =&gt; {
    if (!e.target.classList.contains(&quot;toggle-btn&quot;)) return;
    const box = e.target.closest(&quot;.code-box&quot;);
    const cont = box.querySelector(&quot;.code-ct&quot;);
    const act = box.querySelector(&quot;.code-act&quot;);
    const open = cont.style.display === &quot;block&quot;;
    cont.style.display = act.style.display = open ? &quot;none&quot; : &quot;block&quot;;
    e.target.textContent = open ? &quot;  펼치기&quot; : &quot;  접기&quot;;
  });

  /* 복사 */
  document.body.addEventListener(&quot;click&quot;, e =&gt; {
    if (!e.target.classList.contains(&quot;copy-btn&quot;)) return;
    const btn = e.target;
    const code = btn.closest(&quot;.code-box&quot;).querySelector(&quot;code&quot;).innerText;
    if (navigator.clipboard &amp;&amp; window.isSecureContext) {
      navigator.clipboard.writeText(code).then(() =&gt; flash(btn))
                                         .catch(() =&gt; fallback(code, btn));
    } else {
      fallback(code, btn);
    }
  });

  const flash = btn =&gt; {
    const orig = btn.textContent;
    btn.textContent = &quot;✅ 복사됨!&quot;;
    setTimeout(() =&gt; (btn.textContent = orig), 2000);
  };

  const fallback = (text, btn) =&gt; {
    const ta = document.createElement(&quot;textarea&quot;);
    ta.value = text;
    ta.style.position = &quot;fixed&quot;;
    ta.style.top = &quot;-1000px&quot;;
    document.body.appendChild(ta);
    ta.focus();
    ta.select();
    try {
      document.execCommand(&quot;copy&quot;);
      flash(btn);
    } catch {
      alert(&quot;복사 실패   – 브라우저가 클립보드를 차단했습니다.&quot;);
    }
    document.body.removeChild(ta);
  };
});
&lt;/script&gt;
&lt;/div&gt;
&lt;p data-ke-size=&quot;size16&quot;&gt;&amp;nbsp;&lt;/p&gt;
&lt;h3 data-ke-size=&quot;size23&quot;&gt;&lt;span&gt; ️&lt;/span&gt; 실행결과&lt;/h3&gt;
&lt;p&gt;&lt;figure class=&quot;imageblock alignCenter&quot; data-ke-mobileStyle=&quot;widthOrigin&quot; data-origin-width=&quot;1115&quot; data-origin-height=&quot;628&quot;&gt;&lt;span data-url=&quot;https://blog.kakaocdn.net/dn/InnZm/btsOAXnieci/9679UK6IbQK3eDJlIFUOy0/img.png&quot; data-phocus=&quot;https://blog.kakaocdn.net/dn/InnZm/btsOAXnieci/9679UK6IbQK3eDJlIFUOy0/img.png&quot;&gt;&lt;img src=&quot;https://blog.kakaocdn.net/dn/InnZm/btsOAXnieci/9679UK6IbQK3eDJlIFUOy0/img.png&quot; srcset=&quot;https://img1.daumcdn.net/thumb/R1280x0/?scode=mtistory2&amp;fname=https%3A%2F%2Fblog.kakaocdn.net%2Fdn%2FInnZm%2FbtsOAXnieci%2F9679UK6IbQK3eDJlIFUOy0%2Fimg.png&quot; onerror=&quot;this.onerror=null; this.src='//t1.daumcdn.net/tistory_admin/static/images/no-image-v1.png'; this.srcset='//t1.daumcdn.net/tistory_admin/static/images/no-image-v1.png';&quot; loading=&quot;lazy&quot; width=&quot;1115&quot; height=&quot;628&quot; data-origin-width=&quot;1115&quot; data-origin-height=&quot;628&quot;/&gt;&lt;/span&gt;&lt;/figure&gt;
&lt;/p&gt;
&lt;hr contenteditable=&quot;false&quot; data-ke-type=&quot;horizontalRule&quot; data-ke-style=&quot;style6&quot; /&gt;
&lt;h3 data-ke-size=&quot;size23&quot;&gt;  성능 평가 및 결과&lt;/h3&gt;
&lt;p data-ke-size=&quot;size16&quot;&gt;request는 0.61초에 하나의 웹사이트를 추출할 수 있지만, playwright를 이용한 브라우저 자동화는 평균 5.93초로 시간이 약 10배 가까이 증가하였다. 또한, 여전히 msn 웹사이트에 대해서는 추출이 100% 실패했다.&lt;/p&gt;
&lt;div id=&quot;code_1749822631063&quot; data-ke-type=&quot;html&quot; data-source=&quot;&amp;lt;!DOCTYPE html&amp;gt;
&amp;lt;html lang=&amp;quot;ko&amp;quot;&amp;gt;
&amp;lt;head&amp;gt;
    &amp;lt;meta charset=&amp;quot;UTF-8&amp;quot;&amp;gt;
    &amp;lt;meta name=&amp;quot;viewport&amp;quot; content=&amp;quot;width=device-width, initial-scale=1.0&amp;quot;&amp;gt;
    &amp;lt;title&amp;gt;URL 처리 결과 분석&amp;lt;/title&amp;gt;
    &amp;lt;style&amp;gt;
        * {
            margin: 0;
            padding: 0;
            box-sizing: border-box;
        }

        body {
            font-family: 'Arial', sans-serif;
            background: white;
            min-height: 100vh;
            padding: 20px;
        }

        .container {
            max-width: 1200px;
            margin: 0 auto;
            background: white;
        }

        .content {
            padding: 40px;
        }

        .summary-grid {
            display: grid;
            grid-template-columns: repeat(auto-fit, minmax(200px, 1fr));
            gap: 20px;
            margin-bottom: 40px;
        }

        .summary-card {
            background: #f8f9ff;
            padding: 20px;
            border-radius: 8px;
            text-align: center;
            border-left: 4px solid #667eea;
        }

        .summary-card h3 {
            color: #667eea;
            font-size: 1.3em;
            margin-bottom: 8px;
            font-weight: bold;
        }

        .summary-card p {
            color: #333;
            font-size: 1em;
            margin: 0;
            font-weight: 500;
        }

        .section {
            margin-bottom: 40px;
        }

        .section h2 {
            color: #2196f3;
            font-size: 1.8em;
            margin-bottom: 20px;
            padding-bottom: 10px;
            border-bottom: 3px solid #2196f3;
        }

        .domain-grid {
            display: grid;
            gap: 15px;
        }

        .domain-item {
            background: #f0f8ff;
            padding: 20px;
            border-radius: 10px;
            display: flex;
            justify-content: space-between;
            align-items: center;
            border-left: 5px solid transparent;
        }

        .domain-item.success {
            border-left-color: #2196f3;
        }

        .domain-item.failed {
            border-left-color: #f44336;
            background: #fff5f5;
        }

        .domain-item.partial {
            border-left-color: #f44336;
            background: #fff5f5;
        }

        .domain-name {
            font-weight: bold;
            color: #333;
        }

        .domain-stats {
            display: flex;
            gap: 15px;
            align-items: center;
        }

        .success-rate {
            padding: 5px 15px;
            border-radius: 20px;
            color: white;
            font-weight: bold;
            font-size: 0.9em;
        }

        .success-rate.high {
            background: #2196f3;
        }

        .success-rate.low {
            background: #f44336;
        }

        .success-rate.medium {
            background: #f44336;
        }

        .stats-text {
            color: #666;
            font-size: 0.9em;
        }

        .time-distribution {
            margin-top: 20px;
        }

        .time-chart {
            display: flex;
            align-items: end;
            height: 250px;
            gap: 10px;
            padding: 20px;
            border-bottom: 2px solid #ddd;
            background: #f9f9f9;
            border-radius: 8px;
        }

        .time-bar {
            flex: 1;
            background: #2196f3;
            border-radius: 6px 6px 0 0;
            display: flex;
            flex-direction: column;
            justify-content: flex-end;
            align-items: center;
            color: white;
            font-weight: bold;
            font-size: 0.85em;
            padding: 8px 4px;
            min-height: 50px;
            position: relative;
            box-shadow: 0 2px 4px rgba(33, 150, 243, 0.3);
        }

        .time-bar .count {
            margin-bottom: 5px;
            font-size: 1.1em;
        }

        .time-bar .percentage {
            font-size: 0.8em;
            opacity: 0.9;
        }

        .time-labels {
            display: flex;
            gap: 10px;
            margin-top: 15px;
            padding: 0 20px;
        }

        .time-label {
            flex: 1;
            text-align: center;
            color: #666;
            font-size: 0.9em;
            font-weight: 500;
        }

        .highlight {
            background: rgba(255, 255, 255, 0.2);
            padding: 2px 6px;
            border-radius: 4px;
        }
    &amp;lt;/style&amp;gt;
&amp;lt;/head&amp;gt;
&amp;lt;body&amp;gt;
    &amp;lt;div class=&amp;quot;container&amp;quot;&amp;gt;
        &amp;lt;div class=&amp;quot;content&amp;quot;&amp;gt;
            &amp;lt;!-- 도메인별 성공률 --&amp;gt;
            &amp;lt;div class=&amp;quot;section&amp;quot;&amp;gt;
                &amp;lt;h2&amp;gt;  도메인별 성공률 분석&amp;lt;/h2&amp;gt;
                &amp;lt;div style=&amp;quot;display: grid; grid-template-columns: 1fr 1fr; gap: 30px;&amp;quot;&amp;gt;
                    &amp;lt;!-- 성공 도메인 --&amp;gt;
                    &amp;lt;div&amp;gt;
                        &amp;lt;h3 style=&amp;quot;color: #2196f3; margin-bottom: 15px; font-size: 1.2em;&amp;quot;&amp;gt;✅ 성공 도메인&amp;lt;/h3&amp;gt;
                        &amp;lt;div class=&amp;quot;domain-grid&amp;quot;&amp;gt;
                            &amp;lt;div class=&amp;quot;domain-item success&amp;quot;&amp;gt;
                                &amp;lt;div class=&amp;quot;domain-name&amp;quot;&amp;gt;biz.chosun.com&amp;lt;/div&amp;gt;
                                &amp;lt;div class=&amp;quot;domain-stats&amp;quot;&amp;gt;
                                    &amp;lt;span class=&amp;quot;stats-text&amp;quot;&amp;gt;62/62&amp;lt;/span&amp;gt;
                                    &amp;lt;span class=&amp;quot;success-rate high&amp;quot;&amp;gt;100%&amp;lt;/span&amp;gt;
                                &amp;lt;/div&amp;gt;
                            &amp;lt;/div&amp;gt;
                            &amp;lt;div class=&amp;quot;domain-item success&amp;quot;&amp;gt;
                                &amp;lt;div class=&amp;quot;domain-name&amp;quot;&amp;gt;biz.sbs.co.kr&amp;lt;/div&amp;gt;
                                &amp;lt;div class=&amp;quot;domain-stats&amp;quot;&amp;gt;
                                    &amp;lt;span class=&amp;quot;stats-text&amp;quot;&amp;gt;4/4&amp;lt;/span&amp;gt;
                                    &amp;lt;span class=&amp;quot;success-rate high&amp;quot;&amp;gt;100%&amp;lt;/span&amp;gt;
                                &amp;lt;/div&amp;gt;
                            &amp;lt;/div&amp;gt;
                            &amp;lt;div class=&amp;quot;domain-item success&amp;quot;&amp;gt;
                                &amp;lt;div class=&amp;quot;domain-name&amp;quot;&amp;gt;www.antnews.org&amp;lt;/div&amp;gt;
                                &amp;lt;div class=&amp;quot;domain-stats&amp;quot;&amp;gt;
                                    &amp;lt;span class=&amp;quot;stats-text&amp;quot;&amp;gt;1/1&amp;lt;/span&amp;gt;
                                    &amp;lt;span class=&amp;quot;success-rate high&amp;quot;&amp;gt;100%&amp;lt;/span&amp;gt;
                                &amp;lt;/div&amp;gt;
                            &amp;lt;/div&amp;gt;
                            &amp;lt;div class=&amp;quot;domain-item success&amp;quot;&amp;gt;
                                &amp;lt;div class=&amp;quot;domain-name&amp;quot; style=&amp;quot;font-size: 0.9em;&amp;quot;&amp;gt;www.medisobizanews.com&amp;lt;/div&amp;gt;
                                &amp;lt;div class=&amp;quot;domain-stats&amp;quot;&amp;gt;
                                    &amp;lt;span class=&amp;quot;stats-text&amp;quot;&amp;gt;1/1&amp;lt;/span&amp;gt;
                                    &amp;lt;span class=&amp;quot;success-rate high&amp;quot;&amp;gt;100%&amp;lt;/span&amp;gt;
                                &amp;lt;/div&amp;gt;
                            &amp;lt;/div&amp;gt;
                        &amp;lt;/div&amp;gt;
                    &amp;lt;/div&amp;gt;
                    
                    &amp;lt;!-- 실패/부분실패 도메인 --&amp;gt;
                    &amp;lt;div&amp;gt;
                        &amp;lt;h3 style=&amp;quot;color: #f44336; margin-bottom: 15px; font-size: 1.2em;&amp;quot;&amp;gt;❌ 실패/부분실패 도메인&amp;lt;/h3&amp;gt;
                        &amp;lt;div class=&amp;quot;domain-grid&amp;quot;&amp;gt;
                            &amp;lt;div class=&amp;quot;domain-item failed&amp;quot;&amp;gt;
                                &amp;lt;div class=&amp;quot;domain-name&amp;quot;&amp;gt;www.msn.com&amp;lt;/div&amp;gt;
                                &amp;lt;div class=&amp;quot;domain-stats&amp;quot;&amp;gt;
                                    &amp;lt;span class=&amp;quot;stats-text&amp;quot;&amp;gt;0/23&amp;lt;/span&amp;gt;
                                    &amp;lt;span class=&amp;quot;success-rate low&amp;quot;&amp;gt;0%&amp;lt;/span&amp;gt;
                                &amp;lt;/div&amp;gt;
                            &amp;lt;/div&amp;gt;
                            &amp;lt;div class=&amp;quot;domain-item partial&amp;quot;&amp;gt;
                                &amp;lt;div class=&amp;quot;domain-name&amp;quot;&amp;gt;www.newstong.co.kr&amp;lt;/div&amp;gt;
                                &amp;lt;div class=&amp;quot;domain-stats&amp;quot;&amp;gt;
                                    &amp;lt;span class=&amp;quot;stats-text&amp;quot;&amp;gt;1/3&amp;lt;/span&amp;gt;
                                    &amp;lt;span class=&amp;quot;success-rate medium&amp;quot;&amp;gt;33.3%&amp;lt;/span&amp;gt;
                                &amp;lt;/div&amp;gt;
                            &amp;lt;/div&amp;gt;
                        &amp;lt;/div&amp;gt;
                    &amp;lt;/div&amp;gt;
                &amp;lt;/div&amp;gt;
            &amp;lt;/div&amp;gt;

            &amp;lt;!-- 실패 사이트 중심 분석 --&amp;gt;
            &amp;lt;div class=&amp;quot;section&amp;quot;&amp;gt;
                &amp;lt;h2&amp;gt;❌ 실패 사이트 심층 분석&amp;lt;/h2&amp;gt;
                &amp;lt;div style=&amp;quot;display: grid; grid-template-columns: 1fr 1fr; gap: 30px;&amp;quot;&amp;gt;
                    &amp;lt;div class=&amp;quot;domain-item failed&amp;quot;&amp;gt;
                        &amp;lt;div&amp;gt;
                            &amp;lt;div class=&amp;quot;domain-name&amp;quot;&amp;gt;www.msn.com (23건 모두 실패)&amp;lt;/div&amp;gt;
                            &amp;lt;p style=&amp;quot;color: #666; margin-top: 5px; font-size: 0.9em;&amp;quot;&amp;gt;
                                실패 원인: Empty content extracted&amp;lt;br&amp;gt;
                                &amp;lt;strong&amp;gt;분석:&amp;lt;/strong&amp;gt; 동적 콘텐츠 로딩 또는 봇 차단 가능성
                            &amp;lt;/p&amp;gt;
                        &amp;lt;/div&amp;gt;
                    &amp;lt;/div&amp;gt;
                    &amp;lt;div class=&amp;quot;domain-item partial&amp;quot;&amp;gt;
                        &amp;lt;div&amp;gt;
                            &amp;lt;div class=&amp;quot;domain-name&amp;quot;&amp;gt;www.newstong.co.kr (2건 실패)&amp;lt;/div&amp;gt;
                            &amp;lt;p style=&amp;quot;color: #666; margin-top: 5px; font-size: 0.9em;&amp;quot;&amp;gt;
                                실패 원인: Empty content extracted&amp;lt;br&amp;gt;
                                &amp;lt;strong&amp;gt;분석:&amp;lt;/strong&amp;gt; 일부 페이지의 구조적 차이점 존재
                            &amp;lt;/p&amp;gt;
                        &amp;lt;/div&amp;gt;
                    &amp;lt;/div&amp;gt;
                &amp;lt;/div&amp;gt;
            &amp;lt;/div&amp;gt;

            &amp;lt;!-- 처리 시간 분포 --&amp;gt;
            &amp;lt;div class=&amp;quot;section&amp;quot;&amp;gt;
                &amp;lt;h2&amp;gt;⏱️ 처리 시간 분포&amp;lt;/h2&amp;gt;
                &amp;lt;div class=&amp;quot;time-distribution&amp;quot;&amp;gt;
                    &amp;lt;div class=&amp;quot;time-chart&amp;quot;&amp;gt;
                        &amp;lt;div class=&amp;quot;time-bar&amp;quot; style=&amp;quot;height: 180px;&amp;quot;&amp;gt;
                            &amp;lt;div class=&amp;quot;count&amp;quot;&amp;gt;23&amp;lt;/div&amp;gt;
                            &amp;lt;div class=&amp;quot;percentage&amp;quot;&amp;gt;24.5%&amp;lt;/div&amp;gt;
                        &amp;lt;/div&amp;gt;
                        &amp;lt;div class=&amp;quot;time-bar&amp;quot; style=&amp;quot;height: 60px;&amp;quot;&amp;gt;
                            &amp;lt;div class=&amp;quot;count&amp;quot;&amp;gt;4&amp;lt;/div&amp;gt;
                            &amp;lt;div class=&amp;quot;percentage&amp;quot;&amp;gt;4.3%&amp;lt;/div&amp;gt;
                        &amp;lt;/div&amp;gt;
                        &amp;lt;div class=&amp;quot;time-bar&amp;quot; style=&amp;quot;height: 110px;&amp;quot;&amp;gt;
                            &amp;lt;div class=&amp;quot;count&amp;quot;&amp;gt;14&amp;lt;/div&amp;gt;
                            &amp;lt;div class=&amp;quot;percentage&amp;quot;&amp;gt;14.9%&amp;lt;/div&amp;gt;
                        &amp;lt;/div&amp;gt;
                        &amp;lt;div class=&amp;quot;time-bar&amp;quot; style=&amp;quot;height: 200px;&amp;quot;&amp;gt;
                            &amp;lt;div class=&amp;quot;count&amp;quot;&amp;gt;30&amp;lt;/div&amp;gt;
                            &amp;lt;div class=&amp;quot;percentage&amp;quot;&amp;gt;31.9%&amp;lt;/div&amp;gt;
                        &amp;lt;/div&amp;gt;
                        &amp;lt;div class=&amp;quot;time-bar&amp;quot; style=&amp;quot;height: 130px;&amp;quot;&amp;gt;
                            &amp;lt;div class=&amp;quot;count&amp;quot;&amp;gt;16&amp;lt;/div&amp;gt;
                            &amp;lt;div class=&amp;quot;percentage&amp;quot;&amp;gt;17.0%&amp;lt;/div&amp;gt;
                        &amp;lt;/div&amp;gt;
                        &amp;lt;div class=&amp;quot;time-bar&amp;quot; style=&amp;quot;height: 80px;&amp;quot;&amp;gt;
                            &amp;lt;div class=&amp;quot;count&amp;quot;&amp;gt;7&amp;lt;/div&amp;gt;
                            &amp;lt;div class=&amp;quot;percentage&amp;quot;&amp;gt;7.4%&amp;lt;/div&amp;gt;
                        &amp;lt;/div&amp;gt;
                    &amp;lt;/div&amp;gt;
                    &amp;lt;div class=&amp;quot;time-labels&amp;quot;&amp;gt;
                        &amp;lt;div class=&amp;quot;time-label&amp;quot;&amp;gt;3-4초&amp;lt;/div&amp;gt;
                        &amp;lt;div class=&amp;quot;time-label&amp;quot;&amp;gt;4-5초&amp;lt;/div&amp;gt;
                        &amp;lt;div class=&amp;quot;time-label&amp;quot;&amp;gt;5-6초&amp;lt;/div&amp;gt;
                        &amp;lt;div class=&amp;quot;time-label&amp;quot;&amp;gt;6-7초&amp;lt;/div&amp;gt;
                        &amp;lt;div class=&amp;quot;time-label&amp;quot;&amp;gt;7-8초&amp;lt;/div&amp;gt;
                        &amp;lt;div class=&amp;quot;time-label&amp;quot;&amp;gt;8초+&amp;lt;/div&amp;gt;
                    &amp;lt;/div&amp;gt;
                &amp;lt;/div&amp;gt;
            &amp;lt;/div&amp;gt;

            &amp;lt;!-- 주요 인사이트 --&amp;gt;
            &amp;lt;div class=&amp;quot;section&amp;quot;&amp;gt;
                &amp;lt;h2&amp;gt;  주요 인사이트 및 개선 방안&amp;lt;/h2&amp;gt;
                &amp;lt;div style=&amp;quot;display: grid; grid-template-columns: 1fr 1fr; gap: 30px;&amp;quot;&amp;gt;
                    &amp;lt;!-- 성공/긍정적 인사이트 --&amp;gt;
                    &amp;lt;div class=&amp;quot;domain-grid&amp;quot;&amp;gt;
                        &amp;lt;div class=&amp;quot;domain-item success&amp;quot;&amp;gt;
                            &amp;lt;div&amp;gt;
                                &amp;lt;div class=&amp;quot;domain-name&amp;quot;&amp;gt;조선비즈 가장 안정적&amp;lt;/div&amp;gt;
                                &amp;lt;p style=&amp;quot;color: #666; margin-top: 5px; font-size: 0.9em;&amp;quot;&amp;gt;
                                    100% 성공률로 가장 신뢰할 수 있는 스크래핑 대상
                                &amp;lt;/p&amp;gt;
                            &amp;lt;/div&amp;gt;
                        &amp;lt;/div&amp;gt;
                        &amp;lt;div class=&amp;quot;domain-item success&amp;quot;&amp;gt;
                            &amp;lt;div&amp;gt;
                                &amp;lt;div class=&amp;quot;domain-name&amp;quot;&amp;gt;처리 시간 6-7초 집중&amp;lt;/div&amp;gt;
                                &amp;lt;p style=&amp;quot;color: #666; margin-top: 5px; font-size: 0.9em;&amp;quot;&amp;gt;
                                    전체의 31.9%가 6-7초 구간에 분포하며 안정적 성능
                                &amp;lt;/p&amp;gt;
                            &amp;lt;/div&amp;gt;
                        &amp;lt;/div&amp;gt;
                    &amp;lt;/div&amp;gt;
                    &amp;lt;!-- 실패/문제점 인사이트 --&amp;gt;
                    &amp;lt;div class=&amp;quot;domain-grid&amp;quot; style=&amp;quot;align-content: start;&amp;quot;&amp;gt;
                        &amp;lt;div class=&amp;quot;domain-item failed&amp;quot;&amp;gt;
                            &amp;lt;div&amp;gt;
                                &amp;lt;div class=&amp;quot;domain-name&amp;quot;&amp;gt;MSN 사이트 스크래핑 불가능&amp;lt;/div&amp;gt;
                                &amp;lt;p style=&amp;quot;color: #666; margin-top: 5px; font-size: 0.9em;&amp;quot;&amp;gt;
                                    봇 차단 또는 JavaScript 의존적 콘텐츠 로딩으로 완전 실패
                                &amp;lt;/p&amp;gt;
                            &amp;lt;/div&amp;gt;
                        &amp;lt;/div&amp;gt;
                        &amp;lt;div class=&amp;quot;domain-item failed&amp;quot;&amp;gt;
                            &amp;lt;div&amp;gt;
                                &amp;lt;div class=&amp;quot;domain-name&amp;quot;&amp;gt;콘텐츠 추출 로직 개선 필요&amp;lt;/div&amp;gt;
                                &amp;lt;p style=&amp;quot;color: #666; margin-top: 5px; font-size: 0.9em;&amp;quot;&amp;gt;
                                    모든 실패가 &amp;quot;Empty content extracted&amp;quot; - 추출 방식 재검토 요구
                                &amp;lt;/p&amp;gt;
                            &amp;lt;/div&amp;gt;
                        &amp;lt;/div&amp;gt;
                    &amp;lt;/div&amp;gt;
                &amp;lt;/div&amp;gt;
            &amp;lt;/div&amp;gt;
        &amp;lt;/div&amp;gt;
    &amp;lt;/div&amp;gt;
&amp;lt;/body&amp;gt;
&amp;lt;/html&amp;gt;&quot;&gt;
&lt;style&gt;
        * {
            margin: 0;
            padding: 0;
            box-sizing: border-box;
        }

        body {
            font-family: 'Arial', sans-serif;
            background: white;
            min-height: 100vh;
            padding: 20px;
        }

        .container {
            max-width: 1200px;
            margin: 0 auto;
            background: white;
        }

        .content {
            padding: 40px;
        }

        .summary-grid {
            display: grid;
            grid-template-columns: repeat(auto-fit, minmax(200px, 1fr));
            gap: 20px;
            margin-bottom: 40px;
        }

        .summary-card {
            background: #f8f9ff;
            padding: 20px;
            border-radius: 8px;
            text-align: center;
            border-left: 4px solid #667eea;
        }

        .summary-card h3 {
            color: #667eea;
            font-size: 1.3em;
            margin-bottom: 8px;
            font-weight: bold;
        }

        .summary-card p {
            color: #333;
            font-size: 1em;
            margin: 0;
            font-weight: 500;
        }

        .section {
            margin-bottom: 40px;
        }

        .section h2 {
            color: #2196f3;
            font-size: 1.8em;
            margin-bottom: 20px;
            padding-bottom: 10px;
            border-bottom: 3px solid #2196f3;
        }

        .domain-grid {
            display: grid;
            gap: 15px;
        }

        .domain-item {
            background: #f0f8ff;
            padding: 20px;
            border-radius: 10px;
            display: flex;
            justify-content: space-between;
            align-items: center;
            border-left: 5px solid transparent;
        }

        .domain-item.success {
            border-left-color: #2196f3;
        }

        .domain-item.failed {
            border-left-color: #f44336;
            background: #fff5f5;
        }

        .domain-item.partial {
            border-left-color: #f44336;
            background: #fff5f5;
        }

        .domain-name {
            font-weight: bold;
            color: #333;
        }

        .domain-stats {
            display: flex;
            gap: 15px;
            align-items: center;
        }

        .success-rate {
            padding: 5px 15px;
            border-radius: 20px;
            color: white;
            font-weight: bold;
            font-size: 0.9em;
        }

        .success-rate.high {
            background: #2196f3;
        }

        .success-rate.low {
            background: #f44336;
        }

        .success-rate.medium {
            background: #f44336;
        }

        .stats-text {
            color: #666;
            font-size: 0.9em;
        }

        .time-distribution {
            margin-top: 20px;
        }

        .time-chart {
            display: flex;
            align-items: end;
            height: 250px;
            gap: 10px;
            padding: 20px;
            border-bottom: 2px solid #ddd;
            background: #f9f9f9;
            border-radius: 8px;
        }

        .time-bar {
            flex: 1;
            background: #2196f3;
            border-radius: 6px 6px 0 0;
            display: flex;
            flex-direction: column;
            justify-content: flex-end;
            align-items: center;
            color: white;
            font-weight: bold;
            font-size: 0.85em;
            padding: 8px 4px;
            min-height: 50px;
            position: relative;
            box-shadow: 0 2px 4px rgba(33, 150, 243, 0.3);
        }

        .time-bar .count {
            margin-bottom: 5px;
            font-size: 1.1em;
        }

        .time-bar .percentage {
            font-size: 0.8em;
            opacity: 0.9;
        }

        .time-labels {
            display: flex;
            gap: 10px;
            margin-top: 15px;
            padding: 0 20px;
        }

        .time-label {
            flex: 1;
            text-align: center;
            color: #666;
            font-size: 0.9em;
            font-weight: 500;
        }

        .highlight {
            background: rgba(255, 255, 255, 0.2);
            padding: 2px 6px;
            border-radius: 4px;
        }
    &lt;/style&gt;
&lt;div class=&quot;container&quot;&gt;
&lt;div class=&quot;content&quot;&gt;&lt;!-- 도메인별 성공률 --&gt;
&lt;div class=&quot;section&quot;&gt;
&lt;h2 data-ke-size=&quot;size26&quot;&gt;  도메인별 성공률 분석&lt;/h2&gt;
&lt;div style=&quot;display: grid; grid-template-columns: 1fr 1fr; gap: 30px;&quot;&gt;&lt;!-- 성공 도메인 --&gt;
&lt;div&gt;
&lt;h3 style=&quot;color: #2196f3; margin-bottom: 15px; font-size: 1.2em;&quot; data-ke-size=&quot;size23&quot;&gt;✅ 성공 도메인&lt;/h3&gt;
&lt;div class=&quot;domain-grid&quot;&gt;
&lt;div class=&quot;domain-item success&quot;&gt;
&lt;div class=&quot;domain-name&quot;&gt;biz.chosun.com&lt;/div&gt;
&lt;div class=&quot;domain-stats&quot;&gt;&lt;span class=&quot;stats-text&quot;&gt;62/62&lt;/span&gt; &lt;span class=&quot;success-rate high&quot;&gt;100%&lt;/span&gt;&lt;/div&gt;
&lt;/div&gt;
&lt;div class=&quot;domain-item success&quot;&gt;
&lt;div class=&quot;domain-name&quot;&gt;biz.sbs.co.kr&lt;/div&gt;
&lt;div class=&quot;domain-stats&quot;&gt;&lt;span class=&quot;stats-text&quot;&gt;4/4&lt;/span&gt; &lt;span class=&quot;success-rate high&quot;&gt;100%&lt;/span&gt;&lt;/div&gt;
&lt;/div&gt;
&lt;div class=&quot;domain-item success&quot;&gt;
&lt;div class=&quot;domain-name&quot;&gt;www.antnews.org&lt;/div&gt;
&lt;div class=&quot;domain-stats&quot;&gt;&lt;span class=&quot;stats-text&quot;&gt;1/1&lt;/span&gt; &lt;span class=&quot;success-rate high&quot;&gt;100%&lt;/span&gt;&lt;/div&gt;
&lt;/div&gt;
&lt;div class=&quot;domain-item success&quot;&gt;
&lt;div class=&quot;domain-name&quot; style=&quot;font-size: 0.9em;&quot;&gt;www.medisobizanews.com&lt;/div&gt;
&lt;div class=&quot;domain-stats&quot;&gt;&lt;span class=&quot;stats-text&quot;&gt;1/1&lt;/span&gt; &lt;span class=&quot;success-rate high&quot;&gt;100%&lt;/span&gt;&lt;/div&gt;
&lt;/div&gt;
&lt;/div&gt;
&lt;/div&gt;
&lt;!-- 실패/부분실패 도메인 --&gt;
&lt;div&gt;
&lt;h3 style=&quot;color: #f44336; margin-bottom: 15px; font-size: 1.2em;&quot; data-ke-size=&quot;size23&quot;&gt;❌ 실패/부분실패 도메인&lt;/h3&gt;
&lt;div class=&quot;domain-grid&quot;&gt;
&lt;div class=&quot;domain-item failed&quot;&gt;
&lt;div class=&quot;domain-name&quot;&gt;www.msn.com&lt;/div&gt;
&lt;div class=&quot;domain-stats&quot;&gt;&lt;span class=&quot;stats-text&quot;&gt;0/23&lt;/span&gt; &lt;span class=&quot;success-rate low&quot;&gt;0%&lt;/span&gt;&lt;/div&gt;
&lt;/div&gt;
&lt;div class=&quot;domain-item partial&quot;&gt;
&lt;div class=&quot;domain-name&quot;&gt;www.newstong.co.kr&lt;/div&gt;
&lt;div class=&quot;domain-stats&quot;&gt;&lt;span class=&quot;stats-text&quot;&gt;1/3&lt;/span&gt; &lt;span class=&quot;success-rate medium&quot;&gt;33.3%&lt;/span&gt;&lt;/div&gt;
&lt;/div&gt;
&lt;/div&gt;
&lt;/div&gt;
&lt;/div&gt;
&lt;/div&gt;
&lt;!-- 실패 사이트 중심 분석 --&gt;
&lt;div class=&quot;section&quot;&gt;
&lt;h2 data-ke-size=&quot;size26&quot;&gt;❌ 실패 사이트 심층 분석&lt;/h2&gt;
&lt;div style=&quot;display: grid; grid-template-columns: 1fr 1fr; gap: 30px;&quot;&gt;
&lt;div class=&quot;domain-item failed&quot;&gt;
&lt;div&gt;
&lt;div class=&quot;domain-name&quot;&gt;www.msn.com (23건 모두 실패)&lt;/div&gt;
&lt;p style=&quot;color: #666; margin-top: 5px; font-size: 0.9em;&quot; data-ke-size=&quot;size16&quot;&gt;실패 원인: Empty content extracted&lt;br /&gt;&lt;b&gt;분석:&lt;/b&gt; 동적 콘텐츠 로딩 또는 봇 차단 가능성&lt;/p&gt;
&lt;/div&gt;
&lt;/div&gt;
&lt;div class=&quot;domain-item partial&quot;&gt;
&lt;div&gt;
&lt;div class=&quot;domain-name&quot;&gt;www.newstong.co.kr (2건 실패)&lt;/div&gt;
&lt;p style=&quot;color: #666; margin-top: 5px; font-size: 0.9em;&quot; data-ke-size=&quot;size16&quot;&gt;실패 원인: Empty content extracted&lt;br /&gt;&lt;b&gt;분석:&lt;/b&gt; 일부 페이지의 구조적 차이점 존재&lt;/p&gt;
&lt;/div&gt;
&lt;/div&gt;
&lt;/div&gt;
&lt;/div&gt;
&lt;!-- 처리 시간 분포 --&gt;
&lt;div class=&quot;section&quot;&gt;
&lt;h2 data-ke-size=&quot;size26&quot;&gt;⏱️ 처리 시간 분포&lt;/h2&gt;
&lt;div class=&quot;time-distribution&quot;&gt;
&lt;div class=&quot;time-chart&quot;&gt;
&lt;div class=&quot;time-bar&quot; style=&quot;height: 180px;&quot;&gt;
&lt;div class=&quot;count&quot;&gt;23&lt;/div&gt;
&lt;div class=&quot;percentage&quot;&gt;24.5%&lt;/div&gt;
&lt;/div&gt;
&lt;div class=&quot;time-bar&quot; style=&quot;height: 60px;&quot;&gt;
&lt;div class=&quot;count&quot;&gt;4&lt;/div&gt;
&lt;div class=&quot;percentage&quot;&gt;4.3%&lt;/div&gt;
&lt;/div&gt;
&lt;div class=&quot;time-bar&quot; style=&quot;height: 110px;&quot;&gt;
&lt;div class=&quot;count&quot;&gt;14&lt;/div&gt;
&lt;div class=&quot;percentage&quot;&gt;14.9%&lt;/div&gt;
&lt;/div&gt;
&lt;div class=&quot;time-bar&quot; style=&quot;height: 200px;&quot;&gt;
&lt;div class=&quot;count&quot;&gt;30&lt;/div&gt;
&lt;div class=&quot;percentage&quot;&gt;31.9%&lt;/div&gt;
&lt;/div&gt;
&lt;div class=&quot;time-bar&quot; style=&quot;height: 130px;&quot;&gt;
&lt;div class=&quot;count&quot;&gt;16&lt;/div&gt;
&lt;div class=&quot;percentage&quot;&gt;17.0%&lt;/div&gt;
&lt;/div&gt;
&lt;div class=&quot;time-bar&quot; style=&quot;height: 80px;&quot;&gt;
&lt;div class=&quot;count&quot;&gt;7&lt;/div&gt;
&lt;div class=&quot;percentage&quot;&gt;7.4%&lt;/div&gt;
&lt;/div&gt;
&lt;/div&gt;
&lt;div class=&quot;time-labels&quot;&gt;
&lt;div class=&quot;time-label&quot;&gt;3-4초&lt;/div&gt;
&lt;div class=&quot;time-label&quot;&gt;4-5초&lt;/div&gt;
&lt;div class=&quot;time-label&quot;&gt;5-6초&lt;/div&gt;
&lt;div class=&quot;time-label&quot;&gt;6-7초&lt;/div&gt;
&lt;div class=&quot;time-label&quot;&gt;7-8초&lt;/div&gt;
&lt;div class=&quot;time-label&quot;&gt;8초+&lt;/div&gt;
&lt;/div&gt;
&lt;/div&gt;
&lt;/div&gt;
&lt;!-- 주요 인사이트 --&gt;
&lt;div class=&quot;section&quot;&gt;
&lt;h2 data-ke-size=&quot;size26&quot;&gt;  주요 인사이트 및 개선 방안&lt;/h2&gt;
&lt;div style=&quot;display: grid; grid-template-columns: 1fr 1fr; gap: 30px;&quot;&gt;&lt;!-- 성공/긍정적 인사이트 --&gt;
&lt;div class=&quot;domain-grid&quot;&gt;
&lt;div class=&quot;domain-item success&quot;&gt;
&lt;div&gt;
&lt;div class=&quot;domain-name&quot;&gt;조선비즈 가장 안정적&lt;/div&gt;
&lt;p style=&quot;color: #666; margin-top: 5px; font-size: 0.9em;&quot; data-ke-size=&quot;size16&quot;&gt;100% 성공률로 가장 신뢰할 수 있는 스크래핑 대상&lt;/p&gt;
&lt;/div&gt;
&lt;/div&gt;
&lt;div class=&quot;domain-item success&quot;&gt;
&lt;div&gt;
&lt;div class=&quot;domain-name&quot;&gt;처리 시간 6-7초 집중&lt;/div&gt;
&lt;p style=&quot;color: #666; margin-top: 5px; font-size: 0.9em;&quot; data-ke-size=&quot;size16&quot;&gt;전체의 31.9%가 6-7초 구간에 분포하며 안정적 성능&lt;/p&gt;
&lt;/div&gt;
&lt;/div&gt;
&lt;/div&gt;
&lt;!-- 실패/문제점 인사이트 --&gt;
&lt;div class=&quot;domain-grid&quot; style=&quot;align-content: start;&quot;&gt;
&lt;div class=&quot;domain-item failed&quot;&gt;
&lt;div&gt;
&lt;div class=&quot;domain-name&quot;&gt;MSN 사이트 스크래핑 불가능&lt;/div&gt;
&lt;p style=&quot;color: #666; margin-top: 5px; font-size: 0.9em;&quot; data-ke-size=&quot;size16&quot;&gt;봇 차단 또는 JavaScript 의존적 콘텐츠 로딩으로 완전 실패&lt;/p&gt;
&lt;/div&gt;
&lt;/div&gt;
&lt;div class=&quot;domain-item failed&quot;&gt;
&lt;div&gt;
&lt;div class=&quot;domain-name&quot;&gt;콘텐츠 추출 로직 개선 필요&lt;/div&gt;
&lt;p style=&quot;color: #666; margin-top: 5px; font-size: 0.9em;&quot; data-ke-size=&quot;size16&quot;&gt;모든 실패가 &quot;Empty content extracted&quot; - 추출 방식 재검토 요구&lt;/p&gt;
&lt;/div&gt;
&lt;/div&gt;
&lt;/div&gt;
&lt;/div&gt;
&lt;/div&gt;
&lt;/div&gt;
&lt;/div&gt;
&lt;/div&gt;
&lt;hr contenteditable=&quot;false&quot; data-ke-type=&quot;horizontalRule&quot; data-ke-style=&quot;style6&quot; /&gt;
&lt;h3 data-ke-size=&quot;size23&quot;&gt;마치며&lt;/h3&gt;
&lt;p data-ke-size=&quot;size16&quot;&gt;Playwright와 Trafilatura를 결합한 웹 스크래핑 방식으로 기존 requests 기반 방법을 개선하였다. 94개 실패 URL을 대상으로 테스트한 결과, &lt;b&gt;&lt;span style=&quot;color: #006dd7;&quot;&gt;69개(73.4%)가 성공하여 상당한 개선&lt;/span&gt;&lt;/b&gt;을 보였다. 특히 &lt;b&gt;&lt;span style=&quot;color: #006dd7;&quot;&gt;biz.chosun.com은 100% 성공률&lt;/span&gt;&lt;/b&gt;을 달성했으나, 여전히 &lt;span style=&quot;color: #006dd7;&quot;&gt;&lt;b&gt;msn 웹사이트는 실패&lt;/b&gt;&lt;/span&gt;했다. 처리 시간은 평균 5.93초로 requests 대비 약 10배 증가했지만, &lt;b&gt;&lt;span style=&quot;color: #006dd7;&quot;&gt;JavaScript 기반 동적 웹사이트에서 안정적인 콘텐츠 추출이 가능&lt;/span&gt;&lt;/b&gt;해졌다.&amp;nbsp;&lt;/p&gt;
&lt;p data-ke-size=&quot;size16&quot;&gt;분석 결과 두 가지 주요 개선사항이 도출되었습니다: &lt;span style=&quot;color: #006dd7;&quot;&gt;&lt;b&gt;첫째, 처리 시간 단축을 위한 병렬 처리 도입&lt;/b&gt;&lt;/span&gt;이 필요하며, &lt;span style=&quot;color: #006dd7;&quot;&gt;&lt;b&gt;둘째, MSN 사이트에 특화된 콘텐츠 추출 로직&lt;/b&gt;&lt;/span&gt;이 필요하다. 다음글에는 첫번째 문제인 처리 시간 닥축을 위한 병렬 처리 도입에 대해서 다룬다.&lt;/p&gt;</description>
      <category>playwright</category>
      <category>trafilatura</category>
      <category>기사수집자동화</category>
      <category>브라우저자동화</category>
      <category>웹스크래핑</category>
      <author>catalystmind</author>
      <guid isPermaLink="true">https://catalystmind.tistory.com/22</guid>
      <comments>https://catalystmind.tistory.com/22#entry22comment</comments>
      <pubDate>Fri, 13 Jun 2025 23:50:11 +0900</pubDate>
    </item>
    <item>
      <title>Trafilatura - requsets - 병렬처리로 시간 단축하기</title>
      <link>https://catalystmind.tistory.com/21</link>
      <description>&lt;div id=&quot;code_1749224632475&quot; data-ke-type=&quot;html&quot; data-source=&quot;&amp;lt;div style=&amp;quot;background-color: #fff; border-radius: 8px; padding: 24px; box-shadow: 0 2px 8px rgba(0, 0, 0, 0.05); margin-bottom: 24px; border-left: 4px solid #3b82f6;&amp;quot;&amp;gt;
    &amp;lt;h1 style=&amp;quot;color: #1e40af; font-size: 24px; font-weight: 700; margin-top: 0; margin-bottom: 16px;&amp;quot;&amp;gt;TL;DR&amp;lt;/h1&amp;gt;
&amp;lt;div style=&amp;quot;background-color: #f0f7ff; padding: 12px 16px; border-radius: 6px; margin: 16px 0;&amp;quot;&amp;gt;
    &amp;lt;ul style=&amp;quot;padding-left: 20px; margin: 0;&amp;quot;&amp;gt;
        &amp;lt;li style=&amp;quot;margin-bottom: 10px;&amp;quot;&amp;gt;&amp;lt;span style=&amp;quot;color: #1e40af; font-weight: 600;&amp;quot;&amp;gt;Worker 5개&amp;lt;/span&amp;gt; 병렬 처리로 웹 스크래핑 시간을 &amp;lt;span style=&amp;quot;color: #1e40af; font-weight: 600;&amp;quot;&amp;gt;5분&amp;rarr;2분(60% 단축)&amp;lt;/span&amp;gt;, 처리량 &amp;lt;span style=&amp;quot;color: #1e40af; font-weight: 600;&amp;quot;&amp;gt;2.5배 향상&amp;lt;/span&amp;gt; 달성&amp;lt;/li&amp;gt;
        &amp;lt;li style=&amp;quot;margin-bottom: 10px;&amp;quot;&amp;gt;이론상 5배 개선 대비 실제 2.5배는 &amp;lt;span style=&amp;quot;color: #1e40af; font-weight: 600;&amp;quot;&amp;gt;50% 효율&amp;lt;/span&amp;gt;이나, 웹 스크래핑 분야에서 &amp;lt;span style=&amp;quot;color: #1e40af; font-weight: 600;&amp;quot;&amp;gt;준수한 수준&amp;lt;/span&amp;gt;임&amp;lt;/li&amp;gt;
        &amp;lt;li style=&amp;quot;margin-bottom: 10px;&amp;quot;&amp;gt;병목 원인은 &amp;lt;span style=&amp;quot;color: #1e40af; font-weight: 600;&amp;quot;&amp;gt;네트워크 I/O, 서버 rate limiting, 시스템 오버헤드&amp;lt;/span&amp;gt; 등으로 예상됨 &amp;lt;/li&amp;gt;
        &amp;lt;li style=&amp;quot;margin-bottom: 10px;&amp;quot;&amp;gt;실용적 가치로 &amp;lt;span style=&amp;quot;color: #1e40af; font-weight: 600;&amp;quot;&amp;gt;일일 처리량 2.5배 증가&amp;lt;/span&amp;gt;, &amp;lt;span style=&amp;quot;color: #1e40af; font-weight: 600;&amp;quot;&amp;gt;개발 시간 60% 절약&amp;lt;/span&amp;gt;, 서비스 응답성 60% 향상 효과 예상&amp;lt;/li&amp;gt;

    &amp;lt;/ul&amp;gt;
&amp;lt;/div&amp;gt;
&amp;lt;/div&amp;gt;&quot;&gt;
&lt;div style=&quot;background-color: #fff; border-radius: 8px; padding: 24px; box-shadow: 0 2px 8px rgba(0, 0, 0, 0.05); margin-bottom: 24px; border-left: 4px solid #3b82f6;&quot;&gt;
&lt;h1 style=&quot;color: #1e40af; font-size: 24px; font-weight: bold; margin-top: 0; margin-bottom: 16px;&quot;&gt;TL;DR&lt;/h1&gt;
&lt;div style=&quot;background-color: #f0f7ff; padding: 12px 16px; border-radius: 6px; margin: 16px 0;&quot;&gt;
&lt;ul style=&quot;padding-left: 20px; margin: 0px; list-style-type: disc;&quot; data-ke-list-type=&quot;disc&quot;&gt;
&lt;li style=&quot;margin-bottom: 10px;&quot;&gt;&lt;span style=&quot;color: #1e40af; font-weight: 600;&quot;&gt;Worker 5개&lt;/span&gt; 병렬 처리로 웹 스크래핑 시간을 &lt;span style=&quot;color: #1e40af; font-weight: 600;&quot;&gt;5분&amp;rarr;2분(60% 단축)&lt;/span&gt;, 처리량 &lt;span style=&quot;color: #1e40af; font-weight: 600;&quot;&gt;2.5배 향상&lt;/span&gt; 달성&lt;/li&gt;
&lt;li style=&quot;margin-bottom: 10px;&quot;&gt;이론상 5배 개선 대비 실제 2.5배는 &lt;span style=&quot;color: #1e40af; font-weight: 600;&quot;&gt;50% 효율&lt;/span&gt;이나, 웹 스크래핑 분야에서 &lt;span style=&quot;color: #1e40af; font-weight: 600;&quot;&gt;준수한 수준&lt;/span&gt;임&lt;/li&gt;
&lt;li style=&quot;margin-bottom: 10px;&quot;&gt;병목 원인은 &lt;span style=&quot;color: #1e40af; font-weight: 600;&quot;&gt;네트워크 I/O, 서버 rate limiting, 시스템 오버헤드&lt;/span&gt; 등으로 예상됨&lt;/li&gt;
&lt;li style=&quot;margin-bottom: 10px;&quot;&gt;실용적 가치로 &lt;span style=&quot;color: #1e40af; font-weight: 600;&quot;&gt;일일 처리량 2.5배 증가&lt;/span&gt;, &lt;span style=&quot;color: #1e40af; font-weight: 600;&quot;&gt;개발 시간 60% 절약&lt;/span&gt;, 서비스 응답성 60% 향상 효과 예상&lt;/li&gt;
&lt;/ul&gt;
&lt;/div&gt;
&lt;/div&gt;
&lt;/div&gt;
&lt;h2 data-ke-size=&quot;size26&quot;&gt;requsets의 병렬화 필요성&lt;/h2&gt;
&lt;p data-ke-size=&quot;size16&quot;&gt;&lt;b&gt;&lt;span style=&quot;color: #006dd7;&quot;&gt;trafilatura와 requests를 활용&lt;/span&gt;&lt;/b&gt;하여 500개의 기사를 순차적으로 처리할 경우, 기사 하나당 평균 처리 시간은 &lt;span style=&quot;color: #000000;&quot;&gt;0.6초로&lt;/span&gt; 전체 처리시간은 약 5분이 소요되었다. &lt;b&gt;&lt;span style=&quot;color: #006dd7;&quot;&gt;분당 100개의 웹사이트 처리&lt;/span&gt;&lt;/b&gt;하는 것이 느리지는 않으나, 시간을 더 단축해보기로 했다. request도 I/O 바운드 작업이기에 &lt;span style=&quot;color: #006dd7;&quot;&gt;&lt;b&gt;requests + threadpoolexecutor 조합&lt;/b&gt;&lt;/span&gt;으로 병렬화를 시도했다. (병렬화에 대한 내용은 이전 글을 참고)&lt;/p&gt;
&lt;p data-ke-size=&quot;size16&quot;&gt;&amp;nbsp;&lt;/p&gt;
&lt;p data-ke-size=&quot;size16&quot;&gt;&amp;nbsp;&lt;/p&gt;
&lt;figure id=&quot;og_1749215425383&quot; contenteditable=&quot;false&quot; data-ke-type=&quot;opengraph&quot; data-ke-align=&quot;alignCenter&quot; data-og-type=&quot;article&quot; data-og-title=&quot;Googlenewsdecoder - 병렬처리로 시간 단축하기&quot; data-og-description=&quot;TL;DR GoogleNewsDecoder는 I/O 바운드 작업으로 병렬 처리에 적합한 특성을 가짐 Python의 concurrent.futures 라이브러리를 활용해 ThreadPoolExecutor 기반의 병렬 처리 시스템 구현 submit()과 as_completed() 메서드를 &quot; data-og-host=&quot;catalystmind.tistory.com&quot; data-og-source-url=&quot;https://catalystmind.tistory.com/17&quot; data-og-url=&quot;https://catalystmind.tistory.com/17&quot; data-og-image=&quot;https://scrap.kakaocdn.net/dn/r2Wwl/hyY06A047F/JmlYb4PQyKeKVBgLivkW90/img.png?width=800&amp;amp;height=450&amp;amp;face=0_0_800_450,https://scrap.kakaocdn.net/dn/cdwtvI/hyY07mnUvo/nM6UaAyxuSwussSI6lF9VK/img.png?width=800&amp;amp;height=450&amp;amp;face=0_0_800_450,https://scrap.kakaocdn.net/dn/0JXe6/hyY5fJO53F/WFQ2ghi4RKPD2j2zjpVW1k/img.png?width=1115&amp;amp;height=628&amp;amp;face=0_0_1115_628&quot;&gt;&lt;a href=&quot;https://catalystmind.tistory.com/17&quot; target=&quot;_blank&quot; rel=&quot;noopener&quot; data-source-url=&quot;https://catalystmind.tistory.com/17&quot;&gt;
&lt;div class=&quot;og-image&quot; style=&quot;background-image: url('https://scrap.kakaocdn.net/dn/r2Wwl/hyY06A047F/JmlYb4PQyKeKVBgLivkW90/img.png?width=800&amp;amp;height=450&amp;amp;face=0_0_800_450,https://scrap.kakaocdn.net/dn/cdwtvI/hyY07mnUvo/nM6UaAyxuSwussSI6lF9VK/img.png?width=800&amp;amp;height=450&amp;amp;face=0_0_800_450,https://scrap.kakaocdn.net/dn/0JXe6/hyY5fJO53F/WFQ2ghi4RKPD2j2zjpVW1k/img.png?width=1115&amp;amp;height=628&amp;amp;face=0_0_1115_628');&quot;&gt;&amp;nbsp;&lt;/div&gt;
&lt;div class=&quot;og-text&quot;&gt;
&lt;p class=&quot;og-title&quot; data-ke-size=&quot;size16&quot;&gt;Googlenewsdecoder - 병렬처리로 시간 단축하기&lt;/p&gt;
&lt;p class=&quot;og-desc&quot; data-ke-size=&quot;size16&quot;&gt;TL;DR GoogleNewsDecoder는 I/O 바운드 작업으로 병렬 처리에 적합한 특성을 가짐 Python의 concurrent.futures 라이브러리를 활용해 ThreadPoolExecutor 기반의 병렬 처리 시스템 구현 submit()과 as_completed() 메서드를&lt;/p&gt;
&lt;p class=&quot;og-host&quot; data-ke-size=&quot;size16&quot;&gt;catalystmind.tistory.com&lt;/p&gt;
&lt;/div&gt;
&lt;/a&gt;&lt;/figure&gt;
&lt;hr contenteditable=&quot;false&quot; data-ke-type=&quot;horizontalRule&quot; data-ke-style=&quot;style6&quot; /&gt;
&lt;h2 data-ke-size=&quot;size26&quot;&gt;requsets와 ThreadPoolExecutor의 통합&lt;/h2&gt;
&lt;p data-ke-size=&quot;size16&quot;&gt;requsets는&amp;nbsp; URL 요청으로 이루어진 I/O 바운드 작업이므로 &lt;span style=&quot;color: #006dd7;&quot;&gt;&lt;b&gt;멀티스레드&lt;/b&gt;&lt;/span&gt;를 이용한 병렬화 효과가 뛰어나다. 병렬처리를 위한 파이썬 코드는 이전 코드를 조합하여 작성하였다.(&lt;span style=&quot;color: #006dd7;&quot;&gt;&lt;b&gt;전체 파이썬 스크립트 및 사용법은 아래를 참고)&lt;/b&gt;&lt;/span&gt;&lt;/p&gt;
&lt;div id=&quot;code_1749296746633&quot; data-ke-type=&quot;html&quot; data-source=&quot;&amp;lt;!DOCTYPE html&amp;gt;
&amp;lt;html lang=&amp;quot;ko&amp;quot;&amp;gt;
&amp;lt;head&amp;gt;
    &amp;lt;meta charset=&amp;quot;UTF-8&amp;quot;&amp;gt;
    &amp;lt;meta name=&amp;quot;viewport&amp;quot; content=&amp;quot;width=device-width, initial-scale=1.0&amp;quot;&amp;gt;
    &amp;lt;title&amp;gt;티스토리 코드 블록&amp;lt;/title&amp;gt;
    &amp;lt;!-- ★ 스타일 --&amp;gt;
    &amp;lt;style&amp;gt;
    /* 티스토리 기본 코드 블록 스타일 - Carbon one-light 테마 적용 */
    .code-box{
      border:1px solid #e3e3e3;
      border-radius:8px;
      margin:20px 0; /* ✅ 위아래 여백 축소 */
      background:#ffffff;
      font-family:'Hack','D2Coding','Nanum Gothic Coding',monospace;
      overflow:hidden;
      box-shadow:0 2px 8px rgba(0,0,0,0.06)
      /* ── NEW: 폭 제한 풀기 ── */
      width:100% !important;
      max-width:none !important;
    }
    .code-hd{
      background:#3a4250;
      color:#fff;
      padding:14px 20px;
      font-weight:600;
      font-size:14px;
      display:flex;
      justify-content:space-between;
      align-items:center;
      cursor:pointer;      
      border-bottom: 1px solid #2e333f; /* ✅ 어두운 테두리 */
    }
    .code-hd:hover{
      background: #4b5563;  /* 약간 밝은 회색-파랑 계열 */
    }
    .toggle-btn {
      background: #6b7280;       /* 버튼 배경 (회색) */
      color: #fff;               /* 글자색 */
      font-size: 13px;
      padding: 4px 10px;
      border-radius: 5px;
      font-weight: normal;
      display: inline-block;
}


.code-ct {
  display: none;
  padding: 8px 0; /* ✅ 위아래 padding 줄임, 좌우 제거 */
  background: #ffffff;
  font-size: 14px;
  line-height: 1.3;
  overflow-x: auto;
  white-space: pre;
  font-family: 'Hack','D2Coding','Consolas','Monaco', monospace;
  color: #383a42;
  scrollbar-width: thin;
  scrollbar-color: #d0d0d0 #f5f5f5;
}

.code-ct::-webkit-scrollbar {
  height: 6px;
  background: #f5f5f5;
}
.code-ct::-webkit-scrollbar-track {
  background: #f5f5f5;
  border-radius: 4px;
}
.code-ct::-webkit-scrollbar-thumb {
  background: #d0d0d0;
  border-radius: 4px;
}
.code-ct::-webkit-scrollbar-thumb:hover {
  background: #b0b0b0;
}

.code-act {
  display: none;
  text-align: right;
  padding: 12px 20px; /* ✅ 복사 버튼 줄도 축소 */
  background: #fafafa;
  border-top: 1px solid #e3e3e3;
}
    .copy-btn{
      background:#50a14f;
      color:#fff;
      border:0;
      padding:8px 20px;
      border-radius:6px;
      font-size:14px;
      cursor:pointer;
      font-weight:500;
      transition:all 0.2s ease
    }
    .copy-btn:hover{
      background:#40a33f;
      transform:translateY(-1px);
      box-shadow:0 2px 4px rgba(0,0,0,0.1)
    }

    /* 파이썬 신택스 하이라이팅 - one-light 테마 */
    .keyword{color:#a626a4;font-weight:normal}
    .string{color:#50a14f}
    .comment{color:#a0a1a7;font-style:italic}
    .function{color:#4078f2}
    .number{color:#986801}
    .operator{color:#383a42}
    .builtin{color:#c18401}


&amp;lt;/style&amp;gt;


&amp;lt;/head&amp;gt;

&amp;lt;!-- ★ 코드 박스 시작 --&amp;gt;
&amp;lt;div class=&amp;quot;code-box&amp;quot;&amp;gt;
  &amp;lt;div class=&amp;quot;code-hd&amp;quot;&amp;gt;
      request Batch Processor (w/ Trafilatura)
    &amp;lt;span class=&amp;quot;toggle-btn&amp;quot;&amp;gt;  펼치기&amp;lt;/span&amp;gt;
  &amp;lt;/div&amp;gt;
  &amp;lt;div class=&amp;quot;code-ct&amp;quot;&amp;gt;
&amp;lt;pre&amp;gt;&amp;lt;code class=&amp;quot;language-python&amp;quot;&amp;gt;
import pandas as pd
import trafilatura
from trafilatura.metadata import extract_metadata
import requests
import time
from datetime import datetime, timedelta
import csv
import argparse
import concurrent.futures
from typing import List, Optional
import os


def guess_best_decode(data: bytes, encodings: List[str]) -&amp;gt; str:
    &amp;quot;&amp;quot;&amp;quot;Pick the decoding that yields the most Hangul characters.&amp;quot;&amp;quot;&amp;quot;
    best_text: Optional[str] = None
    best_score = -1
    for enc in encodings:
        if not enc:
            continue
        try:
            text = data.decode(enc, errors=&amp;quot;replace&amp;quot;)
        except LookupError:
            continue
        # Score by Hangul character count
        score = sum(0xAC00 &amp;lt;= ord(ch) &amp;lt;= 0xD7A3 for ch in text)
        if score &amp;gt; best_score:
            best_text, best_score = text, score
        # Early exit if score is very high (heuristic)
        if score &amp;gt; 10:
            break
    if best_text is None:
        best_text = data.decode(&amp;quot;utf-8&amp;quot;, errors=&amp;quot;replace&amp;quot;)
    return best_text

def fetch_html_with_requests(url: str, timeout: int = 10) -&amp;gt; dict:
    &amp;quot;&amp;quot;&amp;quot;
    requests로 HTML만 다운로드하는 함수 (본문 추출은 별도로 수행)
    guess_best_decode를 활용한 강력한 인코딩 자동 보정
    &amp;quot;&amp;quot;&amp;quot;
    result = {
        'url': url,
        'html': '',
        'success': False,
        'error': None,
        'method_used': 'requests_only'
    }

    try:
        headers = {
            'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/119.0.0.0 Safari/537.36'
        }
        response = requests.get(url, headers=headers, timeout=timeout)
        response.raise_for_status()

        # ✅ guess_best_decode를 통해 인코딩 복원
        candidates = [
            response.encoding,
            response.apparent_encoding,
            'utf-8',
            'euc-kr',
            'cp949'
        ]
        result['html'] = guess_best_decode(response.content, candidates)
        result['success'] = True

    except Exception as e:
        result['error'] = str(e)

    return result

def process_single_url(url, index=None):
    &amp;quot;&amp;quot;&amp;quot;단일 URL 처리 함수 - 병렬처리에 최적화&amp;quot;&amp;quot;&amp;quot;
    start_time = time.time()
    result = {
        &amp;quot;index&amp;quot;: index,
        &amp;quot;url&amp;quot;: url,
        &amp;quot;status&amp;quot;: &amp;quot;failed&amp;quot;,
        &amp;quot;title&amp;quot;: None,
        &amp;quot;date&amp;quot;: None,
        &amp;quot;content&amp;quot;: None,
        &amp;quot;processing_time&amp;quot;: 0,
        &amp;quot;error_message&amp;quot;: None,
    }
    
    try:
        # HTML 다운로드
        html_result = fetch_html_with_requests(url)
        
        if html_result['success']:
            html_content = html_result['html']
            
            # trafilatura로 메타데이터 추출
            metadata = trafilatura.extract_metadata(html_content)
            
            # trafilatura로 본문 추출
            text = trafilatura.extract(
                html_content,
                output_format=&amp;quot;txt&amp;quot;,
                include_comments=False,
                favor_precision=True,
            )
            
            if text and len(text.strip()) &amp;gt; 0:
                result.update({
                    &amp;quot;status&amp;quot;: &amp;quot;success&amp;quot;,
                    &amp;quot;title&amp;quot;: metadata.title if metadata and metadata.title else &amp;quot;No title&amp;quot;,
                    &amp;quot;date&amp;quot;: str(metadata.date) if metadata and metadata.date else &amp;quot;No date&amp;quot;,
                    &amp;quot;content&amp;quot;: text[:200] + &amp;quot;...&amp;quot; if len(text) &amp;gt; 200 else text,
                })
            else:
                result[&amp;quot;error_message&amp;quot;] = &amp;quot;Empty content extracted&amp;quot;
        else:
            result[&amp;quot;error_message&amp;quot;] = f&amp;quot;Failed to download page: {html_result['error']}&amp;quot;
            
    except Exception as e:
        result[&amp;quot;error_message&amp;quot;] = str(e)
    
    # 처리 시간 계산
    processing_time = time.time() - start_time
    result[&amp;quot;processing_time&amp;quot;] = processing_time
    
    return result


def save_results_to_csv(results, output_file=None):
    &amp;quot;&amp;quot;&amp;quot;결과를 CSV 파일로 저장&amp;quot;&amp;quot;&amp;quot;
    # 파일명이 지정되지 않으면 현재 시간을 포함한 파일명 생성
    if output_file is None:
        timestamp = datetime.now().strftime(&amp;quot;%Y%m%d_%H%M%S&amp;quot;)
        output_file = f&amp;quot;url_processing_results_{timestamp}.csv&amp;quot;

    try:
        with open(output_file, &amp;quot;w&amp;quot;, newline=&amp;quot;&amp;quot;, encoding=&amp;quot;utf-8&amp;quot;) as csvfile:
            fieldnames = [
                &amp;quot;index&amp;quot;,
                &amp;quot;url&amp;quot;,
                &amp;quot;status&amp;quot;,
                &amp;quot;title&amp;quot;,
                &amp;quot;date&amp;quot;,
                &amp;quot;content&amp;quot;,
                &amp;quot;processing_time&amp;quot;,
                &amp;quot;error_message&amp;quot;,
            ]
            writer = csv.DictWriter(csvfile, fieldnames=fieldnames)

            writer.writeheader()
            for result in results:
                writer.writerow(result)

        print(f&amp;quot;✅ 결과가 '{output_file}' 파일에 저장되었습니다.&amp;quot;)
        return output_file  # 실제 저장된 파일명 반환

    except Exception as e:
        print(f&amp;quot;❌ 결과 저장 실패: {e}&amp;quot;)
        return None


def print_summary(results, total_time, skipped_count=0, workers_used=1):
    &amp;quot;&amp;quot;&amp;quot;처리 결과 요약 출력&amp;quot;&amp;quot;&amp;quot;
    total_urls = len(results)
    successful_count = sum(1 for r in results if r[&amp;quot;status&amp;quot;] == &amp;quot;success&amp;quot;)
    failed_count = total_urls - successful_count
    processing_times = [r[&amp;quot;processing_time&amp;quot;] for r in results]

    # 시간 포맷팅 함수
    def format_time(seconds):
        if seconds &amp;lt; 60:
            return f&amp;quot;{seconds:.2f} seconds&amp;quot;
        else:
            return str(timedelta(seconds=round(seconds)))

    print(&amp;quot;\n&amp;quot; + &amp;quot;=&amp;quot; * 7 + &amp;quot; SUMMARY &amp;quot; + &amp;quot;=&amp;quot; * 7)
    print(f&amp;quot;Total URLs processed: {total_urls}&amp;quot;)
    print(f&amp;quot;Workers used: {workers_used} (Parallel processing: {'Enabled' if workers_used &amp;gt; 1 else 'Disabled'})&amp;quot;)
    print(f&amp;quot;Successfully decoded: {successful_count} ({successful_count/total_urls*100:.1f}%)&amp;quot;)
    print(f&amp;quot;Failed to decode: {failed_count} ({failed_count/total_urls*100:.1f}%)&amp;quot;)
    print(f&amp;quot;Skipped (Google News URLs): {skipped_count} ({skipped_count/(total_urls + skipped_count)*100:.1f}%)&amp;quot;)

    if processing_times:
        avg_time = sum(processing_times) / len(processing_times)

        print(&amp;quot;\n&amp;quot; + &amp;quot;-&amp;quot; * 5 + &amp;quot; TIMING INFORMATION &amp;quot; + &amp;quot;-&amp;quot; * 5)
        print(f&amp;quot;Total processing time: {format_time(total_time)}&amp;quot;)
        print(f&amp;quot;Average processing time per URL: {format_time(avg_time)}&amp;quot;)
        print(f&amp;quot;Fastest URL processing time: {format_time(min(processing_times))}&amp;quot;)
        print(f&amp;quot;Slowest URL processing time: {format_time(max(processing_times))}&amp;quot;)

    print(&amp;quot;\nProcess completed successfully. Results saved to CSV file.&amp;quot;)


def process_urls_from_csv(csv_file_path, url_column=&amp;quot;decoded_url&amp;quot;, output_dir=None, 
                         batch_size=10, workers=1):
    &amp;quot;&amp;quot;&amp;quot;CSV 파일에서 URL들을 읽어서 병렬 처리&amp;quot;&amp;quot;&amp;quot;

    print(&amp;quot;=&amp;quot; * 50)
    print(&amp;quot;  URL 배치 처리 시작&amp;quot;)
    print(&amp;quot;=&amp;quot; * 50)

    # CSV 파일 읽기
    try:
        df = pd.read_csv(csv_file_path)
        print(f&amp;quot;  CSV 파일 컬럼들: {list(df.columns)}&amp;quot;)
        print(f&amp;quot;  총 행 수: {len(df)}&amp;quot;)

        if url_column not in df.columns:
            raise ValueError(f&amp;quot;Column '{url_column}' not found in CSV file&amp;quot;)

        print(f&amp;quot;  '{url_column}' 컬럼의 NULL이 아닌 값 개수: {df[url_column].notna().sum()}&amp;quot;)

        all_urls = df[url_column].dropna().tolist()
        print(f&amp;quot;  첫 번째 URL 샘플: {all_urls[0] if all_urls else 'None'}&amp;quot;)

        # news.google.com이 포함되지 않은 URL만 필터링
        urls = [url for url in all_urls if &amp;quot;news.google.com&amp;quot; not in str(url)]

        total_urls = len(urls)
        skipped_urls = len(all_urls) - total_urls

        print(f&amp;quot;  전체 URL: {len(all_urls)}개&amp;quot;)
        print(f&amp;quot;  처리 대상 URL (decoded URLs): {total_urls}개&amp;quot;)
        print(f&amp;quot;  건너뛴 URL (Google News URLs): {skipped_urls}개&amp;quot;)
        print(f&amp;quot;  배치 크기: {batch_size}&amp;quot;)
        print(f&amp;quot;  작업자 수: {workers} (병렬 처리: {'활성화' if workers &amp;gt; 1 else '비활성화'})&amp;quot;)

        if urls:
            print(f&amp;quot;  첫 번째 디코딩된 URL 샘플: {urls[0]}&amp;quot;)

        print(&amp;quot;-&amp;quot; * 30)

    except Exception as e:
        print(f&amp;quot;❌ CSV 파일 읽기 실패: {e}&amp;quot;)
        return None, None

    # 출력 파일 설정
    if output_dir is None:
        output_dir = os.path.dirname(csv_file_path) or '.'
    
    timestamp = datetime.now().strftime(&amp;quot;%Y%m%d_%H%M%S&amp;quot;)
    input_filename = os.path.basename(csv_file_path).split('.')[0]
    output_filename = f&amp;quot;{input_filename}_processed_{timestamp}.csv&amp;quot;
    output_path = os.path.join(output_dir, output_filename)
    
    print(f&amp;quot;  결과 저장 경로: {output_path}&amp;quot;)

    total_start_time = time.time()
    results = []
    completed = 0

    # 병렬 처리 사용 여부에 따라 처리 방식 결정
    if workers &amp;gt; 1:
        print(f&amp;quot;  병렬 처리 모드로 실행 중...&amp;quot;)
        
        # URL 배치로 나누기 (한 번에 batch_size만큼 병렬 처리)
        for batch_start in range(0, total_urls, batch_size):
            batch_end = min(batch_start + batch_size, total_urls)
            batch_urls = urls[batch_start:batch_end]
            
            print(f&amp;quot;처리 중: URLs {batch_start+1}-{batch_end} of {total_urls}...&amp;quot;)
            
            # 병렬 처리 실행
            with concurrent.futures.ThreadPoolExecutor(max_workers=workers) as executor:
                # URL을 병렬로 처리 (인덱스도 함께 전달)
                future_to_data = {
                    executor.submit(process_single_url, url, batch_start + i): (url, batch_start + i) 
                    for i, url in enumerate(batch_urls)
                }
                
                # 결과 수집
                for future in concurrent.futures.as_completed(future_to_data):
                    result = future.result()
                    results.append(result)
                    completed += 1
                    
                    # 진행 상황 표시 (10개마다 또는 배치 완료 시)
                    if completed % 10 == 0 or completed == total_urls:
                        successful_in_batch = sum(1 for r in results[-len(batch_urls):] if r[&amp;quot;status&amp;quot;] == &amp;quot;success&amp;quot;)
                        print(f&amp;quot;진행: {completed}/{total_urls} ({completed/total_urls*100:.1f}%) &amp;quot;
                              f&amp;quot;배치 성공: {successful_in_batch}/{len(batch_urls)}&amp;quot;)
            
            # 현재까지의 결과를 CSV 파일로 저장
            save_results_to_csv(results, output_path)
            print(f&amp;quot;  {len(results)}개 결과 저장 완료&amp;quot;)
            
    else:
        print(f&amp;quot;  순차 처리 모드로 실행 중...&amp;quot;)
        
        # 직렬 처리 사용 (기존 방식)
        for i, url in enumerate(urls):
            if i % 10 == 0 or i == total_urls - 1:
                print(f&amp;quot;처리 중: URL {i+1}/{total_urls}...&amp;quot;)
            
            # 단일 URL 처리
            result = process_single_url(url, i)
            results.append(result)
            completed += 1
            
            # batch_size마다 중간 결과 저장
            if (i + 1) % batch_size == 0 or i == total_urls - 1:
                save_results_to_csv(results, output_path)
                successful_count = sum(1 for r in results if r[&amp;quot;status&amp;quot;] == &amp;quot;success&amp;quot;)
                print(f&amp;quot;진행: {completed}/{total_urls} ({completed/total_urls*100:.1f}%) &amp;quot;
                      f&amp;quot;성공: {successful_count}, 실패: {completed-successful_count}&amp;quot;)

    total_processing_time = time.time() - total_start_time
    print_summary(results, total_processing_time, skipped_urls, workers)

    return results, output_path


def main():
    # 명령줄 인자 파서 설정
    parser = argparse.ArgumentParser(description='URL content extractor with parallel processing')
    parser.add_argument('csv_file_path', nargs='?', 
                       help='Path to CSV file containing URLs to process')
    parser.add_argument('--url_column', '-c', default='decoded_url',
                       help='Name of the column containing URLs (default: decoded_url)')
    parser.add_argument('--output_dir', '-o', 
                       help='Output directory for results (default: same as input file)')
    parser.add_argument('--batch_size', '-b', type=int, default=10,
                       help='Batch size for saving interim results (default: 10)')
    parser.add_argument('--workers', '-w', type=int, default=1,
                       help='Number of worker threads for parallel processing (default: 1)')
    
    args = parser.parse_args()
    
    # 기본 파일 경로 (명령줄에서 제공되지 않은 경우)
    if args.csv_file_path is None:
        csv_file_path = r&amp;quot;C:\Users\yhsur\Downloads\특징주\sample_data\Combined_sample_data_500_decoded_2025-05-20_224740.csv&amp;quot;
        print(f&amp;quot;⚠️  기본 파일 경로 사용: {csv_file_path}&amp;quot;)
    else:
        csv_file_path = args.csv_file_path
    
    # 설정 정보 출력
    print(f&amp;quot;  입력 파일: {csv_file_path}&amp;quot;)
    print(f&amp;quot;  URL 컬럼명: {args.url_column}&amp;quot;)
    print(f&amp;quot;  출력 디렉토리: {args.output_dir or '입력 파일과 동일'}&amp;quot;)
    print(f&amp;quot;  배치 크기: {args.batch_size}&amp;quot;)
    print(f&amp;quot;  작업자 수: {args.workers}&amp;quot;)
    
    # 함수 실행
    results, saved_file = process_urls_from_csv(
        csv_file_path, 
        url_column=args.url_column,
        output_dir=args.output_dir,
        batch_size=args.batch_size,
        workers=args.workers
    )

    if results:
        print(f&amp;quot;\n  저장된 파일: {saved_file}&amp;quot;)

        print(&amp;quot;\n  처리 결과 샘플:&amp;quot;)
        for i, result in enumerate(results[:3]):
            print(f&amp;quot;\n[{i+1}] {result['url'][:50]}...&amp;quot;)
            print(f&amp;quot;    상태: {result['status']}&amp;quot;)
            print(f&amp;quot;    제목: {result['title']}&amp;quot;)
            print(f&amp;quot;    처리시간: {result['processing_time']:.2f}초&amp;quot;)
            if result[&amp;quot;status&amp;quot;] == &amp;quot;failed&amp;quot;:
                print(f&amp;quot;    오류: {result['error_message']}&amp;quot;)
        
        print(f&amp;quot;\n✅ 처리 완료! 총 {len(results)}개 URL 처리됨&amp;quot;)
    else:
        print(f&amp;quot;\n❌ 처리 실패. 위의 오류 메시지를 확인하세요.&amp;quot;)


if __name__ == &amp;quot;__main__&amp;quot;:
    main()
&amp;lt;/code&amp;gt;&amp;lt;/pre&amp;gt;
  &amp;lt;/div&amp;gt;
  &amp;lt;div class=&amp;quot;code-act&amp;quot;&amp;gt;
    &amp;lt;button class=&amp;quot;copy-btn&amp;quot;&amp;gt;  코드 복사&amp;lt;/button&amp;gt;
  &amp;lt;/div&amp;gt;
&amp;lt;/div&amp;gt;
&amp;lt;!-- ★ 코드 박스 끝 --&amp;gt;

&amp;lt;!-- ★ 토글 &amp;amp; 복사 스크립트 --&amp;gt;
&amp;lt;script&amp;gt;
document.addEventListener(&amp;quot;DOMContentLoaded&amp;quot;, () =&amp;gt; {

  /* 토글 */
  document.body.addEventListener(&amp;quot;click&amp;quot;, e =&amp;gt; {
    if (!e.target.classList.contains(&amp;quot;toggle-btn&amp;quot;)) return;
    const box = e.target.closest(&amp;quot;.code-box&amp;quot;);
    const cont = box.querySelector(&amp;quot;.code-ct&amp;quot;);
    const act = box.querySelector(&amp;quot;.code-act&amp;quot;);
    const open = cont.style.display === &amp;quot;block&amp;quot;;
    cont.style.display = act.style.display = open ? &amp;quot;none&amp;quot; : &amp;quot;block&amp;quot;;
    e.target.textContent = open ? &amp;quot;  펼치기&amp;quot; : &amp;quot;  접기&amp;quot;;
  });

  /* 복사 */
  document.body.addEventListener(&amp;quot;click&amp;quot;, e =&amp;gt; {
    if (!e.target.classList.contains(&amp;quot;copy-btn&amp;quot;)) return;
    const btn = e.target;
    const code = btn.closest(&amp;quot;.code-box&amp;quot;).querySelector(&amp;quot;code&amp;quot;).innerText;
    if (navigator.clipboard &amp;amp;&amp;amp; window.isSecureContext) {
      navigator.clipboard.writeText(code).then(() =&amp;gt; flash(btn))
                                         .catch(() =&amp;gt; fallback(code, btn));
    } else {
      fallback(code, btn);
    }
  });

  const flash = btn =&amp;gt; {
    const orig = btn.textContent;
    btn.textContent = &amp;quot;✅ 복사됨!&amp;quot;;
    setTimeout(() =&amp;gt; (btn.textContent = orig), 2000);
  };

  const fallback = (text, btn) =&amp;gt; {
    const ta = document.createElement(&amp;quot;textarea&amp;quot;);
    ta.value = text;
    ta.style.position = &amp;quot;fixed&amp;quot;;
    ta.style.top = &amp;quot;-1000px&amp;quot;;
    document.body.appendChild(ta);
    ta.focus();
    ta.select();
    try {
      document.execCommand(&amp;quot;copy&amp;quot;);
      flash(btn);
    } catch {
      alert(&amp;quot;복사 실패   &amp;ndash; 브라우저가 클립보드를 차단했습니다.&amp;quot;);
    }
    document.body.removeChild(ta);
  };
});
&amp;lt;/script&amp;gt;
&amp;lt;/body&amp;gt;
&amp;lt;/html&amp;gt;
&quot;&gt;&lt;!-- ★ 스타일 --&gt;
&lt;style&gt;
    /* 티스토리 기본 코드 블록 스타일 - Carbon one-light 테마 적용 */
    .code-box{
      border:1px solid #e3e3e3;
      border-radius:8px;
      margin:20px 0; /* ✅ 위아래 여백 축소 */
      background:#ffffff;
      font-family:'Hack','D2Coding','Nanum Gothic Coding',monospace;
      overflow:hidden;
      box-shadow:0 2px 8px rgba(0,0,0,0.06)
      /* ── NEW: 폭 제한 풀기 ── */
      width:100% !important;
      max-width:none !important;
    }
    .code-hd{
      background:#3a4250;
      color:#fff;
      padding:14px 20px;
      font-weight:600;
      font-size:14px;
      display:flex;
      justify-content:space-between;
      align-items:center;
      cursor:pointer;      
      border-bottom: 1px solid #2e333f; /* ✅ 어두운 테두리 */
    }
    .code-hd:hover{
      background: #4b5563;  /* 약간 밝은 회색-파랑 계열 */
    }
    .toggle-btn {
      background: #6b7280;       /* 버튼 배경 (회색) */
      color: #fff;               /* 글자색 */
      font-size: 13px;
      padding: 4px 10px;
      border-radius: 5px;
      font-weight: normal;
      display: inline-block;
}


.code-ct {
  display: none;
  padding: 8px 0; /* ✅ 위아래 padding 줄임, 좌우 제거 */
  background: #ffffff;
  font-size: 14px;
  line-height: 1.3;
  overflow-x: auto;
  white-space: pre;
  font-family: 'Hack','D2Coding','Consolas','Monaco', monospace;
  color: #383a42;
  scrollbar-width: thin;
  scrollbar-color: #d0d0d0 #f5f5f5;
}

.code-ct::-webkit-scrollbar {
  height: 6px;
  background: #f5f5f5;
}
.code-ct::-webkit-scrollbar-track {
  background: #f5f5f5;
  border-radius: 4px;
}
.code-ct::-webkit-scrollbar-thumb {
  background: #d0d0d0;
  border-radius: 4px;
}
.code-ct::-webkit-scrollbar-thumb:hover {
  background: #b0b0b0;
}

.code-act {
  display: none;
  text-align: right;
  padding: 12px 20px; /* ✅ 복사 버튼 줄도 축소 */
  background: #fafafa;
  border-top: 1px solid #e3e3e3;
}
    .copy-btn{
      background:#50a14f;
      color:#fff;
      border:0;
      padding:8px 20px;
      border-radius:6px;
      font-size:14px;
      cursor:pointer;
      font-weight:500;
      transition:all 0.2s ease
    }
    .copy-btn:hover{
      background:#40a33f;
      transform:translateY(-1px);
      box-shadow:0 2px 4px rgba(0,0,0,0.1)
    }

    /* 파이썬 신택스 하이라이팅 - one-light 테마 */
    .keyword{color:#a626a4;font-weight:normal}
    .string{color:#50a14f}
    .comment{color:#a0a1a7;font-style:italic}
    .function{color:#4078f2}
    .number{color:#986801}
    .operator{color:#383a42}
    .builtin{color:#c18401}


&lt;/style&gt;
&lt;!-- ★ 코드 박스 시작 --&gt;
&lt;div class=&quot;code-box&quot;&gt;
&lt;div class=&quot;code-hd&quot;&gt;  request Batch Processor (w/ Trafilatura) &lt;span class=&quot;toggle-btn&quot;&gt;  펼치기&lt;/span&gt;&lt;/div&gt;
&lt;div class=&quot;code-ct&quot;&gt;
&lt;pre&gt;&lt;code class=&quot;language-python&quot;&gt;
import pandas as pd
import trafilatura
from trafilatura.metadata import extract_metadata
import requests
import time
from datetime import datetime, timedelta
import csv
import argparse
import concurrent.futures
from typing import List, Optional
import os


def guess_best_decode(data: bytes, encodings: List[str]) -&amp;gt; str:
    &quot;&quot;&quot;Pick the decoding that yields the most Hangul characters.&quot;&quot;&quot;
    best_text: Optional[str] = None
    best_score = -1
    for enc in encodings:
        if not enc:
            continue
        try:
            text = data.decode(enc, errors=&quot;replace&quot;)
        except LookupError:
            continue
        # Score by Hangul character count
        score = sum(0xAC00 &amp;lt;= ord(ch) &amp;lt;= 0xD7A3 for ch in text)
        if score &amp;gt; best_score:
            best_text, best_score = text, score
        # Early exit if score is very high (heuristic)
        if score &amp;gt; 10:
            break
    if best_text is None:
        best_text = data.decode(&quot;utf-8&quot;, errors=&quot;replace&quot;)
    return best_text

def fetch_html_with_requests(url: str, timeout: int = 10) -&amp;gt; dict:
    &quot;&quot;&quot;
    requests로 HTML만 다운로드하는 함수 (본문 추출은 별도로 수행)
    guess_best_decode를 활용한 강력한 인코딩 자동 보정
    &quot;&quot;&quot;
    result = {
        'url': url,
        'html': '',
        'success': False,
        'error': None,
        'method_used': 'requests_only'
    }

    try:
        headers = {
            'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/119.0.0.0 Safari/537.36'
        }
        response = requests.get(url, headers=headers, timeout=timeout)
        response.raise_for_status()

        # ✅ guess_best_decode를 통해 인코딩 복원
        candidates = [
            response.encoding,
            response.apparent_encoding,
            'utf-8',
            'euc-kr',
            'cp949'
        ]
        result['html'] = guess_best_decode(response.content, candidates)
        result['success'] = True

    except Exception as e:
        result['error'] = str(e)

    return result

def process_single_url(url, index=None):
    &quot;&quot;&quot;단일 URL 처리 함수 - 병렬처리에 최적화&quot;&quot;&quot;
    start_time = time.time()
    result = {
        &quot;index&quot;: index,
        &quot;url&quot;: url,
        &quot;status&quot;: &quot;failed&quot;,
        &quot;title&quot;: None,
        &quot;date&quot;: None,
        &quot;content&quot;: None,
        &quot;processing_time&quot;: 0,
        &quot;error_message&quot;: None,
    }
    
    try:
        # HTML 다운로드
        html_result = fetch_html_with_requests(url)
        
        if html_result['success']:
            html_content = html_result['html']
            
            # trafilatura로 메타데이터 추출
            metadata = trafilatura.extract_metadata(html_content)
            
            # trafilatura로 본문 추출
            text = trafilatura.extract(
                html_content,
                output_format=&quot;txt&quot;,
                include_comments=False,
                favor_precision=True,
            )
            
            if text and len(text.strip()) &amp;gt; 0:
                result.update({
                    &quot;status&quot;: &quot;success&quot;,
                    &quot;title&quot;: metadata.title if metadata and metadata.title else &quot;No title&quot;,
                    &quot;date&quot;: str(metadata.date) if metadata and metadata.date else &quot;No date&quot;,
                    &quot;content&quot;: text[:200] + &quot;...&quot; if len(text) &amp;gt; 200 else text,
                })
            else:
                result[&quot;error_message&quot;] = &quot;Empty content extracted&quot;
        else:
            result[&quot;error_message&quot;] = f&quot;Failed to download page: {html_result['error']}&quot;
            
    except Exception as e:
        result[&quot;error_message&quot;] = str(e)
    
    # 처리 시간 계산
    processing_time = time.time() - start_time
    result[&quot;processing_time&quot;] = processing_time
    
    return result


def save_results_to_csv(results, output_file=None):
    &quot;&quot;&quot;결과를 CSV 파일로 저장&quot;&quot;&quot;
    # 파일명이 지정되지 않으면 현재 시간을 포함한 파일명 생성
    if output_file is None:
        timestamp = datetime.now().strftime(&quot;%Y%m%d_%H%M%S&quot;)
        output_file = f&quot;url_processing_results_{timestamp}.csv&quot;

    try:
        with open(output_file, &quot;w&quot;, newline=&quot;&quot;, encoding=&quot;utf-8&quot;) as csvfile:
            fieldnames = [
                &quot;index&quot;,
                &quot;url&quot;,
                &quot;status&quot;,
                &quot;title&quot;,
                &quot;date&quot;,
                &quot;content&quot;,
                &quot;processing_time&quot;,
                &quot;error_message&quot;,
            ]
            writer = csv.DictWriter(csvfile, fieldnames=fieldnames)

            writer.writeheader()
            for result in results:
                writer.writerow(result)

        print(f&quot;✅ 결과가 '{output_file}' 파일에 저장되었습니다.&quot;)
        return output_file  # 실제 저장된 파일명 반환

    except Exception as e:
        print(f&quot;❌ 결과 저장 실패: {e}&quot;)
        return None


def print_summary(results, total_time, skipped_count=0, workers_used=1):
    &quot;&quot;&quot;처리 결과 요약 출력&quot;&quot;&quot;
    total_urls = len(results)
    successful_count = sum(1 for r in results if r[&quot;status&quot;] == &quot;success&quot;)
    failed_count = total_urls - successful_count
    processing_times = [r[&quot;processing_time&quot;] for r in results]

    # 시간 포맷팅 함수
    def format_time(seconds):
        if seconds &amp;lt; 60:
            return f&quot;{seconds:.2f} seconds&quot;
        else:
            return str(timedelta(seconds=round(seconds)))

    print(&quot;\n&quot; + &quot;=&quot; * 7 + &quot; SUMMARY &quot; + &quot;=&quot; * 7)
    print(f&quot;Total URLs processed: {total_urls}&quot;)
    print(f&quot;Workers used: {workers_used} (Parallel processing: {'Enabled' if workers_used &amp;gt; 1 else 'Disabled'})&quot;)
    print(f&quot;Successfully decoded: {successful_count} ({successful_count/total_urls*100:.1f}%)&quot;)
    print(f&quot;Failed to decode: {failed_count} ({failed_count/total_urls*100:.1f}%)&quot;)
    print(f&quot;Skipped (Google News URLs): {skipped_count} ({skipped_count/(total_urls + skipped_count)*100:.1f}%)&quot;)

    if processing_times:
        avg_time = sum(processing_times) / len(processing_times)

        print(&quot;\n&quot; + &quot;-&quot; * 5 + &quot; TIMING INFORMATION &quot; + &quot;-&quot; * 5)
        print(f&quot;Total processing time: {format_time(total_time)}&quot;)
        print(f&quot;Average processing time per URL: {format_time(avg_time)}&quot;)
        print(f&quot;Fastest URL processing time: {format_time(min(processing_times))}&quot;)
        print(f&quot;Slowest URL processing time: {format_time(max(processing_times))}&quot;)

    print(&quot;\nProcess completed successfully. Results saved to CSV file.&quot;)


def process_urls_from_csv(csv_file_path, url_column=&quot;decoded_url&quot;, output_dir=None, 
                         batch_size=10, workers=1):
    &quot;&quot;&quot;CSV 파일에서 URL들을 읽어서 병렬 처리&quot;&quot;&quot;

    print(&quot;=&quot; * 50)
    print(&quot;  URL 배치 처리 시작&quot;)
    print(&quot;=&quot; * 50)

    # CSV 파일 읽기
    try:
        df = pd.read_csv(csv_file_path)
        print(f&quot;  CSV 파일 컬럼들: {list(df.columns)}&quot;)
        print(f&quot;  총 행 수: {len(df)}&quot;)

        if url_column not in df.columns:
            raise ValueError(f&quot;Column '{url_column}' not found in CSV file&quot;)

        print(f&quot;  '{url_column}' 컬럼의 NULL이 아닌 값 개수: {df[url_column].notna().sum()}&quot;)

        all_urls = df[url_column].dropna().tolist()
        print(f&quot;  첫 번째 URL 샘플: {all_urls[0] if all_urls else 'None'}&quot;)

        # news.google.com이 포함되지 않은 URL만 필터링
        urls = [url for url in all_urls if &quot;news.google.com&quot; not in str(url)]

        total_urls = len(urls)
        skipped_urls = len(all_urls) - total_urls

        print(f&quot;  전체 URL: {len(all_urls)}개&quot;)
        print(f&quot;  처리 대상 URL (decoded URLs): {total_urls}개&quot;)
        print(f&quot;  건너뛴 URL (Google News URLs): {skipped_urls}개&quot;)
        print(f&quot;  배치 크기: {batch_size}&quot;)
        print(f&quot;  작업자 수: {workers} (병렬 처리: {'활성화' if workers &amp;gt; 1 else '비활성화'})&quot;)

        if urls:
            print(f&quot;  첫 번째 디코딩된 URL 샘플: {urls[0]}&quot;)

        print(&quot;-&quot; * 30)

    except Exception as e:
        print(f&quot;❌ CSV 파일 읽기 실패: {e}&quot;)
        return None, None

    # 출력 파일 설정
    if output_dir is None:
        output_dir = os.path.dirname(csv_file_path) or '.'
    
    timestamp = datetime.now().strftime(&quot;%Y%m%d_%H%M%S&quot;)
    input_filename = os.path.basename(csv_file_path).split('.')[0]
    output_filename = f&quot;{input_filename}_processed_{timestamp}.csv&quot;
    output_path = os.path.join(output_dir, output_filename)
    
    print(f&quot;  결과 저장 경로: {output_path}&quot;)

    total_start_time = time.time()
    results = []
    completed = 0

    # 병렬 처리 사용 여부에 따라 처리 방식 결정
    if workers &amp;gt; 1:
        print(f&quot;  병렬 처리 모드로 실행 중...&quot;)
        
        # URL 배치로 나누기 (한 번에 batch_size만큼 병렬 처리)
        for batch_start in range(0, total_urls, batch_size):
            batch_end = min(batch_start + batch_size, total_urls)
            batch_urls = urls[batch_start:batch_end]
            
            print(f&quot;처리 중: URLs {batch_start+1}-{batch_end} of {total_urls}...&quot;)
            
            # 병렬 처리 실행
            with concurrent.futures.ThreadPoolExecutor(max_workers=workers) as executor:
                # URL을 병렬로 처리 (인덱스도 함께 전달)
                future_to_data = {
                    executor.submit(process_single_url, url, batch_start + i): (url, batch_start + i) 
                    for i, url in enumerate(batch_urls)
                }
                
                # 결과 수집
                for future in concurrent.futures.as_completed(future_to_data):
                    result = future.result()
                    results.append(result)
                    completed += 1
                    
                    # 진행 상황 표시 (10개마다 또는 배치 완료 시)
                    if completed % 10 == 0 or completed == total_urls:
                        successful_in_batch = sum(1 for r in results[-len(batch_urls):] if r[&quot;status&quot;] == &quot;success&quot;)
                        print(f&quot;진행: {completed}/{total_urls} ({completed/total_urls*100:.1f}%) &quot;
                              f&quot;배치 성공: {successful_in_batch}/{len(batch_urls)}&quot;)
            
            # 현재까지의 결과를 CSV 파일로 저장
            save_results_to_csv(results, output_path)
            print(f&quot;  {len(results)}개 결과 저장 완료&quot;)
            
    else:
        print(f&quot;  순차 처리 모드로 실행 중...&quot;)
        
        # 직렬 처리 사용 (기존 방식)
        for i, url in enumerate(urls):
            if i % 10 == 0 or i == total_urls - 1:
                print(f&quot;처리 중: URL {i+1}/{total_urls}...&quot;)
            
            # 단일 URL 처리
            result = process_single_url(url, i)
            results.append(result)
            completed += 1
            
            # batch_size마다 중간 결과 저장
            if (i + 1) % batch_size == 0 or i == total_urls - 1:
                save_results_to_csv(results, output_path)
                successful_count = sum(1 for r in results if r[&quot;status&quot;] == &quot;success&quot;)
                print(f&quot;진행: {completed}/{total_urls} ({completed/total_urls*100:.1f}%) &quot;
                      f&quot;성공: {successful_count}, 실패: {completed-successful_count}&quot;)

    total_processing_time = time.time() - total_start_time
    print_summary(results, total_processing_time, skipped_urls, workers)

    return results, output_path


def main():
    # 명령줄 인자 파서 설정
    parser = argparse.ArgumentParser(description='URL content extractor with parallel processing')
    parser.add_argument('csv_file_path', nargs='?', 
                       help='Path to CSV file containing URLs to process')
    parser.add_argument('--url_column', '-c', default='decoded_url',
                       help='Name of the column containing URLs (default: decoded_url)')
    parser.add_argument('--output_dir', '-o', 
                       help='Output directory for results (default: same as input file)')
    parser.add_argument('--batch_size', '-b', type=int, default=10,
                       help='Batch size for saving interim results (default: 10)')
    parser.add_argument('--workers', '-w', type=int, default=1,
                       help='Number of worker threads for parallel processing (default: 1)')
    
    args = parser.parse_args()
    
    # 기본 파일 경로 (명령줄에서 제공되지 않은 경우)
    if args.csv_file_path is None:
        csv_file_path = r&quot;C:\Users\yhsur\Downloads\특징주\sample_data\Combined_sample_data_500_decoded_2025-05-20_224740.csv&quot;
        print(f&quot;⚠️  기본 파일 경로 사용: {csv_file_path}&quot;)
    else:
        csv_file_path = args.csv_file_path
    
    # 설정 정보 출력
    print(f&quot;  입력 파일: {csv_file_path}&quot;)
    print(f&quot;  URL 컬럼명: {args.url_column}&quot;)
    print(f&quot;  출력 디렉토리: {args.output_dir or '입력 파일과 동일'}&quot;)
    print(f&quot;  배치 크기: {args.batch_size}&quot;)
    print(f&quot;  작업자 수: {args.workers}&quot;)
    
    # 함수 실행
    results, saved_file = process_urls_from_csv(
        csv_file_path, 
        url_column=args.url_column,
        output_dir=args.output_dir,
        batch_size=args.batch_size,
        workers=args.workers
    )

    if results:
        print(f&quot;\n  저장된 파일: {saved_file}&quot;)

        print(&quot;\n  처리 결과 샘플:&quot;)
        for i, result in enumerate(results[:3]):
            print(f&quot;\n[{i+1}] {result['url'][:50]}...&quot;)
            print(f&quot;    상태: {result['status']}&quot;)
            print(f&quot;    제목: {result['title']}&quot;)
            print(f&quot;    처리시간: {result['processing_time']:.2f}초&quot;)
            if result[&quot;status&quot;] == &quot;failed&quot;:
                print(f&quot;    오류: {result['error_message']}&quot;)
        
        print(f&quot;\n✅ 처리 완료! 총 {len(results)}개 URL 처리됨&quot;)
    else:
        print(f&quot;\n❌ 처리 실패. 위의 오류 메시지를 확인하세요.&quot;)


if __name__ == &quot;__main__&quot;:
    main()
&lt;/code&gt;&lt;/pre&gt;
&lt;/div&gt;
&lt;div class=&quot;code-act&quot;&gt;&lt;button class=&quot;copy-btn&quot;&gt;  코드 복사&lt;/button&gt;&lt;/div&gt;
&lt;/div&gt;
&lt;!-- ★ 코드 박스 끝 --&gt; &lt;!-- ★ 토글 &amp; 복사 스크립트 --&gt;
&lt;script&gt;
document.addEventListener(&quot;DOMContentLoaded&quot;, () =&gt; {

  /* 토글 */
  document.body.addEventListener(&quot;click&quot;, e =&gt; {
    if (!e.target.classList.contains(&quot;toggle-btn&quot;)) return;
    const box = e.target.closest(&quot;.code-box&quot;);
    const cont = box.querySelector(&quot;.code-ct&quot;);
    const act = box.querySelector(&quot;.code-act&quot;);
    const open = cont.style.display === &quot;block&quot;;
    cont.style.display = act.style.display = open ? &quot;none&quot; : &quot;block&quot;;
    e.target.textContent = open ? &quot;  펼치기&quot; : &quot;  접기&quot;;
  });

  /* 복사 */
  document.body.addEventListener(&quot;click&quot;, e =&gt; {
    if (!e.target.classList.contains(&quot;copy-btn&quot;)) return;
    const btn = e.target;
    const code = btn.closest(&quot;.code-box&quot;).querySelector(&quot;code&quot;).innerText;
    if (navigator.clipboard &amp;&amp; window.isSecureContext) {
      navigator.clipboard.writeText(code).then(() =&gt; flash(btn))
                                         .catch(() =&gt; fallback(code, btn));
    } else {
      fallback(code, btn);
    }
  });

  const flash = btn =&gt; {
    const orig = btn.textContent;
    btn.textContent = &quot;✅ 복사됨!&quot;;
    setTimeout(() =&gt; (btn.textContent = orig), 2000);
  };

  const fallback = (text, btn) =&gt; {
    const ta = document.createElement(&quot;textarea&quot;);
    ta.value = text;
    ta.style.position = &quot;fixed&quot;;
    ta.style.top = &quot;-1000px&quot;;
    document.body.appendChild(ta);
    ta.focus();
    ta.select();
    try {
      document.execCommand(&quot;copy&quot;);
      flash(btn);
    } catch {
      alert(&quot;복사 실패   – 브라우저가 클립보드를 차단했습니다.&quot;);
    }
    document.body.removeChild(ta);
  };
});
&lt;/script&gt;
&lt;/div&gt;
&lt;pre id=&quot;code_1749297013377&quot; class=&quot;python&quot; data-ke-language=&quot;python&quot; data-ke-type=&quot;codeblock&quot;&gt;&lt;code&gt;python script.py input.csv --workers 5 --batch_size 20 --url_column &quot;link&quot; --output_dir &quot;./results&quot;&lt;/code&gt;&lt;/pre&gt;
&lt;div id=&quot;code_1749297026216&quot; data-ke-type=&quot;html&quot; data-source=&quot;&amp;lt;div id=&amp;quot;parameter-options&amp;quot; style=&amp;quot;margin-top: 0px; padding-top: 0;&amp;quot;&amp;gt;
  &amp;lt;div style=&amp;quot;font-size: 0.95em; line-height: 1.7; white-space: pre-wrap; margin-top: 0; padding-top: 0;&amp;quot;&amp;gt;
    &amp;lt;span style=&amp;quot;background-color: #f5f5f5; color: #333; font-family: Consolas, 'Courier New', monospace; font-size: 0.95em; padding: 2px 6px; border-radius: 4px; border: 1px solid #ccc;&amp;quot;&amp;gt;input.csv&amp;lt;/span&amp;gt;: 처리할 CSV 파일 경로 (필수 파라미터)&amp;lt;br&amp;gt;
    &amp;lt;span style=&amp;quot;background-color: #f5f5f5; color: #333; font-family: Consolas, 'Courier New', monospace; font-size: 0.95em; padding: 2px 6px; border-radius: 4px; border: 1px solid #ccc;&amp;quot;&amp;gt;--workers 5&amp;lt;/span&amp;gt;: 병렬 처리 스레드 수 (기본값: 1, 추천: CPU 코어 수의 1-2배)&amp;lt;br&amp;gt;
    &amp;lt;span style=&amp;quot;background-color: #f5f5f5; color: #333; font-family: Consolas, 'Courier New', monospace; font-size: 0.95em; padding: 2px 6px; border-radius: 4px; border: 1px solid #ccc;&amp;quot;&amp;gt;--batch_size 20&amp;lt;/span&amp;gt;: 중간 저장 단위 (기본값: 10, 대용량 처리 시 50-100 권장)&amp;lt;br&amp;gt;
    &amp;lt;span style=&amp;quot;background-color: #f5f5f5; color: #333; font-family: Consolas, 'Courier New', monospace; font-size: 0.95em; padding: 2px 6px; border-radius: 4px; border: 1px solid #ccc;&amp;quot;&amp;gt;--url_column &amp;quot;link&amp;quot;&amp;lt;/span&amp;gt;: URL이 들어있는 컬럼명 (기본값: &amp;quot;decoded_url&amp;quot;)&amp;lt;br&amp;gt;
    &amp;lt;span style=&amp;quot;background-color: #f5f5f5; color: #333; font-family: Consolas, 'Courier New', monospace; font-size: 0.95em; padding: 2px 6px; border-radius: 4px; border: 1px solid #ccc;&amp;quot;&amp;gt;--output_dir &amp;quot;./results&amp;quot;&amp;lt;/span&amp;gt;: 결과 파일 저장 디렉토리 (기본값: 입력 파일과 동일한 폴더)
  &amp;lt;/div&amp;gt;
&amp;lt;/div&amp;gt;&quot;&gt;
&lt;div id=&quot;parameter-options&quot; style=&quot;margin-top: 0px; padding-top: 0;&quot;&gt;
&lt;div style=&quot;font-size: 0.95em; line-height: 1.7; white-space: pre-wrap; margin-top: 0; padding-top: 0;&quot;&gt;&lt;span style=&quot;background-color: #f5f5f5; color: #333; font-family: Consolas, 'Courier New', monospace; font-size: 0.95em; padding: 2px 6px; border-radius: 4px; border: 1px solid #ccc;&quot;&gt;input.csv&lt;/span&gt;: 처리할 CSV 파일 경로 (필수 파라미터)&lt;br /&gt;&lt;span style=&quot;background-color: #f5f5f5; color: #333; font-family: Consolas, 'Courier New', monospace; font-size: 0.95em; padding: 2px 6px; border-radius: 4px; border: 1px solid #ccc;&quot;&gt;--workers 5&lt;/span&gt;: 병렬 처리 스레드 수 (기본값: 1, 추천: CPU 코어 수의 1-2배)&lt;br /&gt;&lt;span style=&quot;background-color: #f5f5f5; color: #333; font-family: Consolas, 'Courier New', monospace; font-size: 0.95em; padding: 2px 6px; border-radius: 4px; border: 1px solid #ccc;&quot;&gt;--batch_size 20&lt;/span&gt;: 중간 저장 단위 (기본값: 10, 대용량 처리 시 50-100 권장)&lt;br /&gt;&lt;span style=&quot;background-color: #f5f5f5; color: #333; font-family: Consolas, 'Courier New', monospace; font-size: 0.95em; padding: 2px 6px; border-radius: 4px; border: 1px solid #ccc;&quot;&gt;--url_column &quot;link&quot;&lt;/span&gt;: URL이 들어있는 컬럼명 (기본값: &quot;decoded_url&quot;)&lt;br /&gt;&lt;span style=&quot;background-color: #f5f5f5; color: #333; font-family: Consolas, 'Courier New', monospace; font-size: 0.95em; padding: 2px 6px; border-radius: 4px; border: 1px solid #ccc;&quot;&gt;--output_dir &quot;./results&quot;&lt;/span&gt;: 결과 파일 저장 디렉토리 (기본값: 입력 파일과 동일한 폴더)&lt;/div&gt;
&lt;/div&gt;
&lt;/div&gt;
&lt;p data-ke-size=&quot;size16&quot;&gt;&amp;nbsp;&lt;/p&gt;
&lt;h3 data-ke-size=&quot;size23&quot;&gt;&lt;span&gt; ️&lt;/span&gt; 실행결과&lt;/h3&gt;
&lt;p&gt;&lt;figure class=&quot;imageblock alignCenter&quot; data-ke-mobileStyle=&quot;widthOrigin&quot; data-origin-width=&quot;1115&quot; data-origin-height=&quot;628&quot;&gt;&lt;span data-url=&quot;https://blog.kakaocdn.net/dn/Xhayj/btsOrwwUIRi/GWELPbPqiGwdjklFstuyf0/img.png&quot; data-phocus=&quot;https://blog.kakaocdn.net/dn/Xhayj/btsOrwwUIRi/GWELPbPqiGwdjklFstuyf0/img.png&quot;&gt;&lt;img src=&quot;https://blog.kakaocdn.net/dn/Xhayj/btsOrwwUIRi/GWELPbPqiGwdjklFstuyf0/img.png&quot; srcset=&quot;https://img1.daumcdn.net/thumb/R1280x0/?scode=mtistory2&amp;fname=https%3A%2F%2Fblog.kakaocdn.net%2Fdn%2FXhayj%2FbtsOrwwUIRi%2FGWELPbPqiGwdjklFstuyf0%2Fimg.png&quot; onerror=&quot;this.onerror=null; this.src='//t1.daumcdn.net/tistory_admin/static/images/no-image-v1.png'; this.srcset='//t1.daumcdn.net/tistory_admin/static/images/no-image-v1.png';&quot; loading=&quot;lazy&quot; width=&quot;1115&quot; height=&quot;628&quot; data-origin-width=&quot;1115&quot; data-origin-height=&quot;628&quot;/&gt;&lt;/span&gt;&lt;/figure&gt;
&lt;/p&gt;
&lt;h2 data-ke-size=&quot;size26&quot;&gt;  성능 평가 및 결과 해석&lt;/h2&gt;
&lt;p data-ke-size=&quot;size16&quot;&gt;병렬 처리를 적용한 결과, &lt;span style=&quot;color: #006dd7;&quot;&gt;&lt;b&gt;개당 평균 처리 속도는 0.61 &amp;rarr; 0.79초로 증가&lt;/b&gt;&lt;/span&gt;하였으나, 병렬 처리 효과로 인해 &lt;span style=&quot;color: #006dd7;&quot;&gt;&lt;b&gt;전체 처리 시간은 60% 감소&lt;/b&gt;&lt;/span&gt;하였다.&lt;/p&gt;
&lt;div id=&quot;code_1749298051623&quot; data-ke-type=&quot;html&quot; data-source=&quot;&amp;lt;!DOCTYPE html&amp;gt;
&amp;lt;html&amp;gt;
&amp;lt;head&amp;gt;
    &amp;lt;style&amp;gt;
        .performance-table {
            width: 100%;
            border-collapse: separate;
            border-spacing: 0;
            margin: 25px 0;
            font-family: 'Nanum Gothic', sans-serif;
            box-shadow: 0 3px 8px rgba(0,0,0,0.15);
            font-size: 15px;
            border-radius: 5px;
            overflow: hidden;
            table-layout: fixed;
        }
        
        .performance-table th {
            background-color: #414b5c;
            color: white;
            text-align: center;
            padding: 15px 20px;
            font-weight: bold;
            border: 1px solid #ddd;
            letter-spacing: 0.5px;
            vertical-align: middle;
        }
        
        .performance-table th:first-child {
            width: 25%;
        }
        
        .performance-table th:nth-child(2),
        .performance-table th:nth-child(3) {
            width: 25%;
        }
        
        .performance-table th:nth-child(4) {
            width: 25%;
        }
        
        .performance-table tr:nth-child(even) {
            background-color: #f9f9f9;
        }
        
        .performance-table tr:hover {
            background-color: #f1f1f1;
        }
        
        .performance-table td {
            border: 1px solid #ddd;
            padding: 14px 18px;
            vertical-align: middle;
            line-height: 1.5;
            text-align: center;
            word-break: keep-all;
        }
        
        .metric-name {
            font-weight: bold;
            background-color: #f5f5f5;
            text-align: center;
        }
        
        .better-value {
            color: #2e7d32;
            font-weight: bold;
        }
        
        .worse-value {
            color: #c62828;
        }
        
        .neutral-value {
            color: #0277bd;
        }
        
        .comparison-result {
            font-style: italic;
            color: #555;
            font-weight: 500;
        }
        
        .improvement {
            color: #2e7d32;
            font-weight: bold;
        }
    &amp;lt;/style&amp;gt;
&amp;lt;/head&amp;gt;
&amp;lt;body&amp;gt;
    &amp;lt;table class=&amp;quot;performance-table&amp;quot;&amp;gt;
        &amp;lt;thead&amp;gt;
            &amp;lt;tr&amp;gt;
                &amp;lt;th&amp;gt;성능 지표&amp;lt;/th&amp;gt;
                &amp;lt;th&amp;gt;순차 처리&amp;lt;br&amp;gt;(Sequential)&amp;lt;/th&amp;gt;
                &amp;lt;th&amp;gt;병렬 처리&amp;lt;br&amp;gt;(Parallel)&amp;lt;/th&amp;gt;
                &amp;lt;th&amp;gt;비교 결과&amp;lt;/th&amp;gt;
            &amp;lt;/tr&amp;gt;
        &amp;lt;/thead&amp;gt;
        &amp;lt;tbody&amp;gt;
            &amp;lt;tr&amp;gt;
                &amp;lt;td class=&amp;quot;metric-name&amp;quot;&amp;gt;총 처리 시간&amp;lt;br&amp;gt;&amp;lt;small&amp;gt;(500개 기사 수집)&amp;lt;/small&amp;gt;&amp;lt;/td&amp;gt;
                &amp;lt;td class=&amp;quot;worse-value&amp;quot;&amp;gt;5분 05초&amp;lt;/td&amp;gt;
                &amp;lt;td class=&amp;quot;better-value&amp;quot;&amp;gt;1분 59초&amp;lt;/td&amp;gt;
                &amp;lt;td class=&amp;quot;comparison-result improvement&amp;quot;&amp;gt;⏱ 60% 감소&amp;lt;/td&amp;gt;
            &amp;lt;/tr&amp;gt;
            &amp;lt;tr&amp;gt;
                &amp;lt;td class=&amp;quot;metric-name&amp;quot;&amp;gt;평균 개별&amp;lt;br&amp;gt;처리 시간&amp;lt;/td&amp;gt;
                &amp;lt;td class=&amp;quot;neutral-value&amp;quot;&amp;gt;0.61초&amp;lt;/td&amp;gt;
                &amp;lt;td class=&amp;quot;neutral-value&amp;quot;&amp;gt;0.79초&amp;lt;/td&amp;gt;
                &amp;lt;td class=&amp;quot;comparison-result&amp;quot;&amp;gt;  30% 감소&amp;lt;/td&amp;gt;
            &amp;lt;/tr&amp;gt;
            &amp;lt;tr&amp;gt;
                &amp;lt;td class=&amp;quot;metric-name&amp;quot;&amp;gt;전체 처리&amp;lt;br&amp;gt;효율성&amp;lt;/td&amp;gt;
                &amp;lt;td class=&amp;quot;worse-value&amp;quot;&amp;gt;낮음&amp;lt;br&amp;gt;(순차 대기)&amp;lt;/td&amp;gt;
                &amp;lt;td class=&amp;quot;better-value&amp;quot;&amp;gt;높음&amp;lt;br&amp;gt;(동시 처리)&amp;lt;/td&amp;gt;
                &amp;lt;td class=&amp;quot;comparison-result improvement&amp;quot;&amp;gt;2.5배 향상&amp;lt;/td&amp;gt;
            &amp;lt;/tr&amp;gt;
            &amp;lt;tr&amp;gt;
                &amp;lt;td class=&amp;quot;metric-name&amp;quot;&amp;gt;성공률&amp;lt;/td&amp;gt;
                &amp;lt;td class=&amp;quot;neutral-value&amp;quot;&amp;gt;81.4%&amp;lt;/td&amp;gt;
                &amp;lt;td class=&amp;quot;neutral-value&amp;quot;&amp;gt;81.2%&amp;lt;/td&amp;gt;
                &amp;lt;td class=&amp;quot;comparison-result&amp;quot;&amp;gt;동등 수준&amp;lt;/td&amp;gt;
            &amp;lt;/tr&amp;gt;
            &amp;lt;tr&amp;gt;
                &amp;lt;td class=&amp;quot;metric-name&amp;quot;&amp;gt;사용된&amp;lt;br&amp;gt;작업자 수&amp;lt;/td&amp;gt;
                &amp;lt;td&amp;gt;1개&amp;lt;/td&amp;gt;
                &amp;lt;td class=&amp;quot;better-value&amp;quot;&amp;gt;5개&amp;lt;/td&amp;gt;
                &amp;lt;td class=&amp;quot;comparison-result&amp;quot;&amp;gt;병렬 처리 활용&amp;lt;/td&amp;gt;
            &amp;lt;/tr&amp;gt;
        &amp;lt;/tbody&amp;gt;
    &amp;lt;/table&amp;gt;
&amp;lt;/body&amp;gt;
&amp;lt;/html&amp;gt;&quot;&gt;
&lt;style&gt;
        .performance-table {
            width: 100%;
            border-collapse: separate;
            border-spacing: 0;
            margin: 25px 0;
            font-family: 'Nanum Gothic', sans-serif;
            box-shadow: 0 3px 8px rgba(0,0,0,0.15);
            font-size: 15px;
            border-radius: 5px;
            overflow: hidden;
            table-layout: fixed;
        }
        
        .performance-table th {
            background-color: #414b5c;
            color: white;
            text-align: center;
            padding: 15px 20px;
            font-weight: bold;
            border: 1px solid #ddd;
            letter-spacing: 0.5px;
            vertical-align: middle;
        }
        
        .performance-table th:first-child {
            width: 25%;
        }
        
        .performance-table th:nth-child(2),
        .performance-table th:nth-child(3) {
            width: 25%;
        }
        
        .performance-table th:nth-child(4) {
            width: 25%;
        }
        
        .performance-table tr:nth-child(even) {
            background-color: #f9f9f9;
        }
        
        .performance-table tr:hover {
            background-color: #f1f1f1;
        }
        
        .performance-table td {
            border: 1px solid #ddd;
            padding: 14px 18px;
            vertical-align: middle;
            line-height: 1.5;
            text-align: center;
            word-break: keep-all;
        }
        
        .metric-name {
            font-weight: bold;
            background-color: #f5f5f5;
            text-align: center;
        }
        
        .better-value {
            color: #2e7d32;
            font-weight: bold;
        }
        
        .worse-value {
            color: #c62828;
        }
        
        .neutral-value {
            color: #0277bd;
        }
        
        .comparison-result {
            font-style: italic;
            color: #555;
            font-weight: 500;
        }
        
        .improvement {
            color: #2e7d32;
            font-weight: bold;
        }
    &lt;/style&gt;
&lt;table class=&quot;performance-table&quot;&gt;
&lt;thead&gt;
&lt;tr&gt;
&lt;th&gt;성능 지표&lt;/th&gt;
&lt;th&gt;순차 처리&lt;br /&gt;(Sequential)&lt;/th&gt;
&lt;th&gt;병렬 처리&lt;br /&gt;(Parallel)&lt;/th&gt;
&lt;th&gt;비교 결과&lt;/th&gt;
&lt;/tr&gt;
&lt;/thead&gt;
&lt;tbody&gt;
&lt;tr&gt;
&lt;td class=&quot;metric-name&quot;&gt;총 처리 시간&lt;br /&gt;&lt;small&gt;(500개 기사 수집)&lt;/small&gt;&lt;/td&gt;
&lt;td class=&quot;worse-value&quot;&gt;5분 05초&lt;/td&gt;
&lt;td class=&quot;better-value&quot;&gt;1분 59초&lt;/td&gt;
&lt;td class=&quot;comparison-result improvement&quot;&gt;⏱ 60% 감소&lt;/td&gt;
&lt;/tr&gt;
&lt;tr&gt;
&lt;td class=&quot;metric-name&quot;&gt;평균 개별&lt;br /&gt;처리 시간&lt;/td&gt;
&lt;td class=&quot;neutral-value&quot;&gt;0.61초&lt;/td&gt;
&lt;td class=&quot;neutral-value&quot;&gt;0.79초&lt;/td&gt;
&lt;td class=&quot;comparison-result&quot;&gt;  30% 감소&lt;/td&gt;
&lt;/tr&gt;
&lt;tr&gt;
&lt;td class=&quot;metric-name&quot;&gt;전체 처리&lt;br /&gt;효율성&lt;/td&gt;
&lt;td class=&quot;worse-value&quot;&gt;낮음&lt;br /&gt;(순차 대기)&lt;/td&gt;
&lt;td class=&quot;better-value&quot;&gt;높음&lt;br /&gt;(동시 처리)&lt;/td&gt;
&lt;td class=&quot;comparison-result improvement&quot;&gt;2.5배 향상&lt;/td&gt;
&lt;/tr&gt;
&lt;tr&gt;
&lt;td class=&quot;metric-name&quot;&gt;성공률&lt;/td&gt;
&lt;td class=&quot;neutral-value&quot;&gt;81.4%&lt;/td&gt;
&lt;td class=&quot;neutral-value&quot;&gt;81.2%&lt;/td&gt;
&lt;td class=&quot;comparison-result&quot;&gt;동등 수준&lt;/td&gt;
&lt;/tr&gt;
&lt;tr&gt;
&lt;td class=&quot;metric-name&quot;&gt;사용된&lt;br /&gt;작업자 수&lt;/td&gt;
&lt;td&gt;1개&lt;/td&gt;
&lt;td class=&quot;better-value&quot;&gt;5개&lt;/td&gt;
&lt;td class=&quot;comparison-result&quot;&gt;병렬 처리 활용&lt;/td&gt;
&lt;/tr&gt;
&lt;/tbody&gt;
&lt;/table&gt;
&lt;/div&gt;
&lt;p data-ke-size=&quot;size16&quot;&gt;&amp;nbsp;&lt;/p&gt;
&lt;div id=&quot;code_1749297762055&quot; data-ke-type=&quot;html&quot; data-source=&quot;&amp;lt;div style=&amp;quot;border-left: 4px solid #3b82f6; padding-left: 16px; margin: 20px 0; background-color: #f0f8ff; color: #333; line-height: 1.6;&amp;quot;&amp;gt;
  &amp;lt;p style=&amp;quot;margin-top: 0;&amp;quot;&amp;gt;&amp;lt;strong&amp;gt;스레드 수를 5개로 늘렸지만 처리 시간이 정확히 1/5로 줄지는 않았는데, 그 이유는 다음과 같은 요인들 때문일 것으로 추정:&amp;lt;/strong&amp;gt;&amp;lt;/p&amp;gt;
  &amp;lt;ul style=&amp;quot;padding-left: 20px; margin-top: 8px; margin-bottom: 8px;&amp;quot;&amp;gt;
    &amp;lt;li&amp;gt;&amp;lt;strong&amp;gt;네트워크 지연:&amp;lt;/strong&amp;gt; 전체 작업 시간 중 대부분이 요청/응답 대기일 수 있음&amp;lt;/li&amp;gt;
    &amp;lt;li&amp;gt;&amp;lt;strong&amp;gt;서버 반응 속도 제한:&amp;lt;/strong&amp;gt; 일부 서버는 동시 요청을 제한하거나 봇으로 인식해 지연 응답할 가능성&amp;lt;/li&amp;gt;
    &amp;lt;li&amp;gt;&amp;lt;strong&amp;gt;스레드 오버헤드:&amp;lt;/strong&amp;gt; 스레드 관리 및 메모리 할당 등의 부하가 효율을 일부 상쇄했을 수 있음&amp;lt;/li&amp;gt;
  &amp;lt;/ul&amp;gt;
  &amp;lt;p style=&amp;quot;margin-bottom: 0; font-style: italic; color: #555;&amp;quot;&amp;gt;
    ※ 실제 병목 원인은 작업 대상의 구조와 환경에 따라 달라질 수 있으며, 위 내용은 일반적인 웹 크롤링 병렬 처리에서 자주 관찰되는 패턴임
  &amp;lt;/p&amp;gt;
&amp;lt;/div&amp;gt;
&quot;&gt;
&lt;div style=&quot;border-left: 4px solid #3b82f6; padding-left: 16px; margin: 20px 0; background-color: #f0f8ff; color: #333; line-height: 1.6;&quot;&gt;
&lt;p style=&quot;margin-top: 0;&quot; data-ke-size=&quot;size16&quot;&gt;&lt;b&gt;스레드 수를 5개로 늘렸지만 처리 시간이 정확히 1/5로 줄지는 않았는데, 그 이유는 다음과 같은 요인들 때문일 것으로 추정:&lt;/b&gt;&lt;/p&gt;
&lt;ul style=&quot;padding-left: 20px; margin-top: 8px; margin-bottom: 8px; list-style-type: disc;&quot; data-ke-list-type=&quot;disc&quot;&gt;
&lt;li&gt;&lt;b&gt;네트워크 지연:&lt;/b&gt; 전체 작업 시간 중 대부분이 요청/응답 대기일 수 있음&lt;/li&gt;
&lt;li&gt;&lt;b&gt;서버 반응 속도 제한:&lt;/b&gt; 일부 서버는 동시 요청을 제한하거나 봇으로 인식해 지연 응답할 가능성&lt;/li&gt;
&lt;li&gt;&lt;b&gt;스레드 오버헤드:&lt;/b&gt; 스레드 관리 및 메모리 할당 등의 부하가 효율을 일부 상쇄했을 수 있음&lt;/li&gt;
&lt;/ul&gt;
&lt;p style=&quot;margin-bottom: 0; font-style: italic; color: #555;&quot; data-ke-size=&quot;size16&quot;&gt;※ 실제 병목 원인은 작업 대상의 구조와 환경에 따라 달라질 수 있으며, 위 내용은 일반적인 웹 크롤링 병렬 처리에서 자주 관찰되는 패턴임&lt;/p&gt;
&lt;/div&gt;
&lt;/div&gt;
&lt;div&gt;&lt;hr contenteditable=&quot;false&quot; data-ke-type=&quot;horizontalRule&quot; data-ke-style=&quot;style6&quot; /&gt;
&lt;h2 data-ke-size=&quot;size26&quot;&gt;마무리&lt;/h2&gt;
&lt;p data-ke-size=&quot;size16&quot;&gt;&lt;span&gt; 웹 크롤링 작업에서 request와 threadpoolexecutor를 활용해 실행 시간을 &lt;span style=&quot;color: #006dd7;&quot;&gt;&lt;b&gt;5분에서 2분으로 단축&lt;/b&gt;&lt;/span&gt;하는 데 성공했다. 간단한 병렬화 코드만 추가하여 &lt;span style=&quot;color: #006dd7;&quot;&gt;&lt;b&gt;60% 이상의 시간 절약&lt;/b&gt;&lt;/span&gt;, 그리고 &lt;span style=&quot;color: #006dd7;&quot;&gt;&lt;b&gt;2.5배 처리 속도 향상&lt;/b&gt;&lt;/span&gt;이라는 결과를 얻었을 수 있었다. 다음 글에는 requests에서 html 수집에 실패한 동적 웹사이트에서 정보를 가져오는 방법을 다룰 예정이다. 주로 자바스크립트를 이용한 &lt;span&gt;동적 웹사이트에서 html을 수집하기 위해서 playwright와 같은 브라우저 자동화를 적용할 예정이다.&lt;/span&gt;&lt;/span&gt;&lt;/p&gt;
&lt;/div&gt;</description>
      <category>Requests</category>
      <category>ThreadPoolExecutor</category>
      <category>기사수집자동화</category>
      <category>병렬처리</category>
      <author>catalystmind</author>
      <guid isPermaLink="true">https://catalystmind.tistory.com/21</guid>
      <comments>https://catalystmind.tistory.com/21#entry21comment</comments>
      <pubDate>Sat, 7 Jun 2025 21:20:54 +0900</pubDate>
    </item>
    <item>
      <title>Trafilatura - requsets를 이용한 한글깨짐 문제 해결하기</title>
      <link>https://catalystmind.tistory.com/20</link>
      <description>&lt;div id=&quot;code_1748863662763&quot; data-ke-type=&quot;html&quot; data-source=&quot;&amp;lt;div style=&amp;quot;background-color: #fff; border-radius: 8px; padding: 24px; box-shadow: 0 2px 8px rgba(0, 0, 0, 0.05); margin-bottom: 24px; border-left: 4px solid #3b82f6;&amp;quot;&amp;gt;
    &amp;lt;h1 style=&amp;quot;color: #1e40af; font-size: 24px; font-weight: 700; margin-top: 0; margin-bottom: 16px;&amp;quot;&amp;gt;TL;DR&amp;lt;/h1&amp;gt;
    &amp;lt;div style=&amp;quot;background-color: #f0f7ff; padding: 12px 16px; border-radius: 6px; margin: 16px 0;&amp;quot;&amp;gt;
        &amp;lt;ul style=&amp;quot;padding-left: 20px; margin: 0;&amp;quot;&amp;gt;
            &amp;lt;li style=&amp;quot;margin-bottom: 10px;&amp;quot;&amp;gt;&amp;lt;span style=&amp;quot;color: #1e40af; font-weight: 600;&amp;quot;&amp;gt;Trafilatura&amp;lt;/span&amp;gt;를 이용한 한글 뉴스 기사 본문 자동 수집 시 &amp;lt;span style=&amp;quot;color: #1e40af; font-weight: 600;&amp;quot;&amp;gt;인코딩 문제&amp;lt;/span&amp;gt;가 간헐적으로 발생함&amp;lt;/li&amp;gt;
            &amp;lt;li style=&amp;quot;margin-bottom: 10px;&amp;quot;&amp;gt;한글 뉴스 기사의 약 &amp;lt;span style=&amp;quot;color: #1e40af; font-weight: 600;&amp;quot;&amp;gt;80%&amp;lt;/span&amp;gt;는 정적인 HTML 페이지로 구성되어 &amp;lt;span style=&amp;quot;color: #1e40af; font-weight: 600;&amp;quot;&amp;gt;requests&amp;lt;/span&amp;gt;로 수집 가능함&amp;lt;/li&amp;gt;
            &amp;lt;li style=&amp;quot;margin-bottom: 10px;&amp;quot;&amp;gt;&amp;lt;span style=&amp;quot;color: #1e40af; font-weight: 600;&amp;quot;&amp;gt;requests&amp;lt;/span&amp;gt;로 HTML을 받아서 &amp;lt;span style=&amp;quot;color: #1e40af; font-weight: 600;&amp;quot;&amp;gt;trafilatura&amp;lt;/span&amp;gt;로 넘기면 인코딩 문제 회피하면서도 &amp;lt;span style=&amp;quot;color: #1e40af; font-weight: 600;&amp;quot;&amp;gt;빠른 처리 속도&amp;lt;/span&amp;gt; 유지 가능함&amp;lt;/li&amp;gt;
            &amp;lt;li style=&amp;quot;margin-bottom: 10px;&amp;quot;&amp;gt;&amp;lt;span style=&amp;quot;color: #1e40af; font-weight: 600;&amp;quot;&amp;gt;Trafilatura + requests&amp;lt;/span&amp;gt; 조합 테스트 결과, 인코딩 처리를 위해 &amp;lt;span style=&amp;quot;color: #1e40af; font-weight: 600;&amp;quot;&amp;gt;0.52초 &amp;rarr; 0.61초&amp;lt;/span&amp;gt;로 다소 증가하였으나 인코딩 문제를 &amp;lt;span style=&amp;quot;color: #1e40af; font-weight: 600;&amp;quot;&amp;gt;완전히 해결&amp;lt;/span&amp;gt;&amp;lt;/li&amp;gt;
            &amp;lt;li style=&amp;quot;margin-bottom: 10px;&amp;quot;&amp;gt;이 방식을 통해 한글 뉴스 기사 수집의 &amp;lt;span style=&amp;quot;color: #1e40af; font-weight: 600;&amp;quot;&amp;gt;안정성&amp;lt;/span&amp;gt;과 &amp;lt;span style=&amp;quot;color: #1e40af; font-weight: 600;&amp;quot;&amp;gt;효율성&amp;lt;/span&amp;gt;을 동시에 확보할 수 있음&amp;lt;/li&amp;gt;
        &amp;lt;/ul&amp;gt;
    &amp;lt;/div&amp;gt;
&amp;lt;/div&amp;gt;&quot;&gt;
&lt;div style=&quot;background-color: #fff; border-radius: 8px; padding: 24px; box-shadow: 0 2px 8px rgba(0, 0, 0, 0.05); margin-bottom: 24px; border-left: 4px solid #3b82f6;&quot;&gt;
&lt;h1 style=&quot;color: #1e40af; font-size: 24px; font-weight: bold; margin-top: 0; margin-bottom: 16px;&quot;&gt;TL;DR&lt;/h1&gt;
&lt;div style=&quot;background-color: #f0f7ff; padding: 12px 16px; border-radius: 6px; margin: 16px 0;&quot;&gt;
&lt;ul style=&quot;padding-left: 20px; margin: 0px; list-style-type: disc;&quot; data-ke-list-type=&quot;disc&quot;&gt;
&lt;li style=&quot;margin-bottom: 10px;&quot;&gt;&lt;span style=&quot;color: #1e40af; font-weight: 600;&quot;&gt;Trafilatura&lt;/span&gt;를 이용한 한글 뉴스 기사 본문 자동 수집 시 &lt;span style=&quot;color: #1e40af; font-weight: 600;&quot;&gt;인코딩 문제&lt;/span&gt;가 간헐적으로 발생함&lt;/li&gt;
&lt;li style=&quot;margin-bottom: 10px;&quot;&gt;한글 뉴스 기사의 약 &lt;span style=&quot;color: #1e40af; font-weight: 600;&quot;&gt;80%&lt;/span&gt;는 정적인 HTML 페이지로 구성되어 &lt;span style=&quot;color: #1e40af; font-weight: 600;&quot;&gt;requests&lt;/span&gt;로 수집 가능함&lt;/li&gt;
&lt;li style=&quot;margin-bottom: 10px;&quot;&gt;&lt;span style=&quot;color: #1e40af; font-weight: 600;&quot;&gt;requests&lt;/span&gt;로 HTML을 받아서 &lt;span style=&quot;color: #1e40af; font-weight: 600;&quot;&gt;trafilatura&lt;/span&gt;로 넘기면 인코딩 문제 회피하면서도 &lt;span style=&quot;color: #1e40af; font-weight: 600;&quot;&gt;빠른 처리 속도&lt;/span&gt; 유지 가능함&lt;/li&gt;
&lt;li style=&quot;margin-bottom: 10px;&quot;&gt;&lt;span style=&quot;color: #1e40af; font-weight: 600;&quot;&gt;Trafilatura + requests&lt;/span&gt; 조합 테스트 결과, 인코딩 처리를 위해 &lt;span style=&quot;color: #1e40af; font-weight: 600;&quot;&gt;0.52초 &amp;rarr; 0.61초&lt;/span&gt;로 다소 증가하였으나 인코딩 문제를 &lt;span style=&quot;color: #1e40af; font-weight: 600;&quot;&gt;완전히 해결&lt;/span&gt;&lt;/li&gt;
&lt;li style=&quot;margin-bottom: 10px;&quot;&gt;이 방식을 통해 한글 뉴스 기사 수집의 &lt;span style=&quot;color: #1e40af; font-weight: 600;&quot;&gt;안정성&lt;/span&gt;과 &lt;span style=&quot;color: #1e40af; font-weight: 600;&quot;&gt;효율성&lt;/span&gt;을 동시에 확보할 수 있음&lt;/li&gt;
&lt;/ul&gt;
&lt;/div&gt;
&lt;/div&gt;
&lt;/div&gt;
&lt;hr contenteditable=&quot;false&quot; data-ke-type=&quot;horizontalRule&quot; data-ke-style=&quot;style6&quot; /&gt;
&lt;p data-end=&quot;186&quot; data-start=&quot;90&quot; data-ke-size=&quot;size16&quot;&gt;한국어 뉴스를 스크랩할 때 &lt;span style=&quot;color: #006dd7;&quot;&gt;&lt;b&gt;한글 문자가 깨지는 현상&lt;/b&gt;&lt;/span&gt;이 종종 발생한다. 이전 글에서 소개한 &lt;span style=&quot;color: #006dd7;&quot;&gt;&lt;b&gt;Trafilatura&lt;/b&gt;&lt;/span&gt;가 자동으로 문자를 변환해주지만 완벽하지는 않다.&amp;nbsp;이번 글에서는 이 문제를 해결하기 위해, &lt;span style=&quot;color: #006dd7;&quot;&gt;&lt;b&gt;requests로 HTML을 받아와 Trafilatura로 넘겨주는 방식&lt;/b&gt;&lt;/span&gt;을 다룬다. 특히, &lt;span style=&quot;color: #006dd7;&quot;&gt;&lt;b&gt;requests&lt;/b&gt;&lt;/span&gt;가 자체적으로 인코딩 문제를 완벽히 처리하지 못하므로, 이를 보완하는 &lt;span style=&quot;color: #006dd7;&quot;&gt;&lt;b&gt;별도의 문자 변환 함수&lt;/b&gt;&lt;/span&gt;도 소개한다.&lt;/p&gt;
&lt;p data-ke-size=&quot;size16&quot;&gt;&amp;nbsp;&lt;/p&gt;
&lt;div id=&quot;code_1748873709140&quot; data-ke-type=&quot;html&quot; data-source=&quot;&amp;lt;!DOCTYPE html&amp;gt;
&amp;lt;html lang=&amp;quot;ko&amp;quot;&amp;gt;
&amp;lt;head&amp;gt;
  &amp;lt;meta charset=&amp;quot;UTF-8&amp;quot;&amp;gt;
  &amp;lt;title&amp;gt;requests 인코딩 처리 흐름&amp;lt;/title&amp;gt;
  &amp;lt;style&amp;gt;
    body {
      font-family: -apple-system, BlinkMacSystemFont, 'Segoe UI', system-ui, sans-serif;
      line-height: 1.6;
      color: #333;
      width: 100%;
      margin: 0;
      padding: 15px;
      background: #fff;
      box-sizing: border-box;
    }

    h1 {
      font-size: 1.3rem !important;
      color: #2d3748 !important;
      margin-bottom: 25px !important;
      text-align: center !important;
      font-weight: 600 !important;
      font-family: -apple-system, BlinkMacSystemFont, 'Segoe UI', system-ui, sans-serif !important;
    }

    .flow {
      display: flex;
      flex-direction: column;
      gap: 20px;
      position: relative;
    }

    .step {
      border: 1px solid #e2e8f0;
      border-radius: 8px;
      border-left: 4px solid #4a5568;
      padding: 15px;
      background: #fff;
      position: relative;
      margin-left: 54px;
    }

    .step-number {
      position: absolute;
      left: -52px;
      top: 15px;
      width: 32px;
      height: 32px;
      background: #4a5568;
      color: white;
      display: flex;
      align-items: center;
      justify-content: center;
      font-size: 0.9rem;
      font-weight: 600;
      border-radius: 4px;
      z-index: 2;
    }

    .step-content {
      width: 100%;
    }

    .step-title {
      font-size: 1.1rem;
      font-weight: 600;
      color: #2d3748;
      margin-bottom: 8px;
    }

    .step-description {
      color: #4a5568;
      font-size: 0.95rem;
    }

    .code {
      background: #f7fafc;
      padding: 2px 6px;
      border-radius: 4px;
      font-family: 'Fira Code', Consolas, monospace;
      font-size: 0.9rem;
      color: #2d3748;
      border: 1px solid #e2e8f0;
    }

    .code-list {
      margin: 12px 0 0 0;
      padding-left: 0;
    }

    .code-list li {
      list-style: none;
      margin: 8px 0;
      padding: 10px 12px;
      background: #f7fafc;
      border-radius: 4px;
      border-left: 3px solid #4a5568;
      font-family: 'Fira Code', Consolas, monospace;
      font-size: 0.85rem;
    }

    @media (max-width: 600px) {
      body {
        padding: 10px;
      }
      
      h1 {
        font-size: 1.3rem;
      }
      
      .flow::before {
        left: -26px;
      }
      
      .step {
        margin-left: 54px;
        padding: 12px;
      }
      
      .step-number {
        left: -52px;
        width: 28px;
        height: 28px;
        font-size: 0.85rem;
      }
      
      .step-title {
        font-size: 1rem;
      }
      
      .step-description {
        font-size: 0.9rem;
      }
    }
  &amp;lt;/style&amp;gt;
&amp;lt;/head&amp;gt;
&amp;lt;body&amp;gt;
  &amp;lt;h1&amp;gt;웹 크롤링 시 한글 처리 흐름&amp;lt;/h1&amp;gt;
  
  &amp;lt;div class=&amp;quot;flow&amp;quot;&amp;gt;
    &amp;lt;div class=&amp;quot;step&amp;quot;&amp;gt;
      &amp;lt;div class=&amp;quot;step-number&amp;quot;&amp;gt;1&amp;lt;/div&amp;gt;
      &amp;lt;div class=&amp;quot;step-content&amp;quot;&amp;gt;
        &amp;lt;div class=&amp;quot;step-title&amp;quot;&amp;gt;HTTP 요청 및 응답 수신&amp;lt;/div&amp;gt;
        &amp;lt;div class=&amp;quot;step-description&amp;quot;&amp;gt;
          &amp;lt;code class=&amp;quot;code&amp;quot;&amp;gt;requests&amp;lt;/code&amp;gt;로 서버에 요청을 보내고 &amp;lt;code class=&amp;quot;code&amp;quot;&amp;gt;response.content&amp;lt;/code&amp;gt;로 HTML 데이터를 받음
        &amp;lt;/div&amp;gt;
      &amp;lt;/div&amp;gt;
    &amp;lt;/div&amp;gt;

    &amp;lt;div class=&amp;quot;step&amp;quot;&amp;gt;
      &amp;lt;div class=&amp;quot;step-number&amp;quot;&amp;gt;2&amp;lt;/div&amp;gt;
      &amp;lt;div class=&amp;quot;step-content&amp;quot;&amp;gt;
        &amp;lt;div class=&amp;quot;step-title&amp;quot;&amp;gt;서버 문자 정보의 한계&amp;lt;/div&amp;gt;
        &amp;lt;div class=&amp;quot;step-description&amp;quot;&amp;gt;
          서버 보낸 문자 방식을 잘못 변환하는 경우가 있음&amp;lt;br&amp;gt;
          예: 실제는 &amp;lt;code class=&amp;quot;code&amp;quot;&amp;gt;EUC-KR&amp;lt;/code&amp;gt;인데 &amp;lt;code class=&amp;quot;code&amp;quot;&amp;gt;UTF-8&amp;lt;/code&amp;gt;로 변환
        &amp;lt;/div&amp;gt;
      &amp;lt;/div&amp;gt;
    &amp;lt;/div&amp;gt;

    &amp;lt;div class=&amp;quot;step&amp;quot;&amp;gt;
      &amp;lt;div class=&amp;quot;step-number&amp;quot;&amp;gt;3&amp;lt;/div&amp;gt;
      &amp;lt;div class=&amp;quot;step-content&amp;quot;&amp;gt;
        &amp;lt;div class=&amp;quot;step-title&amp;quot;&amp;gt;텍스트 변환 방법&amp;lt;/div&amp;gt;
        &amp;lt;div class=&amp;quot;step-description&amp;quot;&amp;gt;
          HTML 데이터를 올바른 텍스트로 변환하는 것이 핵심:
          &amp;lt;ul class=&amp;quot;code-list&amp;quot;&amp;gt;
            &amp;lt;li&amp;gt;response.encoding &amp;rarr; requests의 자동 해석, 부정확할 수 있음&amp;lt;/li&amp;gt;
            &amp;lt;li&amp;gt;'euc-kr','utf-8','cp949'등을 명시 &amp;rarr; 지정한 방식으로 변환&amp;lt;/li&amp;gt;
            &amp;lt;li&amp;gt;guess_best_decode() &amp;rarr; 한글 비율로 최적 방식 추정&amp;lt;/li&amp;gt;
          &amp;lt;/ul&amp;gt;
        &amp;lt;/div&amp;gt;
      &amp;lt;/div&amp;gt;
    &amp;lt;/div&amp;gt;

    &amp;lt;div class=&amp;quot;step&amp;quot;&amp;gt;
      &amp;lt;div class=&amp;quot;step-number&amp;quot;&amp;gt;4&amp;lt;/div&amp;gt;
      &amp;lt;div class=&amp;quot;step-content&amp;quot;&amp;gt;
        &amp;lt;div class=&amp;quot;step-title&amp;quot;&amp;gt;정상 텍스트 처리&amp;lt;/div&amp;gt;
        &amp;lt;div class=&amp;quot;step-description&amp;quot;&amp;gt;
          올바른 변환으로 한글 깨짐 없는 텍스트를 얻음&amp;lt;br&amp;gt;
          이후 &amp;lt;code class=&amp;quot;code&amp;quot;&amp;gt;trafilatura&amp;lt;/code&amp;gt; 등으로 본문 추출이 가능
        &amp;lt;/div&amp;gt;
      &amp;lt;/div&amp;gt;
    &amp;lt;/div&amp;gt;
  &amp;lt;/div&amp;gt;
&amp;lt;/body&amp;gt;
&amp;lt;/html&amp;gt;&quot;&gt;
&lt;style&gt;
    body {
      font-family: -apple-system, BlinkMacSystemFont, 'Segoe UI', system-ui, sans-serif;
      line-height: 1.6;
      color: #333;
      width: 100%;
      margin: 0;
      padding: 15px;
      background: #fff;
      box-sizing: border-box;
    }

    h1 {
      font-size: 1.3rem !important;
      color: #2d3748 !important;
      margin-bottom: 25px !important;
      text-align: center !important;
      font-weight: 600 !important;
      font-family: -apple-system, BlinkMacSystemFont, 'Segoe UI', system-ui, sans-serif !important;
    }

    .flow {
      display: flex;
      flex-direction: column;
      gap: 20px;
      position: relative;
    }

    .step {
      border: 1px solid #e2e8f0;
      border-radius: 8px;
      border-left: 4px solid #4a5568;
      padding: 15px;
      background: #fff;
      position: relative;
      margin-left: 54px;
    }

    .step-number {
      position: absolute;
      left: -52px;
      top: 15px;
      width: 32px;
      height: 32px;
      background: #4a5568;
      color: white;
      display: flex;
      align-items: center;
      justify-content: center;
      font-size: 0.9rem;
      font-weight: 600;
      border-radius: 4px;
      z-index: 2;
    }

    .step-content {
      width: 100%;
    }

    .step-title {
      font-size: 1.1rem;
      font-weight: 600;
      color: #2d3748;
      margin-bottom: 8px;
    }

    .step-description {
      color: #4a5568;
      font-size: 0.95rem;
    }

    .code {
      background: #f7fafc;
      padding: 2px 6px;
      border-radius: 4px;
      font-family: 'Fira Code', Consolas, monospace;
      font-size: 0.9rem;
      color: #2d3748;
      border: 1px solid #e2e8f0;
    }

    .code-list {
      margin: 12px 0 0 0;
      padding-left: 0;
    }

    .code-list li {
      list-style: none;
      margin: 8px 0;
      padding: 10px 12px;
      background: #f7fafc;
      border-radius: 4px;
      border-left: 3px solid #4a5568;
      font-family: 'Fira Code', Consolas, monospace;
      font-size: 0.85rem;
    }

    @media (max-width: 600px) {
      body {
        padding: 10px;
      }
      
      h1 {
        font-size: 1.3rem;
      }
      
      .flow::before {
        left: -26px;
      }
      
      .step {
        margin-left: 54px;
        padding: 12px;
      }
      
      .step-number {
        left: -52px;
        width: 28px;
        height: 28px;
        font-size: 0.85rem;
      }
      
      .step-title {
        font-size: 1rem;
      }
      
      .step-description {
        font-size: 0.9rem;
      }
    }
  &lt;/style&gt;
&lt;h1&gt;웹 크롤링 시 한글 처리 흐름&lt;/h1&gt;
&lt;div class=&quot;flow&quot;&gt;
&lt;div class=&quot;step&quot;&gt;
&lt;div class=&quot;step-number&quot;&gt;1&lt;/div&gt;
&lt;div class=&quot;step-content&quot;&gt;
&lt;div class=&quot;step-title&quot;&gt;HTTP 요청 및 응답 수신&lt;/div&gt;
&lt;div class=&quot;step-description&quot;&gt;&lt;code class=&quot;code&quot;&gt;requests&lt;/code&gt;로 서버에 요청을 보내고 &lt;code class=&quot;code&quot;&gt;response.content&lt;/code&gt;로 HTML 데이터를 받음&lt;/div&gt;
&lt;/div&gt;
&lt;/div&gt;
&lt;div class=&quot;step&quot;&gt;
&lt;div class=&quot;step-number&quot;&gt;2&lt;/div&gt;
&lt;div class=&quot;step-content&quot;&gt;
&lt;div class=&quot;step-title&quot;&gt;서버 문자 정보의 한계&lt;/div&gt;
&lt;div class=&quot;step-description&quot;&gt;서버 보낸 문자 방식을 잘못 변환하는 경우가 있음&lt;br /&gt;예: 실제는 &lt;code class=&quot;code&quot;&gt;EUC-KR&lt;/code&gt;인데 &lt;code class=&quot;code&quot;&gt;UTF-8&lt;/code&gt;로 변환&lt;/div&gt;
&lt;/div&gt;
&lt;/div&gt;
&lt;div class=&quot;step&quot;&gt;
&lt;div class=&quot;step-number&quot;&gt;3&lt;/div&gt;
&lt;div class=&quot;step-content&quot;&gt;
&lt;div class=&quot;step-title&quot;&gt;텍스트 변환 방법&lt;/div&gt;
&lt;div class=&quot;step-description&quot;&gt;HTML 데이터를 올바른 텍스트로 변환하는 것이 핵심:
&lt;ul class=&quot;code-list&quot; style=&quot;list-style-type: disc;&quot; data-ke-list-type=&quot;disc&quot;&gt;
&lt;li&gt;response.encoding &amp;rarr; requests의 자동 해석, 부정확할 수 있음&lt;/li&gt;
&lt;li&gt;'euc-kr','utf-8','cp949'등을 명시 &amp;rarr; 지정한 방식으로 변환&lt;/li&gt;
&lt;li&gt;guess_best_decode() &amp;rarr; 한글 비율로 최적 방식 추정&lt;/li&gt;
&lt;/ul&gt;
&lt;/div&gt;
&lt;/div&gt;
&lt;/div&gt;
&lt;div class=&quot;step&quot;&gt;
&lt;div class=&quot;step-number&quot;&gt;4&lt;/div&gt;
&lt;div class=&quot;step-content&quot;&gt;
&lt;div class=&quot;step-title&quot;&gt;정상 텍스트 처리&lt;/div&gt;
&lt;div class=&quot;step-description&quot;&gt;올바른 변환으로 한글 깨짐 없는 텍스트를 얻음&lt;br /&gt;이후 &lt;code class=&quot;code&quot;&gt;trafilatura&lt;/code&gt; 등으로 본문 추출이 가능&lt;/div&gt;
&lt;/div&gt;
&lt;/div&gt;
&lt;/div&gt;
&lt;/div&gt;
&lt;hr contenteditable=&quot;false&quot; data-ke-type=&quot;horizontalRule&quot; data-ke-style=&quot;style6&quot; /&gt;
&lt;h2 data-ke-size=&quot;size26&quot;&gt;Requests: 빠르고 안정적인 HTML 수집 도구&lt;/h2&gt;
&lt;p data-ke-size=&quot;size16&quot;&gt;requests는 가장 널리 사용되는 Python의 HTTP 라이브러리로, 웹 서버로부터 HTML을 빠르게 받아올 수 있다. 특히 정적인 HTML로 구성된 뉴스 기사를 수집할 때 매우 효율적이다.&lt;/p&gt;
&lt;h3 data-ke-size=&quot;size23&quot;&gt;Requests 사용 시 주요 장점&lt;/h3&gt;
&lt;p data-ke-size=&quot;size16&quot;&gt;&lt;span style=&quot;color: #006dd7;&quot;&gt;&lt;b&gt;1. 인코딩을 직접 지정해서 문자가 깨지는 문제를 막을 수 있음&lt;/b&gt;&lt;/span&gt;&lt;br /&gt;아래의 예시를 보면 서버는 분명히 &lt;span style=&quot;color: #006dd7;&quot;&gt;charset=EUC-KR&lt;/span&gt;로 보냈는데, 제목은 깨져서 나왔다. 이는 html을 받은 후 서버가 보낸 charset을 제대로 변환하지 못했기 때문이다. trafilatura를 사용하면 이부분을 바로 잡아줄 수 없지만, requests를 사용하면 이 부분을 제대로 잡아 줄 기회가 있다.&amp;nbsp;&lt;/p&gt;
&lt;pre id=&quot;code_1748915589986&quot; class=&quot;python&quot; data-ke-language=&quot;python&quot; data-ke-type=&quot;codeblock&quot;&gt;&lt;code&gt;        &amp;lt;title&amp;gt; ̱        پ  &amp;micro;   ѱ     &quot;  ġ  ׸  ָ   ⷷ&quot;    ̵           -  Ӵ       &amp;lt;/title&amp;gt;
        &amp;lt;meta http-equiv=&quot;content-type&quot; content=&quot;text/html; charset=EUC-KR&quot;&amp;gt;&lt;/code&gt;&lt;/pre&gt;
&lt;p data-ke-size=&quot;size16&quot;&gt;&amp;nbsp;&lt;/p&gt;
&lt;p data-ke-size=&quot;size16&quot;&gt;&lt;span style=&quot;color: #006dd7;&quot;&gt;&lt;b&gt;2. 가볍고 빠른 요청 처리&lt;/b&gt;&lt;/span&gt;&lt;br /&gt;브라우저를 사용하지 않고 순수 HTML 요청만 처리하기 때문에 속도가 빠르고 리소스 사용량도 적다.이전 글에서도 언급했듯, 우리가 수집하는 한글 뉴스의 약 80%는 정적 페이지로 만들어져 있다. 따라서, requests와 문자열 보정 함수를 거치더라도, 전체적인 수집 속도는 여전히 빠르게 유지될 수 있다.&lt;/p&gt;
&lt;hr contenteditable=&quot;false&quot; data-ke-type=&quot;horizontalRule&quot; data-ke-style=&quot;style6&quot; /&gt;
&lt;h2 data-ke-size=&quot;size26&quot; data-start=&quot;2890&quot; data-end=&quot;2913&quot;&gt;Requests + Trafilatura의 기본 사용법&lt;/h2&gt;
&lt;p data-ke-size=&quot;size16&quot;&gt;다음과 같은 간단한 코드로 쉽게 두 기능을 결합할 수 있다. 아래는 &lt;span style=&quot;color: #006dd7;&quot;&gt;&lt;b&gt;requests로 html을 받아와서 trafilatura에 넘겨주는&lt;/b&gt;&lt;/span&gt; 기본 코드이다. 여기서는 requests가 인코딩을 추정하는 기본 기능인&amp;nbsp; &lt;b&gt;&lt;span style=&quot;color: #006dd7;&quot;&gt;apparent_encoding&lt;/span&gt;&lt;/b&gt;을 적용하였다.&amp;nbsp;&lt;/p&gt;
&lt;pre class=&quot;python&quot; data-ke-language=&quot;python&quot;&gt;&lt;code&gt;import requests
import trafilatura
from trafilatura.metadata import extract_metadata

# 대상 URL 설정
url = &quot;https://news.mt.co.kr/mtview.php?no=2025050210033043266&amp;amp;VMK&quot;

# HTML 다운로드 (requests 사용)
response = requests.get(url, timeout=10)
response.encoding = response.apparent_encoding  # 인코딩 자동 감지
downloaded = response.text

# 메타데이터 및 본문 텍스트 추출
metadata = extract_metadata(downloaded)
text = trafilatura.extract(downloaded, output_format='txt', include_comments=False, favor_precision=True)

# 결과 출력
print(f&quot;  제목: {metadata.title}&quot;)
print(f&quot;  날짜: {metadata.date}&quot;)
print(f&quot;  본문:\n{text}&quot;)&lt;/code&gt;&lt;/pre&gt;
&lt;p data-ke-size=&quot;size16&quot;&gt;&amp;nbsp;&lt;/p&gt;
&lt;p data-ke-size=&quot;size16&quot;&gt;하지만, apparent_encoding으로는 여전히 제대로 인식이 되지 않는 웹사이트가 존재하여, 아래와 같이 &lt;b&gt;&lt;span style=&quot;color: #006dd7;&quot;&gt;별도의 문자열 인식 함수&lt;/span&gt;&lt;/b&gt;를 추가하였다. 이 함수를 간단히 설명하면, &lt;b&gt;&lt;span style=&quot;color: #006dd7;&quot;&gt; response.encoding &amp;rarr; response.apparent_encoding &lt;span style=&quot;color: #006dd7;&quot;&gt; &amp;rarr;&lt;/span&gt; 'utf-8' &lt;span style=&quot;color: #006dd7;&quot;&gt; &amp;rarr;&lt;/span&gt; 'euc-kr' &lt;span style=&quot;color: #006dd7;&quot;&gt; &amp;rarr;&lt;/span&gt; 'cp949'&lt;/span&gt;&lt;/b&gt;을 순차적으로 테스트 해보면서 한글이 많이 나오는 인코딩 방식을 선정하는 함수다. &lt;/p&gt;
&lt;pre id=&quot;code_1748916494008&quot; class=&quot;python&quot; data-ke-language=&quot;python&quot; data-ke-type=&quot;codeblock&quot;&gt;&lt;code&gt;def guess_best_decode(data: bytes, encodings: List[str]) -&amp;gt; str:
    &quot;&quot;&quot;Pick the decoding that yields the most Hangul characters.&quot;&quot;&quot;
    best_text: Optional[str] = None
    best_score = -1
    for enc in encodings:
        if not enc:
            continue
        try:
            text = data.decode(enc, errors=&quot;replace&quot;)
        except LookupError:
            continue
        # Score by Hangul character count
        score = sum(0xAC00 &amp;lt;= ord(ch) &amp;lt;= 0xD7A3 for ch in text)
        if score &amp;gt; best_score:
            best_text, best_score = text, score
        # Early exit if score is very high (heuristic)
        if score &amp;gt; 10:
            break
    if best_text is None:
        best_text = data.decode(&quot;utf-8&quot;, errors=&quot;replace&quot;)
    return best_text
    
def fetch_html_with_requests(url: str, timeout: int = 10) -&amp;gt; dict:
    &quot;&quot;&quot;
    requests로 HTML만 다운로드하는 함수 (본문 추출은 별도로 수행)
    guess_best_decode를 활용한 강력한 인코딩 자동 보정
    &quot;&quot;&quot;
    result = {
        'url': url,
        'html': '',
        'success': False,
        'error': None,
        'method_used': 'requests_only'
    }

    try:
        headers = {
            'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/119.0.0.0 Safari/537.36'
        }
        response = requests.get(url, headers=headers, timeout=timeout)
        response.raise_for_status()
        # ✅ guess_best_decode를 통해 인코딩 복원
        candidates = [
        response.encoding,           # 서버가 응답 헤더에 명시한 인코딩
        response.apparent_encoding,  # requests가 추정한 인코딩
        'utf-8',                     # 가장 일반적인 기본값
        'euc-kr',                    # 한국 뉴스에서 흔히 사용됨
        'cp949'                      # euc-kr의 superset, Windows 환경에서 자주 사용
        ]

        result['html'] = guess_best_decode(response.content, candidates)
        result['success'] = True

    except Exception as e:
        result['error'] = str(e)

    return result&lt;/code&gt;&lt;/pre&gt;
&lt;hr contenteditable=&quot;false&quot; data-ke-type=&quot;horizontalRule&quot; data-ke-style=&quot;style6&quot; /&gt;
&lt;h2 data-end=&quot;2913&quot; data-start=&quot;2890&quot; data-ke-size=&quot;size26&quot;&gt;Trafilatura - Requests 조합의 성능을 평가해 보자&lt;/h2&gt;
&lt;p data-ke-size=&quot;size16&quot;&gt;성능 평가를 위해, 앞의 글에서 사용한 500개 URL이 담긴 파일을 그대로 사용했다. 성능 평가를 위한 최종 코드를 아래와 같이 작성하였다.&amp;nbsp;&lt;/p&gt;
&lt;p data-ke-size=&quot;size16&quot;&gt;&amp;nbsp;&lt;/p&gt;
&lt;div id=&quot;code_1748864199361&quot; data-ke-type=&quot;html&quot; data-source=&quot;&amp;lt;!DOCTYPE html&amp;gt;
&amp;lt;html lang=&amp;quot;ko&amp;quot;&amp;gt;
&amp;lt;head&amp;gt;
    &amp;lt;meta charset=&amp;quot;UTF-8&amp;quot;&amp;gt;
    &amp;lt;meta name=&amp;quot;viewport&amp;quot; content=&amp;quot;width=device-width, initial-scale=1.0&amp;quot;&amp;gt;
    &amp;lt;title&amp;gt;티스토리 코드 블록&amp;lt;/title&amp;gt;
    &amp;lt;!-- ★ 스타일 --&amp;gt;
    &amp;lt;style&amp;gt;
    /* 티스토리 기본 코드 블록 스타일 - Carbon one-light 테마 적용 */
    .code-box{
      border:1px solid #e3e3e3;
      border-radius:8px;
      margin:20px 0; /* ✅ 위아래 여백 축소 */
      background:#ffffff;
      font-family:'Hack','D2Coding','Nanum Gothic Coding',monospace;
      overflow:hidden;
      box-shadow:0 2px 8px rgba(0,0,0,0.06)
      /* ── NEW: 폭 제한 풀기 ── */
      width:100% !important;
      max-width:none !important;
    }
    .code-hd{
      background:#3a4250;
      color:#fff;
      padding:14px 20px;
      font-weight:600;
      font-size:14px;
      display:flex;
      justify-content:space-between;
      align-items:center;
      cursor:pointer;      
      border-bottom: 1px solid #2e333f; /* ✅ 어두운 테두리 */
    }
    .code-hd:hover{
      background: #4b5563;  /* 약간 밝은 회색-파랑 계열 */
    }
    .toggle-btn {
      background: #6b7280;       /* 버튼 배경 (회색) */
      color: #fff;               /* 글자색 */
      font-size: 13px;
      padding: 4px 10px;
      border-radius: 5px;
      font-weight: normal;
      display: inline-block;
}


.code-ct {
  display: none;
  padding: 8px 0; /* ✅ 위아래 padding 줄임, 좌우 제거 */
  background: #ffffff;
  font-size: 14px;
  line-height: 1.3;
  overflow-x: auto;
  white-space: pre;
  font-family: 'Hack','D2Coding','Consolas','Monaco', monospace;
  color: #383a42;
  scrollbar-width: thin;
  scrollbar-color: #d0d0d0 #f5f5f5;
}

.code-ct::-webkit-scrollbar {
  height: 6px;
  background: #f5f5f5;
}
.code-ct::-webkit-scrollbar-track {
  background: #f5f5f5;
  border-radius: 4px;
}
.code-ct::-webkit-scrollbar-thumb {
  background: #d0d0d0;
  border-radius: 4px;
}
.code-ct::-webkit-scrollbar-thumb:hover {
  background: #b0b0b0;
}

.code-act {
  display: none;
  text-align: right;
  padding: 12px 20px; /* ✅ 복사 버튼 줄도 축소 */
  background: #fafafa;
  border-top: 1px solid #e3e3e3;
}
    .copy-btn{
      background:#50a14f;
      color:#fff;
      border:0;
      padding:8px 20px;
      border-radius:6px;
      font-size:14px;
      cursor:pointer;
      font-weight:500;
      transition:all 0.2s ease
    }
    .copy-btn:hover{
      background:#40a33f;
      transform:translateY(-1px);
      box-shadow:0 2px 4px rgba(0,0,0,0.1)
    }

    /* 파이썬 신택스 하이라이팅 - one-light 테마 */
    .keyword{color:#a626a4;font-weight:normal}
    .string{color:#50a14f}
    .comment{color:#a0a1a7;font-style:italic}
    .function{color:#4078f2}
    .number{color:#986801}
    .operator{color:#383a42}
    .builtin{color:#c18401}


&amp;lt;/style&amp;gt;


&amp;lt;/head&amp;gt;

&amp;lt;!-- ★ 코드 박스 시작 --&amp;gt;
&amp;lt;div class=&amp;quot;code-box&amp;quot;&amp;gt;
  &amp;lt;div class=&amp;quot;code-hd&amp;quot;&amp;gt;
      URL Batch Processor (Trafilatura + requests)
    &amp;lt;span class=&amp;quot;toggle-btn&amp;quot;&amp;gt;  펼치기&amp;lt;/span&amp;gt;
  &amp;lt;/div&amp;gt;
  &amp;lt;div class=&amp;quot;code-ct&amp;quot;&amp;gt;
&amp;lt;pre&amp;gt;&amp;lt;code class=&amp;quot;language-python&amp;quot;&amp;gt;
import pandas as pd
import trafilatura
from trafilatura.metadata import extract_metadata
import requests
import time
from datetime import datetime
import csv
from typing import List, Optional


def guess_best_decode(data: bytes, encodings: List[str]) -&amp;gt; str:
    &amp;quot;&amp;quot;&amp;quot;Pick the decoding that yields the most Hangul characters.&amp;quot;&amp;quot;&amp;quot;
    best_text: Optional[str] = None
    best_score = -1
    for enc in encodings:
        if not enc:
            continue
        try:
            text = data.decode(enc, errors=&amp;quot;replace&amp;quot;)
        except LookupError:
            continue
        # Score by Hangul character count
        score = sum(0xAC00 &amp;lt;= ord(ch) &amp;lt;= 0xD7A3 for ch in text)
        if score &amp;gt; best_score:
            best_text, best_score = text, score
        # Early exit if score is very high (heuristic)
        if score &amp;gt; 10:
            break
    if best_text is None:
        best_text = data.decode(&amp;quot;utf-8&amp;quot;, errors=&amp;quot;replace&amp;quot;)
    return best_text

def fetch_html_with_requests(url: str, timeout: int = 10) -&amp;gt; dict:
    &amp;quot;&amp;quot;&amp;quot;
    requests로 HTML만 다운로드하는 함수 (본문 추출은 별도로 수행)
    guess_best_decode를 활용한 강력한 인코딩 자동 보정
    &amp;quot;&amp;quot;&amp;quot;
    result = {
        'url': url,
        'html': '',
        'success': False,
        'error': None,
        'method_used': 'requests_only'
    }

    try:
        headers = {
            'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/119.0.0.0 Safari/537.36'
        }
        response = requests.get(url, headers=headers, timeout=timeout)
        response.raise_for_status()

        # ✅ guess_best_decode를 통해 인코딩 복원
        candidates = [
            response.encoding,
            response.apparent_encoding,
            'utf-8',
            'euc-kr',
            'cp949'
        ]
        result['html'] = guess_best_decode(response.content, candidates)
        result['success'] = True

    except Exception as e:
        result['error'] = str(e)

    return result

def process_single_url(url, index, total_urls):
    &amp;quot;&amp;quot;&amp;quot;단일 URL 처리 함수&amp;quot;&amp;quot;&amp;quot;
    start_time = time.time()
    result = {
        &amp;quot;index&amp;quot;: index,
        &amp;quot;url&amp;quot;: url,
        &amp;quot;status&amp;quot;: &amp;quot;failed&amp;quot;,
        &amp;quot;title&amp;quot;: None,
        &amp;quot;date&amp;quot;: None,
        &amp;quot;content&amp;quot;: None,
        &amp;quot;processing_time&amp;quot;: 0,
        &amp;quot;error_message&amp;quot;: None,
    }
    
    try:
        print(f&amp;quot;Processing URL {index + 1}/{total_urls}: {url[:60]}...&amp;quot;)
        
        # HTML 다운로드
        html_result = fetch_html_with_requests(url)
        
        if html_result['success']:
            html_content = html_result['html']
            
            # trafilatura로 메타데이터 추출
            metadata = trafilatura.extract_metadata(html_content)
            
            # trafilatura로 본문 추출
            text = trafilatura.extract(
                html_content,
                output_format=&amp;quot;txt&amp;quot;,
                include_comments=False,
                favor_precision=True,
            )
            
            if text and len(text.strip()) &amp;gt; 0:
                result.update({
                    &amp;quot;status&amp;quot;: &amp;quot;success&amp;quot;,
                    &amp;quot;title&amp;quot;: metadata.title if metadata and metadata.title else &amp;quot;No title&amp;quot;,
                    &amp;quot;date&amp;quot;: str(metadata.date) if metadata and metadata.date else &amp;quot;No date&amp;quot;,
                    &amp;quot;content&amp;quot;: text[:200] + &amp;quot;...&amp;quot; if len(text) &amp;gt; 200 else text,
                })
            else:
                result[&amp;quot;error_message&amp;quot;] = &amp;quot;Empty content extracted&amp;quot;
        else:
            result[&amp;quot;error_message&amp;quot;] = f&amp;quot;Failed to download page: {html_result['error']}&amp;quot;
            
    except Exception as e:
        result[&amp;quot;error_message&amp;quot;] = str(e)
    
    # 처리 시간 계산
    processing_time = time.time() - start_time
    result[&amp;quot;processing_time&amp;quot;] = processing_time
    
    return result


def save_results_to_csv(results, output_file=None):
    &amp;quot;&amp;quot;&amp;quot;결과를 CSV 파일로 저장&amp;quot;&amp;quot;&amp;quot;

    # 파일명이 지정되지 않으면 현재 시간을 포함한 파일명 생성
    if output_file is None:
        timestamp = datetime.now().strftime(&amp;quot;%Y%m%d_%H%M%S&amp;quot;)
        output_file = f&amp;quot;url_processing_results_{timestamp}.csv&amp;quot;

    try:
        with open(output_file, &amp;quot;w&amp;quot;, newline=&amp;quot;&amp;quot;, encoding=&amp;quot;utf-8&amp;quot;) as csvfile:
            fieldnames = [
                &amp;quot;index&amp;quot;,
                &amp;quot;url&amp;quot;,
                &amp;quot;status&amp;quot;,
                &amp;quot;title&amp;quot;,
                &amp;quot;date&amp;quot;,
                &amp;quot;content&amp;quot;,
                &amp;quot;processing_time&amp;quot;,
                &amp;quot;error_message&amp;quot;,
            ]
            writer = csv.DictWriter(csvfile, fieldnames=fieldnames)

            writer.writeheader()
            for result in results:
                writer.writerow(result)

        print(f&amp;quot;✅ 결과가 '{output_file}' 파일에 저장되었습니다.&amp;quot;)
        return output_file  # 실제 저장된 파일명 반환

    except Exception as e:
        print(f&amp;quot;❌ 결과 저장 실패: {e}&amp;quot;)
        return None


def print_summary(results, total_time, skipped_count=0):
    &amp;quot;&amp;quot;&amp;quot;처리 결과 요약 출력&amp;quot;&amp;quot;&amp;quot;
    total_urls = len(results)
    successful_count = sum(1 for r in results if r[&amp;quot;status&amp;quot;] == &amp;quot;success&amp;quot;)
    failed_count = total_urls - successful_count
    processing_times = [r[&amp;quot;processing_time&amp;quot;] for r in results]

    print(&amp;quot;\n&amp;quot; + &amp;quot;=&amp;quot; * 7 + &amp;quot; SUMMARY &amp;quot; + &amp;quot;=&amp;quot; * 7)
    print(f&amp;quot;Total URLs processed: {total_urls}&amp;quot;)
    print(&amp;quot;Workers used: 1&amp;quot;)  # 단일 스레드 처리
    print(f&amp;quot;Successfully decoded: {successful_count} ({successful_count/total_urls*100:.1f}%)&amp;quot;)
    print(f&amp;quot;Failed to decode: {failed_count} ({failed_count/total_urls*100:.1f}%)&amp;quot;)
    print(f&amp;quot;Skipped (Google News URLs): {skipped_count} ({skipped_count/(total_urls + skipped_count)*100:.1f}%)&amp;quot;)

    if processing_times:
        avg_time = sum(processing_times) / len(processing_times)

        print(&amp;quot;\n&amp;quot; + &amp;quot;-&amp;quot; * 5 + &amp;quot; TIMING INFORMATION &amp;quot; + &amp;quot;-&amp;quot; * 5)
        print(f&amp;quot;Total processing time: {int(total_time//60)}:{total_time%60:05.2f}&amp;quot;)
        print(f&amp;quot;Average processing time per URL: {avg_time:.2f} seconds&amp;quot;)
        print(f&amp;quot;Fastest URL processing time: {min(processing_times):.2f} seconds&amp;quot;)
        print(f&amp;quot;Slowest URL processing time: {max(processing_times):.2f} seconds&amp;quot;)

    print(&amp;quot;\nProcess completed successfully. Results saved to CSV file.&amp;quot;)


def process_urls_from_csv(csv_file_path, url_column=&amp;quot;decoded_url&amp;quot;):
    &amp;quot;&amp;quot;&amp;quot;CSV 파일에서 URL들을 읽어서 순차 처리&amp;quot;&amp;quot;&amp;quot;

    print(&amp;quot;=&amp;quot; * 50)
    print(&amp;quot;  URL 배치 처리 시작&amp;quot;)
    print(&amp;quot;=&amp;quot; * 50)

    # CSV 파일 읽기
    try:
        df = pd.read_csv(csv_file_path)
        print(f&amp;quot;  CSV 파일 컬럼들: {list(df.columns)}&amp;quot;)
        print(f&amp;quot;  총 행 수: {len(df)}&amp;quot;)

        if url_column not in df.columns:
            raise ValueError(f&amp;quot;Column '{url_column}' not found in CSV file&amp;quot;)

        print(f&amp;quot;  '{url_column}' 컬럼의 NULL이 아닌 값 개수: {df[url_column].notna().sum()}&amp;quot;)

        all_urls = df[url_column].dropna().tolist()
        print(f&amp;quot;  첫 번째 URL 샘플: {all_urls[0] if all_urls else 'None'}&amp;quot;)

        # news.google.com이 포함되지 않은 URL만 필터링
        urls = [url for url in all_urls if &amp;quot;news.google.com&amp;quot; not in str(url)]

        total_urls = len(urls)
        skipped_urls = len(all_urls) - total_urls

        print(f&amp;quot;  전체 URL: {len(all_urls)}개&amp;quot;)
        print(f&amp;quot;  처리 대상 URL (decoded URLs): {total_urls}개&amp;quot;)
        print(f&amp;quot;  건너뛴 URL (Google News URLs): {skipped_urls}개&amp;quot;)

        if urls:
            print(f&amp;quot;  첫 번째 디코딩된 URL 샘플: {urls[0]}&amp;quot;)

        print(&amp;quot;-&amp;quot; * 30)

    except Exception as e:
        print(f&amp;quot;❌ CSV 파일 읽기 실패: {e}&amp;quot;)
        return None

    total_start_time = time.time()
    results = []
    successful_count = 0
    failed_count = 0

    for i, url in enumerate(urls):
        result = process_single_url(url, i, total_urls)
        results.append(result)

        if result[&amp;quot;status&amp;quot;] == &amp;quot;success&amp;quot;:
            successful_count += 1
        else:
            failed_count += 1

        if (i + 1) % 10 == 0 or (i + 1) == total_urls:
            print(
                f&amp;quot;진행: {i + 1}/{total_urls} &amp;quot;
                f&amp;quot;({(i + 1)/total_urls*100:.1f}%) &amp;quot;
                f&amp;quot;성공: {successful_count}, 실패: {failed_count}&amp;quot;
            )

    total_processing_time = time.time() - total_start_time
    print_summary(results, total_processing_time, skipped_urls)

    saved_file = save_results_to_csv(results)
    return results, saved_file


def main():
    csv_file_path = r&amp;quot;C:\Users\yhsur\Downloads\특징주\sample_data\Combined_sample_data_500_decoded_2025-05-20_224740.csv&amp;quot;
    results, saved_file = process_urls_from_csv(csv_file_path, url_column=&amp;quot;decoded_url&amp;quot;)

    if results:
        print(f&amp;quot;\n  저장된 파일: {saved_file}&amp;quot;)

        print(&amp;quot;\n  처리 결과 샘플:&amp;quot;)
        for i, result in enumerate(results[:3]):
            print(f&amp;quot;\n[{i+1}] {result['url'][:50]}...&amp;quot;)
            print(f&amp;quot;    상태: {result['status']}&amp;quot;)
            print(f&amp;quot;    제목: {result['title']}&amp;quot;)
            print(f&amp;quot;    처리시간: {result['processing_time']:.2f}초&amp;quot;)
            if result[&amp;quot;status&amp;quot;] == &amp;quot;failed&amp;quot;:
                print(f&amp;quot;    오류: {result['error_message']}&amp;quot;)


if __name__ == &amp;quot;__main__&amp;quot;:
    main()
&amp;lt;/code&amp;gt;&amp;lt;/pre&amp;gt;
  &amp;lt;/div&amp;gt;
  &amp;lt;div class=&amp;quot;code-act&amp;quot;&amp;gt;
    &amp;lt;button class=&amp;quot;copy-btn&amp;quot;&amp;gt;  코드 복사&amp;lt;/button&amp;gt;
  &amp;lt;/div&amp;gt;
&amp;lt;/div&amp;gt;
&amp;lt;!-- ★ 코드 박스 끝 --&amp;gt;

&amp;lt;!-- ★ 토글 &amp;amp; 복사 스크립트 --&amp;gt;
&amp;lt;script&amp;gt;
document.addEventListener(&amp;quot;DOMContentLoaded&amp;quot;, () =&amp;gt; {

  /* 토글 */
  document.body.addEventListener(&amp;quot;click&amp;quot;, e =&amp;gt; {
    if (!e.target.classList.contains(&amp;quot;toggle-btn&amp;quot;)) return;
    const box = e.target.closest(&amp;quot;.code-box&amp;quot;);
    const cont = box.querySelector(&amp;quot;.code-ct&amp;quot;);
    const act = box.querySelector(&amp;quot;.code-act&amp;quot;);
    const open = cont.style.display === &amp;quot;block&amp;quot;;
    cont.style.display = act.style.display = open ? &amp;quot;none&amp;quot; : &amp;quot;block&amp;quot;;
    e.target.textContent = open ? &amp;quot;  펼치기&amp;quot; : &amp;quot;  접기&amp;quot;;
  });

  /* 복사 */
  document.body.addEventListener(&amp;quot;click&amp;quot;, e =&amp;gt; {
    if (!e.target.classList.contains(&amp;quot;copy-btn&amp;quot;)) return;
    const btn = e.target;
    const code = btn.closest(&amp;quot;.code-box&amp;quot;).querySelector(&amp;quot;code&amp;quot;).innerText;
    if (navigator.clipboard &amp;amp;&amp;amp; window.isSecureContext) {
      navigator.clipboard.writeText(code).then(() =&amp;gt; flash(btn))
                                         .catch(() =&amp;gt; fallback(code, btn));
    } else {
      fallback(code, btn);
    }
  });

  const flash = btn =&amp;gt; {
    const orig = btn.textContent;
    btn.textContent = &amp;quot;✅ 복사됨!&amp;quot;;
    setTimeout(() =&amp;gt; (btn.textContent = orig), 2000);
  };

  const fallback = (text, btn) =&amp;gt; {
    const ta = document.createElement(&amp;quot;textarea&amp;quot;);
    ta.value = text;
    ta.style.position = &amp;quot;fixed&amp;quot;;
    ta.style.top = &amp;quot;-1000px&amp;quot;;
    document.body.appendChild(ta);
    ta.focus();
    ta.select();
    try {
      document.execCommand(&amp;quot;copy&amp;quot;);
      flash(btn);
    } catch {
      alert(&amp;quot;복사 실패   &amp;ndash; 브라우저가 클립보드를 차단했습니다.&amp;quot;);
    }
    document.body.removeChild(ta);
  };
});
&amp;lt;/script&amp;gt;
&amp;lt;/body&amp;gt;
&amp;lt;/html&amp;gt;
&quot;&gt;&lt;!-- ★ 스타일 --&gt;
&lt;style&gt;
    /* 티스토리 기본 코드 블록 스타일 - Carbon one-light 테마 적용 */
    .code-box{
      border:1px solid #e3e3e3;
      border-radius:8px;
      margin:20px 0; /* ✅ 위아래 여백 축소 */
      background:#ffffff;
      font-family:'Hack','D2Coding','Nanum Gothic Coding',monospace;
      overflow:hidden;
      box-shadow:0 2px 8px rgba(0,0,0,0.06)
      /* ── NEW: 폭 제한 풀기 ── */
      width:100% !important;
      max-width:none !important;
    }
    .code-hd{
      background:#3a4250;
      color:#fff;
      padding:14px 20px;
      font-weight:600;
      font-size:14px;
      display:flex;
      justify-content:space-between;
      align-items:center;
      cursor:pointer;      
      border-bottom: 1px solid #2e333f; /* ✅ 어두운 테두리 */
    }
    .code-hd:hover{
      background: #4b5563;  /* 약간 밝은 회색-파랑 계열 */
    }
    .toggle-btn {
      background: #6b7280;       /* 버튼 배경 (회색) */
      color: #fff;               /* 글자색 */
      font-size: 13px;
      padding: 4px 10px;
      border-radius: 5px;
      font-weight: normal;
      display: inline-block;
}


.code-ct {
  display: none;
  padding: 8px 0; /* ✅ 위아래 padding 줄임, 좌우 제거 */
  background: #ffffff;
  font-size: 14px;
  line-height: 1.3;
  overflow-x: auto;
  white-space: pre;
  font-family: 'Hack','D2Coding','Consolas','Monaco', monospace;
  color: #383a42;
  scrollbar-width: thin;
  scrollbar-color: #d0d0d0 #f5f5f5;
}

.code-ct::-webkit-scrollbar {
  height: 6px;
  background: #f5f5f5;
}
.code-ct::-webkit-scrollbar-track {
  background: #f5f5f5;
  border-radius: 4px;
}
.code-ct::-webkit-scrollbar-thumb {
  background: #d0d0d0;
  border-radius: 4px;
}
.code-ct::-webkit-scrollbar-thumb:hover {
  background: #b0b0b0;
}

.code-act {
  display: none;
  text-align: right;
  padding: 12px 20px; /* ✅ 복사 버튼 줄도 축소 */
  background: #fafafa;
  border-top: 1px solid #e3e3e3;
}
    .copy-btn{
      background:#50a14f;
      color:#fff;
      border:0;
      padding:8px 20px;
      border-radius:6px;
      font-size:14px;
      cursor:pointer;
      font-weight:500;
      transition:all 0.2s ease
    }
    .copy-btn:hover{
      background:#40a33f;
      transform:translateY(-1px);
      box-shadow:0 2px 4px rgba(0,0,0,0.1)
    }

    /* 파이썬 신택스 하이라이팅 - one-light 테마 */
    .keyword{color:#a626a4;font-weight:normal}
    .string{color:#50a14f}
    .comment{color:#a0a1a7;font-style:italic}
    .function{color:#4078f2}
    .number{color:#986801}
    .operator{color:#383a42}
    .builtin{color:#c18401}


&lt;/style&gt;
&lt;!-- ★ 코드 박스 시작 --&gt;
&lt;div class=&quot;code-box&quot;&gt;
&lt;div class=&quot;code-hd&quot;&gt;  URL Batch Processor (Trafilatura + requests) &lt;span class=&quot;toggle-btn&quot;&gt;  펼치기&lt;/span&gt;&lt;/div&gt;
&lt;div class=&quot;code-ct&quot;&gt;
&lt;pre&gt;&lt;code class=&quot;language-python&quot;&gt;
import pandas as pd
import trafilatura
from trafilatura.metadata import extract_metadata
import requests
import time
from datetime import datetime
import csv
from typing import List, Optional


def guess_best_decode(data: bytes, encodings: List[str]) -&amp;gt; str:
    &quot;&quot;&quot;Pick the decoding that yields the most Hangul characters.&quot;&quot;&quot;
    best_text: Optional[str] = None
    best_score = -1
    for enc in encodings:
        if not enc:
            continue
        try:
            text = data.decode(enc, errors=&quot;replace&quot;)
        except LookupError:
            continue
        # Score by Hangul character count
        score = sum(0xAC00 &amp;lt;= ord(ch) &amp;lt;= 0xD7A3 for ch in text)
        if score &amp;gt; best_score:
            best_text, best_score = text, score
        # Early exit if score is very high (heuristic)
        if score &amp;gt; 10:
            break
    if best_text is None:
        best_text = data.decode(&quot;utf-8&quot;, errors=&quot;replace&quot;)
    return best_text

def fetch_html_with_requests(url: str, timeout: int = 10) -&amp;gt; dict:
    &quot;&quot;&quot;
    requests로 HTML만 다운로드하는 함수 (본문 추출은 별도로 수행)
    guess_best_decode를 활용한 강력한 인코딩 자동 보정
    &quot;&quot;&quot;
    result = {
        'url': url,
        'html': '',
        'success': False,
        'error': None,
        'method_used': 'requests_only'
    }

    try:
        headers = {
            'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/119.0.0.0 Safari/537.36'
        }
        response = requests.get(url, headers=headers, timeout=timeout)
        response.raise_for_status()

        # ✅ guess_best_decode를 통해 인코딩 복원
        candidates = [
            response.encoding,
            response.apparent_encoding,
            'utf-8',
            'euc-kr',
            'cp949'
        ]
        result['html'] = guess_best_decode(response.content, candidates)
        result['success'] = True

    except Exception as e:
        result['error'] = str(e)

    return result

def process_single_url(url, index, total_urls):
    &quot;&quot;&quot;단일 URL 처리 함수&quot;&quot;&quot;
    start_time = time.time()
    result = {
        &quot;index&quot;: index,
        &quot;url&quot;: url,
        &quot;status&quot;: &quot;failed&quot;,
        &quot;title&quot;: None,
        &quot;date&quot;: None,
        &quot;content&quot;: None,
        &quot;processing_time&quot;: 0,
        &quot;error_message&quot;: None,
    }
    
    try:
        print(f&quot;Processing URL {index + 1}/{total_urls}: {url[:60]}...&quot;)
        
        # HTML 다운로드
        html_result = fetch_html_with_requests(url)
        
        if html_result['success']:
            html_content = html_result['html']
            
            # trafilatura로 메타데이터 추출
            metadata = trafilatura.extract_metadata(html_content)
            
            # trafilatura로 본문 추출
            text = trafilatura.extract(
                html_content,
                output_format=&quot;txt&quot;,
                include_comments=False,
                favor_precision=True,
            )
            
            if text and len(text.strip()) &amp;gt; 0:
                result.update({
                    &quot;status&quot;: &quot;success&quot;,
                    &quot;title&quot;: metadata.title if metadata and metadata.title else &quot;No title&quot;,
                    &quot;date&quot;: str(metadata.date) if metadata and metadata.date else &quot;No date&quot;,
                    &quot;content&quot;: text[:200] + &quot;...&quot; if len(text) &amp;gt; 200 else text,
                })
            else:
                result[&quot;error_message&quot;] = &quot;Empty content extracted&quot;
        else:
            result[&quot;error_message&quot;] = f&quot;Failed to download page: {html_result['error']}&quot;
            
    except Exception as e:
        result[&quot;error_message&quot;] = str(e)
    
    # 처리 시간 계산
    processing_time = time.time() - start_time
    result[&quot;processing_time&quot;] = processing_time
    
    return result


def save_results_to_csv(results, output_file=None):
    &quot;&quot;&quot;결과를 CSV 파일로 저장&quot;&quot;&quot;

    # 파일명이 지정되지 않으면 현재 시간을 포함한 파일명 생성
    if output_file is None:
        timestamp = datetime.now().strftime(&quot;%Y%m%d_%H%M%S&quot;)
        output_file = f&quot;url_processing_results_{timestamp}.csv&quot;

    try:
        with open(output_file, &quot;w&quot;, newline=&quot;&quot;, encoding=&quot;utf-8&quot;) as csvfile:
            fieldnames = [
                &quot;index&quot;,
                &quot;url&quot;,
                &quot;status&quot;,
                &quot;title&quot;,
                &quot;date&quot;,
                &quot;content&quot;,
                &quot;processing_time&quot;,
                &quot;error_message&quot;,
            ]
            writer = csv.DictWriter(csvfile, fieldnames=fieldnames)

            writer.writeheader()
            for result in results:
                writer.writerow(result)

        print(f&quot;✅ 결과가 '{output_file}' 파일에 저장되었습니다.&quot;)
        return output_file  # 실제 저장된 파일명 반환

    except Exception as e:
        print(f&quot;❌ 결과 저장 실패: {e}&quot;)
        return None


def print_summary(results, total_time, skipped_count=0):
    &quot;&quot;&quot;처리 결과 요약 출력&quot;&quot;&quot;
    total_urls = len(results)
    successful_count = sum(1 for r in results if r[&quot;status&quot;] == &quot;success&quot;)
    failed_count = total_urls - successful_count
    processing_times = [r[&quot;processing_time&quot;] for r in results]

    print(&quot;\n&quot; + &quot;=&quot; * 7 + &quot; SUMMARY &quot; + &quot;=&quot; * 7)
    print(f&quot;Total URLs processed: {total_urls}&quot;)
    print(&quot;Workers used: 1&quot;)  # 단일 스레드 처리
    print(f&quot;Successfully decoded: {successful_count} ({successful_count/total_urls*100:.1f}%)&quot;)
    print(f&quot;Failed to decode: {failed_count} ({failed_count/total_urls*100:.1f}%)&quot;)
    print(f&quot;Skipped (Google News URLs): {skipped_count} ({skipped_count/(total_urls + skipped_count)*100:.1f}%)&quot;)

    if processing_times:
        avg_time = sum(processing_times) / len(processing_times)

        print(&quot;\n&quot; + &quot;-&quot; * 5 + &quot; TIMING INFORMATION &quot; + &quot;-&quot; * 5)
        print(f&quot;Total processing time: {int(total_time//60)}:{total_time%60:05.2f}&quot;)
        print(f&quot;Average processing time per URL: {avg_time:.2f} seconds&quot;)
        print(f&quot;Fastest URL processing time: {min(processing_times):.2f} seconds&quot;)
        print(f&quot;Slowest URL processing time: {max(processing_times):.2f} seconds&quot;)

    print(&quot;\nProcess completed successfully. Results saved to CSV file.&quot;)


def process_urls_from_csv(csv_file_path, url_column=&quot;decoded_url&quot;):
    &quot;&quot;&quot;CSV 파일에서 URL들을 읽어서 순차 처리&quot;&quot;&quot;

    print(&quot;=&quot; * 50)
    print(&quot;  URL 배치 처리 시작&quot;)
    print(&quot;=&quot; * 50)

    # CSV 파일 읽기
    try:
        df = pd.read_csv(csv_file_path)
        print(f&quot;  CSV 파일 컬럼들: {list(df.columns)}&quot;)
        print(f&quot;  총 행 수: {len(df)}&quot;)

        if url_column not in df.columns:
            raise ValueError(f&quot;Column '{url_column}' not found in CSV file&quot;)

        print(f&quot;  '{url_column}' 컬럼의 NULL이 아닌 값 개수: {df[url_column].notna().sum()}&quot;)

        all_urls = df[url_column].dropna().tolist()
        print(f&quot;  첫 번째 URL 샘플: {all_urls[0] if all_urls else 'None'}&quot;)

        # news.google.com이 포함되지 않은 URL만 필터링
        urls = [url for url in all_urls if &quot;news.google.com&quot; not in str(url)]

        total_urls = len(urls)
        skipped_urls = len(all_urls) - total_urls

        print(f&quot;  전체 URL: {len(all_urls)}개&quot;)
        print(f&quot;  처리 대상 URL (decoded URLs): {total_urls}개&quot;)
        print(f&quot;  건너뛴 URL (Google News URLs): {skipped_urls}개&quot;)

        if urls:
            print(f&quot;  첫 번째 디코딩된 URL 샘플: {urls[0]}&quot;)

        print(&quot;-&quot; * 30)

    except Exception as e:
        print(f&quot;❌ CSV 파일 읽기 실패: {e}&quot;)
        return None

    total_start_time = time.time()
    results = []
    successful_count = 0
    failed_count = 0

    for i, url in enumerate(urls):
        result = process_single_url(url, i, total_urls)
        results.append(result)

        if result[&quot;status&quot;] == &quot;success&quot;:
            successful_count += 1
        else:
            failed_count += 1

        if (i + 1) % 10 == 0 or (i + 1) == total_urls:
            print(
                f&quot;진행: {i + 1}/{total_urls} &quot;
                f&quot;({(i + 1)/total_urls*100:.1f}%) &quot;
                f&quot;성공: {successful_count}, 실패: {failed_count}&quot;
            )

    total_processing_time = time.time() - total_start_time
    print_summary(results, total_processing_time, skipped_urls)

    saved_file = save_results_to_csv(results)
    return results, saved_file


def main():
    csv_file_path = r&quot;C:\Users\yhsur\Downloads\특징주\sample_data\Combined_sample_data_500_decoded_2025-05-20_224740.csv&quot;
    results, saved_file = process_urls_from_csv(csv_file_path, url_column=&quot;decoded_url&quot;)

    if results:
        print(f&quot;\n  저장된 파일: {saved_file}&quot;)

        print(&quot;\n  처리 결과 샘플:&quot;)
        for i, result in enumerate(results[:3]):
            print(f&quot;\n[{i+1}] {result['url'][:50]}...&quot;)
            print(f&quot;    상태: {result['status']}&quot;)
            print(f&quot;    제목: {result['title']}&quot;)
            print(f&quot;    처리시간: {result['processing_time']:.2f}초&quot;)
            if result[&quot;status&quot;] == &quot;failed&quot;:
                print(f&quot;    오류: {result['error_message']}&quot;)


if __name__ == &quot;__main__&quot;:
    main()
&lt;/code&gt;&lt;/pre&gt;
&lt;/div&gt;
&lt;div class=&quot;code-act&quot;&gt;&lt;button class=&quot;copy-btn&quot;&gt;  코드 복사&lt;/button&gt;&lt;/div&gt;
&lt;/div&gt;
&lt;!-- ★ 코드 박스 끝 --&gt; &lt;!-- ★ 토글 &amp; 복사 스크립트 --&gt;
&lt;script&gt;
document.addEventListener(&quot;DOMContentLoaded&quot;, () =&gt; {

  /* 토글 */
  document.body.addEventListener(&quot;click&quot;, e =&gt; {
    if (!e.target.classList.contains(&quot;toggle-btn&quot;)) return;
    const box = e.target.closest(&quot;.code-box&quot;);
    const cont = box.querySelector(&quot;.code-ct&quot;);
    const act = box.querySelector(&quot;.code-act&quot;);
    const open = cont.style.display === &quot;block&quot;;
    cont.style.display = act.style.display = open ? &quot;none&quot; : &quot;block&quot;;
    e.target.textContent = open ? &quot;  펼치기&quot; : &quot;  접기&quot;;
  });

  /* 복사 */
  document.body.addEventListener(&quot;click&quot;, e =&gt; {
    if (!e.target.classList.contains(&quot;copy-btn&quot;)) return;
    const btn = e.target;
    const code = btn.closest(&quot;.code-box&quot;).querySelector(&quot;code&quot;).innerText;
    if (navigator.clipboard &amp;&amp; window.isSecureContext) {
      navigator.clipboard.writeText(code).then(() =&gt; flash(btn))
                                         .catch(() =&gt; fallback(code, btn));
    } else {
      fallback(code, btn);
    }
  });

  const flash = btn =&gt; {
    const orig = btn.textContent;
    btn.textContent = &quot;✅ 복사됨!&quot;;
    setTimeout(() =&gt; (btn.textContent = orig), 2000);
  };

  const fallback = (text, btn) =&gt; {
    const ta = document.createElement(&quot;textarea&quot;);
    ta.value = text;
    ta.style.position = &quot;fixed&quot;;
    ta.style.top = &quot;-1000px&quot;;
    document.body.appendChild(ta);
    ta.focus();
    ta.select();
    try {
      document.execCommand(&quot;copy&quot;);
      flash(btn);
    } catch {
      alert(&quot;복사 실패   – 브라우저가 클립보드를 차단했습니다.&quot;);
    }
    document.body.removeChild(ta);
  };
});
&lt;/script&gt;
&lt;/div&gt;
&lt;h3 data-ke-size=&quot;size23&quot;&gt;&lt;span&gt; ️&lt;/span&gt; 실행결과&lt;/h3&gt;
&lt;pre id=&quot;code_1748611554912&quot; class=&quot;python&quot; data-ke-language=&quot;python&quot; data-ke-type=&quot;codeblock&quot;&gt;&lt;code&gt;진행: 500/500 (100.0%) 성공: 407, 실패: 93

======= SUMMARY =======
Total URLs processed: 500
Workers used: 1
Successfully decoded: 407 (81.4%)
Failed to decode: 93 (18.6%)
Skipped (Google News URLs): 0 (0.0%)

----- TIMING INFORMATION -----
Total processing time: 5:05.95
Average processing time per URL: 0.61 seconds
Fastest URL processing time: 0.06 seconds
Slowest URL processing time: 3.96 seconds

Process completed successfully. Results saved to CSV file.
✅ 결과가 'url_processing_results_20250531_004542.csv' 파일에 저장되었습니다.

  저장된 파일: url_processing_results_20250531_004542.csv&lt;/code&gt;&lt;/pre&gt;
&lt;hr contenteditable=&quot;false&quot; data-ke-type=&quot;horizontalRule&quot; data-ke-style=&quot;style6&quot; /&gt;
&lt;h3 data-ke-size=&quot;size23&quot;&gt;  성능 평가 및 결과&lt;/h3&gt;
&lt;div id=&quot;code_1748918281454&quot; data-ke-type=&quot;html&quot; data-source=&quot;&amp;lt;!DOCTYPE html&amp;gt;
&amp;lt;html lang=&amp;quot;ko&amp;quot;&amp;gt;
&amp;lt;head&amp;gt;
    &amp;lt;meta charset=&amp;quot;UTF-8&amp;quot;&amp;gt;
    &amp;lt;meta name=&amp;quot;viewport&amp;quot; content=&amp;quot;width=device-width, initial-scale=1.0&amp;quot;&amp;gt;
    &amp;lt;title&amp;gt;trafilatura 사용 방식 비교&amp;lt;/title&amp;gt;
    &amp;lt;style&amp;gt;
        body {
            font-family: -apple-system, BlinkMacSystemFont, 'Segoe UI', Roboto, sans-serif;
            margin: 40px;
            background-color: white;
            color: #333;
            line-height: 1.6;
        }
        
        .container {
            max-width: 800px;
            margin: 0 auto;
        }
        
        h1 {
            text-align: center;
            color: #2c3e50;
            margin-bottom: 30px;
            font-weight: 600;
        }
        
        table {
            width: 100%;
            border-collapse: collapse;
            margin: 20px 0;
            font-size: 14px;
        }
        
        th {
            background-color: #34495e;
            color: white;
            padding: 15px 12px;
            text-align: center;
            font-weight: 500;
            border: 1px solid #2c3e50;
            font-size: 16px;
        }
        
        th:first-child {
            text-align: left;
        }
        
        td {
            padding: 12px;
            border: 1px solid #ddd;
            vertical-align: top;
        }
        
        tr:nth-child(even) {
            background-color: #f8f9fa;
        }
        
        tr:hover {
            background-color: #e8f4f8;
        }
        
        .item-column {
            font-weight: 600;
            background-color: #ecf0f1;
            width: 200px;
        }
        
        .method1 {
            background-color: #fff;
            text-align: center;
        }
        
        .method2 {
            background-color: #fff;
            text-align: center;
        }
        
        .advantage {
            color: #27ae60;
            font-weight: 500;
        }
        
        .disadvantage {
            color: #e74c3c;
            font-weight: 500;
        }
        
        .neutral {
            color: #7f8c8d;
            font-weight: 500;
        }
    &amp;lt;/style&amp;gt;
&amp;lt;/head&amp;gt;
&amp;lt;body&amp;gt;
    &amp;lt;div class=&amp;quot;container&amp;quot;&amp;gt;
        &amp;lt;table&amp;gt;
            &amp;lt;thead&amp;gt;
                &amp;lt;tr&amp;gt;
                    &amp;lt;th&amp;gt;항목&amp;lt;/th&amp;gt;
                    &amp;lt;th&amp;gt;trafilatura&amp;lt;/th&amp;gt;
                    &amp;lt;th&amp;gt;requests + trafilatura&amp;lt;/th&amp;gt;
                &amp;lt;/tr&amp;gt;
            &amp;lt;/thead&amp;gt;
            &amp;lt;tbody&amp;gt;
                &amp;lt;tr&amp;gt;
                    &amp;lt;td class=&amp;quot;item-column&amp;quot;&amp;gt;문자 인코딩 처리&amp;lt;/td&amp;gt;
                    &amp;lt;td class=&amp;quot;method1&amp;quot;&amp;gt;&amp;lt;span class=&amp;quot;disadvantage&amp;quot;&amp;gt;제한적 (간헐적 오류)&amp;lt;/span&amp;gt;&amp;lt;/td&amp;gt;
                    &amp;lt;td class=&amp;quot;method2&amp;quot;&amp;gt;&amp;lt;span class=&amp;quot;advantage&amp;quot;&amp;gt;완전 제어 가능&amp;lt;/span&amp;gt;&amp;lt;/td&amp;gt;
                &amp;lt;/tr&amp;gt;
                &amp;lt;tr&amp;gt;
                    &amp;lt;td class=&amp;quot;item-column&amp;quot;&amp;gt;처리 속도&amp;lt;/td&amp;gt;
                    &amp;lt;td class=&amp;quot;method1&amp;quot;&amp;gt;&amp;lt;span class=&amp;quot;advantage&amp;quot;&amp;gt;빠름 (예: 0.52초)&amp;lt;/span&amp;gt;&amp;lt;/td&amp;gt;
                    &amp;lt;td class=&amp;quot;method2&amp;quot;&amp;gt;&amp;lt;span class=&amp;quot;neutral&amp;quot;&amp;gt;약간 느림 (예: 0.61초)&amp;lt;/span&amp;gt;&amp;lt;/td&amp;gt;
                &amp;lt;/tr&amp;gt;
                &amp;lt;tr&amp;gt;
                    &amp;lt;td class=&amp;quot;item-column&amp;quot;&amp;gt;설정 유연성&amp;lt;/td&amp;gt;
                    &amp;lt;td class=&amp;quot;method1&amp;quot;&amp;gt;&amp;lt;span class=&amp;quot;disadvantage&amp;quot;&amp;gt;낮음&amp;lt;/span&amp;gt;&amp;lt;/td&amp;gt;
                    &amp;lt;td class=&amp;quot;method2&amp;quot;&amp;gt;&amp;lt;span class=&amp;quot;advantage&amp;quot;&amp;gt;높음&amp;lt;/span&amp;gt;&amp;lt;/td&amp;gt;
                &amp;lt;/tr&amp;gt;
                &amp;lt;tr&amp;gt;
                    &amp;lt;td class=&amp;quot;item-column&amp;quot;&amp;gt;디버깅/오류 추적&amp;lt;/td&amp;gt;
                    &amp;lt;td class=&amp;quot;method1&amp;quot;&amp;gt;&amp;lt;span class=&amp;quot;disadvantage&amp;quot;&amp;gt;어려움&amp;lt;/span&amp;gt;&amp;lt;/td&amp;gt;
                    &amp;lt;td class=&amp;quot;method2&amp;quot;&amp;gt;&amp;lt;span class=&amp;quot;advantage&amp;quot;&amp;gt;용이&amp;lt;/span&amp;gt;&amp;lt;/td&amp;gt;
                &amp;lt;/tr&amp;gt;
            &amp;lt;/tbody&amp;gt;
        &amp;lt;/table&amp;gt;
    &amp;lt;/div&amp;gt;
&amp;lt;/body&amp;gt;
&amp;lt;/html&amp;gt;&quot;&gt;
&lt;style&gt;
        body {
            font-family: -apple-system, BlinkMacSystemFont, 'Segoe UI', Roboto, sans-serif;
            margin: 40px;
            background-color: white;
            color: #333;
            line-height: 1.6;
        }
        
        .container {
            max-width: 800px;
            margin: 0 auto;
        }
        
        h1 {
            text-align: center;
            color: #2c3e50;
            margin-bottom: 30px;
            font-weight: 600;
        }
        
        table {
            width: 100%;
            border-collapse: collapse;
            margin: 20px 0;
            font-size: 14px;
        }
        
        th {
            background-color: #34495e;
            color: white;
            padding: 15px 12px;
            text-align: center;
            font-weight: 500;
            border: 1px solid #2c3e50;
            font-size: 16px;
        }
        
        th:first-child {
            text-align: left;
        }
        
        td {
            padding: 12px;
            border: 1px solid #ddd;
            vertical-align: top;
        }
        
        tr:nth-child(even) {
            background-color: #f8f9fa;
        }
        
        tr:hover {
            background-color: #e8f4f8;
        }
        
        .item-column {
            font-weight: 600;
            background-color: #ecf0f1;
            width: 200px;
        }
        
        .method1 {
            background-color: #fff;
            text-align: center;
        }
        
        .method2 {
            background-color: #fff;
            text-align: center;
        }
        
        .advantage {
            color: #27ae60;
            font-weight: 500;
        }
        
        .disadvantage {
            color: #e74c3c;
            font-weight: 500;
        }
        
        .neutral {
            color: #7f8c8d;
            font-weight: 500;
        }
    &lt;/style&gt;
&lt;div class=&quot;container&quot;&gt;
&lt;table&gt;
&lt;thead&gt;
&lt;tr&gt;
&lt;th&gt;항목&lt;/th&gt;
&lt;th&gt;trafilatura&lt;/th&gt;
&lt;th&gt;requests + trafilatura&lt;/th&gt;
&lt;/tr&gt;
&lt;/thead&gt;
&lt;tbody&gt;
&lt;tr&gt;
&lt;td class=&quot;item-column&quot;&gt;문자 인코딩 처리&lt;/td&gt;
&lt;td class=&quot;method1&quot;&gt;&lt;span class=&quot;disadvantage&quot;&gt;제한적 (간헐적 오류)&lt;/span&gt;&lt;/td&gt;
&lt;td class=&quot;method2&quot;&gt;&lt;span class=&quot;advantage&quot;&gt;완전 제어 가능&lt;/span&gt;&lt;/td&gt;
&lt;/tr&gt;
&lt;tr&gt;
&lt;td class=&quot;item-column&quot;&gt;처리 속도&lt;/td&gt;
&lt;td class=&quot;method1&quot;&gt;&lt;span class=&quot;advantage&quot;&gt;빠름 (예: 0.52초)&lt;/span&gt;&lt;/td&gt;
&lt;td class=&quot;method2&quot;&gt;&lt;span class=&quot;neutral&quot;&gt;약간 느림 (예: 0.61초)&lt;/span&gt;&lt;/td&gt;
&lt;/tr&gt;
&lt;tr&gt;
&lt;td class=&quot;item-column&quot;&gt;설정 유연성&lt;/td&gt;
&lt;td class=&quot;method1&quot;&gt;&lt;span class=&quot;disadvantage&quot;&gt;낮음&lt;/span&gt;&lt;/td&gt;
&lt;td class=&quot;method2&quot;&gt;&lt;span class=&quot;advantage&quot;&gt;높음&lt;/span&gt;&lt;/td&gt;
&lt;/tr&gt;
&lt;tr&gt;
&lt;td class=&quot;item-column&quot;&gt;디버깅/오류 추적&lt;/td&gt;
&lt;td class=&quot;method1&quot;&gt;&lt;span class=&quot;disadvantage&quot;&gt;어려움&lt;/span&gt;&lt;/td&gt;
&lt;td class=&quot;method2&quot;&gt;&lt;span class=&quot;advantage&quot;&gt;용이&lt;/span&gt;&lt;/td&gt;
&lt;/tr&gt;
&lt;/tbody&gt;
&lt;/table&gt;
&lt;/div&gt;
&lt;/div&gt;
&lt;p data-ke-size=&quot;size16&quot;&gt;&amp;nbsp;&lt;/p&gt;
&lt;hr contenteditable=&quot;false&quot; data-ke-type=&quot;horizontalRule&quot; data-ke-style=&quot;style6&quot; /&gt;
&lt;h2 data-ke-size=&quot;size26&quot;&gt;마무리&lt;/h2&gt;
&lt;p data-end=&quot;245&quot; data-start=&quot;101&quot; data-ke-size=&quot;size16&quot;&gt;이번 전략은 뉴스 수집의 &lt;span style=&quot;color: #006dd7;&quot;&gt;&lt;b&gt;일관성과 품질&lt;/b&gt;&lt;/span&gt;을 크게 향상시킨다.일관성이 중요한 이유는, 추후 문제가 발견되었을 때 &lt;span style=&quot;color: #006dd7;&quot;&gt;&lt;b&gt;수정과 재수집에 소요되는 비용을 줄일 수 있기 때문&lt;/b&gt;&lt;/span&gt;이다. 수집 대상 URL의 80% 이상이 정적 HTML이라면, &lt;span style=&quot;color: #006dd7;&quot;&gt;&lt;b&gt;requests와 trafilatura 조합&lt;/b&gt;&lt;/span&gt;이 지금까지 시도한 방법 중 가장 실용적이다.&lt;/p&gt;
&lt;p data-end=&quot;245&quot; data-start=&quot;101&quot; data-ke-size=&quot;size16&quot;&gt;다음 글에서는 이 전략을 더욱 발전시켜, &lt;span style=&quot;color: #006dd7;&quot;&gt;&lt;b&gt;병렬 처리를 적용하여 처리 속도를 더욱 단축&lt;/b&gt;&lt;/span&gt;하는 방법을 시도하겠다.&lt;/p&gt;</description>
      <category>Requests</category>
      <category>trafilatura</category>
      <category>기사수집자동화</category>
      <category>한글깨짐</category>
      <author>catalystmind</author>
      <guid isPermaLink="true">https://catalystmind.tistory.com/20</guid>
      <comments>https://catalystmind.tistory.com/20#entry20comment</comments>
      <pubDate>Tue, 3 Jun 2025 22:14:22 +0900</pubDate>
    </item>
    <item>
      <title>Trafilatura를 이용한 뉴스 스크래핑</title>
      <link>https://catalystmind.tistory.com/19</link>
      <description>&lt;p data-ke-size=&quot;size16&quot;&gt;&amp;nbsp;&lt;/p&gt;
&lt;div id=&quot;code_1748606386595&quot; data-ke-type=&quot;html&quot; data-source=&quot;&amp;lt;!DOCTYPE html&amp;gt;
&amp;lt;html lang=&amp;quot;ko&amp;quot;&amp;gt;
&amp;lt;head&amp;gt;
    &amp;lt;meta charset=&amp;quot;UTF-8&amp;quot;&amp;gt;
    &amp;lt;meta name=&amp;quot;viewport&amp;quot; content=&amp;quot;width=device-width, initial-scale=1.0&amp;quot;&amp;gt;
    &amp;lt;title&amp;gt;TL;DR 섹션&amp;lt;/title&amp;gt;
&amp;lt;/head&amp;gt;
&amp;lt;body style=&amp;quot;font-family: -apple-system, BlinkMacSystemFont, 'Segoe UI', Roboto, sans-serif; line-height: 1.6; color: #333; margin: 20px; background-color: #f8fafc;&amp;quot;&amp;gt;

&amp;lt;div style=&amp;quot;background-color: #fff; border-radius: 8px; padding: 24px; box-shadow: 0 2px 8px rgba(0, 0, 0, 0.05); margin-bottom: 24px; border-left: 4px solid #3b82f6;&amp;quot;&amp;gt;
    &amp;lt;h1 style=&amp;quot;color: #1e40af; font-size: 24px; font-weight: 700; margin-top: 0; margin-bottom: 16px;&amp;quot;&amp;gt;TL;DR&amp;lt;/h1&amp;gt;
    &amp;lt;div style=&amp;quot;background-color: #f0f7ff; padding: 16px; border-radius: 6px; margin: 16px 0;&amp;quot;&amp;gt;
        &amp;lt;p style=&amp;quot;margin: 0 0 16px 0; font-weight: 500; color: #1e40af;&amp;quot;&amp;gt;
            &amp;lt;span style=&amp;quot;color: #1e40af; font-weight: 700;&amp;quot;&amp;gt;Trafilatura&amp;lt;/span&amp;gt;: 
            &amp;lt;span style=&amp;quot;color: #1e40af; font-weight: 700;&amp;quot;&amp;gt;웹페이지에서 핵심 본문만 자동 추출하는 파이썬 라이브러리&amp;lt;/span&amp;gt;
        &amp;lt;/p&amp;gt;
        
        &amp;lt;div style=&amp;quot;margin: 16px 0;&amp;quot;&amp;gt;
            &amp;lt;h3 style=&amp;quot;color: #1e40af; font-size: 16px; font-weight: 600; margin: 0 0 8px 0;&amp;quot;&amp;gt;✅ 주요 장점&amp;lt;/h3&amp;gt;
            &amp;lt;p style=&amp;quot;margin: 0; color: #374151;&amp;quot;&amp;gt;
                웹사이트에서 광고 및 불필요한 내용 제거하고 본문만 추출, 
                &amp;lt;span style=&amp;quot;color: #1e40af; font-weight: 600;&amp;quot;&amp;gt;웹사이트마다 본문의 구조가 다른 점을 해결&amp;lt;/span&amp;gt;
            &amp;lt;/p&amp;gt;
        &amp;lt;/div&amp;gt;
        
        &amp;lt;div style=&amp;quot;margin: 16px 0;&amp;quot;&amp;gt;
            &amp;lt;h3 style=&amp;quot;color: #1e40af; font-size: 16px; font-weight: 600; margin: 0 0 8px 0;&amp;quot;&amp;gt;  성능 평가 결과&amp;lt;/h3&amp;gt;
            &amp;lt;p style=&amp;quot;margin: 0; color: #374151;&amp;quot;&amp;gt;
                500개 URL 테스트에서 
                &amp;lt;span style=&amp;quot;color: #1e40af; font-weight: 600;&amp;quot;&amp;gt;성공률 81.4%&amp;lt;/span&amp;gt;, 
                &amp;lt;span style=&amp;quot;color: #1e40af; font-weight: 600;&amp;quot;&amp;gt;평균 처리시간 0.52초&amp;lt;/span&amp;gt; 달성
            &amp;lt;/p&amp;gt;
        &amp;lt;/div&amp;gt;
        
        &amp;lt;div style=&amp;quot;margin: 16px 0;&amp;quot;&amp;gt;
            &amp;lt;h3 style=&amp;quot;color: #dc2626; font-size: 16px; font-weight: 600; margin: 0 0 8px 0;&amp;quot;&amp;gt;⚠️ 주요 한계&amp;lt;/h3&amp;gt;
            &amp;lt;p style=&amp;quot;margin: 0; color: #374151;&amp;quot;&amp;gt;
                &amp;lt;span style=&amp;quot;color: #dc2626; font-weight: 600;&amp;quot;&amp;gt;자바스크립트 렌더링&amp;lt;/span&amp;gt;, 
                &amp;lt;span style=&amp;quot;color: #dc2626; font-weight: 600;&amp;quot;&amp;gt;한글 인코딩 오류&amp;lt;/span&amp;gt; 시 본문 추출 실패
            &amp;lt;/p&amp;gt;
        &amp;lt;/div&amp;gt;
    &amp;lt;/div&amp;gt;
&amp;lt;/div&amp;gt;

&amp;lt;/body&amp;gt;
&amp;lt;/html&amp;gt;
&quot;&gt;
&lt;div style=&quot;background-color: #fff; border-radius: 8px; padding: 24px; box-shadow: 0 2px 8px rgba(0, 0, 0, 0.05); margin-bottom: 24px; border-left: 4px solid #3b82f6;&quot;&gt;
&lt;h1 style=&quot;color: #1e40af; font-size: 24px; font-weight: bold; margin-top: 0; margin-bottom: 16px;&quot;&gt;TL;DR&lt;/h1&gt;
&lt;div style=&quot;background-color: #f0f7ff; padding: 16px; border-radius: 6px; margin: 16px 0;&quot;&gt;
&lt;p style=&quot;margin: 0 0 16px 0; font-weight: 500; color: #1e40af;&quot; data-ke-size=&quot;size16&quot;&gt;&lt;span style=&quot;color: #1e40af; font-weight: bold;&quot;&gt;Trafilatura&lt;/span&gt;: &lt;span style=&quot;color: #1e40af; font-weight: bold;&quot;&gt;웹페이지에서 핵심 본문만 자동 추출하는 파이썬 라이브러리&lt;/span&gt;&lt;/p&gt;
&lt;div style=&quot;margin: 16px 0;&quot;&gt;
&lt;h3 style=&quot;color: #1e40af; font-size: 16px; font-weight: 600; margin: 0 0 8px 0;&quot; data-ke-size=&quot;size23&quot;&gt;✅ 주요 장점&lt;/h3&gt;
&lt;p style=&quot;margin: 0; color: #374151;&quot; data-ke-size=&quot;size16&quot;&gt;웹사이트에서 광고 및 불필요한 내용 제거하고 본문만 추출, &lt;span style=&quot;color: #1e40af; font-weight: 600;&quot;&gt;웹사이트마다 본문의 구조가 다른 점을 해결&lt;/span&gt;&lt;/p&gt;
&lt;/div&gt;
&lt;div style=&quot;margin: 16px 0;&quot;&gt;
&lt;h3 style=&quot;color: #1e40af; font-size: 16px; font-weight: 600; margin: 0 0 8px 0;&quot; data-ke-size=&quot;size23&quot;&gt;  성능 평가 결과&lt;/h3&gt;
&lt;p style=&quot;margin: 0; color: #374151;&quot; data-ke-size=&quot;size16&quot;&gt;500개 URL 테스트에서 &lt;span style=&quot;color: #1e40af; font-weight: 600;&quot;&gt;성공률 81.4%&lt;/span&gt;, &lt;span style=&quot;color: #1e40af; font-weight: 600;&quot;&gt;평균 처리시간 0.52초&lt;/span&gt; 달성&lt;/p&gt;
&lt;/div&gt;
&lt;div style=&quot;margin: 16px 0;&quot;&gt;
&lt;h3 style=&quot;color: #dc2626; font-size: 16px; font-weight: 600; margin: 0 0 8px 0;&quot; data-ke-size=&quot;size23&quot;&gt;⚠️ 주요 한계&lt;/h3&gt;
&lt;p style=&quot;margin: 0; color: #374151;&quot; data-ke-size=&quot;size16&quot;&gt;&lt;span style=&quot;color: #dc2626; font-weight: 600;&quot;&gt;자바스크립트 렌더링&lt;/span&gt;, &lt;span style=&quot;color: #dc2626; font-weight: 600;&quot;&gt;한글 인코딩 오류&lt;/span&gt; 시 본문 추출 실패&lt;/p&gt;
&lt;/div&gt;
&lt;/div&gt;
&lt;/div&gt;
&lt;/div&gt;
&lt;p data-ke-size=&quot;size16&quot;&gt;&amp;nbsp;&lt;/p&gt;
&lt;h1&gt;Trafilatura로 시작하는 뉴스 자동화의 전환점&lt;/h1&gt;
&lt;p data-ke-size=&quot;size16&quot;&gt;뉴스 데이터는 투자, 여론 분석, 정보 탐색 등 다양한 목적으로 활용되는 핵심 정보다. 특히, &lt;b&gt;&lt;span style=&quot;color: #006dd7;&quot;&gt;시간이 부족한 직장인 투자자&lt;/span&gt;&lt;/b&gt;에게 빠르게 뉴스와 시장의 흐름을 판단하기 위해서는 자동화가 필요한데, 자동으로 기사를 수집하고 정제하는 과정에는 많은 어려움이 있다.&lt;/p&gt;
&lt;hr contenteditable=&quot;false&quot; data-ke-type=&quot;horizontalRule&quot; data-ke-style=&quot;style6&quot; /&gt;
&lt;h2 data-ke-size=&quot;size26&quot;&gt;⚠️ 기존 방식의 한계: Power Automate + GPT&lt;/h2&gt;
&lt;p data-ke-size=&quot;size16&quot;&gt;이전에 Microsoft Power Automate를 활용해 뉴스 기사를 수집하고, GPT를 이용해 본문을 정제하는 자동화 워크플로우를 구성했다. 기사의 웹싸이트에서 텍스트 추출까지는 성공했지만, 곧 다음과 같은 문제에 부딪혔다.&lt;/p&gt;
&lt;p data-ke-size=&quot;size16&quot;&gt;&lt;span style=&quot;color: #006dd7;&quot;&gt;&lt;b&gt;1. 정제 비용 문제:&lt;/b&gt;&lt;/span&gt; 웹사이트에서 본문뿐만 아니라 광고, 댓글, 관련 기사 링크 등 &lt;span style=&quot;color: #006dd7;&quot;&gt;&lt;b&gt;불필요한 정보까지 함께 수집&lt;/b&gt;&lt;/span&gt;되다 보니, GPT 후처리 시 토큰 낭비가 심각했다.&lt;b&gt;&lt;span style=&quot;color: #006dd7;&quot;&gt; 불필요한 텍스트는 곧 비용으로 연결된다.&lt;/span&gt;&lt;/b&gt;&lt;/p&gt;
&lt;p data-ke-size=&quot;size16&quot;&gt;&lt;span style=&quot;color: #006dd7;&quot;&gt;&lt;b&gt;2. 유지보수 문제:&lt;/b&gt;&lt;/span&gt; 뉴스 사이트마다 HTML 구조가 달라 &lt;span style=&quot;color: #006dd7;&quot;&gt;&lt;b&gt;본문 위치를 정확히 찾는 작업을 반복해야 했고&lt;/b&gt;&lt;/span&gt;, 사이트가 조금만 바뀌어도 본문 추출 방법이 무력화된다.&lt;/p&gt;
&lt;h2 data-ke-size=&quot;size26&quot;&gt;Trafilatura: 웹에서 핵심 본문만 추출하는 파이썬 도구&lt;/h2&gt;
&lt;p data-ke-size=&quot;size16&quot;&gt;Trafilatura는 웹페이지에서 핵심 콘텐츠만 뽑아주는 파이썬 기반 라이브러리로, 뉴스, 블로그에서 본문을 추출하는데 특화되어 있어 위에서 언급한 문제점을 해결할 수 있는 가장 현실적인 방안이다.&lt;/p&gt;
&lt;h2 data-ke-size=&quot;size26&quot;&gt;Trafilatura의 주요 장점&lt;/h2&gt;
&lt;p data-ke-size=&quot;size16&quot;&gt;&lt;span style=&quot;color: #006dd7;&quot;&gt;&lt;b&gt;1. 불필요한 요소 자동 제거&lt;/b&gt;&lt;/span&gt; 광고, 네비게이션 바, 댓글, 추천 기사 등 콘텐츠 이외 요소를 필터링해서 본문만 남겨준다. 이렇게 정제된 데이터는 GPT 같은 언어 모델에 넘기기에 최적화되어 있다.&lt;/p&gt;
&lt;p data-ke-size=&quot;size16&quot;&gt;&lt;span style=&quot;color: #006dd7;&quot;&gt;&lt;b&gt;2. 다양한 사이트 구조 대응&lt;/b&gt;&lt;/span&gt; 뉴스, 블로그 등 다양한 HTML 구조에 자동으로 적응한다. 별도 규칙 정의 없이도 대부분의 웹사이트에서 본문을 추출할 수 있어 유지보수 부담이 크게 줄어든다.&lt;/p&gt;
&lt;p data-ke-size=&quot;size16&quot;&gt;&amp;nbsp;&lt;/p&gt;
&lt;div id=&quot;code_1748258970306&quot; data-ke-type=&quot;html&quot; data-source=&quot;&amp;lt;!DOCTYPE html&amp;gt;
&amp;lt;html lang=&amp;quot;ko&amp;quot;&amp;gt;
&amp;lt;head&amp;gt;
    &amp;lt;meta charset=&amp;quot;UTF-8&amp;quot;&amp;gt;
    &amp;lt;meta name=&amp;quot;viewport&amp;quot; content=&amp;quot;width=device-width, initial-scale=1.0&amp;quot;&amp;gt;
    &amp;lt;title&amp;gt;티스토리 인용구 블록&amp;lt;/title&amp;gt;
    &amp;lt;style&amp;gt;
        .quote-block {
            border-left: 4px solid #e74c3c;
            padding: 15px 20px;
            margin: 20px 0;
            background-color: #f8f9fa;
            font-style: italic;
            color: #555;
            position: relative;
        }
        
        .quote-block p {
            margin: 0;
            line-height: 1.6;
            font-size: 1.2rem;
        }
        
        .quote-block * {
            font-size: 1.2rem;
        }
        

    &amp;lt;/style&amp;gt;
&amp;lt;/head&amp;gt;
&amp;lt;body&amp;gt;
    &amp;lt;!-- 티스토리에서 복사해서 사용할 부분 --&amp;gt;
    &amp;lt;div class=&amp;quot;quote-block&amp;quot;&amp;gt;
        &amp;lt;p&amp;gt;Trafilatura는 뉴스 데이터 정제 비용을 줄이고 유지보수를 간소화하는 핵심 도구다.&amp;lt;/p&amp;gt;
    &amp;lt;/div&amp;gt;
&amp;lt;/body&amp;gt;
&amp;lt;/html&amp;gt;&quot;&gt;
&lt;style&gt;
        .quote-block {
            border-left: 4px solid #e74c3c;
            padding: 15px 20px;
            margin: 20px 0;
            background-color: #f8f9fa;
            font-style: italic;
            color: #555;
            position: relative;
        }
        
        .quote-block p {
            margin: 0;
            line-height: 1.6;
            font-size: 1.2rem;
        }
        
        .quote-block * {
            font-size: 1.2rem;
        }
        

    &lt;/style&gt;
&lt;!-- 티스토리에서 복사해서 사용할 부분 --&gt;
&lt;div class=&quot;quote-block&quot;&gt;
&lt;p data-ke-size=&quot;size16&quot;&gt;Trafilatura는 뉴스 데이터 정제 비용을 줄이고 유지보수를 간소화하는 핵심 도구다.&lt;/p&gt;
&lt;/div&gt;
&lt;/div&gt;
&lt;hr contenteditable=&quot;false&quot; data-ke-type=&quot;horizontalRule&quot; data-ke-style=&quot;style6&quot; /&gt;
&lt;h2 data-end=&quot;2913&quot; data-start=&quot;2890&quot; data-ke-size=&quot;size26&quot;&gt;Trafilatura의 기본 사용법&lt;/h2&gt;
&lt;p data-end=&quot;2946&quot; data-start=&quot;2915&quot; data-ke-size=&quot;size16&quot;&gt;다음과 같은 간단한 코드로 쉽게 시작할 수 있다.&lt;/p&gt;
&lt;pre id=&quot;code_1746961617951&quot; class=&quot;python&quot; data-ke-language=&quot;python&quot; data-ke-type=&quot;codeblock&quot;&gt;&lt;code&gt;import trafilatura
from trafilatura.metadata import extract_metadata

# 대상 URL 설정
url = &quot;https://www.yna.co.kr/view/AKR20250511050400001?section=election2025/news&amp;amp;site=topnews01&quot;

# URL에서 HTML 다운로드
downloaded = trafilatura.fetch_url(url)

# 메타데이터 및 본문 텍스트 추출
metadata = extract_metadata(downloaded)
text = trafilatura.extract(downloaded, output_format='txt', include_comments=False, favor_precision=True)

# 결과 출력
print(f&quot;  제목: {metadata.title}&quot;)
print(f&quot;  날짜: {metadata.date}&quot;)
print(f&quot;  본문:\n{text}&quot;)&lt;/code&gt;&lt;/pre&gt;
&lt;pre id=&quot;code_1746961664809&quot; class=&quot;python&quot; data-ke-language=&quot;python&quot; data-ke-type=&quot;codeblock&quot;&gt;&lt;code&gt;  제목: 21대 대선에 7명 후보 등록&amp;hellip;이재명 1번&amp;middot;김문수 2번&amp;middot;이준석 4번 | 연합뉴스
  날짜: 2025-05-11
  본문:
제21대 대통령 선거에 총 7명의 후보가 등록한 것으로 11일 집계됐다.
이준석 후보, 구주와 후보, 송진호 후보는 군 복무를 마쳤다고 신고했다.
후보자 기호는 1번 더불어민주당 이재명, 2번 국민의힘 김문수, 4번 개혁신당 이준석, 5번 민주노동당 권영국, 6번 자유통일당 구주와, 7번 무소속 황교안, 8번 무소속 송진호 후보로 결정됐다.
전체 내용을 이...&lt;/code&gt;&lt;/pre&gt;
&lt;hr contenteditable=&quot;false&quot; data-ke-type=&quot;horizontalRule&quot; data-ke-style=&quot;style6&quot; /&gt;
&lt;h2 data-ke-size=&quot;size26&quot; data-start=&quot;2890&quot; data-end=&quot;2913&quot;&gt;Trafilatura의 성능을 평가해 보자&lt;/h2&gt;
&lt;p data-ke-size=&quot;size16&quot;&gt;Trafilatura의 성능을 평가하기 위해, 500개의 다양한 기사 원문 URL이 담긴 파일에서 URL을 불러와 처리 속도와 성공률을 측정하는 코드를 추가했다.&lt;/p&gt;
&lt;div id=&quot;code_1748260298016&quot; data-ke-type=&quot;html&quot; data-source=&quot;&amp;lt;!DOCTYPE html&amp;gt;
&amp;lt;html lang=&amp;quot;ko&amp;quot;&amp;gt;
&amp;lt;head&amp;gt;
    &amp;lt;meta charset=&amp;quot;UTF-8&amp;quot;&amp;gt;
    &amp;lt;meta name=&amp;quot;viewport&amp;quot; content=&amp;quot;width=device-width, initial-scale=1.0&amp;quot;&amp;gt;
    &amp;lt;title&amp;gt;티스토리 코드 블록&amp;lt;/title&amp;gt;
    &amp;lt;!-- ★ 스타일 --&amp;gt;
    &amp;lt;style&amp;gt;
    /* 티스토리 기본 코드 블록 스타일 - Carbon one-light 테마 적용 */
    .code-box{
      border:1px solid #e3e3e3;
      border-radius:8px;
      margin:20px 0; /* ✅ 위아래 여백 축소 */
      background:#ffffff;
      font-family:'Hack','D2Coding','Nanum Gothic Coding',monospace;
      overflow:hidden;
      box-shadow:0 2px 8px rgba(0,0,0,0.06)
      /* ── NEW: 폭 제한 풀기 ── */
      width:100% !important;
      max-width:none !important;
    }
    .code-hd{
      background:#3a4250;
      color:#fff;
      padding:14px 20px;
      font-weight:600;
      font-size:14px;
      display:flex;
      justify-content:space-between;
      align-items:center;
      cursor:pointer;      
      border-bottom: 1px solid #2e333f; /* ✅ 어두운 테두리 */
    }
    .code-hd:hover{
      background: #4b5563;  /* 약간 밝은 회색-파랑 계열 */
    }
    .toggle-btn {
      background: #6b7280;       /* 버튼 배경 (회색) */
      color: #fff;               /* 글자색 */
      font-size: 13px;
      padding: 4px 10px;
      border-radius: 5px;
      font-weight: normal;
      display: inline-block;
}


.code-ct {
  display: none;
  padding: 8px 0; /* ✅ 위아래 padding 줄임, 좌우 제거 */
  background: #ffffff;
  font-size: 14px;
  line-height: 1.3;
  overflow-x: auto;
  white-space: pre;
  font-family: 'Hack','D2Coding','Consolas','Monaco', monospace;
  color: #383a42;
  scrollbar-width: thin;
  scrollbar-color: #d0d0d0 #f5f5f5;
}

.code-ct::-webkit-scrollbar {
  height: 6px;
  background: #f5f5f5;
}
.code-ct::-webkit-scrollbar-track {
  background: #f5f5f5;
  border-radius: 4px;
}
.code-ct::-webkit-scrollbar-thumb {
  background: #d0d0d0;
  border-radius: 4px;
}
.code-ct::-webkit-scrollbar-thumb:hover {
  background: #b0b0b0;
}

.code-act {
  display: none;
  text-align: right;
  padding: 12px 20px; /* ✅ 복사 버튼 줄도 축소 */
  background: #fafafa;
  border-top: 1px solid #e3e3e3;
}
    .copy-btn{
      background:#50a14f;
      color:#fff;
      border:0;
      padding:8px 20px;
      border-radius:6px;
      font-size:14px;
      cursor:pointer;
      font-weight:500;
      transition:all 0.2s ease
    }
    .copy-btn:hover{
      background:#40a33f;
      transform:translateY(-1px);
      box-shadow:0 2px 4px rgba(0,0,0,0.1)
    }

    /* 파이썬 신택스 하이라이팅 - one-light 테마 */
    .keyword{color:#a626a4;font-weight:normal}
    .string{color:#50a14f}
    .comment{color:#a0a1a7;font-style:italic}
    .function{color:#4078f2}
    .number{color:#986801}
    .operator{color:#383a42}
    .builtin{color:#c18401}


&amp;lt;/style&amp;gt;


&amp;lt;/head&amp;gt;

&amp;lt;!-- ★ 코드 박스 시작 --&amp;gt;
&amp;lt;div class=&amp;quot;code-box&amp;quot;&amp;gt;
  &amp;lt;div class=&amp;quot;code-hd&amp;quot;&amp;gt;
      URL Batch Processor (Trafilatura)
    &amp;lt;span class=&amp;quot;toggle-btn&amp;quot;&amp;gt;  펼치기&amp;lt;/span&amp;gt;
  &amp;lt;/div&amp;gt;
  &amp;lt;div class=&amp;quot;code-ct&amp;quot;&amp;gt;
&amp;lt;pre&amp;gt;&amp;lt;code class=&amp;quot;language-python&amp;quot;&amp;gt;
import pandas as pd
import trafilatura
from trafilatura.metadata import extract_metadata
import time
from datetime import datetime
import csv


def process_single_url(url, index, total_urls):
    &amp;quot;&amp;quot;&amp;quot;단일 URL 처리 함수&amp;quot;&amp;quot;&amp;quot;
    start_time = time.time()
    result = {
        &amp;quot;index&amp;quot;: index,
        &amp;quot;url&amp;quot;: url,
        &amp;quot;status&amp;quot;: &amp;quot;failed&amp;quot;,
        &amp;quot;title&amp;quot;: None,
        &amp;quot;date&amp;quot;: None,
        &amp;quot;content&amp;quot;: None,
        &amp;quot;processing_time&amp;quot;: 0,
        &amp;quot;error_message&amp;quot;: None,
    }

    try:
        print(f&amp;quot;Processing URL {index + 1}/{total_urls}: {url[:60]}...&amp;quot;)

        # URL 다운로드
        downloaded = trafilatura.fetch_url(url)

        if downloaded:
            # 메타데이터 추출
            metadata = extract_metadata(downloaded)

            # 본문 추출
            text = trafilatura.extract(
                downloaded,
                output_format=&amp;quot;txt&amp;quot;,
                include_comments=False,
                favor_precision=True,
            )

            if text and len(text.strip()) &amp;gt; 0:
                result.update(
                    {
                        &amp;quot;status&amp;quot;: &amp;quot;success&amp;quot;,
                        &amp;quot;title&amp;quot;: metadata.title if metadata else &amp;quot;No title&amp;quot;,
                        &amp;quot;date&amp;quot;: str(metadata.date) if metadata and metadata.date else &amp;quot;No date&amp;quot;,
                        &amp;quot;content&amp;quot;: text[:200] + &amp;quot;...&amp;quot; if len(text) &amp;gt; 200 else text,
                    }
                )
            else:
                result[&amp;quot;error_message&amp;quot;] = &amp;quot;Empty content extracted&amp;quot;
        else:
            result[&amp;quot;error_message&amp;quot;] = &amp;quot;Failed to download page&amp;quot;

    except Exception as e:
        result[&amp;quot;error_message&amp;quot;] = str(e)

    # 처리 시간 계산
    processing_time = time.time() - start_time
    result[&amp;quot;processing_time&amp;quot;] = processing_time

    return result


def save_results_to_csv(results, output_file=None):
    &amp;quot;&amp;quot;&amp;quot;결과를 CSV 파일로 저장&amp;quot;&amp;quot;&amp;quot;

    # 파일명이 지정되지 않으면 현재 시간을 포함한 파일명 생성
    if output_file is None:
        timestamp = datetime.now().strftime(&amp;quot;%Y%m%d_%H%M%S&amp;quot;)
        output_file = f&amp;quot;url_processing_results_{timestamp}.csv&amp;quot;

    try:
        with open(output_file, &amp;quot;w&amp;quot;, newline=&amp;quot;&amp;quot;, encoding=&amp;quot;utf-8&amp;quot;) as csvfile:
            fieldnames = [
                &amp;quot;index&amp;quot;,
                &amp;quot;url&amp;quot;,
                &amp;quot;status&amp;quot;,
                &amp;quot;title&amp;quot;,
                &amp;quot;date&amp;quot;,
                &amp;quot;content&amp;quot;,
                &amp;quot;processing_time&amp;quot;,
                &amp;quot;error_message&amp;quot;,
            ]
            writer = csv.DictWriter(csvfile, fieldnames=fieldnames)

            writer.writeheader()
            for result in results:
                writer.writerow(result)

        print(f&amp;quot;✅ 결과가 '{output_file}' 파일에 저장되었습니다.&amp;quot;)
        return output_file  # 실제 저장된 파일명 반환

    except Exception as e:
        print(f&amp;quot;❌ 결과 저장 실패: {e}&amp;quot;)
        return None


def print_summary(results, total_time, skipped_count=0):
    &amp;quot;&amp;quot;&amp;quot;처리 결과 요약 출력&amp;quot;&amp;quot;&amp;quot;
    total_urls = len(results)
    successful_count = sum(1 for r in results if r[&amp;quot;status&amp;quot;] == &amp;quot;success&amp;quot;)
    failed_count = total_urls - successful_count
    processing_times = [r[&amp;quot;processing_time&amp;quot;] for r in results]

    print(&amp;quot;\n&amp;quot; + &amp;quot;=&amp;quot; * 7 + &amp;quot; SUMMARY &amp;quot; + &amp;quot;=&amp;quot; * 7)
    print(f&amp;quot;Total URLs processed: {total_urls}&amp;quot;)
    print(&amp;quot;Workers used: 1&amp;quot;)  # 단일 스레드 처리
    print(f&amp;quot;Successfully decoded: {successful_count} ({successful_count/total_urls*100:.1f}%)&amp;quot;)
    print(f&amp;quot;Failed to decode: {failed_count} ({failed_count/total_urls*100:.1f}%)&amp;quot;)
    print(f&amp;quot;Skipped (Google News URLs): {skipped_count} ({skipped_count/(total_urls + skipped_count)*100:.1f}%)&amp;quot;)

    if processing_times:
        avg_time = sum(processing_times) / len(processing_times)

        print(&amp;quot;\n&amp;quot; + &amp;quot;-&amp;quot; * 5 + &amp;quot; TIMING INFORMATION &amp;quot; + &amp;quot;-&amp;quot; * 5)
        print(f&amp;quot;Total processing time: {int(total_time//60)}:{total_time%60:05.2f}&amp;quot;)
        print(f&amp;quot;Average processing time per URL: {avg_time:.2f} seconds&amp;quot;)
        print(f&amp;quot;Fastest URL processing time: {min(processing_times):.2f} seconds&amp;quot;)
        print(f&amp;quot;Slowest URL processing time: {max(processing_times):.2f} seconds&amp;quot;)

    print(&amp;quot;\nProcess completed successfully. Results saved to CSV file.&amp;quot;)


def process_urls_from_csv(csv_file_path, url_column=&amp;quot;decoded_url&amp;quot;):
    &amp;quot;&amp;quot;&amp;quot;CSV 파일에서 URL들을 읽어서 순차 처리&amp;quot;&amp;quot;&amp;quot;

    print(&amp;quot;=&amp;quot; * 50)
    print(&amp;quot;  URL 배치 처리 시작&amp;quot;)
    print(&amp;quot;=&amp;quot; * 50)

    # CSV 파일 읽기
    try:
        df = pd.read_csv(csv_file_path)
        print(f&amp;quot;  CSV 파일 컬럼들: {list(df.columns)}&amp;quot;)
        print(f&amp;quot;  총 행 수: {len(df)}&amp;quot;)

        if url_column not in df.columns:
            raise ValueError(f&amp;quot;Column '{url_column}' not found in CSV file&amp;quot;)

        print(f&amp;quot;  '{url_column}' 컬럼의 NULL이 아닌 값 개수: {df[url_column].notna().sum()}&amp;quot;)

        all_urls = df[url_column].dropna().tolist()
        print(f&amp;quot;  첫 번째 URL 샘플: {all_urls[0] if all_urls else 'None'}&amp;quot;)

        # news.google.com이 포함되지 않은 URL만 필터링
        urls = [url for url in all_urls if &amp;quot;news.google.com&amp;quot; not in str(url)]

        total_urls = len(urls)
        skipped_urls = len(all_urls) - total_urls

        print(f&amp;quot;  전체 URL: {len(all_urls)}개&amp;quot;)
        print(f&amp;quot;  처리 대상 URL (decoded URLs): {total_urls}개&amp;quot;)
        print(f&amp;quot;  건너뛴 URL (Google News URLs): {skipped_urls}개&amp;quot;)

        if urls:
            print(f&amp;quot;  첫 번째 디코딩된 URL 샘플: {urls[0]}&amp;quot;)

        print(&amp;quot;-&amp;quot; * 30)

    except Exception as e:
        print(f&amp;quot;❌ CSV 파일 읽기 실패: {e}&amp;quot;)
        return None

    total_start_time = time.time()
    results = []
    successful_count = 0
    failed_count = 0

    for i, url in enumerate(urls):
        result = process_single_url(url, i, total_urls)
        results.append(result)

        if result[&amp;quot;status&amp;quot;] == &amp;quot;success&amp;quot;:
            successful_count += 1
        else:
            failed_count += 1

        if (i + 1) % 10 == 0 or (i + 1) == total_urls:
            print(
                f&amp;quot;진행: {i + 1}/{total_urls} &amp;quot;
                f&amp;quot;({(i + 1)/total_urls*100:.1f}%) &amp;quot;
                f&amp;quot;성공: {successful_count}, 실패: {failed_count}&amp;quot;
            )

    total_processing_time = time.time() - total_start_time
    print_summary(results, total_processing_time, skipped_urls)

    saved_file = save_results_to_csv(results)
    return results, saved_file


def main():
    csv_file_path = r&amp;quot;C:\Users\yhsur\Downloads\특징주\sample_data\Combined_sample_data_500_decoded_2025-05-20_224740.csv&amp;quot;
    results, saved_file = process_urls_from_csv(csv_file_path, url_column=&amp;quot;decoded_url&amp;quot;)

    if results:
        print(f&amp;quot;\n  저장된 파일: {saved_file}&amp;quot;)

        print(&amp;quot;\n  처리 결과 샘플:&amp;quot;)
        for i, result in enumerate(results[:3]):
            print(f&amp;quot;\n[{i+1}] {result['url'][:50]}...&amp;quot;)
            print(f&amp;quot;    상태: {result['status']}&amp;quot;)
            print(f&amp;quot;    제목: {result['title']}&amp;quot;)
            print(f&amp;quot;    처리시간: {result['processing_time']:.2f}초&amp;quot;)
            if result[&amp;quot;status&amp;quot;] == &amp;quot;failed&amp;quot;:
                print(f&amp;quot;    오류: {result['error_message']}&amp;quot;)


if __name__ == &amp;quot;__main__&amp;quot;:
    main()
&amp;lt;/code&amp;gt;&amp;lt;/pre&amp;gt;
  &amp;lt;/div&amp;gt;
  &amp;lt;div class=&amp;quot;code-act&amp;quot;&amp;gt;
    &amp;lt;button class=&amp;quot;copy-btn&amp;quot;&amp;gt;  코드 복사&amp;lt;/button&amp;gt;
  &amp;lt;/div&amp;gt;
&amp;lt;/div&amp;gt;
&amp;lt;!-- ★ 코드 박스 끝 --&amp;gt;

&amp;lt;!-- ★ 토글 &amp;amp; 복사 스크립트 --&amp;gt;
&amp;lt;script&amp;gt;
document.addEventListener(&amp;quot;DOMContentLoaded&amp;quot;, () =&amp;gt; {

  /* 토글 */
  document.body.addEventListener(&amp;quot;click&amp;quot;, e =&amp;gt; {
    if (!e.target.classList.contains(&amp;quot;toggle-btn&amp;quot;)) return;
    const box = e.target.closest(&amp;quot;.code-box&amp;quot;);
    const cont = box.querySelector(&amp;quot;.code-ct&amp;quot;);
    const act = box.querySelector(&amp;quot;.code-act&amp;quot;);
    const open = cont.style.display === &amp;quot;block&amp;quot;;
    cont.style.display = act.style.display = open ? &amp;quot;none&amp;quot; : &amp;quot;block&amp;quot;;
    e.target.textContent = open ? &amp;quot;  펼치기&amp;quot; : &amp;quot;  접기&amp;quot;;
  });

  /* 복사 */
  document.body.addEventListener(&amp;quot;click&amp;quot;, e =&amp;gt; {
    if (!e.target.classList.contains(&amp;quot;copy-btn&amp;quot;)) return;
    const btn = e.target;
    const code = btn.closest(&amp;quot;.code-box&amp;quot;).querySelector(&amp;quot;code&amp;quot;).innerText;
    if (navigator.clipboard &amp;amp;&amp;amp; window.isSecureContext) {
      navigator.clipboard.writeText(code).then(() =&amp;gt; flash(btn))
                                         .catch(() =&amp;gt; fallback(code, btn));
    } else {
      fallback(code, btn);
    }
  });

  const flash = btn =&amp;gt; {
    const orig = btn.textContent;
    btn.textContent = &amp;quot;✅ 복사됨!&amp;quot;;
    setTimeout(() =&amp;gt; (btn.textContent = orig), 2000);
  };

  const fallback = (text, btn) =&amp;gt; {
    const ta = document.createElement(&amp;quot;textarea&amp;quot;);
    ta.value = text;
    ta.style.position = &amp;quot;fixed&amp;quot;;
    ta.style.top = &amp;quot;-1000px&amp;quot;;
    document.body.appendChild(ta);
    ta.focus();
    ta.select();
    try {
      document.execCommand(&amp;quot;copy&amp;quot;);
      flash(btn);
    } catch {
      alert(&amp;quot;복사 실패   &amp;ndash; 브라우저가 클립보드를 차단했습니다.&amp;quot;);
    }
    document.body.removeChild(ta);
  };
});
&amp;lt;/script&amp;gt;
&amp;lt;/body&amp;gt;
&amp;lt;/html&amp;gt;
&quot;&gt;&lt;!-- ★ 스타일 --&gt;
&lt;style&gt;
    /* 티스토리 기본 코드 블록 스타일 - Carbon one-light 테마 적용 */
    .code-box{
      border:1px solid #e3e3e3;
      border-radius:8px;
      margin:20px 0; /* ✅ 위아래 여백 축소 */
      background:#ffffff;
      font-family:'Hack','D2Coding','Nanum Gothic Coding',monospace;
      overflow:hidden;
      box-shadow:0 2px 8px rgba(0,0,0,0.06)
      /* ── NEW: 폭 제한 풀기 ── */
      width:100% !important;
      max-width:none !important;
    }
    .code-hd{
      background:#3a4250;
      color:#fff;
      padding:14px 20px;
      font-weight:600;
      font-size:14px;
      display:flex;
      justify-content:space-between;
      align-items:center;
      cursor:pointer;      
      border-bottom: 1px solid #2e333f; /* ✅ 어두운 테두리 */
    }
    .code-hd:hover{
      background: #4b5563;  /* 약간 밝은 회색-파랑 계열 */
    }
    .toggle-btn {
      background: #6b7280;       /* 버튼 배경 (회색) */
      color: #fff;               /* 글자색 */
      font-size: 13px;
      padding: 4px 10px;
      border-radius: 5px;
      font-weight: normal;
      display: inline-block;
}


.code-ct {
  display: none;
  padding: 8px 0; /* ✅ 위아래 padding 줄임, 좌우 제거 */
  background: #ffffff;
  font-size: 14px;
  line-height: 1.3;
  overflow-x: auto;
  white-space: pre;
  font-family: 'Hack','D2Coding','Consolas','Monaco', monospace;
  color: #383a42;
  scrollbar-width: thin;
  scrollbar-color: #d0d0d0 #f5f5f5;
}

.code-ct::-webkit-scrollbar {
  height: 6px;
  background: #f5f5f5;
}
.code-ct::-webkit-scrollbar-track {
  background: #f5f5f5;
  border-radius: 4px;
}
.code-ct::-webkit-scrollbar-thumb {
  background: #d0d0d0;
  border-radius: 4px;
}
.code-ct::-webkit-scrollbar-thumb:hover {
  background: #b0b0b0;
}

.code-act {
  display: none;
  text-align: right;
  padding: 12px 20px; /* ✅ 복사 버튼 줄도 축소 */
  background: #fafafa;
  border-top: 1px solid #e3e3e3;
}
    .copy-btn{
      background:#50a14f;
      color:#fff;
      border:0;
      padding:8px 20px;
      border-radius:6px;
      font-size:14px;
      cursor:pointer;
      font-weight:500;
      transition:all 0.2s ease
    }
    .copy-btn:hover{
      background:#40a33f;
      transform:translateY(-1px);
      box-shadow:0 2px 4px rgba(0,0,0,0.1)
    }

    /* 파이썬 신택스 하이라이팅 - one-light 테마 */
    .keyword{color:#a626a4;font-weight:normal}
    .string{color:#50a14f}
    .comment{color:#a0a1a7;font-style:italic}
    .function{color:#4078f2}
    .number{color:#986801}
    .operator{color:#383a42}
    .builtin{color:#c18401}


&lt;/style&gt;
&lt;!-- ★ 코드 박스 시작 --&gt;
&lt;div class=&quot;code-box&quot;&gt;
&lt;div class=&quot;code-hd&quot;&gt;  URL Batch Processor (Trafilatura) &lt;span class=&quot;toggle-btn&quot;&gt;  펼치기&lt;/span&gt;&lt;/div&gt;
&lt;div class=&quot;code-ct&quot;&gt;
&lt;pre&gt;&lt;code class=&quot;language-python&quot;&gt;
import pandas as pd
import trafilatura
from trafilatura.metadata import extract_metadata
import time
from datetime import datetime
import csv


def process_single_url(url, index, total_urls):
    &quot;&quot;&quot;단일 URL 처리 함수&quot;&quot;&quot;
    start_time = time.time()
    result = {
        &quot;index&quot;: index,
        &quot;url&quot;: url,
        &quot;status&quot;: &quot;failed&quot;,
        &quot;title&quot;: None,
        &quot;date&quot;: None,
        &quot;content&quot;: None,
        &quot;processing_time&quot;: 0,
        &quot;error_message&quot;: None,
    }

    try:
        print(f&quot;Processing URL {index + 1}/{total_urls}: {url[:60]}...&quot;)

        # URL 다운로드
        downloaded = trafilatura.fetch_url(url)

        if downloaded:
            # 메타데이터 추출
            metadata = extract_metadata(downloaded)

            # 본문 추출
            text = trafilatura.extract(
                downloaded,
                output_format=&quot;txt&quot;,
                include_comments=False,
                favor_precision=True,
            )

            if text and len(text.strip()) &amp;gt; 0:
                result.update(
                    {
                        &quot;status&quot;: &quot;success&quot;,
                        &quot;title&quot;: metadata.title if metadata else &quot;No title&quot;,
                        &quot;date&quot;: str(metadata.date) if metadata and metadata.date else &quot;No date&quot;,
                        &quot;content&quot;: text[:200] + &quot;...&quot; if len(text) &amp;gt; 200 else text,
                    }
                )
            else:
                result[&quot;error_message&quot;] = &quot;Empty content extracted&quot;
        else:
            result[&quot;error_message&quot;] = &quot;Failed to download page&quot;

    except Exception as e:
        result[&quot;error_message&quot;] = str(e)

    # 처리 시간 계산
    processing_time = time.time() - start_time
    result[&quot;processing_time&quot;] = processing_time

    return result


def save_results_to_csv(results, output_file=None):
    &quot;&quot;&quot;결과를 CSV 파일로 저장&quot;&quot;&quot;

    # 파일명이 지정되지 않으면 현재 시간을 포함한 파일명 생성
    if output_file is None:
        timestamp = datetime.now().strftime(&quot;%Y%m%d_%H%M%S&quot;)
        output_file = f&quot;url_processing_results_{timestamp}.csv&quot;

    try:
        with open(output_file, &quot;w&quot;, newline=&quot;&quot;, encoding=&quot;utf-8&quot;) as csvfile:
            fieldnames = [
                &quot;index&quot;,
                &quot;url&quot;,
                &quot;status&quot;,
                &quot;title&quot;,
                &quot;date&quot;,
                &quot;content&quot;,
                &quot;processing_time&quot;,
                &quot;error_message&quot;,
            ]
            writer = csv.DictWriter(csvfile, fieldnames=fieldnames)

            writer.writeheader()
            for result in results:
                writer.writerow(result)

        print(f&quot;✅ 결과가 '{output_file}' 파일에 저장되었습니다.&quot;)
        return output_file  # 실제 저장된 파일명 반환

    except Exception as e:
        print(f&quot;❌ 결과 저장 실패: {e}&quot;)
        return None


def print_summary(results, total_time, skipped_count=0):
    &quot;&quot;&quot;처리 결과 요약 출력&quot;&quot;&quot;
    total_urls = len(results)
    successful_count = sum(1 for r in results if r[&quot;status&quot;] == &quot;success&quot;)
    failed_count = total_urls - successful_count
    processing_times = [r[&quot;processing_time&quot;] for r in results]

    print(&quot;\n&quot; + &quot;=&quot; * 7 + &quot; SUMMARY &quot; + &quot;=&quot; * 7)
    print(f&quot;Total URLs processed: {total_urls}&quot;)
    print(&quot;Workers used: 1&quot;)  # 단일 스레드 처리
    print(f&quot;Successfully decoded: {successful_count} ({successful_count/total_urls*100:.1f}%)&quot;)
    print(f&quot;Failed to decode: {failed_count} ({failed_count/total_urls*100:.1f}%)&quot;)
    print(f&quot;Skipped (Google News URLs): {skipped_count} ({skipped_count/(total_urls + skipped_count)*100:.1f}%)&quot;)

    if processing_times:
        avg_time = sum(processing_times) / len(processing_times)

        print(&quot;\n&quot; + &quot;-&quot; * 5 + &quot; TIMING INFORMATION &quot; + &quot;-&quot; * 5)
        print(f&quot;Total processing time: {int(total_time//60)}:{total_time%60:05.2f}&quot;)
        print(f&quot;Average processing time per URL: {avg_time:.2f} seconds&quot;)
        print(f&quot;Fastest URL processing time: {min(processing_times):.2f} seconds&quot;)
        print(f&quot;Slowest URL processing time: {max(processing_times):.2f} seconds&quot;)

    print(&quot;\nProcess completed successfully. Results saved to CSV file.&quot;)


def process_urls_from_csv(csv_file_path, url_column=&quot;decoded_url&quot;):
    &quot;&quot;&quot;CSV 파일에서 URL들을 읽어서 순차 처리&quot;&quot;&quot;

    print(&quot;=&quot; * 50)
    print(&quot;  URL 배치 처리 시작&quot;)
    print(&quot;=&quot; * 50)

    # CSV 파일 읽기
    try:
        df = pd.read_csv(csv_file_path)
        print(f&quot;  CSV 파일 컬럼들: {list(df.columns)}&quot;)
        print(f&quot;  총 행 수: {len(df)}&quot;)

        if url_column not in df.columns:
            raise ValueError(f&quot;Column '{url_column}' not found in CSV file&quot;)

        print(f&quot;  '{url_column}' 컬럼의 NULL이 아닌 값 개수: {df[url_column].notna().sum()}&quot;)

        all_urls = df[url_column].dropna().tolist()
        print(f&quot;  첫 번째 URL 샘플: {all_urls[0] if all_urls else 'None'}&quot;)

        # news.google.com이 포함되지 않은 URL만 필터링
        urls = [url for url in all_urls if &quot;news.google.com&quot; not in str(url)]

        total_urls = len(urls)
        skipped_urls = len(all_urls) - total_urls

        print(f&quot;  전체 URL: {len(all_urls)}개&quot;)
        print(f&quot;  처리 대상 URL (decoded URLs): {total_urls}개&quot;)
        print(f&quot;  건너뛴 URL (Google News URLs): {skipped_urls}개&quot;)

        if urls:
            print(f&quot;  첫 번째 디코딩된 URL 샘플: {urls[0]}&quot;)

        print(&quot;-&quot; * 30)

    except Exception as e:
        print(f&quot;❌ CSV 파일 읽기 실패: {e}&quot;)
        return None

    total_start_time = time.time()
    results = []
    successful_count = 0
    failed_count = 0

    for i, url in enumerate(urls):
        result = process_single_url(url, i, total_urls)
        results.append(result)

        if result[&quot;status&quot;] == &quot;success&quot;:
            successful_count += 1
        else:
            failed_count += 1

        if (i + 1) % 10 == 0 or (i + 1) == total_urls:
            print(
                f&quot;진행: {i + 1}/{total_urls} &quot;
                f&quot;({(i + 1)/total_urls*100:.1f}%) &quot;
                f&quot;성공: {successful_count}, 실패: {failed_count}&quot;
            )

    total_processing_time = time.time() - total_start_time
    print_summary(results, total_processing_time, skipped_urls)

    saved_file = save_results_to_csv(results)
    return results, saved_file


def main():
    csv_file_path = r&quot;C:\Users\yhsur\Downloads\특징주\sample_data\Combined_sample_data_500_decoded_2025-05-20_224740.csv&quot;
    results, saved_file = process_urls_from_csv(csv_file_path, url_column=&quot;decoded_url&quot;)

    if results:
        print(f&quot;\n  저장된 파일: {saved_file}&quot;)

        print(&quot;\n  처리 결과 샘플:&quot;)
        for i, result in enumerate(results[:3]):
            print(f&quot;\n[{i+1}] {result['url'][:50]}...&quot;)
            print(f&quot;    상태: {result['status']}&quot;)
            print(f&quot;    제목: {result['title']}&quot;)
            print(f&quot;    처리시간: {result['processing_time']:.2f}초&quot;)
            if result[&quot;status&quot;] == &quot;failed&quot;:
                print(f&quot;    오류: {result['error_message']}&quot;)


if __name__ == &quot;__main__&quot;:
    main()
&lt;/code&gt;&lt;/pre&gt;
&lt;/div&gt;
&lt;div class=&quot;code-act&quot;&gt;&lt;button class=&quot;copy-btn&quot;&gt;  코드 복사&lt;/button&gt;&lt;/div&gt;
&lt;/div&gt;
&lt;!-- ★ 코드 박스 끝 --&gt; &lt;!-- ★ 토글 &amp; 복사 스크립트 --&gt;
&lt;script&gt;
document.addEventListener(&quot;DOMContentLoaded&quot;, () =&gt; {

  /* 토글 */
  document.body.addEventListener(&quot;click&quot;, e =&gt; {
    if (!e.target.classList.contains(&quot;toggle-btn&quot;)) return;
    const box = e.target.closest(&quot;.code-box&quot;);
    const cont = box.querySelector(&quot;.code-ct&quot;);
    const act = box.querySelector(&quot;.code-act&quot;);
    const open = cont.style.display === &quot;block&quot;;
    cont.style.display = act.style.display = open ? &quot;none&quot; : &quot;block&quot;;
    e.target.textContent = open ? &quot;  펼치기&quot; : &quot;  접기&quot;;
  });

  /* 복사 */
  document.body.addEventListener(&quot;click&quot;, e =&gt; {
    if (!e.target.classList.contains(&quot;copy-btn&quot;)) return;
    const btn = e.target;
    const code = btn.closest(&quot;.code-box&quot;).querySelector(&quot;code&quot;).innerText;
    if (navigator.clipboard &amp;&amp; window.isSecureContext) {
      navigator.clipboard.writeText(code).then(() =&gt; flash(btn))
                                         .catch(() =&gt; fallback(code, btn));
    } else {
      fallback(code, btn);
    }
  });

  const flash = btn =&gt; {
    const orig = btn.textContent;
    btn.textContent = &quot;✅ 복사됨!&quot;;
    setTimeout(() =&gt; (btn.textContent = orig), 2000);
  };

  const fallback = (text, btn) =&gt; {
    const ta = document.createElement(&quot;textarea&quot;);
    ta.value = text;
    ta.style.position = &quot;fixed&quot;;
    ta.style.top = &quot;-1000px&quot;;
    document.body.appendChild(ta);
    ta.focus();
    ta.select();
    try {
      document.execCommand(&quot;copy&quot;);
      flash(btn);
    } catch {
      alert(&quot;복사 실패   – 브라우저가 클립보드를 차단했습니다.&quot;);
    }
    document.body.removeChild(ta);
  };
});
&lt;/script&gt;
&lt;/div&gt;
&lt;h3 data-ke-size=&quot;size23&quot;&gt;&lt;span&gt; ️&lt;/span&gt; 실행결과&lt;/h3&gt;
&lt;pre id=&quot;code_1748173061076&quot; class=&quot;shell&quot; data-ke-language=&quot;shell&quot; data-ke-type=&quot;codeblock&quot;&gt;&lt;code&gt;진행: 500/500 (100.0%) 성공: 407, 실패: 93

======= SUMMARY =======
Total URLs processed: 500
Workers used: 1
Successfully decoded: 407 (81.4%)
Failed to decode: 93 (18.6%)
Total errors: 93 (18.6%)
Skipped (Google News URLs): 0 (0.0%)

----- TIMING INFORMATION -----
Total processing time: 4:20.64
Average processing time per URL: 0.52 seconds
Average processing time per decoded URL: 0.52 seconds
Fastest URL processing time: 0.05 seconds
Slowest URL processing time: 5.26 seconds

Process completed successfully. Results saved to CSV file.
✅ 결과가 'url_processing_results_20250525_131353.csv' 파일에 저장되었습니다.

  저장된 파일: url_processing_results_20250525_131353.csv&lt;/code&gt;&lt;/pre&gt;
&lt;h2 data-ke-size=&quot;size26&quot;&gt;  성능 평가 및 결과&lt;/h2&gt;
&lt;p data-ke-size=&quot;size16&quot;&gt;Claude를 사용해서, 평가 결과를 분석 및 시각화를 진행하였다.&lt;/p&gt;
&lt;div id=&quot;code_1748178050462&quot; data-ke-type=&quot;html&quot; data-source=&quot;&amp;lt;html lang=&amp;quot;ko&amp;quot;&amp;gt;
&amp;lt;head&amp;gt;
    &amp;lt;meta charset=&amp;quot;UTF-8&amp;quot;&amp;gt;
    &amp;lt;meta name=&amp;quot;viewport&amp;quot; content=&amp;quot;width=device-width, initial-scale=1.0&amp;quot;&amp;gt;
    &amp;lt;title&amp;gt;URL 처리 결과 분석 보고서&amp;lt;/title&amp;gt;
    &amp;lt;script src=&amp;quot;https://cdnjs.cloudflare.com/ajax/libs/Chart.js/3.9.1/chart.min.js&amp;quot;&amp;gt;&amp;lt;/script&amp;gt;
    &amp;lt;style&amp;gt;
        .url-analysis-report * {
            margin: 0;
            padding: 0;
            box-sizing: border-box;
        }
        
        .url-analysis-report {
            font-family: 'Segoe UI', Tahoma, Geneva, Verdana, sans-serif;
            line-height: 1.6;
            color: #333;
            background: #f8f9fa;
            min-height: 100vh;
        }
        
        .url-analysis-container {
            max-width: 1200px;
            margin: 0 auto;
            background: white;
            min-height: 100vh;
            box-shadow: 0 0 20px rgba(0,0,0,0.1);
        }
        
        .url-analysis-header {
            background: linear-gradient(135deg, #2c3e50 0%, #34495e 100%);
            color: white;
            padding: 40px 30px;
            text-align: center;
        }
        
        .url-analysis-header h1 {
            font-size: 2.5rem;
            margin-bottom: 10px;
            font-weight: 300;
        }
        
        .url-analysis-header .subtitle {
            font-size: 1.1rem;
            opacity: 0.9;
        }
        
        .url-analysis-header .date {
            margin-top: 20px;
            font-size: 0.9rem;
            opacity: 0.8;
        }
        
        .url-analysis-content {
            padding: 15px 20px;
        }
        
        .url-analysis-summary {
            background: #ffffff;
            padding: 15px;
            border-radius: 8px;
            margin-bottom: 15px;
            border: 1px solid #e9ecef;
            box-shadow: 0 2px 4px rgba(0,0,0,0.1);
        }
        
        .url-analysis-summary h2 {
            color: #2c3e50;
            margin-bottom: 10px;
            font-size: 1.3rem;
        }
        
        .url-summary-info {
            background: white;
            padding: 12px;
            border-radius: 6px;
            margin-bottom: 10px;
            box-shadow: 0 2px 8px rgba(0,0,0,0.1);
        }
        
        .url-summary-info h3 {
            color: #2c3e50;
            margin-bottom: 10px;
            font-size: 1.1rem;
        }
        
        .url-summary-grid {
            display: grid;
            grid-template-columns: repeat(4, 1fr);
            gap: 8px;
            margin-bottom: 10px;
        }
        
        .url-summary-card {
            background: #f8f9fa;
            padding: 12px;
            border-radius: 6px;
            border: 1px solid #dee2e6;
        }
        
        .url-summary-card.success { background: #f0f8f0; border-color: #c3e6c3; }
        .url-summary-card.info { background: #f0f6ff; border-color: #c3d9ff; }
        .url-summary-card.purple { background: #f8f0ff; border-color: #e6c3ff; }
        
        .url-summary-label {
            color: #666;
            font-size: 0.9rem;
            margin-bottom: 5px;
        }
        
        .url-summary-value {
            font-size: 1.4rem;
            font-weight: bold;
        }
        
        .url-summary-value.info { color: #495057; }
        .url-summary-value.success { color: #495057; }
        .url-summary-value.info2 { color: #495057; }
        .url-summary-value.purple { color: #495057; }
        
        .url-chart-section {
            margin-top: 10px;
        }
        
        .url-chart-title {
            color: #2c3e50;
            margin-bottom: 10px;
            font-size: 1rem;
            font-weight: 600;
        }
        
        .url-chart-container {
            background: #fff;
            padding: 20px;
            border-radius: 8px;
            border: 1px solid #e9ecef;
            height: 300px;
            position: relative;
        }
        
        .url-key-metrics {
            display: grid;
            grid-template-columns: repeat(4, 1fr);
            gap: 8px;
            margin-bottom: 10px;
        }
        
        .url-metric-card {
            background: white;
            padding: 10px;
            border-radius: 6px;
            text-align: center;
            box-shadow: 0 2px 8px rgba(0,0,0,0.1);
            transition: transform 0.3s ease;
        }
        
        .url-metric-card:hover {
            transform: translateY(-5px);
        }
        
        .url-metric-value {
            font-size: 1.8rem;
            font-weight: bold;
            margin-bottom: 3px;
        }
        
        .url-metric-label {
            color: #666;
            font-size: 0.9rem;
            text-transform: uppercase;
            letter-spacing: 1px;
        }
        
        .url-success { color: #495057; }
        .url-info { color: #495057; }
        .url-warning { color: #495057; }
        .url-danger { color: #495057; }
        
        .url-section {
            margin-bottom: 20px;
        }
        
        .url-section h2 {
            color: #2c3e50;
            border-bottom: 2px solid #6c757d;
            padding-bottom: 5px;
            margin-bottom: 10px;
            font-size: 1.2rem;
        }
        
        .url-alert {
            background: #fff8dc;
            border: 1px solid #f0e68c;
            padding: 8px;
            border-radius: 4px;
            margin-bottom: 10px;
        }
        
        .url-alert-danger {
            background: #f8d7da;
            border: 1px solid #f5c6cb;
            border-left: 4px solid #dc3545;
        }
        
        .url-status-grid {
            display: grid;
            grid-template-columns: repeat(3, 1fr);
            gap: 6px;
            margin-bottom: 10px;
        }
        
        .url-status-item {
            text-align: center;
            padding: 6px;
            border-radius: 4px;
            color: white;
        }
        
        .url-status-success {
            background: #d4edda;
            color: #2c3e50;
        }
        
        .url-status-failed {
            background: #f8d7da;
            color: #2c3e50;
        }
        
        .url-status-unknown {
            background: #e2e3e5;
            color: #2c3e50;
        }
        
        .url-problem-domain {
            background: #faf8f8;
            border: 1px solid #e9d7d7;
            border-radius: 6px;
            padding: 12px;
            margin: 8px 0;
        }
        
        .url-problem-domain h3 {
            color: #495057;
            margin-bottom: 8px;
            font-size: 1.1rem;
        }
        
        .url-problem-stats {
            display: grid;
            grid-template-columns: repeat(4, 1fr);
            gap: 8px;
            margin: 8px 0;
        }
        
        .url-problem-stat {
            background: white;
            padding: 8px;
            border-radius: 4px;
            text-align: center;
            border: 1px solid #dee2e6;
        }
        
        .url-problem-stat-value {
            font-size: 1.3rem;
            font-weight: bold;
            color: #495057;
        }
        
        .url-problem-stat-label {
            font-size: 0.85rem;
            color: #666;
            margin-top: 5px;
        }
        
        .url-error-details {
            background: #f8f9fa;
            padding: 10px;
            border-radius: 6px;
            margin-top: 8px;
        }
        
        .url-error-item {
            background: white;
            padding: 6px;
            margin: 4px 0;
            border-radius: 3px;
            border: 1px solid #dee2e6;
            font-family: monospace;
            font-size: 0.8rem;
        }
        
        .url-insights {
            background: #f8f9fa;
            padding: 20px;
            border-radius: 8px;
            margin-top: 25px;
            border: 1px solid #dee2e6;
        }
        
        .url-insights h3 {
            color: #495057;
            margin-bottom: 15px;
            font-size: 1.2rem;
        }
        
        .url-insights ul {
            list-style: none;
        }
        
        .url-insights li {
            margin: 8px 0;
            padding-left: 15px;
            position: relative;
        }
        
        .url-insights li:before {
            content: &amp;quot;&amp;bull;&amp;quot;;
            position: absolute;
            left: 0;
            color: #6c757d;
        }
        
        .url-footer {
            background: #2c3e50;
            color: white;
            padding: 20px 30px;
            text-align: center;
        }
        
        @media (max-width: 768px) {
            .url-key-metrics,
            .url-problem-stats {
                grid-template-columns: repeat(2, 1fr);
            }
            
            .url-status-grid {
                grid-template-columns: repeat(3, 1fr);
                gap: 4px;
            }
            
            .url-summary-grid {
                grid-template-columns: repeat(2, 1fr);
            }
            
            .url-analysis-summary h2 {
                font-size: 1.1rem;
            }
            
            .url-analysis-content {
                padding: 10px 15px;
            }
        }
        
        @media (max-width: 480px) {
            .url-key-metrics,
            .url-problem-stats,
            .url-summary-grid {
                grid-template-columns: 1fr;
            }
            
            .url-status-grid {
                grid-template-columns: repeat(3, 1fr);
                gap: 3px;
            }
        }
    &amp;lt;/style&amp;gt;
&amp;lt;/head&amp;gt;
&amp;lt;body&amp;gt;
    &amp;lt;div class=&amp;quot;url-analysis-report&amp;quot;&amp;gt;
        &amp;lt;div class=&amp;quot;url-analysis-container&amp;quot;&amp;gt;
            &amp;lt;div class=&amp;quot;url-analysis-content&amp;quot;&amp;gt;
                &amp;lt;!-- 요약 --&amp;gt;
                &amp;lt;div class=&amp;quot;url-analysis-summary&amp;quot;&amp;gt;
                    &amp;lt;h2&amp;gt;  분석 요약&amp;lt;/h2&amp;gt;
                    
                    &amp;lt;!-- 요약 정보 --&amp;gt;
                    &amp;lt;div class=&amp;quot;url-summary-info&amp;quot;&amp;gt;
                        &amp;lt;h3&amp;gt;  요약 정보&amp;lt;/h3&amp;gt;
                        &amp;lt;div class=&amp;quot;url-summary-grid&amp;quot;&amp;gt;
                            &amp;lt;div class=&amp;quot;url-summary-card&amp;quot;&amp;gt;
                                &amp;lt;div class=&amp;quot;url-summary-label&amp;quot;&amp;gt;총 레코드 수&amp;lt;/div&amp;gt;
                                &amp;lt;div class=&amp;quot;url-summary-value info&amp;quot;&amp;gt;500&amp;lt;/div&amp;gt;
                            &amp;lt;/div&amp;gt;
                            &amp;lt;div class=&amp;quot;url-summary-card success&amp;quot;&amp;gt;
                                &amp;lt;div class=&amp;quot;url-summary-label&amp;quot;&amp;gt;성공률&amp;lt;/div&amp;gt;
                                &amp;lt;div class=&amp;quot;url-summary-value success&amp;quot;&amp;gt;81.4%&amp;lt;/div&amp;gt;
                            &amp;lt;/div&amp;gt;
                            &amp;lt;div class=&amp;quot;url-summary-card info&amp;quot;&amp;gt;
                                &amp;lt;div class=&amp;quot;url-summary-label&amp;quot;&amp;gt;평균 처리 시간&amp;lt;/div&amp;gt;
                                &amp;lt;div class=&amp;quot;url-summary-value info2&amp;quot;&amp;gt;0.521초&amp;lt;/div&amp;gt;
                            &amp;lt;/div&amp;gt;
                            &amp;lt;div class=&amp;quot;url-summary-card purple&amp;quot;&amp;gt;
                                &amp;lt;div class=&amp;quot;url-summary-label&amp;quot;&amp;gt;중앙값 처리 시간&amp;lt;/div&amp;gt;
                                &amp;lt;div class=&amp;quot;url-summary-value purple&amp;quot;&amp;gt;0.314초&amp;lt;/div&amp;gt;
                            &amp;lt;/div&amp;gt;
                        &amp;lt;/div&amp;gt;
                        
                        &amp;lt;!-- 처리 시간 분포 차트 --&amp;gt;
                        &amp;lt;div class=&amp;quot;url-chart-section&amp;quot;&amp;gt;
                            &amp;lt;div class=&amp;quot;url-chart-title&amp;quot;&amp;gt;처리 시간 분포&amp;lt;/div&amp;gt;
                            &amp;lt;div style=&amp;quot;background: #fff; padding: 10px; border-radius: 6px; border: 1px solid #e9ecef;&amp;quot;&amp;gt;
                                &amp;lt;div style=&amp;quot;display: flex; align-items: end; justify-content: space-between; height: 120px; border-bottom: 2px solid #dee2e6; border-left: 2px solid #dee2e6; padding: 5px 0; position: relative;&amp;quot;&amp;gt;
                                    &amp;lt;!-- Y축 눈금 --&amp;gt;
                                    &amp;lt;div style=&amp;quot;position: absolute; left: -25px; top: 0; height: 100%; display: flex; flex-direction: column; justify-content: space-between; color: #666; font-size: 0.65rem;&amp;quot;&amp;gt;
                                        &amp;lt;span&amp;gt;120&amp;lt;/span&amp;gt;
                                        &amp;lt;span&amp;gt;80&amp;lt;/span&amp;gt;
                                        &amp;lt;span&amp;gt;40&amp;lt;/span&amp;gt;
                                        &amp;lt;span&amp;gt;0&amp;lt;/span&amp;gt;
                                    &amp;lt;/div&amp;gt;
                                    
                                    &amp;lt;div style=&amp;quot;display: flex; flex-direction: column; align-items: center; justify-content: end; height: 100%;&amp;quot;&amp;gt;
                                        &amp;lt;div style=&amp;quot;background: #6c757d; width: 30px; height: 90%; border-radius: 3px 3px 0 0;&amp;quot;&amp;gt;&amp;lt;/div&amp;gt;
                                        &amp;lt;div style=&amp;quot;font-size: 0.7rem; color: #666; margin-top: 5px; text-align: center;&amp;quot;&amp;gt;0.0-0.5&amp;lt;/div&amp;gt;
                                    &amp;lt;/div&amp;gt;
                                    &amp;lt;div style=&amp;quot;display: flex; flex-direction: column; align-items: center; justify-content: end; height: 100%;&amp;quot;&amp;gt;
                                        &amp;lt;div style=&amp;quot;background: #6c757d; width: 30px; height: 60%; border-radius: 3px 3px 0 0;&amp;quot;&amp;gt;&amp;lt;/div&amp;gt;
                                        &amp;lt;div style=&amp;quot;font-size: 0.7rem; color: #666; margin-top: 5px; text-align: center;&amp;quot;&amp;gt;0.5-1.0&amp;lt;/div&amp;gt;
                                    &amp;lt;/div&amp;gt;
                                    &amp;lt;div style=&amp;quot;display: flex; flex-direction: column; align-items: center; justify-content: end; height: 100%;&amp;quot;&amp;gt;
                                        &amp;lt;div style=&amp;quot;background: #6c757d; width: 30px; height: 40%; border-radius: 3px 3px 0 0;&amp;quot;&amp;gt;&amp;lt;/div&amp;gt;
                                        &amp;lt;div style=&amp;quot;font-size: 0.7rem; color: #666; margin-top: 5px; text-align: center;&amp;quot;&amp;gt;1.0-1.5&amp;lt;/div&amp;gt;
                                    &amp;lt;/div&amp;gt;
                                    &amp;lt;div style=&amp;quot;display: flex; flex-direction: column; align-items: center; justify-content: end; height: 100%;&amp;quot;&amp;gt;
                                        &amp;lt;div style=&amp;quot;background: #6c757d; width: 30px; height: 30%; border-radius: 3px 3px 0 0;&amp;quot;&amp;gt;&amp;lt;/div&amp;gt;
                                        &amp;lt;div style=&amp;quot;font-size: 0.7rem; color: #666; margin-top: 5px; text-align: center;&amp;quot;&amp;gt;1.5-2.0&amp;lt;/div&amp;gt;
                                    &amp;lt;/div&amp;gt;
                                    &amp;lt;div style=&amp;quot;display: flex; flex-direction: column; align-items: center; justify-content: end; height: 100%;&amp;quot;&amp;gt;
                                        &amp;lt;div style=&amp;quot;background: #6c757d; width: 30px; height: 20%; border-radius: 3px 3px 0 0;&amp;quot;&amp;gt;&amp;lt;/div&amp;gt;
                                        &amp;lt;div style=&amp;quot;font-size: 0.7rem; color: #666; margin-top: 5px; text-align: center;&amp;quot;&amp;gt;2.0-2.5&amp;lt;/div&amp;gt;
                                    &amp;lt;/div&amp;gt;
                                    &amp;lt;div style=&amp;quot;display: flex; flex-direction: column; align-items: center; justify-content: end; height: 100%;&amp;quot;&amp;gt;
                                        &amp;lt;div style=&amp;quot;background: #6c757d; width: 30px; height: 10%; border-radius: 3px 3px 0 0;&amp;quot;&amp;gt;&amp;lt;/div&amp;gt;
                                        &amp;lt;div style=&amp;quot;font-size: 0.7rem; color: #666; margin-top: 5px; text-align: center;&amp;quot;&amp;gt;2.5초+&amp;lt;/div&amp;gt;
                                    &amp;lt;/div&amp;gt;
                                &amp;lt;/div&amp;gt;
                                &amp;lt;div style=&amp;quot;text-align: center; margin-top: 5px; color: #666; font-size: 0.8rem; font-weight: bold;&amp;quot;&amp;gt;처리 시간 (초)&amp;lt;/div&amp;gt;
                            &amp;lt;/div&amp;gt;
                        &amp;lt;/div&amp;gt;
                    &amp;lt;/div&amp;gt;

                    &amp;lt;div class=&amp;quot;url-key-metrics&amp;quot;&amp;gt;
                        &amp;lt;div class=&amp;quot;url-metric-card&amp;quot;&amp;gt;
                            &amp;lt;div class=&amp;quot;url-metric-value url-info&amp;quot;&amp;gt;500&amp;lt;/div&amp;gt;
                            &amp;lt;div class=&amp;quot;url-metric-label&amp;quot;&amp;gt;총 처리 건수&amp;lt;/div&amp;gt;
                        &amp;lt;/div&amp;gt;
                        &amp;lt;div class=&amp;quot;url-metric-card&amp;quot;&amp;gt;
                            &amp;lt;div class=&amp;quot;url-metric-value url-danger&amp;quot;&amp;gt;93&amp;lt;/div&amp;gt;
                            &amp;lt;div class=&amp;quot;url-metric-label&amp;quot;&amp;gt;실패 건수&amp;lt;/div&amp;gt;
                        &amp;lt;/div&amp;gt;
                        &amp;lt;div class=&amp;quot;url-metric-card&amp;quot;&amp;gt;
                            &amp;lt;div class=&amp;quot;url-metric-value url-danger&amp;quot;&amp;gt;18.6%&amp;lt;/div&amp;gt;
                            &amp;lt;div class=&amp;quot;url-metric-label&amp;quot;&amp;gt;실패율&amp;lt;/div&amp;gt;
                        &amp;lt;/div&amp;gt;
                        &amp;lt;div class=&amp;quot;url-metric-card&amp;quot;&amp;gt;
                            &amp;lt;div class=&amp;quot;url-metric-value url-warning&amp;quot;&amp;gt;2개&amp;lt;/div&amp;gt;
                            &amp;lt;div class=&amp;quot;url-metric-label&amp;quot;&amp;gt;주요 문제 도메인&amp;lt;/div&amp;gt;
                        &amp;lt;/div&amp;gt;
                    &amp;lt;/div&amp;gt;
                    
                    &amp;lt;div class=&amp;quot;url-alert&amp;quot;&amp;gt;
                        &amp;lt;strong&amp;gt;주요 문제:&amp;lt;/strong&amp;gt; 전체 실패 96.8%(90건) - biz.chosun.com, www.msn.com 발생
                    &amp;lt;/div&amp;gt;
                &amp;lt;/div&amp;gt;

                &amp;lt;!-- 처리 상태 분석 --&amp;gt;
                &amp;lt;div class=&amp;quot;url-section&amp;quot;&amp;gt;
                    &amp;lt;h2&amp;gt;  전체 처리 현황&amp;lt;/h2&amp;gt;
                    &amp;lt;div class=&amp;quot;url-status-grid&amp;quot;&amp;gt;
                        &amp;lt;div class=&amp;quot;url-status-item url-status-success&amp;quot;&amp;gt;
                            &amp;lt;h3 style=&amp;quot;font-size: 0.9rem; margin: 0;&amp;quot;&amp;gt;성공&amp;lt;/h3&amp;gt;
                            &amp;lt;div style=&amp;quot;font-size: 1.2rem; margin: 2px 0; font-weight: bold;&amp;quot;&amp;gt;407건&amp;lt;/div&amp;gt;
                            &amp;lt;div style=&amp;quot;font-size: 0.85rem;&amp;quot;&amp;gt;81.4%&amp;lt;/div&amp;gt;
                        &amp;lt;/div&amp;gt;
                        &amp;lt;div class=&amp;quot;url-status-item url-status-failed&amp;quot;&amp;gt;
                            &amp;lt;h3 style=&amp;quot;font-size: 0.9rem; margin: 0;&amp;quot;&amp;gt;실패&amp;lt;/h3&amp;gt;
                            &amp;lt;div style=&amp;quot;font-size: 1.2rem; margin: 2px 0; font-weight: bold;&amp;quot;&amp;gt;93건&amp;lt;/div&amp;gt;
                            &amp;lt;div style=&amp;quot;font-size: 0.85rem;&amp;quot;&amp;gt;18.6%&amp;lt;/div&amp;gt;
                        &amp;lt;/div&amp;gt;
                        &amp;lt;div class=&amp;quot;url-status-item url-status-unknown&amp;quot;&amp;gt;
                            &amp;lt;h3 style=&amp;quot;font-size: 0.9rem; margin: 0;&amp;quot;&amp;gt;알수없음&amp;lt;/h3&amp;gt;
                            &amp;lt;div style=&amp;quot;font-size: 1.2rem; margin: 2px 0; font-weight: bold;&amp;quot;&amp;gt;0건&amp;lt;/div&amp;gt;
                            &amp;lt;div style=&amp;quot;font-size: 0.85rem;&amp;quot;&amp;gt;0%&amp;lt;/div&amp;gt;
                        &amp;lt;/div&amp;gt;
                    &amp;lt;/div&amp;gt;
                &amp;lt;/div&amp;gt;

                &amp;lt;!-- 주요 문제 도메인 분석 --&amp;gt;
                &amp;lt;div class=&amp;quot;url-section&amp;quot;&amp;gt;
                    &amp;lt;h2&amp;gt;  주요 문제 도메인 상세 분석&amp;lt;/h2&amp;gt;
                    
                    &amp;lt;div class=&amp;quot;url-problem-domain&amp;quot;&amp;gt;
                        &amp;lt;h3&amp;gt;1. biz.chosun.com&amp;lt;/h3&amp;gt;
                        &amp;lt;div class=&amp;quot;url-problem-stats&amp;quot;&amp;gt;
                            &amp;lt;div class=&amp;quot;url-problem-stat&amp;quot;&amp;gt;
                                &amp;lt;div class=&amp;quot;url-problem-stat-value&amp;quot;&amp;gt;62건&amp;lt;/div&amp;gt;
                                &amp;lt;div class=&amp;quot;url-problem-stat-label&amp;quot;&amp;gt;총 시도&amp;lt;/div&amp;gt;
                            &amp;lt;/div&amp;gt;
                            &amp;lt;div class=&amp;quot;url-problem-stat&amp;quot;&amp;gt;
                                &amp;lt;div class=&amp;quot;url-problem-stat-value&amp;quot;&amp;gt;62건&amp;lt;/div&amp;gt;
                                &amp;lt;div class=&amp;quot;url-problem-stat-label&amp;quot;&amp;gt;실패&amp;lt;/div&amp;gt;
                            &amp;lt;/div&amp;gt;
                            &amp;lt;div class=&amp;quot;url-problem-stat&amp;quot;&amp;gt;
                                &amp;lt;div class=&amp;quot;url-problem-stat-value&amp;quot;&amp;gt;0%&amp;lt;/div&amp;gt;
                                &amp;lt;div class=&amp;quot;url-problem-stat-label&amp;quot;&amp;gt;성공률&amp;lt;/div&amp;gt;
                            &amp;lt;/div&amp;gt;
                            &amp;lt;div class=&amp;quot;url-problem-stat&amp;quot;&amp;gt;
                                &amp;lt;div class=&amp;quot;url-problem-stat-value&amp;quot;&amp;gt;66.7%&amp;lt;/div&amp;gt;
                                &amp;lt;div class=&amp;quot;url-problem-stat-label&amp;quot;&amp;gt;전체 실패 중 비율&amp;lt;/div&amp;gt;
                            &amp;lt;/div&amp;gt;
                        &amp;lt;/div&amp;gt;
                        &amp;lt;div class=&amp;quot;url-error-details&amp;quot;&amp;gt;
                            &amp;lt;strong&amp;gt;주요 오류:&amp;lt;/strong&amp;gt;
                            &amp;lt;div class=&amp;quot;url-error-item&amp;quot;&amp;gt;Empty content extracted - 모든 시도 콘텐츠 추출 실패&amp;lt;/div&amp;gt;
                            &amp;lt;p style=&amp;quot;margin-top: 8px; color: #666; font-size: 0.9rem;&amp;quot;&amp;gt;
                                &amp;lt;strong&amp;gt;원인:&amp;lt;/strong&amp;gt; JavaScript 기반 동적 로딩, 접근 제한(Cloudflare, bot detection) 추정
                            &amp;lt;/p&amp;gt;
                        &amp;lt;/div&amp;gt;
                    &amp;lt;/div&amp;gt;

                    &amp;lt;div class=&amp;quot;url-problem-domain&amp;quot;&amp;gt;
                        &amp;lt;h3&amp;gt;2. www.msn.com&amp;lt;/h3&amp;gt;
                        &amp;lt;div class=&amp;quot;url-problem-stats&amp;quot;&amp;gt;
                            &amp;lt;div class=&amp;quot;url-problem-stat&amp;quot;&amp;gt;
                                &amp;lt;div class=&amp;quot;url-problem-stat-value&amp;quot;&amp;gt;28건&amp;lt;/div&amp;gt;
                                &amp;lt;div class=&amp;quot;url-problem-stat-label&amp;quot;&amp;gt;총 시도&amp;lt;/div&amp;gt;
                            &amp;lt;/div&amp;gt;
                            &amp;lt;div class=&amp;quot;url-problem-stat&amp;quot;&amp;gt;
                                &amp;lt;div class=&amp;quot;url-problem-stat-value&amp;quot;&amp;gt;28건&amp;lt;/div&amp;gt;
                                &amp;lt;div class=&amp;quot;url-problem-stat-label&amp;quot;&amp;gt;실패&amp;lt;/div&amp;gt;
                            &amp;lt;/div&amp;gt;
                            &amp;lt;div class=&amp;quot;url-problem-stat&amp;quot;&amp;gt;
                                &amp;lt;div class=&amp;quot;url-problem-stat-value&amp;quot;&amp;gt;0%&amp;lt;/div&amp;gt;
                                &amp;lt;div class=&amp;quot;url-problem-stat-label&amp;quot;&amp;gt;성공률&amp;lt;/div&amp;gt;
                            &amp;lt;/div&amp;gt;
                            &amp;lt;div class=&amp;quot;url-problem-stat&amp;quot;&amp;gt;
                                &amp;lt;div class=&amp;quot;url-problem-stat-value&amp;quot;&amp;gt;30.1%&amp;lt;/div&amp;gt;
                                &amp;lt;div class=&amp;quot;url-problem-stat-label&amp;quot;&amp;gt;전체 실패 중 비율&amp;lt;/div&amp;gt;
                            &amp;lt;/div&amp;gt;
                        &amp;lt;/div&amp;gt;
                        &amp;lt;div class=&amp;quot;url-error-details&amp;quot;&amp;gt;
                            &amp;lt;strong&amp;gt;주요 오류:&amp;lt;/strong&amp;gt;
                            &amp;lt;div class=&amp;quot;url-error-item&amp;quot;&amp;gt;Empty content extracted - 모든 시도 콘텐츠 추출 실패&amp;lt;/div&amp;gt;
                            &amp;lt;div class=&amp;quot;url-error-item&amp;quot;&amp;gt;Failed to download page - 일부 요청 페이지 다운로드 실패&amp;lt;/div&amp;gt;
                            &amp;lt;p style=&amp;quot;margin-top: 8px; color: #666; font-size: 0.9rem;&amp;quot;&amp;gt;
                                &amp;lt;strong&amp;gt;원인:&amp;lt;/strong&amp;gt; Microsoft 서비스 강력한 bot detection, 지역별 접근 제한
                            &amp;lt;/p&amp;gt;
                        &amp;lt;/div&amp;gt;
                    &amp;lt;/div&amp;gt;

                    &amp;lt;div class=&amp;quot;url-alert&amp;quot;&amp;gt;
                        &amp;lt;strong&amp;gt;기타 실패:&amp;lt;/strong&amp;gt; 나머지 3건 - 네트워크 일시적 장애, 페이지 구조 변경 추정
                    &amp;lt;/div&amp;gt;
                &amp;lt;/div&amp;gt;

                &amp;lt;!-- 오류 패턴 분석 --&amp;gt;
                &amp;lt;div class=&amp;quot;url-section&amp;quot;&amp;gt;
                    &amp;lt;h2&amp;gt;  오류 패턴 분석&amp;lt;/h2&amp;gt;
                    
                    &amp;lt;div style=&amp;quot;background: #f8f9fa; padding: 25px; border-radius: 10px;&amp;quot;&amp;gt;
                        &amp;lt;h3 style=&amp;quot;color: #dc3545; margin-bottom: 15px;&amp;quot;&amp;gt;오류 유형별 분포&amp;lt;/h3&amp;gt;
                        
                        &amp;lt;div style=&amp;quot;display: grid; grid-template-columns: repeat(auto-fit, minmax(250px, 1fr)); gap: 20px;&amp;quot;&amp;gt;
                            &amp;lt;div style=&amp;quot;background: white; padding: 20px; border-radius: 8px; border-left: 4px solid #dc3545;&amp;quot;&amp;gt;
                                &amp;lt;h4&amp;gt;Empty content extracted&amp;lt;/h4&amp;gt;
                                &amp;lt;div style=&amp;quot;font-size: 1.3rem; font-weight: bold; color: #495057;&amp;quot;&amp;gt;87건 (93.5%)&amp;lt;/div&amp;gt;
                                &amp;lt;p style=&amp;quot;font-size: 0.85rem; color: #666; margin-top: 3px;&amp;quot;&amp;gt;
                                    콘텐츠 추출 실패 - JavaScript 렌더링 필요, 접근 차단
                                &amp;lt;/p&amp;gt;
                            &amp;lt;/div&amp;gt;
                            
                            &amp;lt;div style=&amp;quot;background: white; padding: 20px; border-radius: 8px; border-left: 4px solid #ffc107;&amp;quot;&amp;gt;
                                &amp;lt;h4&amp;gt;Failed to download page&amp;lt;/h4&amp;gt;
                                &amp;lt;div style=&amp;quot;font-size: 1.3rem; font-weight: bold; color: #495057;&amp;quot;&amp;gt;6건 (6.5%)&amp;lt;/div&amp;gt;
                                &amp;lt;p style=&amp;quot;font-size: 0.85rem; color: #666; margin-top: 3px;&amp;quot;&amp;gt;
                                    페이지 다운로드 실패 - 네트워크 오류, 서버 응답 없음
                                &amp;lt;/p&amp;gt;
                            &amp;lt;/div&amp;gt;
                        &amp;lt;/div&amp;gt;
                    &amp;lt;/div&amp;gt;
                &amp;lt;/div&amp;gt;

                &amp;lt;!-- 인코딩/문자 문제 --&amp;gt;
                &amp;lt;div class=&amp;quot;url-section&amp;quot;&amp;gt;
                    &amp;lt;h2&amp;gt;  인코딩 및 문자 처리 문제&amp;lt;/h2&amp;gt;
                    
                    &amp;lt;div class=&amp;quot;url-alert&amp;quot;&amp;gt;
                        &amp;lt;strong&amp;gt;발견된 문제:&amp;lt;/strong&amp;gt; 일부 성공 데이터 특수문자, 인코딩 문제 발견
                    &amp;lt;/div&amp;gt;
                    
                    &amp;lt;div style=&amp;quot;background: #f8f9fa; padding: 20px; border-radius: 10px;&amp;quot;&amp;gt;
                        &amp;lt;h4&amp;gt;주요 인코딩 문제:&amp;lt;/h4&amp;gt;
                        &amp;lt;ul style=&amp;quot;margin: 15px 0; padding-left: 20px;&amp;quot;&amp;gt;
                            &amp;lt;li&amp;gt;한글 깨짐 현상 (UTF-8 vs EUC-KR 인코딩 충돌)&amp;lt;/li&amp;gt;
                            &amp;lt;li&amp;gt;특수문자( ) 표시 - 원본 사이트의 인코딩 문제&amp;lt;/li&amp;gt;
                            &amp;lt;li&amp;gt;HTML 엔티티 미변환 (&amp;amp;amp;, &amp;amp;lt;, &amp;amp;gt; 등)&amp;lt;/li&amp;gt;
                            &amp;lt;li&amp;gt;줄바꿈 문자 처리 문제 (\r\n, \n 혼재)&amp;lt;/li&amp;gt;
                        &amp;lt;/ul&amp;gt;
                        
                        &amp;lt;p style=&amp;quot;background: white; padding: 12px; border-radius: 6px; border: 1px solid #dee2e6;&amp;quot;&amp;gt;
                            &amp;lt;strong&amp;gt;권장 해결책:&amp;lt;/strong&amp;gt; 
                            chardet 라이브러리 자동 인코딩 감지, HTML 파싱 전 인코딩 정규화, 후처리 특수문자 정리 필요
                        &amp;lt;/p&amp;gt;
                    &amp;lt;/div&amp;gt;
                &amp;lt;/div&amp;gt;
            &amp;lt;/div&amp;gt;
        &amp;lt;/div&amp;gt;
    &amp;lt;/div&amp;gt;

    &amp;lt;script&amp;gt;
        // Chart.js 코드 제거 - CSS 차트로 대체됨
    &amp;lt;/script&amp;gt;
&amp;lt;/body&amp;gt;
&amp;lt;/html&amp;gt;&quot;&gt;
&lt;script src=&quot;https://cdnjs.cloudflare.com/ajax/libs/Chart.js/3.9.1/chart.min.js&quot;&gt;&lt;/script&gt;
&lt;style&gt;
        .url-analysis-report * {
            margin: 0;
            padding: 0;
            box-sizing: border-box;
        }
        
        .url-analysis-report {
            font-family: 'Segoe UI', Tahoma, Geneva, Verdana, sans-serif;
            line-height: 1.6;
            color: #333;
            background: #f8f9fa;
            min-height: 100vh;
        }
        
        .url-analysis-container {
            max-width: 1200px;
            margin: 0 auto;
            background: white;
            min-height: 100vh;
            box-shadow: 0 0 20px rgba(0,0,0,0.1);
        }
        
        .url-analysis-header {
            background: linear-gradient(135deg, #2c3e50 0%, #34495e 100%);
            color: white;
            padding: 40px 30px;
            text-align: center;
        }
        
        .url-analysis-header h1 {
            font-size: 2.5rem;
            margin-bottom: 10px;
            font-weight: 300;
        }
        
        .url-analysis-header .subtitle {
            font-size: 1.1rem;
            opacity: 0.9;
        }
        
        .url-analysis-header .date {
            margin-top: 20px;
            font-size: 0.9rem;
            opacity: 0.8;
        }
        
        .url-analysis-content {
            padding: 15px 20px;
        }
        
        .url-analysis-summary {
            background: #ffffff;
            padding: 15px;
            border-radius: 8px;
            margin-bottom: 15px;
            border: 1px solid #e9ecef;
            box-shadow: 0 2px 4px rgba(0,0,0,0.1);
        }
        
        .url-analysis-summary h2 {
            color: #2c3e50;
            margin-bottom: 10px;
            font-size: 1.3rem;
        }
        
        .url-summary-info {
            background: white;
            padding: 12px;
            border-radius: 6px;
            margin-bottom: 10px;
            box-shadow: 0 2px 8px rgba(0,0,0,0.1);
        }
        
        .url-summary-info h3 {
            color: #2c3e50;
            margin-bottom: 10px;
            font-size: 1.1rem;
        }
        
        .url-summary-grid {
            display: grid;
            grid-template-columns: repeat(4, 1fr);
            gap: 8px;
            margin-bottom: 10px;
        }
        
        .url-summary-card {
            background: #f8f9fa;
            padding: 12px;
            border-radius: 6px;
            border: 1px solid #dee2e6;
        }
        
        .url-summary-card.success { background: #f0f8f0; border-color: #c3e6c3; }
        .url-summary-card.info { background: #f0f6ff; border-color: #c3d9ff; }
        .url-summary-card.purple { background: #f8f0ff; border-color: #e6c3ff; }
        
        .url-summary-label {
            color: #666;
            font-size: 0.9rem;
            margin-bottom: 5px;
        }
        
        .url-summary-value {
            font-size: 1.4rem;
            font-weight: bold;
        }
        
        .url-summary-value.info { color: #495057; }
        .url-summary-value.success { color: #495057; }
        .url-summary-value.info2 { color: #495057; }
        .url-summary-value.purple { color: #495057; }
        
        .url-chart-section {
            margin-top: 10px;
        }
        
        .url-chart-title {
            color: #2c3e50;
            margin-bottom: 10px;
            font-size: 1rem;
            font-weight: 600;
        }
        
        .url-chart-container {
            background: #fff;
            padding: 20px;
            border-radius: 8px;
            border: 1px solid #e9ecef;
            height: 300px;
            position: relative;
        }
        
        .url-key-metrics {
            display: grid;
            grid-template-columns: repeat(4, 1fr);
            gap: 8px;
            margin-bottom: 10px;
        }
        
        .url-metric-card {
            background: white;
            padding: 10px;
            border-radius: 6px;
            text-align: center;
            box-shadow: 0 2px 8px rgba(0,0,0,0.1);
            transition: transform 0.3s ease;
        }
        
        .url-metric-card:hover {
            transform: translateY(-5px);
        }
        
        .url-metric-value {
            font-size: 1.8rem;
            font-weight: bold;
            margin-bottom: 3px;
        }
        
        .url-metric-label {
            color: #666;
            font-size: 0.9rem;
            text-transform: uppercase;
            letter-spacing: 1px;
        }
        
        .url-success { color: #495057; }
        .url-info { color: #495057; }
        .url-warning { color: #495057; }
        .url-danger { color: #495057; }
        
        .url-section {
            margin-bottom: 20px;
        }
        
        .url-section h2 {
            color: #2c3e50;
            border-bottom: 2px solid #6c757d;
            padding-bottom: 5px;
            margin-bottom: 10px;
            font-size: 1.2rem;
        }
        
        .url-alert {
            background: #fff8dc;
            border: 1px solid #f0e68c;
            padding: 8px;
            border-radius: 4px;
            margin-bottom: 10px;
        }
        
        .url-alert-danger {
            background: #f8d7da;
            border: 1px solid #f5c6cb;
            border-left: 4px solid #dc3545;
        }
        
        .url-status-grid {
            display: grid;
            grid-template-columns: repeat(3, 1fr);
            gap: 6px;
            margin-bottom: 10px;
        }
        
        .url-status-item {
            text-align: center;
            padding: 6px;
            border-radius: 4px;
            color: white;
        }
        
        .url-status-success {
            background: #d4edda;
            color: #2c3e50;
        }
        
        .url-status-failed {
            background: #f8d7da;
            color: #2c3e50;
        }
        
        .url-status-unknown {
            background: #e2e3e5;
            color: #2c3e50;
        }
        
        .url-problem-domain {
            background: #faf8f8;
            border: 1px solid #e9d7d7;
            border-radius: 6px;
            padding: 12px;
            margin: 8px 0;
        }
        
        .url-problem-domain h3 {
            color: #495057;
            margin-bottom: 8px;
            font-size: 1.1rem;
        }
        
        .url-problem-stats {
            display: grid;
            grid-template-columns: repeat(4, 1fr);
            gap: 8px;
            margin: 8px 0;
        }
        
        .url-problem-stat {
            background: white;
            padding: 8px;
            border-radius: 4px;
            text-align: center;
            border: 1px solid #dee2e6;
        }
        
        .url-problem-stat-value {
            font-size: 1.3rem;
            font-weight: bold;
            color: #495057;
        }
        
        .url-problem-stat-label {
            font-size: 0.85rem;
            color: #666;
            margin-top: 5px;
        }
        
        .url-error-details {
            background: #f8f9fa;
            padding: 10px;
            border-radius: 6px;
            margin-top: 8px;
        }
        
        .url-error-item {
            background: white;
            padding: 6px;
            margin: 4px 0;
            border-radius: 3px;
            border: 1px solid #dee2e6;
            font-family: monospace;
            font-size: 0.8rem;
        }
        
        .url-insights {
            background: #f8f9fa;
            padding: 20px;
            border-radius: 8px;
            margin-top: 25px;
            border: 1px solid #dee2e6;
        }
        
        .url-insights h3 {
            color: #495057;
            margin-bottom: 15px;
            font-size: 1.2rem;
        }
        
        .url-insights ul {
            list-style: none;
        }
        
        .url-insights li {
            margin: 8px 0;
            padding-left: 15px;
            position: relative;
        }
        
        .url-insights li:before {
            content: &quot;•&quot;;
            position: absolute;
            left: 0;
            color: #6c757d;
        }
        
        .url-footer {
            background: #2c3e50;
            color: white;
            padding: 20px 30px;
            text-align: center;
        }
        
        @media (max-width: 768px) {
            .url-key-metrics,
            .url-problem-stats {
                grid-template-columns: repeat(2, 1fr);
            }
            
            .url-status-grid {
                grid-template-columns: repeat(3, 1fr);
                gap: 4px;
            }
            
            .url-summary-grid {
                grid-template-columns: repeat(2, 1fr);
            }
            
            .url-analysis-summary h2 {
                font-size: 1.1rem;
            }
            
            .url-analysis-content {
                padding: 10px 15px;
            }
        }
        
        @media (max-width: 480px) {
            .url-key-metrics,
            .url-problem-stats,
            .url-summary-grid {
                grid-template-columns: 1fr;
            }
            
            .url-status-grid {
                grid-template-columns: repeat(3, 1fr);
                gap: 3px;
            }
        }
    &lt;/style&gt;
&lt;div class=&quot;url-analysis-report&quot;&gt;
&lt;div class=&quot;url-analysis-container&quot;&gt;
&lt;div class=&quot;url-analysis-content&quot;&gt;&lt;!-- 요약 --&gt;
&lt;div class=&quot;url-analysis-summary&quot;&gt;
&lt;h2 data-ke-size=&quot;size26&quot;&gt;  분석 요약&lt;/h2&gt;
&lt;!-- 요약 정보 --&gt;
&lt;div class=&quot;url-summary-info&quot;&gt;
&lt;h3 data-ke-size=&quot;size23&quot;&gt;  요약 정보&lt;/h3&gt;
&lt;div class=&quot;url-summary-grid&quot;&gt;
&lt;div class=&quot;url-summary-card&quot;&gt;
&lt;div class=&quot;url-summary-label&quot;&gt;총 레코드 수&lt;/div&gt;
&lt;div class=&quot;url-summary-value info&quot;&gt;500&lt;/div&gt;
&lt;/div&gt;
&lt;div class=&quot;url-summary-card success&quot;&gt;
&lt;div class=&quot;url-summary-label&quot;&gt;성공률&lt;/div&gt;
&lt;div class=&quot;url-summary-value success&quot;&gt;81.4%&lt;/div&gt;
&lt;/div&gt;
&lt;div class=&quot;url-summary-card info&quot;&gt;
&lt;div class=&quot;url-summary-label&quot;&gt;평균 처리 시간&lt;/div&gt;
&lt;div class=&quot;url-summary-value info2&quot;&gt;0.521초&lt;/div&gt;
&lt;/div&gt;
&lt;div class=&quot;url-summary-card purple&quot;&gt;
&lt;div class=&quot;url-summary-label&quot;&gt;중앙값 처리 시간&lt;/div&gt;
&lt;div class=&quot;url-summary-value purple&quot;&gt;0.314초&lt;/div&gt;
&lt;/div&gt;
&lt;/div&gt;
&lt;!-- 처리 시간 분포 차트 --&gt;
&lt;div class=&quot;url-chart-section&quot;&gt;
&lt;div class=&quot;url-chart-title&quot;&gt;처리 시간 분포&lt;/div&gt;
&lt;div style=&quot;background: #fff; padding: 10px; border-radius: 6px; border: 1px solid #e9ecef;&quot;&gt;
&lt;div style=&quot;display: flex; align-items: end; justify-content: space-between; height: 120px; border-bottom: 2px solid #dee2e6; border-left: 2px solid #dee2e6; padding: 5px 0; position: relative;&quot;&gt;&lt;!-- Y축 눈금 --&gt;
&lt;div style=&quot;position: absolute; left: -25px; top: 0; height: 100%; display: flex; flex-direction: column; justify-content: space-between; color: #666; font-size: 0.65rem;&quot;&gt;&lt;span&gt;120&lt;/span&gt; &lt;span&gt;80&lt;/span&gt; &lt;span&gt;40&lt;/span&gt; &lt;span&gt;0&lt;/span&gt;&lt;/div&gt;
&lt;div style=&quot;display: flex; flex-direction: column; align-items: center; justify-content: end; height: 100%;&quot;&gt;
&lt;div style=&quot;background: #6c757d; width: 30px; height: 90%; border-radius: 3px 3px 0 0;&quot;&gt;&amp;nbsp;&lt;/div&gt;
&lt;div style=&quot;font-size: 0.7rem; color: #666; margin-top: 5px; text-align: center;&quot;&gt;0.0-0.5&lt;/div&gt;
&lt;/div&gt;
&lt;div style=&quot;display: flex; flex-direction: column; align-items: center; justify-content: end; height: 100%;&quot;&gt;
&lt;div style=&quot;background: #6c757d; width: 30px; height: 60%; border-radius: 3px 3px 0 0;&quot;&gt;&amp;nbsp;&lt;/div&gt;
&lt;div style=&quot;font-size: 0.7rem; color: #666; margin-top: 5px; text-align: center;&quot;&gt;0.5-1.0&lt;/div&gt;
&lt;/div&gt;
&lt;div style=&quot;display: flex; flex-direction: column; align-items: center; justify-content: end; height: 100%;&quot;&gt;
&lt;div style=&quot;background: #6c757d; width: 30px; height: 40%; border-radius: 3px 3px 0 0;&quot;&gt;&amp;nbsp;&lt;/div&gt;
&lt;div style=&quot;font-size: 0.7rem; color: #666; margin-top: 5px; text-align: center;&quot;&gt;1.0-1.5&lt;/div&gt;
&lt;/div&gt;
&lt;div style=&quot;display: flex; flex-direction: column; align-items: center; justify-content: end; height: 100%;&quot;&gt;
&lt;div style=&quot;background: #6c757d; width: 30px; height: 30%; border-radius: 3px 3px 0 0;&quot;&gt;&amp;nbsp;&lt;/div&gt;
&lt;div style=&quot;font-size: 0.7rem; color: #666; margin-top: 5px; text-align: center;&quot;&gt;1.5-2.0&lt;/div&gt;
&lt;/div&gt;
&lt;div style=&quot;display: flex; flex-direction: column; align-items: center; justify-content: end; height: 100%;&quot;&gt;
&lt;div style=&quot;background: #6c757d; width: 30px; height: 20%; border-radius: 3px 3px 0 0;&quot;&gt;&amp;nbsp;&lt;/div&gt;
&lt;div style=&quot;font-size: 0.7rem; color: #666; margin-top: 5px; text-align: center;&quot;&gt;2.0-2.5&lt;/div&gt;
&lt;/div&gt;
&lt;div style=&quot;display: flex; flex-direction: column; align-items: center; justify-content: end; height: 100%;&quot;&gt;
&lt;div style=&quot;background: #6c757d; width: 30px; height: 10%; border-radius: 3px 3px 0 0;&quot;&gt;&amp;nbsp;&lt;/div&gt;
&lt;div style=&quot;font-size: 0.7rem; color: #666; margin-top: 5px; text-align: center;&quot;&gt;2.5초+&lt;/div&gt;
&lt;/div&gt;
&lt;/div&gt;
&lt;div style=&quot;text-align: center; margin-top: 5px; color: #666; font-size: 0.8rem; font-weight: bold;&quot;&gt;처리 시간 (초)&lt;/div&gt;
&lt;/div&gt;
&lt;/div&gt;
&lt;/div&gt;
&lt;div class=&quot;url-key-metrics&quot;&gt;
&lt;div class=&quot;url-metric-card&quot;&gt;
&lt;div class=&quot;url-metric-value url-info&quot;&gt;500&lt;/div&gt;
&lt;div class=&quot;url-metric-label&quot;&gt;총 처리 건수&lt;/div&gt;
&lt;/div&gt;
&lt;div class=&quot;url-metric-card&quot;&gt;
&lt;div class=&quot;url-metric-value url-danger&quot;&gt;93&lt;/div&gt;
&lt;div class=&quot;url-metric-label&quot;&gt;실패 건수&lt;/div&gt;
&lt;/div&gt;
&lt;div class=&quot;url-metric-card&quot;&gt;
&lt;div class=&quot;url-metric-value url-danger&quot;&gt;18.6%&lt;/div&gt;
&lt;div class=&quot;url-metric-label&quot;&gt;실패율&lt;/div&gt;
&lt;/div&gt;
&lt;div class=&quot;url-metric-card&quot;&gt;
&lt;div class=&quot;url-metric-value url-warning&quot;&gt;2개&lt;/div&gt;
&lt;div class=&quot;url-metric-label&quot;&gt;주요 문제 도메인&lt;/div&gt;
&lt;/div&gt;
&lt;/div&gt;
&lt;div class=&quot;url-alert&quot;&gt;&lt;b&gt;주요 문제:&lt;/b&gt; 전체 실패 96.8%(90건) - biz.chosun.com, www.msn.com 발생&lt;/div&gt;
&lt;/div&gt;
&lt;!-- 처리 상태 분석 --&gt;
&lt;div class=&quot;url-section&quot;&gt;
&lt;h2 data-ke-size=&quot;size26&quot;&gt;  전체 처리 현황&lt;/h2&gt;
&lt;div class=&quot;url-status-grid&quot;&gt;
&lt;div class=&quot;url-status-item url-status-success&quot;&gt;
&lt;h3 style=&quot;font-size: 0.9rem; margin: 0;&quot; data-ke-size=&quot;size23&quot;&gt;성공&lt;/h3&gt;
&lt;div style=&quot;font-size: 1.2rem; margin: 2px 0; font-weight: bold;&quot;&gt;407건&lt;/div&gt;
&lt;div style=&quot;font-size: 0.85rem;&quot;&gt;81.4%&lt;/div&gt;
&lt;/div&gt;
&lt;div class=&quot;url-status-item url-status-failed&quot;&gt;
&lt;h3 style=&quot;font-size: 0.9rem; margin: 0;&quot; data-ke-size=&quot;size23&quot;&gt;실패&lt;/h3&gt;
&lt;div style=&quot;font-size: 1.2rem; margin: 2px 0; font-weight: bold;&quot;&gt;93건&lt;/div&gt;
&lt;div style=&quot;font-size: 0.85rem;&quot;&gt;18.6%&lt;/div&gt;
&lt;/div&gt;
&lt;div class=&quot;url-status-item url-status-unknown&quot;&gt;
&lt;h3 style=&quot;font-size: 0.9rem; margin: 0;&quot; data-ke-size=&quot;size23&quot;&gt;알수없음&lt;/h3&gt;
&lt;div style=&quot;font-size: 1.2rem; margin: 2px 0; font-weight: bold;&quot;&gt;0건&lt;/div&gt;
&lt;div style=&quot;font-size: 0.85rem;&quot;&gt;0%&lt;/div&gt;
&lt;/div&gt;
&lt;/div&gt;
&lt;/div&gt;
&lt;!-- 주요 문제 도메인 분석 --&gt;
&lt;div class=&quot;url-section&quot;&gt;
&lt;h2 data-ke-size=&quot;size26&quot;&gt;  주요 문제 도메인 상세 분석&lt;/h2&gt;
&lt;div class=&quot;url-problem-domain&quot;&gt;
&lt;h3 data-ke-size=&quot;size23&quot;&gt;1. biz.chosun.com&lt;/h3&gt;
&lt;div class=&quot;url-problem-stats&quot;&gt;
&lt;div class=&quot;url-problem-stat&quot;&gt;
&lt;div class=&quot;url-problem-stat-value&quot;&gt;62건&lt;/div&gt;
&lt;div class=&quot;url-problem-stat-label&quot;&gt;총 시도&lt;/div&gt;
&lt;/div&gt;
&lt;div class=&quot;url-problem-stat&quot;&gt;
&lt;div class=&quot;url-problem-stat-value&quot;&gt;62건&lt;/div&gt;
&lt;div class=&quot;url-problem-stat-label&quot;&gt;실패&lt;/div&gt;
&lt;/div&gt;
&lt;div class=&quot;url-problem-stat&quot;&gt;
&lt;div class=&quot;url-problem-stat-value&quot;&gt;0%&lt;/div&gt;
&lt;div class=&quot;url-problem-stat-label&quot;&gt;성공률&lt;/div&gt;
&lt;/div&gt;
&lt;div class=&quot;url-problem-stat&quot;&gt;
&lt;div class=&quot;url-problem-stat-value&quot;&gt;66.7%&lt;/div&gt;
&lt;div class=&quot;url-problem-stat-label&quot;&gt;전체 실패 중 비율&lt;/div&gt;
&lt;/div&gt;
&lt;/div&gt;
&lt;div class=&quot;url-error-details&quot;&gt;&lt;b&gt;주요 오류:&lt;/b&gt;
&lt;div class=&quot;url-error-item&quot;&gt;Empty content extracted - 모든 시도 콘텐츠 추출 실패&lt;/div&gt;
&lt;p style=&quot;margin-top: 8px; color: #666; font-size: 0.9rem;&quot; data-ke-size=&quot;size16&quot;&gt;&lt;b&gt;원인:&lt;/b&gt; JavaScript 기반 동적 로딩, 접근 제한(Cloudflare, bot detection) 추정&lt;/p&gt;
&lt;/div&gt;
&lt;/div&gt;
&lt;div class=&quot;url-problem-domain&quot;&gt;
&lt;h3 data-ke-size=&quot;size23&quot;&gt;2. www.msn.com&lt;/h3&gt;
&lt;div class=&quot;url-problem-stats&quot;&gt;
&lt;div class=&quot;url-problem-stat&quot;&gt;
&lt;div class=&quot;url-problem-stat-value&quot;&gt;28건&lt;/div&gt;
&lt;div class=&quot;url-problem-stat-label&quot;&gt;총 시도&lt;/div&gt;
&lt;/div&gt;
&lt;div class=&quot;url-problem-stat&quot;&gt;
&lt;div class=&quot;url-problem-stat-value&quot;&gt;28건&lt;/div&gt;
&lt;div class=&quot;url-problem-stat-label&quot;&gt;실패&lt;/div&gt;
&lt;/div&gt;
&lt;div class=&quot;url-problem-stat&quot;&gt;
&lt;div class=&quot;url-problem-stat-value&quot;&gt;0%&lt;/div&gt;
&lt;div class=&quot;url-problem-stat-label&quot;&gt;성공률&lt;/div&gt;
&lt;/div&gt;
&lt;div class=&quot;url-problem-stat&quot;&gt;
&lt;div class=&quot;url-problem-stat-value&quot;&gt;30.1%&lt;/div&gt;
&lt;div class=&quot;url-problem-stat-label&quot;&gt;전체 실패 중 비율&lt;/div&gt;
&lt;/div&gt;
&lt;/div&gt;
&lt;div class=&quot;url-error-details&quot;&gt;&lt;b&gt;주요 오류:&lt;/b&gt;
&lt;div class=&quot;url-error-item&quot;&gt;Empty content extracted - 모든 시도 콘텐츠 추출 실패&lt;/div&gt;
&lt;div class=&quot;url-error-item&quot;&gt;Failed to download page - 일부 요청 페이지 다운로드 실패&lt;/div&gt;
&lt;p style=&quot;margin-top: 8px; color: #666; font-size: 0.9rem;&quot; data-ke-size=&quot;size16&quot;&gt;&lt;b&gt;원인:&lt;/b&gt; Microsoft 서비스 강력한 bot detection, 지역별 접근 제한&lt;/p&gt;
&lt;/div&gt;
&lt;/div&gt;
&lt;div class=&quot;url-alert&quot;&gt;&lt;b&gt;기타 실패:&lt;/b&gt; 나머지 3건 - 네트워크 일시적 장애, 페이지 구조 변경 추정&lt;/div&gt;
&lt;/div&gt;
&lt;!-- 오류 패턴 분석 --&gt;
&lt;div class=&quot;url-section&quot;&gt;
&lt;h2 data-ke-size=&quot;size26&quot;&gt;  오류 패턴 분석&lt;/h2&gt;
&lt;div style=&quot;background: #f8f9fa; padding: 25px; border-radius: 10px;&quot;&gt;
&lt;h3 style=&quot;color: #dc3545; margin-bottom: 15px;&quot; data-ke-size=&quot;size23&quot;&gt;오류 유형별 분포&lt;/h3&gt;
&lt;div style=&quot;display: grid; grid-template-columns: repeat(auto-fit, minmax(250px, 1fr)); gap: 20px;&quot;&gt;
&lt;div style=&quot;background: white; padding: 20px; border-radius: 8px; border-left: 4px solid #dc3545;&quot;&gt;
&lt;h4 data-ke-size=&quot;size20&quot;&gt;Empty content extracted&lt;/h4&gt;
&lt;div style=&quot;font-size: 1.3rem; font-weight: bold; color: #495057;&quot;&gt;87건 (93.5%)&lt;/div&gt;
&lt;p style=&quot;font-size: 0.85rem; color: #666; margin-top: 3px;&quot; data-ke-size=&quot;size16&quot;&gt;콘텐츠 추출 실패 - JavaScript 렌더링 필요, 접근 차단&lt;/p&gt;
&lt;/div&gt;
&lt;div style=&quot;background: white; padding: 20px; border-radius: 8px; border-left: 4px solid #ffc107;&quot;&gt;
&lt;h4 data-ke-size=&quot;size20&quot;&gt;Failed to download page&lt;/h4&gt;
&lt;div style=&quot;font-size: 1.3rem; font-weight: bold; color: #495057;&quot;&gt;6건 (6.5%)&lt;/div&gt;
&lt;p style=&quot;font-size: 0.85rem; color: #666; margin-top: 3px;&quot; data-ke-size=&quot;size16&quot;&gt;페이지 다운로드 실패 - 네트워크 오류, 서버 응답 없음&lt;/p&gt;
&lt;/div&gt;
&lt;/div&gt;
&lt;/div&gt;
&lt;/div&gt;
&lt;!-- 인코딩/문자 문제 --&gt;
&lt;div class=&quot;url-section&quot;&gt;
&lt;h2 data-ke-size=&quot;size26&quot;&gt;  인코딩 및 문자 처리 문제&lt;/h2&gt;
&lt;div class=&quot;url-alert&quot;&gt;&lt;b&gt;발견된 문제:&lt;/b&gt; 일부 성공 데이터 특수문자, 인코딩 문제 발견&lt;/div&gt;
&lt;div style=&quot;background: #f8f9fa; padding: 20px; border-radius: 10px;&quot;&gt;
&lt;h4 data-ke-size=&quot;size20&quot;&gt;주요 인코딩 문제:&lt;/h4&gt;
&lt;ul style=&quot;margin: 15px 0px; padding-left: 20px; list-style-type: disc;&quot; data-ke-list-type=&quot;disc&quot;&gt;
&lt;li&gt;한글 깨짐 현상 (UTF-8 vs EUC-KR 인코딩 충돌)&lt;/li&gt;
&lt;li&gt;특수문자( ) 표시 - 원본 사이트의 인코딩 문제&lt;/li&gt;
&lt;li&gt;HTML 엔티티 미변환 (&amp;amp;, &amp;lt;, &amp;gt; 등)&lt;/li&gt;
&lt;li&gt;줄바꿈 문자 처리 문제 (\r\n, \n 혼재)&lt;/li&gt;
&lt;/ul&gt;
&lt;p style=&quot;background: white; padding: 12px; border-radius: 6px; border: 1px solid #dee2e6;&quot; data-ke-size=&quot;size16&quot;&gt;&lt;b&gt;권장 해결책:&lt;/b&gt; chardet 라이브러리 자동 인코딩 감지, HTML 파싱 전 인코딩 정규화, 후처리 특수문자 정리 필요&lt;/p&gt;
&lt;/div&gt;
&lt;/div&gt;
&lt;/div&gt;
&lt;/div&gt;
&lt;/div&gt;
&lt;script&gt;
        // Chart.js 코드 제거 - CSS 차트로 대체됨
    &lt;/script&gt;
&lt;/div&gt;
&lt;hr contenteditable=&quot;false&quot; data-ke-type=&quot;horizontalRule&quot; data-ke-style=&quot;style6&quot; /&gt;
&lt;div&gt;
&lt;h2 data-ke-size=&quot;size26&quot;&gt;마무리&lt;/h2&gt;
&lt;p data-end=&quot;171&quot; data-start=&quot;63&quot; data-ke-size=&quot;size16&quot;&gt;Trafilatura는 &lt;span style=&quot;color: #006dd7;&quot;&gt;&lt;b&gt;정적 HTML 기사 추출에 특화된 도구&lt;/b&gt;&lt;/span&gt;다. &lt;span style=&quot;color: #006dd7;&quot;&gt;&lt;b&gt;Claude로 분석한 결과&lt;/b&gt;,&lt;/span&gt; 500개 기사 URL 테스트에서 &lt;b&gt;&lt;span style=&quot;color: #006dd7;&quot;&gt;평균 처리 속도는 0.521초, 성공률은 81.4%&lt;/span&gt;&lt;/b&gt;로 빠른 성능을 보여주었고, 추출에 실패한 웹사이트는 대부분 &lt;span style=&quot;color: #006dd7;&quot;&gt;&lt;b&gt;자바스크립트 기반 웹사이트 &lt;/b&gt;&lt;span style=&quot;color: #000000;&quot;&gt;또는&lt;/span&gt;&lt;/span&gt; &lt;span style=&quot;color: #006dd7;&quot;&gt;&lt;b&gt;한글 인코딩 문제가 있는 페이지&lt;/b&gt;&lt;span style=&quot;color: #000000;&quot;&gt;로 확인되었다.&lt;/span&gt;&lt;/span&gt; Tra&lt;span style=&quot;color: #000000;&quot;&gt;filatura는 정적 구조에 최적화되어 있으며, 동적 콘텐&lt;/span&gt;츠 처리에는 한계가 있다.다음 글에서는&lt;span style=&quot;color: #006dd7;&quot;&gt; &lt;span style=&quot;color: #006dd7;&quot;&gt;&lt;b&gt;한글 인코딩 문제가 있는 페이지와 자바스크립트 기반 웹사이트에 대한 &lt;/b&gt;&lt;/span&gt;&lt;b&gt;대응 전략&lt;/b&gt;&lt;/span&gt;을 다루고자 한다.&lt;/p&gt;
&lt;p data-ke-size=&quot;size16&quot;&gt;&lt;span&gt;&amp;nbsp;&lt;/span&gt;&lt;/p&gt;
&lt;/div&gt;
&lt;div&gt;&amp;nbsp;&lt;/div&gt;</description>
      <category>Claude</category>
      <category>trafilatura</category>
      <category>기사수집자동화</category>
      <author>catalystmind</author>
      <guid isPermaLink="true">https://catalystmind.tistory.com/19</guid>
      <comments>https://catalystmind.tistory.com/19#entry19comment</comments>
      <pubDate>Fri, 30 May 2025 21:03:05 +0900</pubDate>
    </item>
    <item>
      <title>Googlenewsdecoder - 병렬처리로 시간 단축하기</title>
      <link>https://catalystmind.tistory.com/17</link>
      <description>&lt;div id=&quot;code_1748138894988&quot; data-ke-type=&quot;html&quot; data-source=&quot;&amp;lt;!DOCTYPE html&amp;gt;
&amp;lt;html lang=&amp;quot;ko&amp;quot;&amp;gt;
&amp;lt;head&amp;gt;
    &amp;lt;meta charset=&amp;quot;UTF-8&amp;quot;&amp;gt;
    &amp;lt;meta name=&amp;quot;viewport&amp;quot; content=&amp;quot;width=device-width, initial-scale=1.0&amp;quot;&amp;gt;
    &amp;lt;title&amp;gt;TL;DR 템플릿&amp;lt;/title&amp;gt;
    &amp;lt;style&amp;gt;
        body {
            font-family: -apple-system, BlinkMacSystemFont, 'Segoe UI', Roboto, sans-serif;
            background-color: #f8fafc;
            padding: 20px;
            margin: 0;
        }
    &amp;lt;/style&amp;gt;
&amp;lt;/head&amp;gt;
&amp;lt;body&amp;gt;

&amp;lt;div style=&amp;quot;background-color: #fff; border-radius: 8px; padding: 24px; box-shadow: 0 2px 8px rgba(0, 0, 0, 0.05); margin-bottom: 24px; border-left: 4px solid #3b82f6;&amp;quot;&amp;gt;
    &amp;lt;h1 style=&amp;quot;color: #1e40af; font-size: 24px; font-weight: 700; margin-top: 0; margin-bottom: 16px;&amp;quot;&amp;gt;TL;DR&amp;lt;/h1&amp;gt;
&amp;lt;div style=&amp;quot;background-color: #f0f7ff; padding: 12px 16px; border-radius: 6px; margin: 16px 0;&amp;quot;&amp;gt;
    &amp;lt;ul style=&amp;quot;padding-left: 20px; margin: 0;&amp;quot;&amp;gt;
        &amp;lt;li style=&amp;quot;margin-bottom: 10px;&amp;quot;&amp;gt;&amp;lt;span style=&amp;quot;color: #1e40af; font-weight: 600;&amp;quot;&amp;gt;GoogleNewsDecoder&amp;lt;/span&amp;gt;는 &amp;lt;span style=&amp;quot;color: #1e40af; font-weight: 600;&amp;quot;&amp;gt;I/O 바운드 작업&amp;lt;/span&amp;gt;으로 병렬 처리에 적합한 특성을 가짐&amp;lt;/li&amp;gt;
        &amp;lt;li style=&amp;quot;margin-bottom: 10px;&amp;quot;&amp;gt;Python의 &amp;lt;span style=&amp;quot;color: #1e40af; font-weight: 600;&amp;quot;&amp;gt;concurrent.futures&amp;lt;/span&amp;gt; 라이브러리를 활용해 &amp;lt;span style=&amp;quot;color: #1e40af; font-weight: 600;&amp;quot;&amp;gt;ThreadPoolExecutor&amp;lt;/span&amp;gt; 기반의 병렬 처리 시스템 구현&amp;lt;/li&amp;gt;
        &amp;lt;li style=&amp;quot;margin-bottom: 10px;&amp;quot;&amp;gt;&amp;lt;span style=&amp;quot;color: #1e40af; font-weight: 600;&amp;quot;&amp;gt;submit()&amp;lt;/span&amp;gt;과 &amp;lt;span style=&amp;quot;color: #1e40af; font-weight: 600;&amp;quot;&amp;gt;as_completed()&amp;lt;/span&amp;gt; 메서드를 조합한 비동기 처리를 통해 전체 작업 시간을 &amp;lt;span style=&amp;quot;color: #1e40af; font-weight: 600;&amp;quot;&amp;gt;대폭 단축&amp;lt;/span&amp;gt;&amp;lt;/li&amp;gt;
        &amp;lt;li style=&amp;quot;margin-bottom: 10px;&amp;quot;&amp;gt;기존 순차 처리 방식 대비 &amp;lt;span style=&amp;quot;color: #1e40af; font-weight: 600;&amp;quot;&amp;gt;다중 스레드 병렬 처리&amp;lt;/span&amp;gt;로 &amp;lt;span style=&amp;quot;color: #1e40af; font-weight: 600;&amp;quot;&amp;gt;성능 향상&amp;lt;/span&amp;gt; 달성&amp;lt;/li&amp;gt;

    &amp;lt;/ul&amp;gt;
&amp;lt;/div&amp;gt;
&amp;lt;/div&amp;gt;

&amp;lt;/body&amp;gt;
&amp;lt;/html&amp;gt;&quot;&gt;
&lt;style&gt;
        body {
            font-family: -apple-system, BlinkMacSystemFont, 'Segoe UI', Roboto, sans-serif;
            background-color: #f8fafc;
            padding: 20px;
            margin: 0;
        }
    &lt;/style&gt;
&lt;div style=&quot;background-color: #fff; border-radius: 8px; padding: 24px; box-shadow: 0 2px 8px rgba(0, 0, 0, 0.05); margin-bottom: 24px; border-left: 4px solid #3b82f6;&quot;&gt;
&lt;h1 style=&quot;color: #1e40af; font-size: 24px; font-weight: bold; margin-top: 0; margin-bottom: 16px;&quot;&gt;TL;DR&lt;/h1&gt;
&lt;div style=&quot;background-color: #f0f7ff; padding: 12px 16px; border-radius: 6px; margin: 16px 0;&quot;&gt;
&lt;ul style=&quot;padding-left: 20px; margin: 0px; list-style-type: disc;&quot; data-ke-list-type=&quot;disc&quot;&gt;
&lt;li style=&quot;margin-bottom: 10px;&quot;&gt;&lt;span style=&quot;color: #1e40af; font-weight: 600;&quot;&gt;GoogleNewsDecoder&lt;/span&gt;는 &lt;span style=&quot;color: #1e40af; font-weight: 600;&quot;&gt;I/O 바운드 작업&lt;/span&gt;으로 병렬 처리에 적합한 특성을 가짐&lt;/li&gt;
&lt;li style=&quot;margin-bottom: 10px;&quot;&gt;Python의 &lt;span style=&quot;color: #1e40af; font-weight: 600;&quot;&gt;concurrent.futures&lt;/span&gt; 라이브러리를 활용해 &lt;span style=&quot;color: #1e40af; font-weight: 600;&quot;&gt;ThreadPoolExecutor&lt;/span&gt; 기반의 병렬 처리 시스템 구현&lt;/li&gt;
&lt;li style=&quot;margin-bottom: 10px;&quot;&gt;&lt;span style=&quot;color: #1e40af; font-weight: 600;&quot;&gt;submit()&lt;/span&gt;과 &lt;span style=&quot;color: #1e40af; font-weight: 600;&quot;&gt;as_completed()&lt;/span&gt; 메서드를 조합한 비동기 처리를 통해 전체 작업 시간을 &lt;span style=&quot;color: #1e40af; font-weight: 600;&quot;&gt;대폭 단축&lt;/span&gt;&lt;/li&gt;
&lt;li style=&quot;margin-bottom: 10px;&quot;&gt;기존 순차 처리 방식 대비 &lt;span style=&quot;color: #1e40af; font-weight: 600;&quot;&gt;다중 스레드 병렬 처리&lt;/span&gt;로 &lt;span style=&quot;color: #1e40af; font-weight: 600;&quot;&gt;성능 향상&lt;/span&gt; 달성&lt;/li&gt;
&lt;/ul&gt;
&lt;/div&gt;
&lt;/div&gt;
&lt;/div&gt;
&lt;h3 data-ke-size=&quot;size23&quot;&gt;googlenewsdecoder - 병렬 처리의 필요성&lt;/h3&gt;
&lt;p data-ke-size=&quot;size16&quot;&gt;googlenewsdecoder를 활용하여 500개의 기사를 순차적으로 처리할 경우, 기사 하나당 평균 처리 시간은 3초로 전체 처리시간은 26분 57초가 소요되었다. 원문 URL을 알아내는데만 이렇게 긴 시간을 소비하는 것은 매우 비효율적이며 다음 작업에 필요한 시간을 감안하면 이 단계에서 소요되는 시간을 더 단축할 필요가 있다.&lt;/p&gt;
&lt;p data-ke-size=&quot;size16&quot;&gt;&amp;nbsp;&lt;/p&gt;
&lt;p data-ke-size=&quot;size16&quot;&gt;&lt;span style=&quot;color: #006dd7;&quot;&gt;&lt;b&gt;googlenewsdecoder는 다음과 같은 병렬 처리에 적합한 특징을 가지고 있다.&lt;/b&gt;&lt;/span&gt;&lt;/p&gt;
&lt;ul style=&quot;list-style-type: disc;&quot; data-ke-list-type=&quot;disc&quot;&gt;
&lt;li&gt;&lt;b&gt;I/O 바운드 작업:&lt;/b&gt; googlenewsdecoder의 주요 작업은 네트워크 요청으로 이루어짐.&lt;/li&gt;
&lt;li&gt;&lt;b&gt;독립적인 작업:&lt;/b&gt; 각 URL 처리 작업은 독립적으로, 이전 처리작업 결과를 다음 작업에 영향을 미치지 않음.&lt;/li&gt;
&lt;/ul&gt;
&lt;hr contenteditable=&quot;false&quot; data-ke-type=&quot;horizontalRule&quot; data-ke-style=&quot;style6&quot; /&gt;
&lt;h2 data-ke-size=&quot;size26&quot;&gt;concurrent.futures - 병렬작업을 위한 python 패키지&lt;/h2&gt;
&lt;p data-ke-size=&quot;size16&quot;&gt;Python의 concurrent.futures는 멀티스레드 또는 멀티프로세스를 간편하게 구현할 수 있는 표준 라이브러리로, 멀티스레드의 경우 &lt;span style=&quot;color: #006dd7;&quot;&gt;&lt;b&gt;웹 크롤링&lt;/b&gt;&lt;/span&gt;이나 파일 I/O와 같은 &lt;b&gt;&lt;span style=&quot;color: #006dd7;&quot;&gt;I/O 바운드 작업의 병렬 처리에 매우 적합&lt;/span&gt;&lt;/b&gt;하다. 따라서, 이번 글에는 멀티스레드를 이용한 병렬처리를 다루고자 한다.&lt;/p&gt;
&lt;div id=&quot;code_1747829583126&quot; data-ke-type=&quot;html&quot; data-source=&quot;&amp;lt;!DOCTYPE html&amp;gt;
&amp;lt;html&amp;gt;
&amp;lt;head&amp;gt;
    &amp;lt;style&amp;gt;
        .comparison-table {
            width: 100%;
            border-collapse: separate;
            border-spacing: 0;
            margin: 25px 0;
            font-family: 'Nanum Gothic', sans-serif;
            box-shadow: 0 3px 8px rgba(0,0,0,0.15);
            font-size: 15px;
            border-radius: 5px;
            overflow: hidden;
            table-layout: fixed;
        }
        
        .comparison-table th {
            background-color: #414b5c;
            color: white;
            text-align: center;
            padding: 15px 20px;
            font-weight: bold;
            border: 1px solid #ddd;
            letter-spacing: 0.5px;
            vertical-align: middle;
        }
        
        .comparison-table th:first-child {
            width: 15%;
        }
        
        .comparison-table th:nth-child(2),
        .comparison-table th:nth-child(3) {
            width: 42.5%;
        }
        
        .comparison-table tr:nth-child(even) {
            background-color: #f9f9f9;
        }
        
        .comparison-table tr:hover {
            background-color: #f1f1f1;
        }
        
        .comparison-table td {
            border: 1px solid #ddd;
            padding: 16px 20px;
            vertical-align: middle;
            line-height: 1.6;
            text-align: center;
        }
        
        .feature-name {
            font-weight: bold;
            background-color: #f5f5f5;
            text-align: center;
            vertical-align: middle;
        }
        
        .code {
            font-family: 'Consolas', 'Monaco', 'Courier New', monospace;
            background-color: #e8f4fd;
            padding: 2px 5px;
            border-radius: 3px;
            color: #1976d2;
            font-size: 13px;
            white-space: nowrap;
            display: inline-block;
            margin: 1px 2px;
            word-break: keep-all;
        }
        
        .highlight-good {
            color: #2e7d32;
            font-weight: 500;
            word-break: keep-all;
        }
        
        .highlight-bad {
            color: #c62828;
            font-weight: 500;
            word-break: keep-all;
        }
        
        .work-type {
            font-weight: 600;
            color: #1976d2;
        }
        
        .sub-text {
            color: #666;
            font-size: 13px;
            margin-top: 2px;
            font-style: italic;
        }
        
        /* 코드 블록들이 여러 줄일 때 적절한 간격 */
        .code-block {
            line-height: 1.8;
        }
    &amp;lt;/style&amp;gt;
&amp;lt;/head&amp;gt;
&amp;lt;body&amp;gt;
    &amp;lt;table class=&amp;quot;comparison-table&amp;quot;&amp;gt;
        &amp;lt;thead&amp;gt;
            &amp;lt;tr&amp;gt;
                &amp;lt;th&amp;gt;비교 항목&amp;lt;/th&amp;gt;
                &amp;lt;th&amp;gt;스레드&amp;lt;br&amp;gt;(Thread)&amp;lt;/th&amp;gt;
                &amp;lt;th&amp;gt;프로세스&amp;lt;br&amp;gt;(Process)&amp;lt;/th&amp;gt;
            &amp;lt;/tr&amp;gt;
        &amp;lt;/thead&amp;gt;
        &amp;lt;tbody&amp;gt;
            &amp;lt;tr&amp;gt;
                &amp;lt;td class=&amp;quot;feature-name&amp;quot;&amp;gt;사용&amp;lt;br&amp;gt;자원&amp;lt;/td&amp;gt;
                &amp;lt;td&amp;gt;메모리 공유&amp;lt;div class=&amp;quot;sub-text&amp;quot;&amp;gt;같은 프로세스 내에서 메모리 영역을 공유&amp;lt;/div&amp;gt;&amp;lt;/td&amp;gt;
                &amp;lt;td&amp;gt;별도 메모리&amp;lt;div class=&amp;quot;sub-text&amp;quot;&amp;gt;각 프로세스마다 독립적인 메모리 공간&amp;lt;/div&amp;gt;&amp;lt;/td&amp;gt;
            &amp;lt;/tr&amp;gt;
            &amp;lt;tr&amp;gt;
                &amp;lt;td class=&amp;quot;feature-name&amp;quot;&amp;gt;적합한&amp;lt;br&amp;gt;작업&amp;lt;/td&amp;gt;
                &amp;lt;td&amp;gt;
                    &amp;lt;span class=&amp;quot;work-type&amp;quot;&amp;gt;I/O-bound&amp;lt;/span&amp;gt;&amp;lt;br&amp;gt;
                    &amp;lt;div class=&amp;quot;sub-text&amp;quot;&amp;gt;웹 요청, 파일 입출력, 데이터베이스 조회 등&amp;lt;/div&amp;gt;
                &amp;lt;/td&amp;gt;
                &amp;lt;td&amp;gt;
                    &amp;lt;span class=&amp;quot;work-type&amp;quot;&amp;gt;CPU-bound&amp;lt;/span&amp;gt;&amp;lt;br&amp;gt;
                    &amp;lt;div class=&amp;quot;sub-text&amp;quot;&amp;gt;수치 계산, 이미지 처리, 데이터 분석 등&amp;lt;/div&amp;gt;
                &amp;lt;/td&amp;gt;
            &amp;lt;/tr&amp;gt;
            &amp;lt;tr&amp;gt;
                &amp;lt;td class=&amp;quot;feature-name&amp;quot;&amp;gt;대표&amp;lt;br&amp;gt;패키지&amp;lt;/td&amp;gt;
                &amp;lt;td class=&amp;quot;code-block&amp;quot;&amp;gt;
                    &amp;lt;span class=&amp;quot;code&amp;quot;&amp;gt;threading&amp;lt;/span&amp;gt;&amp;lt;br&amp;gt;
                    &amp;lt;span class=&amp;quot;code&amp;quot;&amp;gt;ThreadPoolExecutor&amp;lt;/span&amp;gt;
                &amp;lt;/td&amp;gt;
                &amp;lt;td class=&amp;quot;code-block&amp;quot;&amp;gt;
                    &amp;lt;span class=&amp;quot;code&amp;quot;&amp;gt;multiprocessing&amp;lt;/span&amp;gt;&amp;lt;br&amp;gt;
                    &amp;lt;span class=&amp;quot;code&amp;quot;&amp;gt;ProcessPoolExecutor&amp;lt;/span&amp;gt;
                &amp;lt;/td&amp;gt;
            &amp;lt;/tr&amp;gt;
            &amp;lt;tr&amp;gt;
                &amp;lt;td class=&amp;quot;feature-name&amp;quot;&amp;gt;장점&amp;lt;/td&amp;gt;
                &amp;lt;td&amp;gt;
                    &amp;lt;span class=&amp;quot;highlight-good&amp;quot;&amp;gt;&amp;bull; 가볍고 생성 속도가 빠름&amp;lt;/span&amp;gt;&amp;lt;br&amp;gt;
                    &amp;lt;span class=&amp;quot;highlight-good&amp;quot;&amp;gt;&amp;bull; 메모리 공유로 데이터 전달 효율적&amp;lt;/span&amp;gt;&amp;lt;br&amp;gt;
                    &amp;lt;span class=&amp;quot;highlight-good&amp;quot;&amp;gt;&amp;bull; 컨텍스트 스위칭 비용이 낮음&amp;lt;/span&amp;gt;
                &amp;lt;/td&amp;gt;
                &amp;lt;td&amp;gt;
                    &amp;lt;span class=&amp;quot;highlight-good&amp;quot;&amp;gt;&amp;bull; GIL 제약 없이 진정한 병렬 처리&amp;lt;/span&amp;gt;&amp;lt;br&amp;gt;
                    &amp;lt;span class=&amp;quot;highlight-good&amp;quot;&amp;gt;&amp;bull; CPU 집약적 작업에 최적화&amp;lt;/span&amp;gt;&amp;lt;br&amp;gt;
                    &amp;lt;span class=&amp;quot;highlight-good&amp;quot;&amp;gt;&amp;bull; 한 프로세스 오류가 다른 프로세스에 영향 없음&amp;lt;/span&amp;gt;
                &amp;lt;/td&amp;gt;
            &amp;lt;/tr&amp;gt;
            &amp;lt;tr&amp;gt;
                &amp;lt;td class=&amp;quot;feature-name&amp;quot;&amp;gt;단점&amp;lt;/td&amp;gt;
                &amp;lt;td&amp;gt;
                    &amp;lt;span class=&amp;quot;highlight-bad&amp;quot;&amp;gt;&amp;bull; GIL로 인한 CPU-bound 작업 제약&amp;lt;/span&amp;gt;&amp;lt;br&amp;gt;
                    &amp;lt;span class=&amp;quot;highlight-bad&amp;quot;&amp;gt;&amp;bull; 공유 자원 접근 시 동기화 문제&amp;lt;/span&amp;gt;
                &amp;lt;/td&amp;gt;
                &amp;lt;td&amp;gt;
                    &amp;lt;span class=&amp;quot;highlight-bad&amp;quot;&amp;gt;&amp;bull; 프로세스 생성/종료 오버헤드 큼&amp;lt;/span&amp;gt;&amp;lt;br&amp;gt;
                    &amp;lt;span class=&amp;quot;highlight-bad&amp;quot;&amp;gt;&amp;bull; IPC 비용과 메모리 사용량 증가&amp;lt;/span&amp;gt;&amp;lt;br&amp;gt;
                    &amp;lt;span class=&amp;quot;highlight-bad&amp;quot;&amp;gt;&amp;bull; 데이터 공유가 복잡함&amp;lt;/span&amp;gt;
                &amp;lt;/td&amp;gt;
            &amp;lt;/tr&amp;gt;
        &amp;lt;/tbody&amp;gt;
    &amp;lt;/table&amp;gt;
&amp;lt;/body&amp;gt;
&amp;lt;/html&amp;gt;&quot;&gt;
&lt;style&gt;
        .comparison-table {
            width: 100%;
            border-collapse: separate;
            border-spacing: 0;
            margin: 25px 0;
            font-family: 'Nanum Gothic', sans-serif;
            box-shadow: 0 3px 8px rgba(0,0,0,0.15);
            font-size: 15px;
            border-radius: 5px;
            overflow: hidden;
            table-layout: fixed;
        }
        
        .comparison-table th {
            background-color: #414b5c;
            color: white;
            text-align: center;
            padding: 15px 20px;
            font-weight: bold;
            border: 1px solid #ddd;
            letter-spacing: 0.5px;
            vertical-align: middle;
        }
        
        .comparison-table th:first-child {
            width: 15%;
        }
        
        .comparison-table th:nth-child(2),
        .comparison-table th:nth-child(3) {
            width: 42.5%;
        }
        
        .comparison-table tr:nth-child(even) {
            background-color: #f9f9f9;
        }
        
        .comparison-table tr:hover {
            background-color: #f1f1f1;
        }
        
        .comparison-table td {
            border: 1px solid #ddd;
            padding: 16px 20px;
            vertical-align: middle;
            line-height: 1.6;
            text-align: center;
        }
        
        .feature-name {
            font-weight: bold;
            background-color: #f5f5f5;
            text-align: center;
            vertical-align: middle;
        }
        
        .code {
            font-family: 'Consolas', 'Monaco', 'Courier New', monospace;
            background-color: #e8f4fd;
            padding: 2px 5px;
            border-radius: 3px;
            color: #1976d2;
            font-size: 13px;
            white-space: nowrap;
            display: inline-block;
            margin: 1px 2px;
            word-break: keep-all;
        }
        
        .highlight-good {
            color: #2e7d32;
            font-weight: 500;
            word-break: keep-all;
        }
        
        .highlight-bad {
            color: #c62828;
            font-weight: 500;
            word-break: keep-all;
        }
        
        .work-type {
            font-weight: 600;
            color: #1976d2;
        }
        
        .sub-text {
            color: #666;
            font-size: 13px;
            margin-top: 2px;
            font-style: italic;
        }
        
        /* 코드 블록들이 여러 줄일 때 적절한 간격 */
        .code-block {
            line-height: 1.8;
        }
    &lt;/style&gt;
&lt;table class=&quot;comparison-table&quot;&gt;
&lt;thead&gt;
&lt;tr&gt;
&lt;th&gt;비교 항목&lt;/th&gt;
&lt;th&gt;스레드&lt;br /&gt;(Thread)&lt;/th&gt;
&lt;th&gt;프로세스&lt;br /&gt;(Process)&lt;/th&gt;
&lt;/tr&gt;
&lt;/thead&gt;
&lt;tbody&gt;
&lt;tr&gt;
&lt;td class=&quot;feature-name&quot;&gt;사용&lt;br /&gt;자원&lt;/td&gt;
&lt;td&gt;메모리 공유
&lt;div class=&quot;sub-text&quot;&gt;같은 프로세스 내에서 메모리 영역을 공유&lt;/div&gt;
&lt;/td&gt;
&lt;td&gt;별도 메모리
&lt;div class=&quot;sub-text&quot;&gt;각 프로세스마다 독립적인 메모리 공간&lt;/div&gt;
&lt;/td&gt;
&lt;/tr&gt;
&lt;tr&gt;
&lt;td class=&quot;feature-name&quot;&gt;적합한&lt;br /&gt;작업&lt;/td&gt;
&lt;td&gt;&lt;span class=&quot;work-type&quot;&gt;I/O-bound&lt;/span&gt;&lt;br /&gt;
&lt;div class=&quot;sub-text&quot;&gt;웹 요청, 파일 입출력, 데이터베이스 조회 등&lt;/div&gt;
&lt;/td&gt;
&lt;td&gt;&lt;span class=&quot;work-type&quot;&gt;CPU-bound&lt;/span&gt;&lt;br /&gt;
&lt;div class=&quot;sub-text&quot;&gt;수치 계산, 이미지 처리, 데이터 분석 등&lt;/div&gt;
&lt;/td&gt;
&lt;/tr&gt;
&lt;tr&gt;
&lt;td class=&quot;feature-name&quot;&gt;대표&lt;br /&gt;패키지&lt;/td&gt;
&lt;td class=&quot;code-block&quot;&gt;&lt;span class=&quot;code&quot;&gt;threading&lt;/span&gt;&lt;br /&gt;&lt;span class=&quot;code&quot;&gt;ThreadPoolExecutor&lt;/span&gt;&lt;/td&gt;
&lt;td class=&quot;code-block&quot;&gt;&lt;span class=&quot;code&quot;&gt;multiprocessing&lt;/span&gt;&lt;br /&gt;&lt;span class=&quot;code&quot;&gt;ProcessPoolExecutor&lt;/span&gt;&lt;/td&gt;
&lt;/tr&gt;
&lt;tr&gt;
&lt;td class=&quot;feature-name&quot;&gt;장점&lt;/td&gt;
&lt;td&gt;&lt;span class=&quot;highlight-good&quot;&gt;&amp;bull; 가볍고 생성 속도가 빠름&lt;/span&gt;&lt;br /&gt;&lt;span class=&quot;highlight-good&quot;&gt;&amp;bull; 메모리 공유로 데이터 전달 효율적&lt;/span&gt;&lt;br /&gt;&lt;span class=&quot;highlight-good&quot;&gt;&amp;bull; 컨텍스트 스위칭 비용이 낮음&lt;/span&gt;&lt;/td&gt;
&lt;td&gt;&lt;span class=&quot;highlight-good&quot;&gt;&amp;bull; GIL 제약 없이 진정한 병렬 처리&lt;/span&gt;&lt;br /&gt;&lt;span class=&quot;highlight-good&quot;&gt;&amp;bull; CPU 집약적 작업에 최적화&lt;/span&gt;&lt;br /&gt;&lt;span class=&quot;highlight-good&quot;&gt;&amp;bull; 한 프로세스 오류가 다른 프로세스에 영향 없음&lt;/span&gt;&lt;/td&gt;
&lt;/tr&gt;
&lt;tr&gt;
&lt;td class=&quot;feature-name&quot;&gt;단점&lt;/td&gt;
&lt;td&gt;&lt;span class=&quot;highlight-bad&quot;&gt;&amp;bull; GIL로 인한 CPU-bound 작업 제약&lt;/span&gt;&lt;br /&gt;&lt;span class=&quot;highlight-bad&quot;&gt;&amp;bull; 공유 자원 접근 시 동기화 문제&lt;/span&gt;&lt;/td&gt;
&lt;td&gt;&lt;span class=&quot;highlight-bad&quot;&gt;&amp;bull; 프로세스 생성/종료 오버헤드 큼&lt;/span&gt;&lt;br /&gt;&lt;span class=&quot;highlight-bad&quot;&gt;&amp;bull; IPC 비용과 메모리 사용량 증가&lt;/span&gt;&lt;br /&gt;&lt;span class=&quot;highlight-bad&quot;&gt;&amp;bull; 데이터 공유가 복잡함&lt;/span&gt;&lt;/td&gt;
&lt;/tr&gt;
&lt;/tbody&gt;
&lt;/table&gt;
&lt;/div&gt;
&lt;hr contenteditable=&quot;false&quot; data-ke-type=&quot;horizontalRule&quot; data-ke-style=&quot;style6&quot; /&gt;
&lt;h2 data-ke-size=&quot;size26&quot;&gt;  사용 예시&lt;/h2&gt;
&lt;p data-ke-size=&quot;size16&quot;&gt;googlenewsdecoder + ThreadPoolExecutor를 이용한 URL 병렬 처리의 기본 예시&lt;/p&gt;
&lt;p data-ke-size=&quot;size16&quot;&gt;&amp;nbsp;&lt;/p&gt;
&lt;p data-ke-size=&quot;size16&quot;&gt;&lt;span style=&quot;color: #006dd7;&quot;&gt;&lt;b&gt; URL을 병렬로 요청하는 실행 흐름:&amp;nbsp;&lt;/b&gt;&lt;/span&gt;&lt;/p&gt;
&lt;div id=&quot;code_1748142272727&quot; data-ke-type=&quot;html&quot; data-source=&quot;&amp;lt;div id=&amp;quot;gnews-options&amp;quot; style=&amp;quot;margin-top: 0px; padding-top: 0;&amp;quot;&amp;gt;
  &amp;lt;div style=&amp;quot;font-size: 0.95em; line-height: 1.7; white-space: pre-wrap; margin-top: 0; padding-top: 0;&amp;quot;&amp;gt;
    &amp;bull; &amp;lt;span style=&amp;quot;background-color: #f5f5f5; color: #333; font-family: Consolas, 'Courier New', monospace; font-size: 0.95em; padding: 2px 6px; border-radius: 4px; border: 1px solid #ccc;&amp;quot;&amp;gt;ThreadPoolExecutor&amp;lt;/span&amp;gt;: max_workers만큼 작업을 동시 수행&amp;lt;br&amp;gt;
    &amp;bull; &amp;lt;span style=&amp;quot;background-color: #f5f5f5; color: #333; font-family: Consolas, 'Courier New', monospace; font-size: 0.95em; padding: 2px 6px; border-radius: 4px; border: 1px solid #ccc;&amp;quot;&amp;gt;executor.submit()&amp;lt;/span&amp;gt; + &amp;lt;span style=&amp;quot;background-color: #f5f5f5; color: #333; font-family: Consolas, 'Courier New', monospace; font-size: 0.95em; padding: 2px 6px; border-radius: 4px; border: 1px solid #ccc;&amp;quot;&amp;gt;as_completed()&amp;lt;/span&amp;gt;로 완료된 작업부터 즉시 결과 처리&amp;lt;br&amp;gt;
    &amp;bull; &amp;lt;span style=&amp;quot;background-color: #f5f5f5; color: #333; font-family: Consolas, 'Courier New', monospace; font-size: 0.95em; padding: 2px 6px; border-radius: 4px; border: 1px solid #ccc;&amp;quot;&amp;gt;submit()&amp;lt;/span&amp;gt;: 각 작업을 비동기적으로 실행되도록 등록하는 함수. 반환값은 Future 객체&amp;lt;br&amp;gt;
    &amp;bull; &amp;lt;span style=&amp;quot;background-color: #f5f5f5; color: #333; font-family: Consolas, 'Courier New', monospace; font-size: 0.95em; padding: 2px 6px; border-radius: 4px; border: 1px solid #ccc;&amp;quot;&amp;gt;as_completed()&amp;lt;/span&amp;gt;: 등록된 작업들 중 먼저 완료된 작업의 결과를 순서대로 가져오는 반복자
  &amp;lt;/div&amp;gt;
&amp;lt;/div&amp;gt;&quot;&gt;
&lt;div id=&quot;gnews-options&quot; style=&quot;margin-top: 0px; padding-top: 0;&quot;&gt;
&lt;div style=&quot;font-size: 0.95em; line-height: 1.7; white-space: pre-wrap; margin-top: 0; padding-top: 0;&quot;&gt;&amp;bull; &lt;span style=&quot;background-color: #f5f5f5; color: #333; font-family: Consolas, 'Courier New', monospace; font-size: 0.95em; padding: 2px 6px; border-radius: 4px; border: 1px solid #ccc;&quot;&gt;ThreadPoolExecutor&lt;/span&gt;: max_workers만큼 작업을 동시 수행&lt;br /&gt;&amp;bull; &lt;span style=&quot;background-color: #f5f5f5; color: #333; font-family: Consolas, 'Courier New', monospace; font-size: 0.95em; padding: 2px 6px; border-radius: 4px; border: 1px solid #ccc;&quot;&gt;executor.submit()&lt;/span&gt; + &lt;span style=&quot;background-color: #f5f5f5; color: #333; font-family: Consolas, 'Courier New', monospace; font-size: 0.95em; padding: 2px 6px; border-radius: 4px; border: 1px solid #ccc;&quot;&gt;as_completed()&lt;/span&gt;로 완료된 작업부터 즉시 결과 처리&lt;br /&gt;&amp;bull; &lt;span style=&quot;background-color: #f5f5f5; color: #333; font-family: Consolas, 'Courier New', monospace; font-size: 0.95em; padding: 2px 6px; border-radius: 4px; border: 1px solid #ccc;&quot;&gt;submit()&lt;/span&gt;: 각 작업을 비동기적으로 실행되도록 등록하는 함수. 반환값은 Future 객체&lt;br /&gt;&amp;bull; &lt;span style=&quot;background-color: #f5f5f5; color: #333; font-family: Consolas, 'Courier New', monospace; font-size: 0.95em; padding: 2px 6px; border-radius: 4px; border: 1px solid #ccc;&quot;&gt;as_completed()&lt;/span&gt;: 등록된 작업들 중 먼저 완료된 작업의 결과를 순서대로 가져오는 반복자&lt;/div&gt;
&lt;/div&gt;
&lt;/div&gt;
&lt;p data-ke-size=&quot;size16&quot;&gt;&amp;nbsp;&lt;/p&gt;
&lt;pre class=&quot;python&quot; data-ke-language=&quot;python&quot;&gt;&lt;code&gt;import concurrent.futures
from googlenewsdecoder import gnewsdecoder

# 구글 뉴스에서 수집한 URL 리스트
encoded_urls = [
   &quot;https://news.google.com/rss/articles/CBMiYWh0dHBzOi8v...&quot;,
   &quot;https://news.google.com/rss/articles/CBMiZGh0dHBzOi8v...&quot;,
   &quot;https://news.google.com/rss/articles/CBMiY2h0dHBzOi8v...&quot;,
]

# 5개 스레드로 병렬 처리
with concurrent.futures.ThreadPoolExecutor(max_workers=5) as executor:
   # 각 URL을 병렬로 처리하도록 작업 제출
   futures = []
   for url in encoded_urls:
       future = executor.submit(gnewsdecoder, url)
       futures.append(future)
   
   # 완료된 순서대로 결과 출력
   for future in concurrent.futures.as_completed(futures):
       result = future.result()
       print(result)&lt;/code&gt;&lt;/pre&gt;
&lt;hr contenteditable=&quot;false&quot; data-ke-type=&quot;horizontalRule&quot; data-ke-style=&quot;style6&quot; /&gt;
&lt;h2 data-ke-size=&quot;size26&quot;&gt;googlenewsdecoder와 concurrent.futures의 통합 하기&lt;/h2&gt;
&lt;p data-ke-size=&quot;size16&quot;&gt;googlenewsdecoder는&amp;nbsp; URL 요청으로 이루어진 I/O 바운드 작업이므로 &lt;span style=&quot;color: #006dd7;&quot;&gt;&lt;b&gt;멀티스레드&lt;/b&gt;&lt;/span&gt;를 이용한 병렬화 효과가 뛰어나다. 병렬처리를 반영한 최종 코드는 다음과 같다. 전체 코드를 복사해서 아래와 같이 사용하면 된다:&lt;/p&gt;
&lt;pre class=&quot;python&quot; data-ke-language=&quot;python&quot;&gt;&lt;code&gt;python script_name.py urls.csv --output_dir ./results --interval 2 --batch_size 20 --workers 5&lt;/code&gt;&lt;/pre&gt;
&lt;p data-ke-size=&quot;size16&quot;&gt;&amp;nbsp;&lt;/p&gt;
&lt;div id=&quot;code_1747830992816&quot; data-ke-type=&quot;html&quot; data-source=&quot;&amp;lt;div id=&amp;quot;gnews-options&amp;quot; style=&amp;quot;margin-top: 0px; padding-top: 0;&amp;quot;&amp;gt;
  &amp;lt;div style=&amp;quot;font-size: 0.95em; line-height: 1.7; white-space: pre-wrap; margin-top: 0; padding-top: 0;&amp;quot;&amp;gt;
    &amp;lt;span style=&amp;quot;background-color: #f5f5f5; color: #333; font-family: Consolas, 'Courier New', monospace; font-size: 0.95em; padding: 2px 6px; border-radius: 4px; border: 1px solid #ccc;&amp;quot;&amp;gt;input_file_path&amp;lt;/span&amp;gt; URL이 포함된 파일 경로 (CSV 또는 텍스트 파일)&amp;lt;br&amp;gt;
    &amp;lt;span style=&amp;quot;background-color: #f5f5f5; color: #333; font-family: Consolas, 'Courier New', monospace; font-size: 0.95em; padding: 2px 6px; border-radius: 4px; border: 1px solid #ccc;&amp;quot;&amp;gt;--output_dir, -o&amp;lt;/span&amp;gt;: 결과 저장 디렉토리 (기본값: 입력 파일과 동일 위치)&amp;lt;br&amp;gt;
    &amp;lt;span style=&amp;quot;background-color: #f5f5f5; color: #333; font-family: Consolas, 'Courier New', monospace; font-size: 0.95em; padding: 2px 6px; border-radius: 4px; border: 1px solid #ccc;&amp;quot;&amp;gt;--interval, -i&amp;lt;/span&amp;gt;: URL 처리 사이의 대기 시간(초) (기본값: 1)&amp;lt;br&amp;gt;
    &amp;lt;span style=&amp;quot;background-color: #f5f5f5; color: #333; font-family: Consolas, 'Courier New', monospace; font-size: 0.95em; padding: 2px 6px; border-radius: 4px; border: 1px solid #ccc;&amp;quot;&amp;gt;--batch_size, -b&amp;lt;/span&amp;gt;: 중간 결과 저장 단위(URL 개수) (기본값: 10)&amp;lt;br&amp;gt;
    &amp;lt;span style=&amp;quot;background-color: #f5f5f5; color: #333; font-family: Consolas, 'Courier New', monospace; font-size: 0.95em; padding: 2px 6px; border-radius: 4px; border: 1px solid #ccc;&amp;quot;&amp;gt;--workers, -w&amp;lt;/span&amp;gt;: 병렬 처리를 위한 작업자 수 (기본값: 1, 병렬 처리 없음)
  &amp;lt;/div&amp;gt;
&amp;lt;/div&amp;gt;&quot;&gt;
&lt;div id=&quot;gnews-options&quot; style=&quot;margin-top: 0px; padding-top: 0;&quot;&gt;
&lt;div style=&quot;font-size: 0.95em; line-height: 1.7; white-space: pre-wrap; margin-top: 0; padding-top: 0;&quot;&gt;&lt;span style=&quot;background-color: #f5f5f5; color: #333; font-family: Consolas, 'Courier New', monospace; font-size: 0.95em; padding: 2px 6px; border-radius: 4px; border: 1px solid #ccc;&quot;&gt;input_file_path&lt;/span&gt; URL이 포함된 파일 경로 (CSV 또는 텍스트 파일)&lt;br /&gt;&lt;span style=&quot;background-color: #f5f5f5; color: #333; font-family: Consolas, 'Courier New', monospace; font-size: 0.95em; padding: 2px 6px; border-radius: 4px; border: 1px solid #ccc;&quot;&gt;--output_dir, -o&lt;/span&gt;: 결과 저장 디렉토리 (기본값: 입력 파일과 동일 위치)&lt;br /&gt;&lt;span style=&quot;background-color: #f5f5f5; color: #333; font-family: Consolas, 'Courier New', monospace; font-size: 0.95em; padding: 2px 6px; border-radius: 4px; border: 1px solid #ccc;&quot;&gt;--interval, -i&lt;/span&gt;: URL 처리 사이의 대기 시간(초) (기본값: 1)&lt;br /&gt;&lt;span style=&quot;background-color: #f5f5f5; color: #333; font-family: Consolas, 'Courier New', monospace; font-size: 0.95em; padding: 2px 6px; border-radius: 4px; border: 1px solid #ccc;&quot;&gt;--batch_size, -b&lt;/span&gt;: 중간 결과 저장 단위(URL 개수) (기본값: 10)&lt;br /&gt;&lt;span style=&quot;background-color: #f5f5f5; color: #333; font-family: Consolas, 'Courier New', monospace; font-size: 0.95em; padding: 2px 6px; border-radius: 4px; border: 1px solid #ccc;&quot;&gt;--workers, -w&lt;/span&gt;: 병렬 처리를 위한 작업자 수 (기본값: 1, 병렬 처리 없음)&lt;/div&gt;
&lt;/div&gt;
&lt;/div&gt;
&lt;hr contenteditable=&quot;false&quot; data-ke-type=&quot;horizontalRule&quot; data-ke-style=&quot;style6&quot; /&gt;
&lt;h3 data-ke-size=&quot;size23&quot;&gt;  전체 파이썬 스크립트는 다음과 같다.&lt;/h3&gt;
&lt;p data-ke-size=&quot;size16&quot;&gt;(펼치기 버튼을 누르면 복사하기 버튼이 나타남)&lt;/p&gt;
&lt;div id=&quot;code_1748096808026&quot; data-ke-type=&quot;html&quot; data-source=&quot;&amp;lt;pre class=&amp;quot;code-ct&amp;quot;&amp;gt;&amp;lt;span style=&amp;quot;color:#a626a4;font-weight:600;&amp;quot;&amp;gt;from&amp;lt;/span&amp;gt; googlenewsdecoder &amp;lt;span style=&amp;quot;color:#a626a4;font-weight:600;&amp;quot;&amp;gt;import&amp;lt;/span&amp;gt; gnewsdecoder
&amp;lt;span style=&amp;quot;color:#a626a4;font-weight:600;&amp;quot;&amp;gt;import&amp;lt;/span&amp;gt; pandas &amp;lt;span style=&amp;quot;color:#a626a4;font-weight:600;&amp;quot;&amp;gt;as&amp;lt;/span&amp;gt; pd
&amp;lt;span style=&amp;quot;color:#a626a4;font-weight:600;&amp;quot;&amp;gt;import&amp;lt;/span&amp;gt; os
&amp;lt;span style=&amp;quot;color:#a626a4;font-weight:600;&amp;quot;&amp;gt;import&amp;lt;/span&amp;gt; time
&amp;lt;span style=&amp;quot;color:#a626a4;font-weight:600;&amp;quot;&amp;gt;import&amp;lt;/span&amp;gt; argparse
&amp;lt;span style=&amp;quot;color:#a626a4;font-weight:600;&amp;quot;&amp;gt;import&amp;lt;/span&amp;gt; concurrent.futures
&amp;lt;span style=&amp;quot;color:#a626a4;font-weight:600;&amp;quot;&amp;gt;from&amp;lt;/span&amp;gt; datetime &amp;lt;span style=&amp;quot;color:#a626a4;font-weight:600;&amp;quot;&amp;gt;import&amp;lt;/span&amp;gt; datetime, timedelta

&amp;lt;span style=&amp;quot;color:#a626a4;font-weight:600;&amp;quot;&amp;gt;def&amp;lt;/span&amp;gt; process_urls_from_file(input_file_path, output_dir=&amp;lt;span style=&amp;quot;color:#a626a4;font-weight:600;&amp;quot;&amp;gt;None&amp;lt;/span&amp;gt;, interval_time=&amp;lt;span style=&amp;quot;color:#986801;&amp;quot;&amp;gt;1&amp;lt;/span&amp;gt;, batch_size=&amp;lt;span style=&amp;quot;color:#986801;&amp;quot;&amp;gt;10&amp;lt;/span&amp;gt;, workers=&amp;lt;span style=&amp;quot;color:#986801;&amp;quot;&amp;gt;1&amp;lt;/span&amp;gt;):
    &amp;lt;span style=&amp;quot;color:#50a14f;&amp;quot;&amp;gt;&amp;quot;&amp;quot;&amp;quot;
    파일에서 URL을 읽고 처리하는 함수
    
    Args:
        input_file_path: URL이 포함된 파일 경로
        output_dir: 결과 저장 디렉토리 (기본값: 입력 파일과 동일)
        interval_time: URL 처리 사이의 대기 시간 (초)
        batch_size: 중간 결과 저장 단위 (URL 개수)
        workers: 병렬 처리를 위한 작업자 수 (기본값: 1, 병렬 처리 없음)
        
    Returns:
        처리 결과가 포함된 DataFrame 또는 오류 시 None
    &amp;quot;&amp;quot;&amp;quot;&amp;lt;/span&amp;gt;
    &amp;lt;span style=&amp;quot;color:#a0a1a7;font-style:italic;&amp;quot;&amp;gt;# Read URLs from file&amp;lt;/span&amp;gt;
    &amp;lt;span style=&amp;quot;color:#a626a4;font-weight:600;&amp;quot;&amp;gt;try&amp;lt;/span&amp;gt;:
        &amp;lt;span style=&amp;quot;color:#a0a1a7;font-style:italic;&amp;quot;&amp;gt;# 파일 확장자 확인&amp;lt;/span&amp;gt;
        file_ext = os.path.splitext(input_file_path)[&amp;lt;span style=&amp;quot;color:#986801;&amp;quot;&amp;gt;1&amp;lt;/span&amp;gt;].lower()
        
        &amp;lt;span style=&amp;quot;color:#a626a4;font-weight:600;&amp;quot;&amp;gt;if&amp;lt;/span&amp;gt; file_ext == &amp;lt;span style=&amp;quot;color:#50a14f;&amp;quot;&amp;gt;'.csv'&amp;lt;/span&amp;gt;:
            &amp;lt;span style=&amp;quot;color:#a0a1a7;font-style:italic;&amp;quot;&amp;gt;# CSV 파일인 경우 pandas로 읽기&amp;lt;/span&amp;gt;
            &amp;lt;span style=&amp;quot;color:#a626a4;font-weight:600;&amp;quot;&amp;gt;try&amp;lt;/span&amp;gt;:
                df = pd.read_csv(input_file_path, encoding=&amp;lt;span style=&amp;quot;color:#50a14f;&amp;quot;&amp;gt;'utf-8-sig'&amp;lt;/span&amp;gt;)
                
                &amp;lt;span style=&amp;quot;color:#a0a1a7;font-style:italic;&amp;quot;&amp;gt;# 'link' 컬럼이 있는지 확인&amp;lt;/span&amp;gt;
                &amp;lt;span style=&amp;quot;color:#a626a4;font-weight:600;&amp;quot;&amp;gt;if&amp;lt;/span&amp;gt; &amp;lt;span style=&amp;quot;color:#50a14f;&amp;quot;&amp;gt;'link'&amp;lt;/span&amp;gt; &amp;lt;span style=&amp;quot;color:#a626a4;font-weight:600;&amp;quot;&amp;gt;in&amp;lt;/span&amp;gt; df.columns:
                    urls = df[&amp;lt;span style=&amp;quot;color:#50a14f;&amp;quot;&amp;gt;'link'&amp;lt;/span&amp;gt;].dropna().tolist()
                    &amp;lt;span style=&amp;quot;color:#4078f2;&amp;quot;&amp;gt;print&amp;lt;/span&amp;gt;(&amp;lt;span style=&amp;quot;color:#50a14f;&amp;quot;&amp;gt;f&amp;quot;Found {len(urls)} URLs in 'link' column&amp;quot;&amp;lt;/span&amp;gt;)
                &amp;lt;span style=&amp;quot;color:#a626a4;font-weight:600;&amp;quot;&amp;gt;else&amp;lt;/span&amp;gt;:
                    available_columns = &amp;lt;span style=&amp;quot;color:#50a14f;&amp;quot;&amp;gt;', '&amp;lt;/span&amp;gt;.join(df.columns)
                    &amp;lt;span style=&amp;quot;color:#4078f2;&amp;quot;&amp;gt;print&amp;lt;/span&amp;gt;(&amp;lt;span style=&amp;quot;color:#50a14f;&amp;quot;&amp;gt;f&amp;quot;Error: 'link' column not found in CSV. Available columns: {available_columns}&amp;quot;&amp;lt;/span&amp;gt;)
                    &amp;lt;span style=&amp;quot;color:#a626a4;font-weight:600;&amp;quot;&amp;gt;return&amp;lt;/span&amp;gt; &amp;lt;span style=&amp;quot;color:#a626a4;font-weight:600;&amp;quot;&amp;gt;None&amp;lt;/span&amp;gt;
            &amp;lt;span style=&amp;quot;color:#a626a4;font-weight:600;&amp;quot;&amp;gt;except&amp;lt;/span&amp;gt; Exception &amp;lt;span style=&amp;quot;color:#a626a4;font-weight:600;&amp;quot;&amp;gt;as&amp;lt;/span&amp;gt; e:
                &amp;lt;span style=&amp;quot;color:#4078f2;&amp;quot;&amp;gt;print&amp;lt;/span&amp;gt;(&amp;lt;span style=&amp;quot;color:#50a14f;&amp;quot;&amp;gt;f&amp;quot;Error parsing CSV file: {e}&amp;quot;&amp;lt;/span&amp;gt;)
                &amp;lt;span style=&amp;quot;color:#a626a4;font-weight:600;&amp;quot;&amp;gt;return&amp;lt;/span&amp;gt; &amp;lt;span style=&amp;quot;color:#a626a4;font-weight:600;&amp;quot;&amp;gt;None&amp;lt;/span&amp;gt;
        &amp;lt;span style=&amp;quot;color:#a626a4;font-weight:600;&amp;quot;&amp;gt;else&amp;lt;/span&amp;gt;:
            &amp;lt;span style=&amp;quot;color:#a0a1a7;font-style:italic;&amp;quot;&amp;gt;# 일반 텍스트 파일로 간주하고 한 줄에 하나의 URL로 읽기&amp;lt;/span&amp;gt;
            &amp;lt;span style=&amp;quot;color:#a626a4;font-weight:600;&amp;quot;&amp;gt;with&amp;lt;/span&amp;gt; &amp;lt;span style=&amp;quot;color:#4078f2;&amp;quot;&amp;gt;open&amp;lt;/span&amp;gt;(input_file_path, &amp;lt;span style=&amp;quot;color:#50a14f;&amp;quot;&amp;gt;'r'&amp;lt;/span&amp;gt;, encoding=&amp;lt;span style=&amp;quot;color:#50a14f;&amp;quot;&amp;gt;'utf-8-sig'&amp;lt;/span&amp;gt;) &amp;lt;span style=&amp;quot;color:#a626a4;font-weight:600;&amp;quot;&amp;gt;as&amp;lt;/span&amp;gt; file:
                urls = [line.strip() &amp;lt;span style=&amp;quot;color:#a626a4;font-weight:600;&amp;quot;&amp;gt;for&amp;lt;/span&amp;gt; line &amp;lt;span style=&amp;quot;color:#a626a4;font-weight:600;&amp;quot;&amp;gt;in&amp;lt;/span&amp;gt; file &amp;lt;span style=&amp;quot;color:#a626a4;font-weight:600;&amp;quot;&amp;gt;if&amp;lt;/span&amp;gt; line.strip()]
                &amp;lt;span style=&amp;quot;color:#4078f2;&amp;quot;&amp;gt;print&amp;lt;/span&amp;gt;(&amp;lt;span style=&amp;quot;color:#50a14f;&amp;quot;&amp;gt;f&amp;quot;Found {len(urls)} URLs in text file&amp;quot;&amp;lt;/span&amp;gt;)
    &amp;lt;span style=&amp;quot;color:#a626a4;font-weight:600;&amp;quot;&amp;gt;except&amp;lt;/span&amp;gt; Exception &amp;lt;span style=&amp;quot;color:#a626a4;font-weight:600;&amp;quot;&amp;gt;as&amp;lt;/span&amp;gt; e:
        &amp;lt;span style=&amp;quot;color:#4078f2;&amp;quot;&amp;gt;print&amp;lt;/span&amp;gt;(&amp;lt;span style=&amp;quot;color:#50a14f;&amp;quot;&amp;gt;f&amp;quot;Error reading file: {e}&amp;quot;&amp;lt;/span&amp;gt;)
        &amp;lt;span style=&amp;quot;color:#a626a4;font-weight:600;&amp;quot;&amp;gt;return&amp;lt;/span&amp;gt; &amp;lt;span style=&amp;quot;color:#a626a4;font-weight:600;&amp;quot;&amp;gt;None&amp;lt;/span&amp;gt;

    &amp;lt;span style=&amp;quot;color:#a0a1a7;font-style:italic;&amp;quot;&amp;gt;# 파일 저장 기본 설정&amp;lt;/span&amp;gt;
    &amp;lt;span style=&amp;quot;color:#a626a4;font-weight:600;&amp;quot;&amp;gt;if&amp;lt;/span&amp;gt; output_dir &amp;lt;span style=&amp;quot;color:#a626a4;font-weight:600;&amp;quot;&amp;gt;is&amp;lt;/span&amp;gt; &amp;lt;span style=&amp;quot;color:#a626a4;font-weight:600;&amp;quot;&amp;gt;None&amp;lt;/span&amp;gt;:
        output_dir = os.path.dirname(input_file_path) &amp;lt;span style=&amp;quot;color:#a626a4;font-weight:600;&amp;quot;&amp;gt;or&amp;lt;/span&amp;gt; &amp;lt;span style=&amp;quot;color:#50a14f;&amp;quot;&amp;gt;'.'&amp;lt;/span&amp;gt;
    
    timestamp = datetime.now().strftime(&amp;lt;span style=&amp;quot;color:#50a14f;&amp;quot;&amp;gt;&amp;quot;%Y-%m-%d_%H%M%S&amp;quot;&amp;lt;/span&amp;gt;)
    input_filename = os.path.basename(input_file_path).split(&amp;lt;span style=&amp;quot;color:#50a14f;&amp;quot;&amp;gt;'.'&amp;lt;/span&amp;gt;)[&amp;lt;span style=&amp;quot;color:#986801;&amp;quot;&amp;gt;0&amp;lt;/span&amp;gt;]
    output_filename = &amp;lt;span style=&amp;quot;color:#50a14f;&amp;quot;&amp;gt;f&amp;quot;{input_filename}_decoded_{timestamp}.csv&amp;quot;&amp;lt;/span&amp;gt;
    output_path = os.path.join(output_dir, output_filename)
    
    &amp;lt;span style=&amp;quot;color:#4078f2;&amp;quot;&amp;gt;print&amp;lt;/span&amp;gt;(&amp;lt;span style=&amp;quot;color:#50a14f;&amp;quot;&amp;gt;f&amp;quot;Results will be saved to: {output_path}&amp;quot;&amp;lt;/span&amp;gt;)
    
    &amp;lt;span style=&amp;quot;color:#a0a1a7;font-style:italic;&amp;quot;&amp;gt;# Process each URL&amp;lt;/span&amp;gt;
    results = []
    total_urls = &amp;lt;span style=&amp;quot;color:#4078f2;&amp;quot;&amp;gt;len&amp;lt;/span&amp;gt;(urls)
    
    &amp;lt;span style=&amp;quot;color:#4078f2;&amp;quot;&amp;gt;print&amp;lt;/span&amp;gt;(&amp;lt;span style=&amp;quot;color:#50a14f;&amp;quot;&amp;gt;f&amp;quot;Starting to process {total_urls} URLs...&amp;quot;&amp;lt;/span&amp;gt;)
    &amp;lt;span style=&amp;quot;color:#4078f2;&amp;quot;&amp;gt;print&amp;lt;/span&amp;gt;(&amp;lt;span style=&amp;quot;color:#50a14f;&amp;quot;&amp;gt;f&amp;quot;Workers: {workers} (Parallel processing: {'Enabled' if workers &amp;gt; 1 else 'Disabled'})&amp;quot;&amp;lt;/span&amp;gt;)

    &amp;lt;span style=&amp;quot;color:#a0a1a7;font-style:italic;&amp;quot;&amp;gt;# Record overall process start time&amp;lt;/span&amp;gt;
    process_start_time = time.time()
    
    &amp;lt;span style=&amp;quot;color:#a0a1a7;font-style:italic;&amp;quot;&amp;gt;# 병렬 처리 사용 여부에 따라 처리 방식 결정&amp;lt;/span&amp;gt;
    &amp;lt;span style=&amp;quot;color:#a626a4;font-weight:600;&amp;quot;&amp;gt;if&amp;lt;/span&amp;gt; workers &amp;gt; &amp;lt;span style=&amp;quot;color:#986801;&amp;quot;&amp;gt;1&amp;lt;/span&amp;gt;:
        &amp;lt;span style=&amp;quot;color:#a0a1a7;font-style:italic;&amp;quot;&amp;gt;# 병렬 처리 사용&amp;lt;/span&amp;gt;
        completed = &amp;lt;span style=&amp;quot;color:#986801;&amp;quot;&amp;gt;0&amp;lt;/span&amp;gt;
        
        &amp;lt;span style=&amp;quot;color:#a0a1a7;font-style:italic;&amp;quot;&amp;gt;# URL 배치로 나누기 (한 번에 batch_size만큼 병렬 처리)&amp;lt;/span&amp;gt;
        &amp;lt;span style=&amp;quot;color:#a626a4;font-weight:600;&amp;quot;&amp;gt;for&amp;lt;/span&amp;gt; batch_start &amp;lt;span style=&amp;quot;color:#a626a4;font-weight:600;&amp;quot;&amp;gt;in&amp;lt;/span&amp;gt; &amp;lt;span style=&amp;quot;color:#4078f2;&amp;quot;&amp;gt;range&amp;lt;/span&amp;gt;(&amp;lt;span style=&amp;quot;color:#986801;&amp;quot;&amp;gt;0&amp;lt;/span&amp;gt;, total_urls, batch_size):
            batch_end = &amp;lt;span style=&amp;quot;color:#4078f2;&amp;quot;&amp;gt;min&amp;lt;/span&amp;gt;(batch_start + batch_size, total_urls)
            batch_urls = urls[batch_start:batch_end]
            batch_size_actual = &amp;lt;span style=&amp;quot;color:#4078f2;&amp;quot;&amp;gt;len&amp;lt;/span&amp;gt;(batch_urls)
            
            &amp;lt;span style=&amp;quot;color:#4078f2;&amp;quot;&amp;gt;print&amp;lt;/span&amp;gt;(&amp;lt;span style=&amp;quot;color:#50a14f;&amp;quot;&amp;gt;f&amp;quot;Processing URLs {batch_start+1}-{batch_end} of {total_urls}...&amp;quot;&amp;lt;/span&amp;gt;)
            
            &amp;lt;span style=&amp;quot;color:#a0a1a7;font-style:italic;&amp;quot;&amp;gt;# 병렬 처리 실행&amp;lt;/span&amp;gt;
            &amp;lt;span style=&amp;quot;color:#a626a4;font-weight:600;&amp;quot;&amp;gt;with&amp;lt;/span&amp;gt; concurrent.futures.ThreadPoolExecutor(max_workers=workers) &amp;lt;span style=&amp;quot;color:#a626a4;font-weight:600;&amp;quot;&amp;gt;as&amp;lt;/span&amp;gt; executor:
                &amp;lt;span style=&amp;quot;color:#a0a1a7;font-style:italic;&amp;quot;&amp;gt;# URL을 병렬로 처리&amp;lt;/span&amp;gt;
                future_to_url = {executor.submit(process_url, url, interval_time): url &amp;lt;span style=&amp;quot;color:#a626a4;font-weight:600;&amp;quot;&amp;gt;for&amp;lt;/span&amp;gt; url &amp;lt;span style=&amp;quot;color:#a626a4;font-weight:600;&amp;quot;&amp;gt;in&amp;lt;/span&amp;gt; batch_urls}
                
                &amp;lt;span style=&amp;quot;color:#a0a1a7;font-style:italic;&amp;quot;&amp;gt;# 결과 수집&amp;lt;/span&amp;gt;
                &amp;lt;span style=&amp;quot;color:#a626a4;font-weight:600;&amp;quot;&amp;gt;for&amp;lt;/span&amp;gt; future &amp;lt;span style=&amp;quot;color:#a626a4;font-weight:600;&amp;quot;&amp;gt;in&amp;lt;/span&amp;gt; concurrent.futures.as_completed(future_to_url):
                    result = future.result()
                    results.append(result)
                    completed += &amp;lt;span style=&amp;quot;color:#986801;&amp;quot;&amp;gt;1&amp;lt;/span&amp;gt;
                    
                    &amp;lt;span style=&amp;quot;color:#a0a1a7;font-style:italic;&amp;quot;&amp;gt;# 진행 상황 표시 (10개마다 또는 배치 완료 시)&amp;lt;/span&amp;gt;
                    &amp;lt;span style=&amp;quot;color:#a626a4;font-weight:600;&amp;quot;&amp;gt;if&amp;lt;/span&amp;gt; completed % &amp;lt;span style=&amp;quot;color:#986801;&amp;quot;&amp;gt;10&amp;lt;/span&amp;gt; == &amp;lt;span style=&amp;quot;color:#986801;&amp;quot;&amp;gt;0&amp;lt;/span&amp;gt; &amp;lt;span style=&amp;quot;color:#a626a4;font-weight:600;&amp;quot;&amp;gt;or&amp;lt;/span&amp;gt; completed == total_urls:
                        &amp;lt;span style=&amp;quot;color:#4078f2;&amp;quot;&amp;gt;print&amp;lt;/span&amp;gt;(&amp;lt;span style=&amp;quot;color:#50a14f;&amp;quot;&amp;gt;f&amp;quot;Processed {completed}/{total_urls} URLs...&amp;quot;&amp;lt;/span&amp;gt;)
            
            &amp;lt;span style=&amp;quot;color:#a0a1a7;font-style:italic;&amp;quot;&amp;gt;# 현재 배치의 결과를 CSV 파일로 저장&amp;lt;/span&amp;gt;
            current_df = pd.DataFrame(results)
            current_df.to_csv(output_path, index=&amp;lt;span style=&amp;quot;color:#a626a4;font-weight:600;&amp;quot;&amp;gt;False&amp;lt;/span&amp;gt;, encoding=&amp;lt;span style=&amp;quot;color:#50a14f;&amp;quot;&amp;gt;'utf-8'&amp;lt;/span&amp;gt;)
            &amp;lt;span style=&amp;quot;color:#4078f2;&amp;quot;&amp;gt;print&amp;lt;/span&amp;gt;(&amp;lt;span style=&amp;quot;color:#50a14f;&amp;quot;&amp;gt;f&amp;quot;Saved {len(results)} results to: {output_path}&amp;quot;&amp;lt;/span&amp;gt;)
    &amp;lt;span style=&amp;quot;color:#a626a4;font-weight:600;&amp;quot;&amp;gt;else&amp;lt;/span&amp;gt;:
        &amp;lt;span style=&amp;quot;color:#a0a1a7;font-style:italic;&amp;quot;&amp;gt;# 직렬 처리 사용 (기존 방식)&amp;lt;/span&amp;gt;
        &amp;lt;span style=&amp;quot;color:#a626a4;font-weight:600;&amp;quot;&amp;gt;for&amp;lt;/span&amp;gt; i, url &amp;lt;span style=&amp;quot;color:#a626a4;font-weight:600;&amp;quot;&amp;gt;in&amp;lt;/span&amp;gt; &amp;lt;span style=&amp;quot;color:#4078f2;&amp;quot;&amp;gt;enumerate&amp;lt;/span&amp;gt;(urls):
            &amp;lt;span style=&amp;quot;color:#a626a4;font-weight:600;&amp;quot;&amp;gt;if&amp;lt;/span&amp;gt; i % &amp;lt;span style=&amp;quot;color:#986801;&amp;quot;&amp;gt;10&amp;lt;/span&amp;gt; == &amp;lt;span style=&amp;quot;color:#986801;&amp;quot;&amp;gt;0&amp;lt;/span&amp;gt; &amp;lt;span style=&amp;quot;color:#a626a4;font-weight:600;&amp;quot;&amp;gt;or&amp;lt;/span&amp;gt; i == total_urls - &amp;lt;span style=&amp;quot;color:#986801;&amp;quot;&amp;gt;1&amp;lt;/span&amp;gt;:
                &amp;lt;span style=&amp;quot;color:#4078f2;&amp;quot;&amp;gt;print&amp;lt;/span&amp;gt;(&amp;lt;span style=&amp;quot;color:#50a14f;&amp;quot;&amp;gt;f&amp;quot;Processing URL {i+1}/{total_urls}...&amp;quot;&amp;lt;/span&amp;gt;)
            
            &amp;lt;span style=&amp;quot;color:#a0a1a7;font-style:italic;&amp;quot;&amp;gt;# 단일 URL 처리&amp;lt;/span&amp;gt;
            result = process_url(url, interval_time)
            results.append(result)
            
            &amp;lt;span style=&amp;quot;color:#a0a1a7;font-style:italic;&amp;quot;&amp;gt;# batch_size마다 중간 결과 저장&amp;lt;/span&amp;gt;
            &amp;lt;span style=&amp;quot;color:#a626a4;font-weight:600;&amp;quot;&amp;gt;if&amp;lt;/span&amp;gt; (i + &amp;lt;span style=&amp;quot;color:#986801;&amp;quot;&amp;gt;1&amp;lt;/span&amp;gt;) % batch_size == &amp;lt;span style=&amp;quot;color:#986801;&amp;quot;&amp;gt;0&amp;lt;/span&amp;gt; &amp;lt;span style=&amp;quot;color:#a626a4;font-weight:600;&amp;quot;&amp;gt;or&amp;lt;/span&amp;gt; i == total_urls - &amp;lt;span style=&amp;quot;color:#986801;&amp;quot;&amp;gt;1&amp;lt;/span&amp;gt;:
                current_df = pd.DataFrame(results)
                current_df.to_csv(output_path, index=&amp;lt;span style=&amp;quot;color:#a626a4;font-weight:600;&amp;quot;&amp;gt;False&amp;lt;/span&amp;gt;, encoding=&amp;lt;span style=&amp;quot;color:#50a14f;&amp;quot;&amp;gt;'utf-8'&amp;lt;/span&amp;gt;)
                &amp;lt;span style=&amp;quot;color:#4078f2;&amp;quot;&amp;gt;print&amp;lt;/span&amp;gt;(&amp;lt;span style=&amp;quot;color:#50a14f;&amp;quot;&amp;gt;f&amp;quot;Saved {len(results)} results to: {output_path}&amp;quot;&amp;lt;/span&amp;gt;)

    &amp;lt;span style=&amp;quot;color:#a0a1a7;font-style:italic;&amp;quot;&amp;gt;# 루프 종료 후 전체 처리 시간 계산&amp;lt;/span&amp;gt;
    process_end_time = time.time()
    total_process_time = process_end_time - process_start_time
    
    &amp;lt;span style=&amp;quot;color:#a0a1a7;font-style:italic;&amp;quot;&amp;gt;# Create DataFrame with results&amp;lt;/span&amp;gt;
    df = pd.DataFrame(results)

    &amp;lt;span style=&amp;quot;color:#a0a1a7;font-style:italic;&amp;quot;&amp;gt;# Calculate statistics&amp;lt;/span&amp;gt;
    df = pd.DataFrame(results)
    
    &amp;lt;span style=&amp;quot;color:#a0a1a7;font-style:italic;&amp;quot;&amp;gt;# Calculate average processing time&amp;lt;/span&amp;gt;
    avg_time_all = df[&amp;lt;span style=&amp;quot;color:#50a14f;&amp;quot;&amp;gt;'processing_time_sec'&amp;lt;/span&amp;gt;].mean()
    avg_time_google_only = df[df[&amp;lt;span style=&amp;quot;color:#50a14f;&amp;quot;&amp;gt;'status'&amp;lt;/span&amp;gt;] != &amp;lt;span style=&amp;quot;color:#50a14f;&amp;quot;&amp;gt;'SKIPPED'&amp;lt;/span&amp;gt;][&amp;lt;span style=&amp;quot;color:#50a14f;&amp;quot;&amp;gt;'processing_time_sec'&amp;lt;/span&amp;gt;].mean() &amp;lt;span style=&amp;quot;color:#a626a4;font-weight:600;&amp;quot;&amp;gt;if&amp;lt;/span&amp;gt; &amp;lt;span style=&amp;quot;color:#4078f2;&amp;quot;&amp;gt;any&amp;lt;/span&amp;gt;(df[&amp;lt;span style=&amp;quot;color:#50a14f;&amp;quot;&amp;gt;'status'&amp;lt;/span&amp;gt;] != &amp;lt;span style=&amp;quot;color:#50a14f;&amp;quot;&amp;gt;'SKIPPED'&amp;lt;/span&amp;gt;) &amp;lt;span style=&amp;quot;color:#a626a4;font-weight:600;&amp;quot;&amp;gt;else&amp;lt;/span&amp;gt; &amp;lt;span style=&amp;quot;color:#986801;&amp;quot;&amp;gt;0&amp;lt;/span&amp;gt;
    
    &amp;lt;span style=&amp;quot;color:#a0a1a7;font-style:italic;&amp;quot;&amp;gt;# 요약 통계&amp;lt;/span&amp;gt;
    success_count = df[&amp;lt;span style=&amp;quot;color:#50a14f;&amp;quot;&amp;gt;'status'&amp;lt;/span&amp;gt;].eq(&amp;lt;span style=&amp;quot;color:#50a14f;&amp;quot;&amp;gt;'SUCCESS'&amp;lt;/span&amp;gt;).&amp;lt;span style=&amp;quot;color:#4078f2;&amp;quot;&amp;gt;sum&amp;lt;/span&amp;gt;()
    failed_count = df[&amp;lt;span style=&amp;quot;color:#50a14f;&amp;quot;&amp;gt;'status'&amp;lt;/span&amp;gt;].eq(&amp;lt;span style=&amp;quot;color:#50a14f;&amp;quot;&amp;gt;'FAILED'&amp;lt;/span&amp;gt;).&amp;lt;span style=&amp;quot;color:#4078f2;&amp;quot;&amp;gt;sum&amp;lt;/span&amp;gt;()
    error_count = df[&amp;lt;span style=&amp;quot;color:#50a14f;&amp;quot;&amp;gt;'status'&amp;lt;/span&amp;gt;].eq(&amp;lt;span style=&amp;quot;color:#50a14f;&amp;quot;&amp;gt;'ERROR'&amp;lt;/span&amp;gt;).&amp;lt;span style=&amp;quot;color:#4078f2;&amp;quot;&amp;gt;sum&amp;lt;/span&amp;gt;() &amp;lt;span style=&amp;quot;color:#a626a4;font-weight:600;&amp;quot;&amp;gt;if&amp;lt;/span&amp;gt; &amp;lt;span style=&amp;quot;color:#50a14f;&amp;quot;&amp;gt;'ERROR'&amp;lt;/span&amp;gt; &amp;lt;span style=&amp;quot;color:#a626a4;font-weight:600;&amp;quot;&amp;gt;in&amp;lt;/span&amp;gt; df[&amp;lt;span style=&amp;quot;color:#50a14f;&amp;quot;&amp;gt;'status'&amp;lt;/span&amp;gt;].values &amp;lt;span style=&amp;quot;color:#a626a4;font-weight:600;&amp;quot;&amp;gt;else&amp;lt;/span&amp;gt; &amp;lt;span style=&amp;quot;color:#986801;&amp;quot;&amp;gt;0&amp;lt;/span&amp;gt;
    skipped_count = df[&amp;lt;span style=&amp;quot;color:#50a14f;&amp;quot;&amp;gt;'status'&amp;lt;/span&amp;gt;].eq(&amp;lt;span style=&amp;quot;color:#50a14f;&amp;quot;&amp;gt;'SKIPPED'&amp;lt;/span&amp;gt;).&amp;lt;span style=&amp;quot;color:#4078f2;&amp;quot;&amp;gt;sum&amp;lt;/span&amp;gt;()
    
    &amp;lt;span style=&amp;quot;color:#a0a1a7;font-style:italic;&amp;quot;&amp;gt;# 총 오류 수 (실패 + 기타 오류)&amp;lt;/span&amp;gt;
    total_errors = failed_count + error_count
    
    &amp;lt;span style=&amp;quot;color:#a0a1a7;font-style:italic;&amp;quot;&amp;gt;# Format time durations for display&amp;lt;/span&amp;gt;
    &amp;lt;span style=&amp;quot;color:#a626a4;font-weight:600;&amp;quot;&amp;gt;def&amp;lt;/span&amp;gt; format_time(seconds):
        &amp;lt;span style=&amp;quot;color:#a626a4;font-weight:600;&amp;quot;&amp;gt;if&amp;lt;/span&amp;gt; seconds &amp;lt; &amp;lt;span style=&amp;quot;color:#986801;&amp;quot;&amp;gt;60&amp;lt;/span&amp;gt;:
            &amp;lt;span style=&amp;quot;color:#a626a4;font-weight:600;&amp;quot;&amp;gt;return&amp;lt;/span&amp;gt; &amp;lt;span style=&amp;quot;color:#50a14f;&amp;quot;&amp;gt;f&amp;quot;{seconds:.2f} seconds&amp;quot;&amp;lt;/span&amp;gt;
        &amp;lt;span style=&amp;quot;color:#a626a4;font-weight:600;&amp;quot;&amp;gt;else&amp;lt;/span&amp;gt;:
            &amp;lt;span style=&amp;quot;color:#a626a4;font-weight:600;&amp;quot;&amp;gt;return&amp;lt;/span&amp;gt; &amp;lt;span style=&amp;quot;color:#4078f2;&amp;quot;&amp;gt;str&amp;lt;/span&amp;gt;(timedelta(seconds=&amp;lt;span style=&amp;quot;color:#4078f2;&amp;quot;&amp;gt;round&amp;lt;/span&amp;gt;(seconds)))
    
    &amp;lt;span style=&amp;quot;color:#4078f2;&amp;quot;&amp;gt;print&amp;lt;/span&amp;gt;(&amp;lt;span style=&amp;quot;color:#50a14f;&amp;quot;&amp;gt;f&amp;quot;\n====== SUMMARY ======&amp;quot;&amp;lt;/span&amp;gt;)
    &amp;lt;span style=&amp;quot;color:#4078f2;&amp;quot;&amp;gt;print&amp;lt;/span&amp;gt;(&amp;lt;span style=&amp;quot;color:#50a14f;&amp;quot;&amp;gt;f&amp;quot;Total URLs processed: {total_urls}&amp;quot;&amp;lt;/span&amp;gt;)
    &amp;lt;span style=&amp;quot;color:#4078f2;&amp;quot;&amp;gt;print&amp;lt;/span&amp;gt;(&amp;lt;span style=&amp;quot;color:#50a14f;&amp;quot;&amp;gt;f&amp;quot;Workers used: {workers}&amp;quot;&amp;lt;/span&amp;gt;)
    &amp;lt;span style=&amp;quot;color:#4078f2;&amp;quot;&amp;gt;print&amp;lt;/span&amp;gt;(&amp;lt;span style=&amp;quot;color:#50a14f;&amp;quot;&amp;gt;f&amp;quot;Successfully decoded: {success_count} ({success_count/total_urls*100:.1f}%)&amp;quot;&amp;lt;/span&amp;gt;)
    &amp;lt;span style=&amp;quot;color:#4078f2;&amp;quot;&amp;gt;print&amp;lt;/span&amp;gt;(&amp;lt;span style=&amp;quot;color:#50a14f;&amp;quot;&amp;gt;f&amp;quot;Failed to decode: {failed_count} ({failed_count/total_urls*100:.1f}%)&amp;quot;&amp;lt;/span&amp;gt;)
    &amp;lt;span style=&amp;quot;color:#a626a4;font-weight:600;&amp;quot;&amp;gt;if&amp;lt;/span&amp;gt; error_count &amp;gt; &amp;lt;span style=&amp;quot;color:#986801;&amp;quot;&amp;gt;0&amp;lt;/span&amp;gt;:
        &amp;lt;span style=&amp;quot;color:#4078f2;&amp;quot;&amp;gt;print&amp;lt;/span&amp;gt;(&amp;lt;span style=&amp;quot;color:#50a14f;&amp;quot;&amp;gt;f&amp;quot;Unexpected errors: {error_count} ({error_count/total_urls*100:.1f}%)&amp;quot;&amp;lt;/span&amp;gt;)
    &amp;lt;span style=&amp;quot;color:#4078f2;&amp;quot;&amp;gt;print&amp;lt;/span&amp;gt;(&amp;lt;span style=&amp;quot;color:#50a14f;&amp;quot;&amp;gt;f&amp;quot;Total errors: {total_errors} ({total_errors/total_urls*100:.1f}%)&amp;quot;&amp;lt;/span&amp;gt;)
    &amp;lt;span style=&amp;quot;color:#4078f2;&amp;quot;&amp;gt;print&amp;lt;/span&amp;gt;(&amp;lt;span style=&amp;quot;color:#50a14f;&amp;quot;&amp;gt;f&amp;quot;Skipped (non-Google News): {skipped_count} ({skipped_count/total_urls*100:.1f}%)&amp;quot;&amp;lt;/span&amp;gt;)
    &amp;lt;span style=&amp;quot;color:#4078f2;&amp;quot;&amp;gt;print&amp;lt;/span&amp;gt;(&amp;lt;span style=&amp;quot;color:#50a14f;&amp;quot;&amp;gt;f&amp;quot;\n----- TIMING INFORMATION -----&amp;quot;&amp;lt;/span&amp;gt;)
    &amp;lt;span style=&amp;quot;color:#4078f2;&amp;quot;&amp;gt;print&amp;lt;/span&amp;gt;(&amp;lt;span style=&amp;quot;color:#50a14f;&amp;quot;&amp;gt;f&amp;quot;Total processing time: {format_time(total_process_time)}&amp;quot;&amp;lt;/span&amp;gt;)
    &amp;lt;span style=&amp;quot;color:#4078f2;&amp;quot;&amp;gt;print&amp;lt;/span&amp;gt;(&amp;lt;span style=&amp;quot;color:#50a14f;&amp;quot;&amp;gt;f&amp;quot;Average processing time per URL: {format_time(avg_time_all)}&amp;quot;&amp;lt;/span&amp;gt;)
    &amp;lt;span style=&amp;quot;color:#4078f2;&amp;quot;&amp;gt;print&amp;lt;/span&amp;gt;(&amp;lt;span style=&amp;quot;color:#50a14f;&amp;quot;&amp;gt;f&amp;quot;Average processing time per Google News URL: {format_time(avg_time_google_only)}&amp;quot;&amp;lt;/span&amp;gt;)
    &amp;lt;span style=&amp;quot;color:#4078f2;&amp;quot;&amp;gt;print&amp;lt;/span&amp;gt;(&amp;lt;span style=&amp;quot;color:#50a14f;&amp;quot;&amp;gt;f&amp;quot;Fastest URL processing time: {format_time(df['processing_time_sec'].min())}&amp;quot;&amp;lt;/span&amp;gt;)
    &amp;lt;span style=&amp;quot;color:#4078f2;&amp;quot;&amp;gt;print&amp;lt;/span&amp;gt;(&amp;lt;span style=&amp;quot;color:#50a14f;&amp;quot;&amp;gt;f&amp;quot;Slowest URL processing time: {format_time(df['processing_time_sec'].max())}&amp;quot;&amp;lt;/span&amp;gt;)
    &amp;lt;span style=&amp;quot;color:#4078f2;&amp;quot;&amp;gt;print&amp;lt;/span&amp;gt;(&amp;lt;span style=&amp;quot;color:#50a14f;&amp;quot;&amp;gt;f&amp;quot;=====================&amp;quot;&amp;lt;/span&amp;gt;)
    
    &amp;lt;span style=&amp;quot;color:#a0a1a7;font-style:italic;&amp;quot;&amp;gt;# Add summary information to the DataFrame as metadata&amp;lt;/span&amp;gt;
    df.attrs[&amp;lt;span style=&amp;quot;color:#50a14f;&amp;quot;&amp;gt;'total_process_time'&amp;lt;/span&amp;gt;] = total_process_time
    df.attrs[&amp;lt;span style=&amp;quot;color:#50a14f;&amp;quot;&amp;gt;'avg_processing_time'&amp;lt;/span&amp;gt;] = avg_time_all
    df.attrs[&amp;lt;span style=&amp;quot;color:#50a14f;&amp;quot;&amp;gt;'avg_google_url_time'&amp;lt;/span&amp;gt;] = avg_time_google_only
    df.attrs[&amp;lt;span style=&amp;quot;color:#50a14f;&amp;quot;&amp;gt;'workers_used'&amp;lt;/span&amp;gt;] = workers
    
    &amp;lt;span style=&amp;quot;color:#a626a4;font-weight:600;&amp;quot;&amp;gt;return&amp;lt;/span&amp;gt; df

&amp;lt;span style=&amp;quot;color:#a626a4;font-weight:600;&amp;quot;&amp;gt;def&amp;lt;/span&amp;gt; process_url(url, interval_time=&amp;lt;span style=&amp;quot;color:#986801;&amp;quot;&amp;gt;1&amp;lt;/span&amp;gt;):
    &amp;lt;span style=&amp;quot;color:#50a14f;&amp;quot;&amp;gt;&amp;quot;&amp;quot;&amp;quot;단일 URL을 처리하고 결과를 반환하는 함수&amp;quot;&amp;quot;&amp;quot;&amp;lt;/span&amp;gt;
    url_start_time = time.time()
    
    &amp;lt;span style=&amp;quot;color:#a626a4;font-weight:600;&amp;quot;&amp;gt;try&amp;lt;/span&amp;gt;:
        &amp;lt;span style=&amp;quot;color:#a626a4;font-weight:600;&amp;quot;&amp;gt;if&amp;lt;/span&amp;gt; &amp;lt;span style=&amp;quot;color:#50a14f;&amp;quot;&amp;gt;&amp;quot;news.google.com&amp;quot;&amp;lt;/span&amp;gt; &amp;lt;span style=&amp;quot;color:#a626a4;font-weight:600;&amp;quot;&amp;gt;in&amp;lt;/span&amp;gt; url:
            &amp;lt;span style=&amp;quot;color:#a626a4;font-weight:600;&amp;quot;&amp;gt;try&amp;lt;/span&amp;gt;:
                decoded_result = gnewsdecoder(url, interval=interval_time)
                &amp;lt;span style=&amp;quot;color:#a626a4;font-weight:600;&amp;quot;&amp;gt;if&amp;lt;/span&amp;gt; decoded_result.&amp;lt;span style=&amp;quot;color:#4078f2;&amp;quot;&amp;gt;get&amp;lt;/span&amp;gt;(&amp;lt;span style=&amp;quot;color:#50a14f;&amp;quot;&amp;gt;&amp;quot;status&amp;quot;&amp;lt;/span&amp;gt;):
                    status = &amp;lt;span style=&amp;quot;color:#50a14f;&amp;quot;&amp;gt;&amp;quot;SUCCESS&amp;quot;&amp;lt;/span&amp;gt;
                    decoded_url = decoded_result[&amp;lt;span style=&amp;quot;color:#50a14f;&amp;quot;&amp;gt;&amp;quot;decoded_url&amp;quot;&amp;lt;/span&amp;gt;]
                    error_message = &amp;lt;span style=&amp;quot;color:#50a14f;&amp;quot;&amp;gt;&amp;quot;&amp;quot;&amp;lt;/span&amp;gt;
                &amp;lt;span style=&amp;quot;color:#a626a4;font-weight:600;&amp;quot;&amp;gt;else&amp;lt;/span&amp;gt;:
                    status = &amp;lt;span style=&amp;quot;color:#50a14f;&amp;quot;&amp;gt;&amp;quot;FAILED&amp;quot;&amp;lt;/span&amp;gt;
                    decoded_url = &amp;lt;span style=&amp;quot;color:#50a14f;&amp;quot;&amp;gt;&amp;quot;N/A&amp;quot;&amp;lt;/span&amp;gt;
                    error_message = decoded_result.&amp;lt;span style=&amp;quot;color:#4078f2;&amp;quot;&amp;gt;get&amp;lt;/span&amp;gt;(&amp;lt;span style=&amp;quot;color:#50a14f;&amp;quot;&amp;gt;&amp;quot;message&amp;quot;&amp;lt;/span&amp;gt;, &amp;lt;span style=&amp;quot;color:#50a14f;&amp;quot;&amp;gt;&amp;quot;Unknown error&amp;quot;&amp;lt;/span&amp;gt;)
            &amp;lt;span style=&amp;quot;color:#a626a4;font-weight:600;&amp;quot;&amp;gt;except&amp;lt;/span&amp;gt; Exception &amp;lt;span style=&amp;quot;color:#a626a4;font-weight:600;&amp;quot;&amp;gt;as&amp;lt;/span&amp;gt; e:
                status = &amp;lt;span style=&amp;quot;color:#50a14f;&amp;quot;&amp;gt;&amp;quot;FAILED&amp;quot;&amp;lt;/span&amp;gt;
                decoded_url = &amp;lt;span style=&amp;quot;color:#50a14f;&amp;quot;&amp;gt;&amp;quot;N/A&amp;quot;&amp;lt;/span&amp;gt;
                error_message = &amp;lt;span style=&amp;quot;color:#4078f2;&amp;quot;&amp;gt;str&amp;lt;/span&amp;gt;(e)
        &amp;lt;span style=&amp;quot;color:#a626a4;font-weight:600;&amp;quot;&amp;gt;else&amp;lt;/span&amp;gt;:
            &amp;lt;span style=&amp;quot;color:#a0a1a7;font-style:italic;&amp;quot;&amp;gt;# For non-Google News URLs&amp;lt;/span&amp;gt;
            status = &amp;lt;span style=&amp;quot;color:#50a14f;&amp;quot;&amp;gt;&amp;quot;SKIPPED&amp;quot;&amp;lt;/span&amp;gt;
            decoded_url = url  &amp;lt;span style=&amp;quot;color:#a0a1a7;font-style:italic;&amp;quot;&amp;gt;# Keep the same URL&amp;lt;/span&amp;gt;
            error_message = &amp;lt;span style=&amp;quot;color:#50a14f;&amp;quot;&amp;gt;&amp;quot;Not a Google News URL&amp;quot;&amp;lt;/span&amp;gt;
        
        &amp;lt;span style=&amp;quot;color:#a0a1a7;font-style:italic;&amp;quot;&amp;gt;# Calculate processing time for this URL&amp;lt;/span&amp;gt;
        url_end_time = time.time()
        processing_time = url_end_time - url_start_time
        
        &amp;lt;span style=&amp;quot;color:#a0a1a7;font-style:italic;&amp;quot;&amp;gt;# Return result as dictionary&amp;lt;/span&amp;gt;
        &amp;lt;span style=&amp;quot;color:#a626a4;font-weight:600;&amp;quot;&amp;gt;return&amp;lt;/span&amp;gt; {
            &amp;lt;span style=&amp;quot;color:#50a14f;&amp;quot;&amp;gt;&amp;quot;original_url&amp;quot;&amp;lt;/span&amp;gt;: url,
            &amp;lt;span style=&amp;quot;color:#50a14f;&amp;quot;&amp;gt;&amp;quot;decoded_url&amp;quot;&amp;lt;/span&amp;gt;: decoded_url,
            &amp;lt;span style=&amp;quot;color:#50a14f;&amp;quot;&amp;gt;&amp;quot;status&amp;quot;&amp;lt;/span&amp;gt;: status,
            &amp;lt;span style=&amp;quot;color:#50a14f;&amp;quot;&amp;gt;&amp;quot;error_message&amp;quot;&amp;lt;/span&amp;gt;: error_message,
            &amp;lt;span style=&amp;quot;color:#50a14f;&amp;quot;&amp;gt;&amp;quot;processing_time_sec&amp;quot;&amp;lt;/span&amp;gt;: &amp;lt;span style=&amp;quot;color:#4078f2;&amp;quot;&amp;gt;round&amp;lt;/span&amp;gt;(processing_time, &amp;lt;span style=&amp;quot;color:#986801;&amp;quot;&amp;gt;3&amp;lt;/span&amp;gt;)
        }
    &amp;lt;span style=&amp;quot;color:#a626a4;font-weight:600;&amp;quot;&amp;gt;except&amp;lt;/span&amp;gt; Exception &amp;lt;span style=&amp;quot;color:#a626a4;font-weight:600;&amp;quot;&amp;gt;as&amp;lt;/span&amp;gt; e:
        &amp;lt;span style=&amp;quot;color:#a0a1a7;font-style:italic;&amp;quot;&amp;gt;# Handle any unexpected errors&amp;lt;/span&amp;gt;
        url_end_time = time.time()
        processing_time = url_end_time - url_start_time
        
        &amp;lt;span style=&amp;quot;color:#a626a4;font-weight:600;&amp;quot;&amp;gt;return&amp;lt;/span&amp;gt; {
            &amp;lt;span style=&amp;quot;color:#50a14f;&amp;quot;&amp;gt;&amp;quot;original_url&amp;quot;&amp;lt;/span&amp;gt;: url,
            &amp;lt;span style=&amp;quot;color:#50a14f;&amp;quot;&amp;gt;&amp;quot;decoded_url&amp;quot;&amp;lt;/span&amp;gt;: &amp;lt;span style=&amp;quot;color:#50a14f;&amp;quot;&amp;gt;&amp;quot;N/A&amp;quot;&amp;lt;/span&amp;gt;,
            &amp;lt;span style=&amp;quot;color:#50a14f;&amp;quot;&amp;gt;&amp;quot;status&amp;quot;&amp;lt;/span&amp;gt;: &amp;lt;span style=&amp;quot;color:#50a14f;&amp;quot;&amp;gt;&amp;quot;ERROR&amp;quot;&amp;lt;/span&amp;gt;,
            &amp;lt;span style=&amp;quot;color:#50a14f;&amp;quot;&amp;gt;&amp;quot;error_message&amp;quot;&amp;lt;/span&amp;gt;: &amp;lt;span style=&amp;quot;color:#50a14f;&amp;quot;&amp;gt;f&amp;quot;Unexpected error: {str(e)}&amp;quot;&amp;lt;/span&amp;gt;,
            &amp;lt;span style=&amp;quot;color:#50a14f;&amp;quot;&amp;gt;&amp;quot;processing_time_sec&amp;quot;&amp;lt;/span&amp;gt;: &amp;lt;span style=&amp;quot;color:#4078f2;&amp;quot;&amp;gt;round&amp;lt;/span&amp;gt;(processing_time, &amp;lt;span style=&amp;quot;color:#986801;&amp;quot;&amp;gt;3&amp;lt;/span&amp;gt;)
        }

&amp;lt;span style=&amp;quot;color:#a626a4;font-weight:600;&amp;quot;&amp;gt;if&amp;lt;/span&amp;gt; __name__ == &amp;lt;span style=&amp;quot;color:#50a14f;&amp;quot;&amp;gt;&amp;quot;__main__&amp;quot;&amp;lt;/span&amp;gt;:
    &amp;lt;span style=&amp;quot;color:#a0a1a7;font-style:italic;&amp;quot;&amp;gt;# 명령줄에서 인자를 받아 실행하기 위한 코드&amp;lt;/span&amp;gt;
    parser = argparse.ArgumentParser(description=&amp;lt;span style=&amp;quot;color:#50a14f;&amp;quot;&amp;gt;'Google News URL decoder'&amp;lt;/span&amp;gt;)
    parser.add_argument(&amp;lt;span style=&amp;quot;color:#50a14f;&amp;quot;&amp;gt;'input_file_path'&amp;lt;/span&amp;gt;, &amp;lt;span style=&amp;quot;color:#4078f2;&amp;quot;&amp;gt;help&amp;lt;/span&amp;gt;=&amp;lt;span style=&amp;quot;color:#50a14f;&amp;quot;&amp;gt;'Path to the file containing URLs to decode'&amp;lt;/span&amp;gt;)
    parser.add_argument(&amp;lt;span style=&amp;quot;color:#50a14f;&amp;quot;&amp;gt;'--output_dir'&amp;lt;/span&amp;gt;, &amp;lt;span style=&amp;quot;color:#50a14f;&amp;quot;&amp;gt;'-o'&amp;lt;/span&amp;gt;, &amp;lt;span style=&amp;quot;color:#4078f2;&amp;quot;&amp;gt;help&amp;lt;/span&amp;gt;=&amp;lt;span style=&amp;quot;color:#50a14f;&amp;quot;&amp;gt;'Output directory for results (default: same as input file)'&amp;lt;/span&amp;gt;)
    parser.add_argument(&amp;lt;span style=&amp;quot;color:#50a14f;&amp;quot;&amp;gt;'--interval'&amp;lt;/span&amp;gt;, &amp;lt;span style=&amp;quot;color:#50a14f;&amp;quot;&amp;gt;'-i'&amp;lt;/span&amp;gt;, &amp;lt;span style=&amp;quot;color:#4078f2;&amp;quot;&amp;gt;type&amp;lt;/span&amp;gt;=&amp;lt;span style=&amp;quot;color:#4078f2;&amp;quot;&amp;gt;int&amp;lt;/span&amp;gt;, default=&amp;lt;span style=&amp;quot;color:#986801;&amp;quot;&amp;gt;1&amp;lt;/span&amp;gt;, &amp;lt;span style=&amp;quot;color:#4078f2;&amp;quot;&amp;gt;help&amp;lt;/span&amp;gt;=&amp;lt;span style=&amp;quot;color:#50a14f;&amp;quot;&amp;gt;'Interval time between requests (default: 1)'&amp;lt;/span&amp;gt;)
    parser.add_argument(&amp;lt;span style=&amp;quot;color:#50a14f;&amp;quot;&amp;gt;'--batch_size'&amp;lt;/span&amp;gt;, &amp;lt;span style=&amp;quot;color:#50a14f;&amp;quot;&amp;gt;'-b'&amp;lt;/span&amp;gt;, &amp;lt;span style=&amp;quot;color:#4078f2;&amp;quot;&amp;gt;type&amp;lt;/span&amp;gt;=&amp;lt;span style=&amp;quot;color:#4078f2;&amp;quot;&amp;gt;int&amp;lt;/span&amp;gt;, default=&amp;lt;span style=&amp;quot;color:#986801;&amp;quot;&amp;gt;10&amp;lt;/span&amp;gt;, &amp;lt;span style=&amp;quot;color:#4078f2;&amp;quot;&amp;gt;help&amp;lt;/span&amp;gt;=&amp;lt;span style=&amp;quot;color:#50a14f;&amp;quot;&amp;gt;'Batch size for saving interim results (default: 10)'&amp;lt;/span&amp;gt;)
    parser.add_argument(&amp;lt;span style=&amp;quot;color:#50a14f;&amp;quot;&amp;gt;'--workers'&amp;lt;/span&amp;gt;, &amp;lt;span style=&amp;quot;color:#50a14f;&amp;quot;&amp;gt;'-w'&amp;lt;/span&amp;gt;, &amp;lt;span style=&amp;quot;color:#4078f2;&amp;quot;&amp;gt;type&amp;lt;/span&amp;gt;=&amp;lt;span style=&amp;quot;color:#4078f2;&amp;quot;&amp;gt;int&amp;lt;/span&amp;gt;, default=&amp;lt;span style=&amp;quot;color:#986801;&amp;quot;&amp;gt;1&amp;lt;/span&amp;gt;, &amp;lt;span style=&amp;quot;color:#4078f2;&amp;quot;&amp;gt;help&amp;lt;/span&amp;gt;=&amp;lt;span style=&amp;quot;color:#50a14f;&amp;quot;&amp;gt;'Number of worker threads for parallel processing (default: 1)'&amp;lt;/span&amp;gt;)
    
    args = parser.parse_args()
    
    &amp;lt;span style=&amp;quot;color:#a0a1a7;font-style:italic;&amp;quot;&amp;gt;# 인자 출력&amp;lt;/span&amp;gt;
    &amp;lt;span style=&amp;quot;color:#4078f2;&amp;quot;&amp;gt;print&amp;lt;/span&amp;gt;(&amp;lt;span style=&amp;quot;color:#50a14f;&amp;quot;&amp;gt;f&amp;quot;Processing URLs from: {args.input_file_path}&amp;quot;&amp;lt;/span&amp;gt;)
    &amp;lt;span style=&amp;quot;color:#4078f2;&amp;quot;&amp;gt;print&amp;lt;/span&amp;gt;(&amp;lt;span style=&amp;quot;color:#50a14f;&amp;quot;&amp;gt;f&amp;quot;Output directory: {args.output_dir or 'Same as input file'}&amp;quot;&amp;lt;/span&amp;gt;)
    &amp;lt;span style=&amp;quot;color:#4078f2;&amp;quot;&amp;gt;print&amp;lt;/span&amp;gt;(&amp;lt;span style=&amp;quot;color:#50a14f;&amp;quot;&amp;gt;f&amp;quot;Interval time: {args.interval}&amp;quot;&amp;lt;/span&amp;gt;)
    &amp;lt;span style=&amp;quot;color:#4078f2;&amp;quot;&amp;gt;print&amp;lt;/span&amp;gt;(&amp;lt;span style=&amp;quot;color:#50a14f;&amp;quot;&amp;gt;f&amp;quot;Batch size: {args.batch_size}&amp;quot;&amp;lt;/span&amp;gt;)
    &amp;lt;span style=&amp;quot;color:#4078f2;&amp;quot;&amp;gt;print&amp;lt;/span&amp;gt;(&amp;lt;span style=&amp;quot;color:#50a14f;&amp;quot;&amp;gt;f&amp;quot;Workers: {args.workers} (Parallel: {'Enabled' if args.workers &amp;gt; 1 else 'Disabled'})&amp;quot;&amp;lt;/span&amp;gt;)
    
    &amp;lt;span style=&amp;quot;color:#a0a1a7;font-style:italic;&amp;quot;&amp;gt;# 함수 실행&amp;lt;/span&amp;gt;
    result = process_urls_from_file(
        args.input_file_path, 
        args.output_dir, 
        args.interval, 
        args.batch_size,
        args.workers
    )
    
    &amp;lt;span style=&amp;quot;color:#a0a1a7;font-style:italic;&amp;quot;&amp;gt;# 결과 반환&amp;lt;/span&amp;gt;
    &amp;lt;span style=&amp;quot;color:#a626a4;font-weight:600;&amp;quot;&amp;gt;if&amp;lt;/span&amp;gt; result &amp;lt;span style=&amp;quot;color:#a626a4;font-weight:600;&amp;quot;&amp;gt;is&amp;lt;/span&amp;gt; &amp;lt;span style=&amp;quot;color:#a626a4;font-weight:600;&amp;quot;&amp;gt;not&amp;lt;/span&amp;gt; &amp;lt;span style=&amp;quot;color:#a626a4;font-weight:600;&amp;quot;&amp;gt;None&amp;lt;/span&amp;gt;:
        &amp;lt;span style=&amp;quot;color:#4078f2;&amp;quot;&amp;gt;print&amp;lt;/span&amp;gt;(&amp;lt;span style=&amp;quot;color:#50a14f;&amp;quot;&amp;gt;f&amp;quot;Process completed successfully. Results saved to CSV file.&amp;quot;&amp;lt;/span&amp;gt;)
    &amp;lt;span style=&amp;quot;color:#a626a4;font-weight:600;&amp;quot;&amp;gt;else&amp;lt;/span&amp;gt;:
        &amp;lt;span style=&amp;quot;color:#4078f2;&amp;quot;&amp;gt;print&amp;lt;/span&amp;gt;(&amp;lt;span style=&amp;quot;color:#50a14f;&amp;quot;&amp;gt;f&amp;quot;Process failed. Please check error messages above.&amp;quot;&amp;lt;/span&amp;gt;)&amp;lt;/pre&amp;gt;&amp;lt;!DOCTYPE html&amp;gt;
&amp;lt;html lang=&amp;quot;ko&amp;quot;&amp;gt;
&amp;lt;head&amp;gt;
    &amp;lt;meta charset=&amp;quot;UTF-8&amp;quot;&amp;gt;
    &amp;lt;meta name=&amp;quot;viewport&amp;quot; content=&amp;quot;width=device-width, initial-scale=1.0&amp;quot;&amp;gt;
    &amp;lt;title&amp;gt;Python Style Code Box&amp;lt;/title&amp;gt;
&amp;lt;/head&amp;gt;
&amp;lt;body&amp;gt;
    &amp;lt;!-- ▣ 스타일 --&amp;gt;
    &amp;lt;style&amp;gt;
    .code-box{border:1px solid #e5e7eb;border-radius:12px;margin:20px 0;background:#fff;
      font-family:-apple-system,BlinkMacSystemFont,'Segoe UI',system-ui,sans-serif;overflow:hidden;
      box-shadow:0 1px 3px rgba(0,0,0,.1)}
    .code-hd{background:#374151;color:#fff;padding:16px 20px;font-weight:600;font-size:16px;
      display:flex;justify-content:space-between;align-items:center;cursor:pointer}
    .code-ct{display:none;padding:24px;background:#1e1e1e;font-size:14px;
      font-family:'Fira Code','D2Coding','Nanum Gothic Coding','Source Code Pro','Courier New',Consolas,Monaco;
      white-space:pre !important;overflow:auto !important;border-top:1px solid #e5e7eb;color:#d4d4d4;line-height:1.6;
      max-height:500px;word-wrap:normal !important;word-break:normal !important}
    .code-act{             /* &amp;larr; 수정: flex + 오른쪽 정렬 */
      display:none;        /* JS에서 'flex'로 켜 줌 */
      justify-content:flex-end;
      align-items:center;
      padding:16px 20px;background:#f8fafc;border-top:1px solid #e5e7eb}
    .copy-btn{background:linear-gradient(135deg,#10b981 0%,#059669 100%);color:#fff;border:0;
      padding:10px 16px;border-radius:8px;font-size:14px;font-weight:500;cursor:pointer;
      display:inline-flex;align-items:center;gap:6px}
    .copy-btn:hover{background:linear-gradient(135deg,#059669 0%,#047857 100%)}
    .toggle-btn{background:rgba(255,255,255,.2);border:none;color:#fff;padding:8px 12px;border-radius:6px;
      font-size:14px;cursor:pointer;font-weight:500}
    
    /* Python 구문 강조 */
    .keyword{color:#569cd6;font-weight:600}
    .builtin{color:#dcdcaa}
    .string{color:#ce9178}
    &amp;lt;/style&amp;gt;
    &amp;lt;!-- ▣ 코드 박스 --&amp;gt;
    &amp;lt;div class=&amp;quot;code-box&amp;quot;&amp;gt;
      &amp;lt;div class=&amp;quot;code-hd&amp;quot;&amp;gt;
          예시 Python 스크립트
        &amp;lt;span class=&amp;quot;toggle-btn&amp;quot;&amp;gt;  펼치기&amp;lt;/span&amp;gt;
      &amp;lt;/div&amp;gt;
      &amp;lt;pre class=&amp;quot;code-ct&amp;quot;&amp;gt;&amp;lt;span class=&amp;quot;keyword&amp;quot;&amp;gt;from&amp;lt;/span&amp;gt; googlenewsdecoder &amp;lt;span class=&amp;quot;keyword&amp;quot;&amp;gt;import&amp;lt;/span&amp;gt; gnewsdecoder
&amp;lt;span class=&amp;quot;keyword&amp;quot;&amp;gt;import&amp;lt;/span&amp;gt; pandas &amp;lt;span class=&amp;quot;keyword&amp;quot;&amp;gt;as&amp;lt;/span&amp;gt; pd
&amp;lt;span class=&amp;quot;keyword&amp;quot;&amp;gt;import&amp;lt;/span&amp;gt; os
&amp;lt;span class=&amp;quot;keyword&amp;quot;&amp;gt;import&amp;lt;/span&amp;gt; time
&amp;lt;span class=&amp;quot;keyword&amp;quot;&amp;gt;import&amp;lt;/span&amp;gt; argparse
&amp;lt;span class=&amp;quot;keyword&amp;quot;&amp;gt;import&amp;lt;/span&amp;gt; concurrent.futures
&amp;lt;span class=&amp;quot;keyword&amp;quot;&amp;gt;from&amp;lt;/span&amp;gt; datetime &amp;lt;span class=&amp;quot;keyword&amp;quot;&amp;gt;import&amp;lt;/span&amp;gt; datetime, timedelta

&amp;lt;span class=&amp;quot;keyword&amp;quot;&amp;gt;def&amp;lt;/span&amp;gt; process_urls_from_file(input_file_path, output_dir=&amp;lt;span class=&amp;quot;keyword&amp;quot;&amp;gt;None&amp;lt;/span&amp;gt;, interval_time=1, batch_size=10, workers=1):
    &amp;lt;span class=&amp;quot;string&amp;quot;&amp;gt;&amp;quot;&amp;quot;&amp;quot;
    파일에서 URL을 읽고 처리하는 함수
    
    Args:
        input_file_path: URL이 포함된 파일 경로
        output_dir: 결과 저장 디렉토리 (기본값: 입력 파일과 동일)
        interval_time: URL 처리 사이의 대기 시간 (초)
        batch_size: 중간 결과 저장 단위 (URL 개수)
        workers: 병렬 처리를 위한 작업자 수 (기본값: 1, 병렬 처리 없음)
        
    Returns:
        처리 결과가 포함된 DataFrame 또는 오류 시 None
    &amp;quot;&amp;quot;&amp;quot;&amp;lt;/span&amp;gt;
    &amp;lt;span class=&amp;quot;comment&amp;quot;&amp;gt;# Read URLs from file&amp;lt;/span&amp;gt;
    &amp;lt;span class=&amp;quot;keyword&amp;quot;&amp;gt;try&amp;lt;/span&amp;gt;:
        &amp;lt;span class=&amp;quot;comment&amp;quot;&amp;gt;# 파일 확장자 확인&amp;lt;/span&amp;gt;
        file_ext = os.path.splitext(input_file_path)[1].lower()
        
        &amp;lt;span class=&amp;quot;keyword&amp;quot;&amp;gt;if&amp;lt;/span&amp;gt; file_ext == &amp;lt;span class=&amp;quot;string&amp;quot;&amp;gt;'.csv'&amp;lt;/span&amp;gt;:
            &amp;lt;span class=&amp;quot;comment&amp;quot;&amp;gt;# CSV 파일인 경우 pandas로 읽기&amp;lt;/span&amp;gt;
            &amp;lt;span class=&amp;quot;keyword&amp;quot;&amp;gt;try&amp;lt;/span&amp;gt;:
                df = pd.read_csv(input_file_path, encoding=&amp;lt;span class=&amp;quot;string&amp;quot;&amp;gt;'utf-8-sig'&amp;lt;/span&amp;gt;)
                
                &amp;lt;span class=&amp;quot;comment&amp;quot;&amp;gt;# 'link' 컬럼이 있는지 확인&amp;lt;/span&amp;gt;
                &amp;lt;span class=&amp;quot;keyword&amp;quot;&amp;gt;if&amp;lt;/span&amp;gt; &amp;lt;span class=&amp;quot;string&amp;quot;&amp;gt;'link'&amp;lt;/span&amp;gt; &amp;lt;span class=&amp;quot;keyword&amp;quot;&amp;gt;in&amp;lt;/span&amp;gt; df.columns:
                    urls = df[&amp;lt;span class=&amp;quot;string&amp;quot;&amp;gt;'link'&amp;lt;/span&amp;gt;].dropna().tolist()
                    &amp;lt;span class=&amp;quot;builtin&amp;quot;&amp;gt;print&amp;lt;/span&amp;gt;(&amp;lt;span class=&amp;quot;string&amp;quot;&amp;gt;f&amp;quot;Found {len(urls)} URLs in 'link' column&amp;quot;&amp;lt;/span&amp;gt;)
                &amp;lt;span class=&amp;quot;keyword&amp;quot;&amp;gt;else&amp;lt;/span&amp;gt;:
                    available_columns = &amp;lt;span class=&amp;quot;string&amp;quot;&amp;gt;', '&amp;lt;/span&amp;gt;.join(df.columns)
                    &amp;lt;span class=&amp;quot;builtin&amp;quot;&amp;gt;print&amp;lt;/span&amp;gt;(&amp;lt;span class=&amp;quot;string&amp;quot;&amp;gt;f&amp;quot;Error: 'link' column not found in CSV. Available columns: {available_columns}&amp;quot;&amp;lt;/span&amp;gt;)
                    &amp;lt;span class=&amp;quot;keyword&amp;quot;&amp;gt;return&amp;lt;/span&amp;gt; &amp;lt;span class=&amp;quot;keyword&amp;quot;&amp;gt;None&amp;lt;/span&amp;gt;
            &amp;lt;span class=&amp;quot;keyword&amp;quot;&amp;gt;except&amp;lt;/span&amp;gt; Exception &amp;lt;span class=&amp;quot;keyword&amp;quot;&amp;gt;as&amp;lt;/span&amp;gt; e:
                &amp;lt;span class=&amp;quot;builtin&amp;quot;&amp;gt;print&amp;lt;/span&amp;gt;(&amp;lt;span class=&amp;quot;string&amp;quot;&amp;gt;f&amp;quot;Error parsing CSV file: {e}&amp;quot;&amp;lt;/span&amp;gt;)
                &amp;lt;span class=&amp;quot;keyword&amp;quot;&amp;gt;return&amp;lt;/span&amp;gt; &amp;lt;span class=&amp;quot;keyword&amp;quot;&amp;gt;None&amp;lt;/span&amp;gt;
        &amp;lt;span class=&amp;quot;keyword&amp;quot;&amp;gt;else&amp;lt;/span&amp;gt;:
            &amp;lt;span class=&amp;quot;comment&amp;quot;&amp;gt;# 일반 텍스트 파일로 간주하고 한 줄에 하나의 URL로 읽기&amp;lt;/span&amp;gt;
            &amp;lt;span class=&amp;quot;keyword&amp;quot;&amp;gt;with&amp;lt;/span&amp;gt; &amp;lt;span class=&amp;quot;builtin&amp;quot;&amp;gt;open&amp;lt;/span&amp;gt;(input_file_path, &amp;lt;span class=&amp;quot;string&amp;quot;&amp;gt;'r'&amp;lt;/span&amp;gt;, encoding=&amp;lt;span class=&amp;quot;string&amp;quot;&amp;gt;'utf-8-sig'&amp;lt;/span&amp;gt;) &amp;lt;span class=&amp;quot;keyword&amp;quot;&amp;gt;as&amp;lt;/span&amp;gt; file:
                urls = [line.strip() &amp;lt;span class=&amp;quot;keyword&amp;quot;&amp;gt;for&amp;lt;/span&amp;gt; line &amp;lt;span class=&amp;quot;keyword&amp;quot;&amp;gt;in&amp;lt;/span&amp;gt; file &amp;lt;span class=&amp;quot;keyword&amp;quot;&amp;gt;if&amp;lt;/span&amp;gt; line.strip()]
                &amp;lt;span class=&amp;quot;builtin&amp;quot;&amp;gt;print&amp;lt;/span&amp;gt;(&amp;lt;span class=&amp;quot;string&amp;quot;&amp;gt;f&amp;quot;Found {len(urls)} URLs in text file&amp;quot;&amp;lt;/span&amp;gt;)
    &amp;lt;span class=&amp;quot;keyword&amp;quot;&amp;gt;except&amp;lt;/span&amp;gt; Exception &amp;lt;span class=&amp;quot;keyword&amp;quot;&amp;gt;as&amp;lt;/span&amp;gt; e:
        &amp;lt;span class=&amp;quot;builtin&amp;quot;&amp;gt;print&amp;lt;/span&amp;gt;(&amp;lt;span class=&amp;quot;string&amp;quot;&amp;gt;f&amp;quot;Error reading file: {e}&amp;quot;&amp;lt;/span&amp;gt;)
        &amp;lt;span class=&amp;quot;keyword&amp;quot;&amp;gt;return&amp;lt;/span&amp;gt; &amp;lt;span class=&amp;quot;keyword&amp;quot;&amp;gt;None&amp;lt;/span&amp;gt;

    &amp;lt;span class=&amp;quot;comment&amp;quot;&amp;gt;# 파일 저장 기본 설정&amp;lt;/span&amp;gt;
    &amp;lt;span class=&amp;quot;keyword&amp;quot;&amp;gt;if&amp;lt;/span&amp;gt; output_dir &amp;lt;span class=&amp;quot;keyword&amp;quot;&amp;gt;is&amp;lt;/span&amp;gt; &amp;lt;span class=&amp;quot;keyword&amp;quot;&amp;gt;None&amp;lt;/span&amp;gt;:
        output_dir = os.path.dirname(input_file_path) &amp;lt;span class=&amp;quot;keyword&amp;quot;&amp;gt;or&amp;lt;/span&amp;gt; &amp;lt;span class=&amp;quot;string&amp;quot;&amp;gt;'.'&amp;lt;/span&amp;gt;
    
    timestamp = datetime.now().strftime(&amp;lt;span class=&amp;quot;string&amp;quot;&amp;gt;&amp;quot;%Y-%m-%d_%H%M%S&amp;quot;&amp;lt;/span&amp;gt;)
    input_filename = os.path.basename(input_file_path).split(&amp;lt;span class=&amp;quot;string&amp;quot;&amp;gt;'.'&amp;lt;/span&amp;gt;)[0]
    output_filename = &amp;lt;span class=&amp;quot;string&amp;quot;&amp;gt;f&amp;quot;{input_filename}_decoded_{timestamp}.csv&amp;quot;&amp;lt;/span&amp;gt;
    output_path = os.path.join(output_dir, output_filename)
    
    &amp;lt;span class=&amp;quot;builtin&amp;quot;&amp;gt;print&amp;lt;/span&amp;gt;(&amp;lt;span class=&amp;quot;string&amp;quot;&amp;gt;f&amp;quot;Results will be saved to: {output_path}&amp;quot;&amp;lt;/span&amp;gt;)
    
    &amp;lt;span class=&amp;quot;comment&amp;quot;&amp;gt;# Process each URL&amp;lt;/span&amp;gt;
    results = []
    total_urls = &amp;lt;span class=&amp;quot;builtin&amp;quot;&amp;gt;len&amp;lt;/span&amp;gt;(urls)
    
    &amp;lt;span class=&amp;quot;builtin&amp;quot;&amp;gt;print&amp;lt;/span&amp;gt;(&amp;lt;span class=&amp;quot;string&amp;quot;&amp;gt;f&amp;quot;Starting to process {total_urls} URLs...&amp;quot;&amp;lt;/span&amp;gt;)
    &amp;lt;span class=&amp;quot;builtin&amp;quot;&amp;gt;print&amp;lt;/span&amp;gt;(&amp;lt;span class=&amp;quot;string&amp;quot;&amp;gt;f&amp;quot;Workers: {workers} (Parallel processing: {'Enabled' if workers &amp;gt; 1 else 'Disabled'})&amp;quot;&amp;lt;/span&amp;gt;)

    &amp;lt;span class=&amp;quot;comment&amp;quot;&amp;gt;# Record overall process start time&amp;lt;/span&amp;gt;
    process_start_time = time.time()
    
    &amp;lt;span class=&amp;quot;comment&amp;quot;&amp;gt;# 병렬 처리 사용 여부에 따라 처리 방식 결정&amp;lt;/span&amp;gt;
    &amp;lt;span class=&amp;quot;keyword&amp;quot;&amp;gt;if&amp;lt;/span&amp;gt; workers &amp;gt; 1:
        &amp;lt;span class=&amp;quot;comment&amp;quot;&amp;gt;# 병렬 처리 사용&amp;lt;/span&amp;gt;
        completed = 0
        
        &amp;lt;span class=&amp;quot;comment&amp;quot;&amp;gt;# URL 배치로 나누기 (한 번에 batch_size만큼 병렬 처리)&amp;lt;/span&amp;gt;
        &amp;lt;span class=&amp;quot;keyword&amp;quot;&amp;gt;for&amp;lt;/span&amp;gt; batch_start &amp;lt;span class=&amp;quot;keyword&amp;quot;&amp;gt;in&amp;lt;/span&amp;gt; &amp;lt;span class=&amp;quot;builtin&amp;quot;&amp;gt;range&amp;lt;/span&amp;gt;(0, total_urls, batch_size):
            batch_end = &amp;lt;span class=&amp;quot;builtin&amp;quot;&amp;gt;min&amp;lt;/span&amp;gt;(batch_start + batch_size, total_urls)
            batch_urls = urls[batch_start:batch_end]
            batch_size_actual = &amp;lt;span class=&amp;quot;builtin&amp;quot;&amp;gt;len&amp;lt;/span&amp;gt;(batch_urls)
            
            &amp;lt;span class=&amp;quot;builtin&amp;quot;&amp;gt;print&amp;lt;/span&amp;gt;(&amp;lt;span class=&amp;quot;string&amp;quot;&amp;gt;f&amp;quot;Processing URLs {batch_start+1}-{batch_end} of {total_urls}...&amp;quot;&amp;lt;/span&amp;gt;)
            
            &amp;lt;span class=&amp;quot;comment&amp;quot;&amp;gt;# 병렬 처리 실행&amp;lt;/span&amp;gt;
            &amp;lt;span class=&amp;quot;keyword&amp;quot;&amp;gt;with&amp;lt;/span&amp;gt; concurrent.futures.ThreadPoolExecutor(max_workers=workers) &amp;lt;span class=&amp;quot;keyword&amp;quot;&amp;gt;as&amp;lt;/span&amp;gt; executor:
                &amp;lt;span class=&amp;quot;comment&amp;quot;&amp;gt;# URL을 병렬로 처리&amp;lt;/span&amp;gt;
                future_to_url = {executor.submit(process_url, url, interval_time): url &amp;lt;span class=&amp;quot;keyword&amp;quot;&amp;gt;for&amp;lt;/span&amp;gt; url &amp;lt;span class=&amp;quot;keyword&amp;quot;&amp;gt;in&amp;lt;/span&amp;gt; batch_urls}
                
                &amp;lt;span class=&amp;quot;comment&amp;quot;&amp;gt;# 결과 수집&amp;lt;/span&amp;gt;
                &amp;lt;span class=&amp;quot;keyword&amp;quot;&amp;gt;for&amp;lt;/span&amp;gt; future &amp;lt;span class=&amp;quot;keyword&amp;quot;&amp;gt;in&amp;lt;/span&amp;gt; concurrent.futures.as_completed(future_to_url):
                    result = future.result()
                    results.append(result)
                    completed += 1
                    
                    &amp;lt;span class=&amp;quot;comment&amp;quot;&amp;gt;# 진행 상황 표시 (10개마다 또는 배치 완료 시)&amp;lt;/span&amp;gt;
                    &amp;lt;span class=&amp;quot;keyword&amp;quot;&amp;gt;if&amp;lt;/span&amp;gt; completed % 10 == 0 &amp;lt;span class=&amp;quot;keyword&amp;quot;&amp;gt;or&amp;lt;/span&amp;gt; completed == total_urls:
                        &amp;lt;span class=&amp;quot;builtin&amp;quot;&amp;gt;print&amp;lt;/span&amp;gt;(&amp;lt;span class=&amp;quot;string&amp;quot;&amp;gt;f&amp;quot;Processed {completed}/{total_urls} URLs...&amp;quot;&amp;lt;/span&amp;gt;)
            
            &amp;lt;span class=&amp;quot;comment&amp;quot;&amp;gt;# 현재 배치의 결과를 CSV 파일로 저장&amp;lt;/span&amp;gt;
            current_df = pd.DataFrame(results)
            current_df.to_csv(output_path, index=&amp;lt;span class=&amp;quot;keyword&amp;quot;&amp;gt;False&amp;lt;/span&amp;gt;, encoding=&amp;lt;span class=&amp;quot;string&amp;quot;&amp;gt;'utf-8'&amp;lt;/span&amp;gt;)
            &amp;lt;span class=&amp;quot;builtin&amp;quot;&amp;gt;print&amp;lt;/span&amp;gt;(&amp;lt;span class=&amp;quot;string&amp;quot;&amp;gt;f&amp;quot;Saved {len(results)} results to: {output_path}&amp;quot;&amp;lt;/span&amp;gt;)
    &amp;lt;span class=&amp;quot;keyword&amp;quot;&amp;gt;else&amp;lt;/span&amp;gt;:
        &amp;lt;span class=&amp;quot;comment&amp;quot;&amp;gt;# 직렬 처리 사용 (기존 방식)&amp;lt;/span&amp;gt;
        &amp;lt;span class=&amp;quot;keyword&amp;quot;&amp;gt;for&amp;lt;/span&amp;gt; i, url &amp;lt;span class=&amp;quot;keyword&amp;quot;&amp;gt;in&amp;lt;/span&amp;gt; &amp;lt;span class=&amp;quot;builtin&amp;quot;&amp;gt;enumerate&amp;lt;/span&amp;gt;(urls):
            &amp;lt;span class=&amp;quot;keyword&amp;quot;&amp;gt;if&amp;lt;/span&amp;gt; i % 10 == 0 &amp;lt;span class=&amp;quot;keyword&amp;quot;&amp;gt;or&amp;lt;/span&amp;gt; i == total_urls - 1:
                &amp;lt;span class=&amp;quot;builtin&amp;quot;&amp;gt;print&amp;lt;/span&amp;gt;(&amp;lt;span class=&amp;quot;string&amp;quot;&amp;gt;f&amp;quot;Processing URL {i+1}/{total_urls}...&amp;quot;&amp;lt;/span&amp;gt;)
            
            &amp;lt;span class=&amp;quot;comment&amp;quot;&amp;gt;# 단일 URL 처리&amp;lt;/span&amp;gt;
            result = process_url(url, interval_time)
            results.append(result)
            
            &amp;lt;span class=&amp;quot;comment&amp;quot;&amp;gt;# batch_size마다 중간 결과 저장&amp;lt;/span&amp;gt;
            &amp;lt;span class=&amp;quot;keyword&amp;quot;&amp;gt;if&amp;lt;/span&amp;gt; (i + 1) % batch_size == 0 &amp;lt;span class=&amp;quot;keyword&amp;quot;&amp;gt;or&amp;lt;/span&amp;gt; i == total_urls - 1:
                current_df = pd.DataFrame(results)
                current_df.to_csv(output_path, index=&amp;lt;span class=&amp;quot;keyword&amp;quot;&amp;gt;False&amp;lt;/span&amp;gt;, encoding=&amp;lt;span class=&amp;quot;string&amp;quot;&amp;gt;'utf-8'&amp;lt;/span&amp;gt;)
                &amp;lt;span class=&amp;quot;builtin&amp;quot;&amp;gt;print&amp;lt;/span&amp;gt;(&amp;lt;span class=&amp;quot;string&amp;quot;&amp;gt;f&amp;quot;Saved {len(results)} results to: {output_path}&amp;quot;&amp;lt;/span&amp;gt;)

    &amp;lt;span class=&amp;quot;comment&amp;quot;&amp;gt;# 루프 종료 후 전체 처리 시간 계산&amp;lt;/span&amp;gt;
    process_end_time = time.time()
    total_process_time = process_end_time - process_start_time
    
    &amp;lt;span class=&amp;quot;comment&amp;quot;&amp;gt;# Create DataFrame with results&amp;lt;/span&amp;gt;
    df = pd.DataFrame(results)

    &amp;lt;span class=&amp;quot;comment&amp;quot;&amp;gt;# Calculate statistics&amp;lt;/span&amp;gt;
    df = pd.DataFrame(results)
    
    &amp;lt;span class=&amp;quot;comment&amp;quot;&amp;gt;# Calculate average processing time&amp;lt;/span&amp;gt;
    avg_time_all = df[&amp;lt;span class=&amp;quot;string&amp;quot;&amp;gt;'processing_time_sec'&amp;lt;/span&amp;gt;].mean()
    avg_time_google_only = df[df[&amp;lt;span class=&amp;quot;string&amp;quot;&amp;gt;'status'&amp;lt;/span&amp;gt;] != &amp;lt;span class=&amp;quot;string&amp;quot;&amp;gt;'SKIPPED'&amp;lt;/span&amp;gt;][&amp;lt;span class=&amp;quot;string&amp;quot;&amp;gt;'processing_time_sec'&amp;lt;/span&amp;gt;].mean() &amp;lt;span class=&amp;quot;keyword&amp;quot;&amp;gt;if&amp;lt;/span&amp;gt; &amp;lt;span class=&amp;quot;builtin&amp;quot;&amp;gt;any&amp;lt;/span&amp;gt;(df[&amp;lt;span class=&amp;quot;string&amp;quot;&amp;gt;'status'&amp;lt;/span&amp;gt;] != &amp;lt;span class=&amp;quot;string&amp;quot;&amp;gt;'SKIPPED'&amp;lt;/span&amp;gt;) &amp;lt;span class=&amp;quot;keyword&amp;quot;&amp;gt;else&amp;lt;/span&amp;gt; 0
    
    &amp;lt;span class=&amp;quot;comment&amp;quot;&amp;gt;# 요약 통계&amp;lt;/span&amp;gt;
    success_count = df[&amp;lt;span class=&amp;quot;string&amp;quot;&amp;gt;'status'&amp;lt;/span&amp;gt;].eq(&amp;lt;span class=&amp;quot;string&amp;quot;&amp;gt;'SUCCESS'&amp;lt;/span&amp;gt;).&amp;lt;span class=&amp;quot;builtin&amp;quot;&amp;gt;sum&amp;lt;/span&amp;gt;()
    failed_count = df[&amp;lt;span class=&amp;quot;string&amp;quot;&amp;gt;'status'&amp;lt;/span&amp;gt;].eq(&amp;lt;span class=&amp;quot;string&amp;quot;&amp;gt;'FAILED'&amp;lt;/span&amp;gt;).&amp;lt;span class=&amp;quot;builtin&amp;quot;&amp;gt;sum&amp;lt;/span&amp;gt;()
    error_count = df[&amp;lt;span class=&amp;quot;string&amp;quot;&amp;gt;'status'&amp;lt;/span&amp;gt;].eq(&amp;lt;span class=&amp;quot;string&amp;quot;&amp;gt;'ERROR'&amp;lt;/span&amp;gt;).&amp;lt;span class=&amp;quot;builtin&amp;quot;&amp;gt;sum&amp;lt;/span&amp;gt;() &amp;lt;span class=&amp;quot;keyword&amp;quot;&amp;gt;if&amp;lt;/span&amp;gt; &amp;lt;span class=&amp;quot;string&amp;quot;&amp;gt;'ERROR'&amp;lt;/span&amp;gt; &amp;lt;span class=&amp;quot;keyword&amp;quot;&amp;gt;in&amp;lt;/span&amp;gt; df[&amp;lt;span class=&amp;quot;string&amp;quot;&amp;gt;'status'&amp;lt;/span&amp;gt;].values &amp;lt;span class=&amp;quot;keyword&amp;quot;&amp;gt;else&amp;lt;/span&amp;gt; 0
    skipped_count = df[&amp;lt;span class=&amp;quot;string&amp;quot;&amp;gt;'status'&amp;lt;/span&amp;gt;].eq(&amp;lt;span class=&amp;quot;string&amp;quot;&amp;gt;'SKIPPED'&amp;lt;/span&amp;gt;).&amp;lt;span class=&amp;quot;builtin&amp;quot;&amp;gt;sum&amp;lt;/span&amp;gt;()
    
    &amp;lt;span class=&amp;quot;comment&amp;quot;&amp;gt;# 총 오류 수 (실패 + 기타 오류)&amp;lt;/span&amp;gt;
    total_errors = failed_count + error_count
    
    &amp;lt;span class=&amp;quot;comment&amp;quot;&amp;gt;# Format time durations for display&amp;lt;/span&amp;gt;
    &amp;lt;span class=&amp;quot;keyword&amp;quot;&amp;gt;def&amp;lt;/span&amp;gt; format_time(seconds):
        &amp;lt;span class=&amp;quot;keyword&amp;quot;&amp;gt;if&amp;lt;/span&amp;gt; seconds &amp;lt; 60:
            &amp;lt;span class=&amp;quot;keyword&amp;quot;&amp;gt;return&amp;lt;/span&amp;gt; &amp;lt;span class=&amp;quot;string&amp;quot;&amp;gt;f&amp;quot;{seconds:.2f} seconds&amp;quot;&amp;lt;/span&amp;gt;
        &amp;lt;span class=&amp;quot;keyword&amp;quot;&amp;gt;else&amp;lt;/span&amp;gt;:
            &amp;lt;span class=&amp;quot;keyword&amp;quot;&amp;gt;return&amp;lt;/span&amp;gt; &amp;lt;span class=&amp;quot;builtin&amp;quot;&amp;gt;str&amp;lt;/span&amp;gt;(timedelta(seconds=&amp;lt;span class=&amp;quot;builtin&amp;quot;&amp;gt;round&amp;lt;/span&amp;gt;(seconds)))
    
    &amp;lt;span class=&amp;quot;builtin&amp;quot;&amp;gt;print&amp;lt;/span&amp;gt;(&amp;lt;span class=&amp;quot;string&amp;quot;&amp;gt;f&amp;quot;\n====== SUMMARY ======&amp;quot;&amp;lt;/span&amp;gt;)
    &amp;lt;span class=&amp;quot;builtin&amp;quot;&amp;gt;print&amp;lt;/span&amp;gt;(&amp;lt;span class=&amp;quot;string&amp;quot;&amp;gt;f&amp;quot;Total URLs processed: {total_urls}&amp;quot;&amp;lt;/span&amp;gt;)
    &amp;lt;span class=&amp;quot;builtin&amp;quot;&amp;gt;print&amp;lt;/span&amp;gt;(&amp;lt;span class=&amp;quot;string&amp;quot;&amp;gt;f&amp;quot;Workers used: {workers}&amp;quot;&amp;lt;/span&amp;gt;)
    &amp;lt;span class=&amp;quot;builtin&amp;quot;&amp;gt;print&amp;lt;/span&amp;gt;(&amp;lt;span class=&amp;quot;string&amp;quot;&amp;gt;f&amp;quot;Successfully decoded: {success_count} ({success_count/total_urls*100:.1f}%)&amp;quot;&amp;lt;/span&amp;gt;)
    &amp;lt;span class=&amp;quot;builtin&amp;quot;&amp;gt;print&amp;lt;/span&amp;gt;(&amp;lt;span class=&amp;quot;string&amp;quot;&amp;gt;f&amp;quot;Failed to decode: {failed_count} ({failed_count/total_urls*100:.1f}%)&amp;quot;&amp;lt;/span&amp;gt;)
    &amp;lt;span class=&amp;quot;keyword&amp;quot;&amp;gt;if&amp;lt;/span&amp;gt; error_count &amp;gt; 0:
        &amp;lt;span class=&amp;quot;builtin&amp;quot;&amp;gt;print&amp;lt;/span&amp;gt;(&amp;lt;span class=&amp;quot;string&amp;quot;&amp;gt;f&amp;quot;Unexpected errors: {error_count} ({error_count/total_urls*100:.1f}%)&amp;quot;&amp;lt;/span&amp;gt;)
    &amp;lt;span class=&amp;quot;builtin&amp;quot;&amp;gt;print&amp;lt;/span&amp;gt;(&amp;lt;span class=&amp;quot;string&amp;quot;&amp;gt;f&amp;quot;Total errors: {total_errors} ({total_errors/total_urls*100:.1f}%)&amp;quot;&amp;lt;/span&amp;gt;)
    &amp;lt;span class=&amp;quot;builtin&amp;quot;&amp;gt;print&amp;lt;/span&amp;gt;(&amp;lt;span class=&amp;quot;string&amp;quot;&amp;gt;f&amp;quot;Skipped (non-Google News): {skipped_count} ({skipped_count/total_urls*100:.1f}%)&amp;quot;&amp;lt;/span&amp;gt;)
    &amp;lt;span class=&amp;quot;builtin&amp;quot;&amp;gt;print&amp;lt;/span&amp;gt;(&amp;lt;span class=&amp;quot;string&amp;quot;&amp;gt;f&amp;quot;\n----- TIMING INFORMATION -----&amp;quot;&amp;lt;/span&amp;gt;)
    &amp;lt;span class=&amp;quot;builtin&amp;quot;&amp;gt;print&amp;lt;/span&amp;gt;(&amp;lt;span class=&amp;quot;string&amp;quot;&amp;gt;f&amp;quot;Total processing time: {format_time(total_process_time)}&amp;quot;&amp;lt;/span&amp;gt;)
    &amp;lt;span class=&amp;quot;builtin&amp;quot;&amp;gt;print&amp;lt;/span&amp;gt;(&amp;lt;span class=&amp;quot;string&amp;quot;&amp;gt;f&amp;quot;Average processing time per URL: {format_time(avg_time_all)}&amp;quot;&amp;lt;/span&amp;gt;)
    &amp;lt;span class=&amp;quot;builtin&amp;quot;&amp;gt;print&amp;lt;/span&amp;gt;(&amp;lt;span class=&amp;quot;string&amp;quot;&amp;gt;f&amp;quot;Average processing time per Google News URL: {format_time(avg_time_google_only)}&amp;quot;&amp;lt;/span&amp;gt;)
    &amp;lt;span class=&amp;quot;builtin&amp;quot;&amp;gt;print&amp;lt;/span&amp;gt;(&amp;lt;span class=&amp;quot;string&amp;quot;&amp;gt;f&amp;quot;Fastest URL processing time: {format_time(df['processing_time_sec'].min())}&amp;quot;&amp;lt;/span&amp;gt;)
    &amp;lt;span class=&amp;quot;builtin&amp;quot;&amp;gt;print&amp;lt;/span&amp;gt;(&amp;lt;span class=&amp;quot;string&amp;quot;&amp;gt;f&amp;quot;Slowest URL processing time: {format_time(df['processing_time_sec'].max())}&amp;quot;&amp;lt;/span&amp;gt;)
    &amp;lt;span class=&amp;quot;builtin&amp;quot;&amp;gt;print&amp;lt;/span&amp;gt;(&amp;lt;span class=&amp;quot;string&amp;quot;&amp;gt;f&amp;quot;=====================&amp;quot;&amp;lt;/span&amp;gt;)
    
    &amp;lt;span class=&amp;quot;comment&amp;quot;&amp;gt;# Add summary information to the DataFrame as metadata&amp;lt;/span&amp;gt;
    df.attrs[&amp;lt;span class=&amp;quot;string&amp;quot;&amp;gt;'total_process_time'&amp;lt;/span&amp;gt;] = total_process_time
    df.attrs[&amp;lt;span class=&amp;quot;string&amp;quot;&amp;gt;'avg_processing_time'&amp;lt;/span&amp;gt;] = avg_time_all
    df.attrs[&amp;lt;span class=&amp;quot;string&amp;quot;&amp;gt;'avg_google_url_time'&amp;lt;/span&amp;gt;] = avg_time_google_only
    df.attrs[&amp;lt;span class=&amp;quot;string&amp;quot;&amp;gt;'workers_used'&amp;lt;/span&amp;gt;] = workers
    
    &amp;lt;span class=&amp;quot;keyword&amp;quot;&amp;gt;return&amp;lt;/span&amp;gt; df

&amp;lt;span class=&amp;quot;keyword&amp;quot;&amp;gt;def&amp;lt;/span&amp;gt; process_url(url, interval_time=1):
    &amp;lt;span class=&amp;quot;string&amp;quot;&amp;gt;&amp;quot;&amp;quot;&amp;quot;단일 URL을 처리하고 결과를 반환하는 함수&amp;quot;&amp;quot;&amp;quot;&amp;lt;/span&amp;gt;
    url_start_time = time.time()
    
    &amp;lt;span class=&amp;quot;keyword&amp;quot;&amp;gt;try&amp;lt;/span&amp;gt;:
        &amp;lt;span class=&amp;quot;keyword&amp;quot;&amp;gt;if&amp;lt;/span&amp;gt; &amp;lt;span class=&amp;quot;string&amp;quot;&amp;gt;&amp;quot;news.google.com&amp;quot;&amp;lt;/span&amp;gt; &amp;lt;span class=&amp;quot;keyword&amp;quot;&amp;gt;in&amp;lt;/span&amp;gt; url:
            &amp;lt;span class=&amp;quot;keyword&amp;quot;&amp;gt;try&amp;lt;/span&amp;gt;:
                decoded_result = gnewsdecoder(url, interval=interval_time)
                &amp;lt;span class=&amp;quot;keyword&amp;quot;&amp;gt;if&amp;lt;/span&amp;gt; decoded_result.&amp;lt;span class=&amp;quot;builtin&amp;quot;&amp;gt;get&amp;lt;/span&amp;gt;(&amp;lt;span class=&amp;quot;string&amp;quot;&amp;gt;&amp;quot;status&amp;quot;&amp;lt;/span&amp;gt;):
                    status = &amp;lt;span class=&amp;quot;string&amp;quot;&amp;gt;&amp;quot;SUCCESS&amp;quot;&amp;lt;/span&amp;gt;
                    decoded_url = decoded_result[&amp;lt;span class=&amp;quot;string&amp;quot;&amp;gt;&amp;quot;decoded_url&amp;quot;&amp;lt;/span&amp;gt;]
                    error_message = &amp;lt;span class=&amp;quot;string&amp;quot;&amp;gt;&amp;quot;&amp;quot;&amp;lt;/span&amp;gt;
                &amp;lt;span class=&amp;quot;keyword&amp;quot;&amp;gt;else&amp;lt;/span&amp;gt;:
                    status = &amp;lt;span class=&amp;quot;string&amp;quot;&amp;gt;&amp;quot;FAILED&amp;quot;&amp;lt;/span&amp;gt;
                    decoded_url = &amp;lt;span class=&amp;quot;string&amp;quot;&amp;gt;&amp;quot;N/A&amp;quot;&amp;lt;/span&amp;gt;
                    error_message = decoded_result.&amp;lt;span class=&amp;quot;builtin&amp;quot;&amp;gt;get&amp;lt;/span&amp;gt;(&amp;lt;span class=&amp;quot;string&amp;quot;&amp;gt;&amp;quot;message&amp;quot;&amp;lt;/span&amp;gt;, &amp;lt;span class=&amp;quot;string&amp;quot;&amp;gt;&amp;quot;Unknown error&amp;quot;&amp;lt;/span&amp;gt;)
            &amp;lt;span class=&amp;quot;keyword&amp;quot;&amp;gt;except&amp;lt;/span&amp;gt; Exception &amp;lt;span class=&amp;quot;keyword&amp;quot;&amp;gt;as&amp;lt;/span&amp;gt; e:
                status = &amp;lt;span class=&amp;quot;string&amp;quot;&amp;gt;&amp;quot;FAILED&amp;quot;&amp;lt;/span&amp;gt;
                decoded_url = &amp;lt;span class=&amp;quot;string&amp;quot;&amp;gt;&amp;quot;N/A&amp;quot;&amp;lt;/span&amp;gt;
                error_message = &amp;lt;span class=&amp;quot;builtin&amp;quot;&amp;gt;str&amp;lt;/span&amp;gt;(e)
        &amp;lt;span class=&amp;quot;keyword&amp;quot;&amp;gt;else&amp;lt;/span&amp;gt;:
            &amp;lt;span class=&amp;quot;comment&amp;quot;&amp;gt;# For non-Google News URLs&amp;lt;/span&amp;gt;
            status = &amp;lt;span class=&amp;quot;string&amp;quot;&amp;gt;&amp;quot;SKIPPED&amp;quot;&amp;lt;/span&amp;gt;
            decoded_url = url  &amp;lt;span class=&amp;quot;comment&amp;quot;&amp;gt;# Keep the same URL&amp;lt;/span&amp;gt;
            error_message = &amp;lt;span class=&amp;quot;string&amp;quot;&amp;gt;&amp;quot;Not a Google News URL&amp;quot;&amp;lt;/span&amp;gt;
        
        &amp;lt;span class=&amp;quot;comment&amp;quot;&amp;gt;# Calculate processing time for this URL&amp;lt;/span&amp;gt;
        url_end_time = time.time()
        processing_time = url_end_time - url_start_time
        
        &amp;lt;span class=&amp;quot;comment&amp;quot;&amp;gt;# Return result as dictionary&amp;lt;/span&amp;gt;
        &amp;lt;span class=&amp;quot;keyword&amp;quot;&amp;gt;return&amp;lt;/span&amp;gt; {
            &amp;lt;span class=&amp;quot;string&amp;quot;&amp;gt;&amp;quot;original_url&amp;quot;&amp;lt;/span&amp;gt;: url,
            &amp;lt;span class=&amp;quot;string&amp;quot;&amp;gt;&amp;quot;decoded_url&amp;quot;&amp;lt;/span&amp;gt;: decoded_url,
            &amp;lt;span class=&amp;quot;string&amp;quot;&amp;gt;&amp;quot;status&amp;quot;&amp;lt;/span&amp;gt;: status,
            &amp;lt;span class=&amp;quot;string&amp;quot;&amp;gt;&amp;quot;error_message&amp;quot;&amp;lt;/span&amp;gt;: error_message,
            &amp;lt;span class=&amp;quot;string&amp;quot;&amp;gt;&amp;quot;processing_time_sec&amp;quot;&amp;lt;/span&amp;gt;: &amp;lt;span class=&amp;quot;builtin&amp;quot;&amp;gt;round&amp;lt;/span&amp;gt;(processing_time, 3)
        }
    &amp;lt;span class=&amp;quot;keyword&amp;quot;&amp;gt;except&amp;lt;/span&amp;gt; Exception &amp;lt;span class=&amp;quot;keyword&amp;quot;&amp;gt;as&amp;lt;/span&amp;gt; e:
        &amp;lt;span class=&amp;quot;comment&amp;quot;&amp;gt;# Handle any unexpected errors&amp;lt;/span&amp;gt;
        url_end_time = time.time()
        processing_time = url_end_time - url_start_time
        
        &amp;lt;span class=&amp;quot;keyword&amp;quot;&amp;gt;return&amp;lt;/span&amp;gt; {
            &amp;lt;span class=&amp;quot;string&amp;quot;&amp;gt;&amp;quot;original_url&amp;quot;&amp;lt;/span&amp;gt;: url,
            &amp;lt;span class=&amp;quot;string&amp;quot;&amp;gt;&amp;quot;decoded_url&amp;quot;&amp;lt;/span&amp;gt;: &amp;lt;span class=&amp;quot;string&amp;quot;&amp;gt;&amp;quot;N/A&amp;quot;&amp;lt;/span&amp;gt;,
            &amp;lt;span class=&amp;quot;string&amp;quot;&amp;gt;&amp;quot;status&amp;quot;&amp;lt;/span&amp;gt;: &amp;lt;span class=&amp;quot;string&amp;quot;&amp;gt;&amp;quot;ERROR&amp;quot;&amp;lt;/span&amp;gt;,
            &amp;lt;span class=&amp;quot;string&amp;quot;&amp;gt;&amp;quot;error_message&amp;quot;&amp;lt;/span&amp;gt;: &amp;lt;span class=&amp;quot;string&amp;quot;&amp;gt;f&amp;quot;Unexpected error: {str(e)}&amp;quot;&amp;lt;/span&amp;gt;,
            &amp;lt;span class=&amp;quot;string&amp;quot;&amp;gt;&amp;quot;processing_time_sec&amp;quot;&amp;lt;/span&amp;gt;: &amp;lt;span class=&amp;quot;builtin&amp;quot;&amp;gt;round&amp;lt;/span&amp;gt;(processing_time, 3)
        }

&amp;lt;span class=&amp;quot;keyword&amp;quot;&amp;gt;if&amp;lt;/span&amp;gt; __name__ == &amp;lt;span class=&amp;quot;string&amp;quot;&amp;gt;&amp;quot;__main__&amp;quot;&amp;lt;/span&amp;gt;:
    &amp;lt;span class=&amp;quot;comment&amp;quot;&amp;gt;# 명령줄에서 인자를 받아 실행하기 위한 코드&amp;lt;/span&amp;gt;
    parser = argparse.ArgumentParser(description=&amp;lt;span class=&amp;quot;string&amp;quot;&amp;gt;'Google News URL decoder'&amp;lt;/span&amp;gt;)
    parser.add_argument(&amp;lt;span class=&amp;quot;string&amp;quot;&amp;gt;'input_file_path'&amp;lt;/span&amp;gt;, &amp;lt;span class=&amp;quot;builtin&amp;quot;&amp;gt;help&amp;lt;/span&amp;gt;=&amp;lt;span class=&amp;quot;string&amp;quot;&amp;gt;'Path to the file containing URLs to decode'&amp;lt;/span&amp;gt;)
    parser.add_argument(&amp;lt;span class=&amp;quot;string&amp;quot;&amp;gt;'--output_dir'&amp;lt;/span&amp;gt;, &amp;lt;span class=&amp;quot;string&amp;quot;&amp;gt;'-o'&amp;lt;/span&amp;gt;, &amp;lt;span class=&amp;quot;builtin&amp;quot;&amp;gt;help&amp;lt;/span&amp;gt;=&amp;lt;span class=&amp;quot;string&amp;quot;&amp;gt;'Output directory for results (default: same as input file)'&amp;lt;/span&amp;gt;)
    parser.add_argument(&amp;lt;span class=&amp;quot;string&amp;quot;&amp;gt;'--interval'&amp;lt;/span&amp;gt;, &amp;lt;span class=&amp;quot;string&amp;quot;&amp;gt;'-i'&amp;lt;/span&amp;gt;, &amp;lt;span class=&amp;quot;builtin&amp;quot;&amp;gt;type&amp;lt;/span&amp;gt;=&amp;lt;span class=&amp;quot;builtin&amp;quot;&amp;gt;int&amp;lt;/span&amp;gt;, default=1, &amp;lt;span class=&amp;quot;builtin&amp;quot;&amp;gt;help&amp;lt;/span&amp;gt;=&amp;lt;span class=&amp;quot;string&amp;quot;&amp;gt;'Interval time between requests (default: 1)'&amp;lt;/span&amp;gt;)
    parser.add_argument(&amp;lt;span class=&amp;quot;string&amp;quot;&amp;gt;'--batch_size'&amp;lt;/span&amp;gt;, &amp;lt;span class=&amp;quot;string&amp;quot;&amp;gt;'-b'&amp;lt;/span&amp;gt;, &amp;lt;span class=&amp;quot;builtin&amp;quot;&amp;gt;type&amp;lt;/span&amp;gt;=&amp;lt;span class=&amp;quot;builtin&amp;quot;&amp;gt;int&amp;lt;/span&amp;gt;, default=10, &amp;lt;span class=&amp;quot;builtin&amp;quot;&amp;gt;help&amp;lt;/span&amp;gt;=&amp;lt;span class=&amp;quot;string&amp;quot;&amp;gt;'Batch size for saving interim results (default: 10)'&amp;lt;/span&amp;gt;)
    parser.add_argument(&amp;lt;span class=&amp;quot;string&amp;quot;&amp;gt;'--workers'&amp;lt;/span&amp;gt;, &amp;lt;span class=&amp;quot;string&amp;quot;&amp;gt;'-w'&amp;lt;/span&amp;gt;, &amp;lt;span class=&amp;quot;builtin&amp;quot;&amp;gt;type&amp;lt;/span&amp;gt;=&amp;lt;span class=&amp;quot;builtin&amp;quot;&amp;gt;int&amp;lt;/span&amp;gt;, default=1, &amp;lt;span class=&amp;quot;builtin&amp;quot;&amp;gt;help&amp;lt;/span&amp;gt;=&amp;lt;span class=&amp;quot;string&amp;quot;&amp;gt;'Number of worker threads for parallel processing (default: 1)'&amp;lt;/span&amp;gt;)
    
    args = parser.parse_args()
    
    &amp;lt;span class=&amp;quot;comment&amp;quot;&amp;gt;# 인자 출력&amp;lt;/span&amp;gt;
    &amp;lt;span class=&amp;quot;builtin&amp;quot;&amp;gt;print&amp;lt;/span&amp;gt;(&amp;lt;span class=&amp;quot;string&amp;quot;&amp;gt;f&amp;quot;Processing URLs from: {args.input_file_path}&amp;quot;&amp;lt;/span&amp;gt;)
    &amp;lt;span class=&amp;quot;builtin&amp;quot;&amp;gt;print&amp;lt;/span&amp;gt;(&amp;lt;span class=&amp;quot;string&amp;quot;&amp;gt;f&amp;quot;Output directory: {args.output_dir or 'Same as input file'}&amp;quot;&amp;lt;/span&amp;gt;)
    &amp;lt;span class=&amp;quot;builtin&amp;quot;&amp;gt;print&amp;lt;/span&amp;gt;(&amp;lt;span class=&amp;quot;string&amp;quot;&amp;gt;f&amp;quot;Interval time: {args.interval}&amp;quot;&amp;lt;/span&amp;gt;)
    &amp;lt;span class=&amp;quot;builtin&amp;quot;&amp;gt;print&amp;lt;/span&amp;gt;(&amp;lt;span class=&amp;quot;string&amp;quot;&amp;gt;f&amp;quot;Batch size: {args.batch_size}&amp;quot;&amp;lt;/span&amp;gt;)
    &amp;lt;span class=&amp;quot;builtin&amp;quot;&amp;gt;print&amp;lt;/span&amp;gt;(&amp;lt;span class=&amp;quot;string&amp;quot;&amp;gt;f&amp;quot;Workers: {args.workers} (Parallel: {'Enabled' if args.workers &amp;gt; 1 else 'Disabled'})&amp;quot;&amp;lt;/span&amp;gt;)
    
    &amp;lt;span class=&amp;quot;comment&amp;quot;&amp;gt;# 함수 실행&amp;lt;/span&amp;gt;
    result = process_urls_from_file(
        args.input_file_path, 
        args.output_dir, 
        args.interval, 
        args.batch_size,
        args.workers
    )
    
    &amp;lt;span class=&amp;quot;comment&amp;quot;&amp;gt;# 결과 반환&amp;lt;/span&amp;gt;
    &amp;lt;span class=&amp;quot;keyword&amp;quot;&amp;gt;if&amp;lt;/span&amp;gt; result &amp;lt;span class=&amp;quot;keyword&amp;quot;&amp;gt;is&amp;lt;/span&amp;gt; &amp;lt;span class=&amp;quot;keyword&amp;quot;&amp;gt;not&amp;lt;/span&amp;gt; &amp;lt;span class=&amp;quot;keyword&amp;quot;&amp;gt;None&amp;lt;/span&amp;gt;:
        &amp;lt;span class=&amp;quot;builtin&amp;quot;&amp;gt;print&amp;lt;/span&amp;gt;(&amp;lt;span class=&amp;quot;string&amp;quot;&amp;gt;f&amp;quot;Process completed successfully. Results saved to CSV file.&amp;quot;&amp;lt;/span&amp;gt;)
    &amp;lt;span class=&amp;quot;keyword&amp;quot;&amp;gt;else&amp;lt;/span&amp;gt;:
        &amp;lt;span class=&amp;quot;builtin&amp;quot;&amp;gt;print&amp;lt;/span&amp;gt;(&amp;lt;span class=&amp;quot;string&amp;quot;&amp;gt;f&amp;quot;Process failed. Please check error messages above.&amp;quot;&amp;lt;/span&amp;gt;)&amp;lt;/pre&amp;gt;
      &amp;lt;div class=&amp;quot;code-act&amp;quot;&amp;gt;
        &amp;lt;button class=&amp;quot;copy-btn&amp;quot;&amp;gt;  코드 복사&amp;lt;/button&amp;gt;
      &amp;lt;/div&amp;gt;
    &amp;lt;/div&amp;gt;
    &amp;lt;script&amp;gt;
    //===============================================
    //  토글 + 복사 (오른쪽 정렬 버전) &amp;ndash; 2025-05-24
    //===============================================
    document.addEventListener('DOMContentLoaded',()=&amp;gt;{
      // 토글
      document.body.addEventListener('click',e=&amp;gt;{
        if(!e.target.classList.contains('toggle-btn')) return;
        const box  = e.target.closest('.code-box');
        const ct   = box.querySelector('.code-ct');
        const act  = box.querySelector('.code-act');
        const open = ct.style.display==='block';
        ct.style.display  = open ? 'none' : 'block';
        act.style.display = open ? 'none' : 'flex';   // &amp;larr; 수정: flex로 켬
        e.target.textContent = open ? '  펼치기' : '  접기';
      });
      // 복사
      document.body.addEventListener('click',e=&amp;gt;{
        if(!e.target.classList.contains('copy-btn')) return;
        const btn  = e.target;
        const code = btn.closest('.code-box').querySelector('.code-ct').innerText;
        if(navigator.clipboard &amp;amp;&amp;amp; window.isSecureContext){
          navigator.clipboard.writeText(code).then(()=&amp;gt;flash(btn)).catch(()=&amp;gt;fallback(code,btn));
        }else{
          fallback(code,btn);
        }
      });
      function flash(btn){
        const t = btn.textContent;
        btn.textContent='✅ 복사됨!';
        setTimeout(()=&amp;gt;btn.textContent=t,2000);
      }
      function fallback(text,btn){
        const ta=document.createElement('textarea');
        ta.value=text;ta.style.position='fixed';ta.style.top='-1000px';
        document.body.appendChild(ta);ta.select();
        try{document.execCommand('copy');flash(btn);}catch(_){alert('복사 실패  ');}
        document.body.removeChild(ta);
      }
    });
    &amp;lt;/script&amp;gt;
&amp;lt;/body&amp;gt;
&amp;lt;/html&amp;gt;&quot;&gt;
&lt;pre class=&quot;code-ct&quot;&gt;&lt;span style=&quot;color: #a626a4; font-weight: 600;&quot;&gt;from&lt;/span&gt; googlenewsdecoder &lt;span style=&quot;color: #a626a4; font-weight: 600;&quot;&gt;import&lt;/span&gt; gnewsdecoder
&lt;span style=&quot;color: #a626a4; font-weight: 600;&quot;&gt;import&lt;/span&gt; pandas &lt;span style=&quot;color: #a626a4; font-weight: 600;&quot;&gt;as&lt;/span&gt; pd
&lt;span style=&quot;color: #a626a4; font-weight: 600;&quot;&gt;import&lt;/span&gt; os
&lt;span style=&quot;color: #a626a4; font-weight: 600;&quot;&gt;import&lt;/span&gt; time
&lt;span style=&quot;color: #a626a4; font-weight: 600;&quot;&gt;import&lt;/span&gt; argparse
&lt;span style=&quot;color: #a626a4; font-weight: 600;&quot;&gt;import&lt;/span&gt; concurrent.futures
&lt;span style=&quot;color: #a626a4; font-weight: 600;&quot;&gt;from&lt;/span&gt; datetime &lt;span style=&quot;color: #a626a4; font-weight: 600;&quot;&gt;import&lt;/span&gt; datetime, timedelta

&lt;span style=&quot;color: #a626a4; font-weight: 600;&quot;&gt;def&lt;/span&gt; process_urls_from_file(input_file_path, output_dir=&lt;span style=&quot;color: #a626a4; font-weight: 600;&quot;&gt;None&lt;/span&gt;, interval_time=&lt;span style=&quot;color: #986801;&quot;&gt;1&lt;/span&gt;, batch_size=&lt;span style=&quot;color: #986801;&quot;&gt;10&lt;/span&gt;, workers=&lt;span style=&quot;color: #986801;&quot;&gt;1&lt;/span&gt;):
    &lt;span style=&quot;color: #50a14f;&quot;&gt;&quot;&quot;&quot;
    파일에서 URL을 읽고 처리하는 함수
    
    Args:
        input_file_path: URL이 포함된 파일 경로
        output_dir: 결과 저장 디렉토리 (기본값: 입력 파일과 동일)
        interval_time: URL 처리 사이의 대기 시간 (초)
        batch_size: 중간 결과 저장 단위 (URL 개수)
        workers: 병렬 처리를 위한 작업자 수 (기본값: 1, 병렬 처리 없음)
        
    Returns:
        처리 결과가 포함된 DataFrame 또는 오류 시 None
    &quot;&quot;&quot;&lt;/span&gt;
    &lt;span style=&quot;color: #a0a1a7; font-style: italic;&quot;&gt;# Read URLs from file&lt;/span&gt;
    &lt;span style=&quot;color: #a626a4; font-weight: 600;&quot;&gt;try&lt;/span&gt;:
        &lt;span style=&quot;color: #a0a1a7; font-style: italic;&quot;&gt;# 파일 확장자 확인&lt;/span&gt;
        file_ext = os.path.splitext(input_file_path)[&lt;span style=&quot;color: #986801;&quot;&gt;1&lt;/span&gt;].lower()
        
        &lt;span style=&quot;color: #a626a4; font-weight: 600;&quot;&gt;if&lt;/span&gt; file_ext == &lt;span style=&quot;color: #50a14f;&quot;&gt;'.csv'&lt;/span&gt;:
            &lt;span style=&quot;color: #a0a1a7; font-style: italic;&quot;&gt;# CSV 파일인 경우 pandas로 읽기&lt;/span&gt;
            &lt;span style=&quot;color: #a626a4; font-weight: 600;&quot;&gt;try&lt;/span&gt;:
                df = pd.read_csv(input_file_path, encoding=&lt;span style=&quot;color: #50a14f;&quot;&gt;'utf-8-sig'&lt;/span&gt;)
                
                &lt;span style=&quot;color: #a0a1a7; font-style: italic;&quot;&gt;# 'link' 컬럼이 있는지 확인&lt;/span&gt;
                &lt;span style=&quot;color: #a626a4; font-weight: 600;&quot;&gt;if&lt;/span&gt; &lt;span style=&quot;color: #50a14f;&quot;&gt;'link'&lt;/span&gt; &lt;span style=&quot;color: #a626a4; font-weight: 600;&quot;&gt;in&lt;/span&gt; df.columns:
                    urls = df[&lt;span style=&quot;color: #50a14f;&quot;&gt;'link'&lt;/span&gt;].dropna().tolist()
                    &lt;span style=&quot;color: #4078f2;&quot;&gt;print&lt;/span&gt;(&lt;span style=&quot;color: #50a14f;&quot;&gt;f&quot;Found {len(urls)} URLs in 'link' column&quot;&lt;/span&gt;)
                &lt;span style=&quot;color: #a626a4; font-weight: 600;&quot;&gt;else&lt;/span&gt;:
                    available_columns = &lt;span style=&quot;color: #50a14f;&quot;&gt;', '&lt;/span&gt;.join(df.columns)
                    &lt;span style=&quot;color: #4078f2;&quot;&gt;print&lt;/span&gt;(&lt;span style=&quot;color: #50a14f;&quot;&gt;f&quot;Error: 'link' column not found in CSV. Available columns: {available_columns}&quot;&lt;/span&gt;)
                    &lt;span style=&quot;color: #a626a4; font-weight: 600;&quot;&gt;return&lt;/span&gt; &lt;span style=&quot;color: #a626a4; font-weight: 600;&quot;&gt;None&lt;/span&gt;
            &lt;span style=&quot;color: #a626a4; font-weight: 600;&quot;&gt;except&lt;/span&gt; Exception &lt;span style=&quot;color: #a626a4; font-weight: 600;&quot;&gt;as&lt;/span&gt; e:
                &lt;span style=&quot;color: #4078f2;&quot;&gt;print&lt;/span&gt;(&lt;span style=&quot;color: #50a14f;&quot;&gt;f&quot;Error parsing CSV file: {e}&quot;&lt;/span&gt;)
                &lt;span style=&quot;color: #a626a4; font-weight: 600;&quot;&gt;return&lt;/span&gt; &lt;span style=&quot;color: #a626a4; font-weight: 600;&quot;&gt;None&lt;/span&gt;
        &lt;span style=&quot;color: #a626a4; font-weight: 600;&quot;&gt;else&lt;/span&gt;:
            &lt;span style=&quot;color: #a0a1a7; font-style: italic;&quot;&gt;# 일반 텍스트 파일로 간주하고 한 줄에 하나의 URL로 읽기&lt;/span&gt;
            &lt;span style=&quot;color: #a626a4; font-weight: 600;&quot;&gt;with&lt;/span&gt; &lt;span style=&quot;color: #4078f2;&quot;&gt;open&lt;/span&gt;(input_file_path, &lt;span style=&quot;color: #50a14f;&quot;&gt;'r'&lt;/span&gt;, encoding=&lt;span style=&quot;color: #50a14f;&quot;&gt;'utf-8-sig'&lt;/span&gt;) &lt;span style=&quot;color: #a626a4; font-weight: 600;&quot;&gt;as&lt;/span&gt; file:
                urls = [line.strip() &lt;span style=&quot;color: #a626a4; font-weight: 600;&quot;&gt;for&lt;/span&gt; line &lt;span style=&quot;color: #a626a4; font-weight: 600;&quot;&gt;in&lt;/span&gt; file &lt;span style=&quot;color: #a626a4; font-weight: 600;&quot;&gt;if&lt;/span&gt; line.strip()]
                &lt;span style=&quot;color: #4078f2;&quot;&gt;print&lt;/span&gt;(&lt;span style=&quot;color: #50a14f;&quot;&gt;f&quot;Found {len(urls)} URLs in text file&quot;&lt;/span&gt;)
    &lt;span style=&quot;color: #a626a4; font-weight: 600;&quot;&gt;except&lt;/span&gt; Exception &lt;span style=&quot;color: #a626a4; font-weight: 600;&quot;&gt;as&lt;/span&gt; e:
        &lt;span style=&quot;color: #4078f2;&quot;&gt;print&lt;/span&gt;(&lt;span style=&quot;color: #50a14f;&quot;&gt;f&quot;Error reading file: {e}&quot;&lt;/span&gt;)
        &lt;span style=&quot;color: #a626a4; font-weight: 600;&quot;&gt;return&lt;/span&gt; &lt;span style=&quot;color: #a626a4; font-weight: 600;&quot;&gt;None&lt;/span&gt;

    &lt;span style=&quot;color: #a0a1a7; font-style: italic;&quot;&gt;# 파일 저장 기본 설정&lt;/span&gt;
    &lt;span style=&quot;color: #a626a4; font-weight: 600;&quot;&gt;if&lt;/span&gt; output_dir &lt;span style=&quot;color: #a626a4; font-weight: 600;&quot;&gt;is&lt;/span&gt; &lt;span style=&quot;color: #a626a4; font-weight: 600;&quot;&gt;None&lt;/span&gt;:
        output_dir = os.path.dirname(input_file_path) &lt;span style=&quot;color: #a626a4; font-weight: 600;&quot;&gt;or&lt;/span&gt; &lt;span style=&quot;color: #50a14f;&quot;&gt;'.'&lt;/span&gt;
    
    timestamp = datetime.now().strftime(&lt;span style=&quot;color: #50a14f;&quot;&gt;&quot;%Y-%m-%d_%H%M%S&quot;&lt;/span&gt;)
    input_filename = os.path.basename(input_file_path).split(&lt;span style=&quot;color: #50a14f;&quot;&gt;'.'&lt;/span&gt;)[&lt;span style=&quot;color: #986801;&quot;&gt;0&lt;/span&gt;]
    output_filename = &lt;span style=&quot;color: #50a14f;&quot;&gt;f&quot;{input_filename}_decoded_{timestamp}.csv&quot;&lt;/span&gt;
    output_path = os.path.join(output_dir, output_filename)
    
    &lt;span style=&quot;color: #4078f2;&quot;&gt;print&lt;/span&gt;(&lt;span style=&quot;color: #50a14f;&quot;&gt;f&quot;Results will be saved to: {output_path}&quot;&lt;/span&gt;)
    
    &lt;span style=&quot;color: #a0a1a7; font-style: italic;&quot;&gt;# Process each URL&lt;/span&gt;
    results = []
    total_urls = &lt;span style=&quot;color: #4078f2;&quot;&gt;len&lt;/span&gt;(urls)
    
    &lt;span style=&quot;color: #4078f2;&quot;&gt;print&lt;/span&gt;(&lt;span style=&quot;color: #50a14f;&quot;&gt;f&quot;Starting to process {total_urls} URLs...&quot;&lt;/span&gt;)
    &lt;span style=&quot;color: #4078f2;&quot;&gt;print&lt;/span&gt;(&lt;span style=&quot;color: #50a14f;&quot;&gt;f&quot;Workers: {workers} (Parallel processing: {'Enabled' if workers &amp;gt; 1 else 'Disabled'})&quot;&lt;/span&gt;)

    &lt;span style=&quot;color: #a0a1a7; font-style: italic;&quot;&gt;# Record overall process start time&lt;/span&gt;
    process_start_time = time.time()
    
    &lt;span style=&quot;color: #a0a1a7; font-style: italic;&quot;&gt;# 병렬 처리 사용 여부에 따라 처리 방식 결정&lt;/span&gt;
    &lt;span style=&quot;color: #a626a4; font-weight: 600;&quot;&gt;if&lt;/span&gt; workers &amp;gt; &lt;span style=&quot;color: #986801;&quot;&gt;1&lt;/span&gt;:
        &lt;span style=&quot;color: #a0a1a7; font-style: italic;&quot;&gt;# 병렬 처리 사용&lt;/span&gt;
        completed = &lt;span style=&quot;color: #986801;&quot;&gt;0&lt;/span&gt;
        
        &lt;span style=&quot;color: #a0a1a7; font-style: italic;&quot;&gt;# URL 배치로 나누기 (한 번에 batch_size만큼 병렬 처리)&lt;/span&gt;
        &lt;span style=&quot;color: #a626a4; font-weight: 600;&quot;&gt;for&lt;/span&gt; batch_start &lt;span style=&quot;color: #a626a4; font-weight: 600;&quot;&gt;in&lt;/span&gt; &lt;span style=&quot;color: #4078f2;&quot;&gt;range&lt;/span&gt;(&lt;span style=&quot;color: #986801;&quot;&gt;0&lt;/span&gt;, total_urls, batch_size):
            batch_end = &lt;span style=&quot;color: #4078f2;&quot;&gt;min&lt;/span&gt;(batch_start + batch_size, total_urls)
            batch_urls = urls[batch_start:batch_end]
            batch_size_actual = &lt;span style=&quot;color: #4078f2;&quot;&gt;len&lt;/span&gt;(batch_urls)
            
            &lt;span style=&quot;color: #4078f2;&quot;&gt;print&lt;/span&gt;(&lt;span style=&quot;color: #50a14f;&quot;&gt;f&quot;Processing URLs {batch_start+1}-{batch_end} of {total_urls}...&quot;&lt;/span&gt;)
            
            &lt;span style=&quot;color: #a0a1a7; font-style: italic;&quot;&gt;# 병렬 처리 실행&lt;/span&gt;
            &lt;span style=&quot;color: #a626a4; font-weight: 600;&quot;&gt;with&lt;/span&gt; concurrent.futures.ThreadPoolExecutor(max_workers=workers) &lt;span style=&quot;color: #a626a4; font-weight: 600;&quot;&gt;as&lt;/span&gt; executor:
                &lt;span style=&quot;color: #a0a1a7; font-style: italic;&quot;&gt;# URL을 병렬로 처리&lt;/span&gt;
                future_to_url = {executor.submit(process_url, url, interval_time): url &lt;span style=&quot;color: #a626a4; font-weight: 600;&quot;&gt;for&lt;/span&gt; url &lt;span style=&quot;color: #a626a4; font-weight: 600;&quot;&gt;in&lt;/span&gt; batch_urls}
                
                &lt;span style=&quot;color: #a0a1a7; font-style: italic;&quot;&gt;# 결과 수집&lt;/span&gt;
                &lt;span style=&quot;color: #a626a4; font-weight: 600;&quot;&gt;for&lt;/span&gt; future &lt;span style=&quot;color: #a626a4; font-weight: 600;&quot;&gt;in&lt;/span&gt; concurrent.futures.as_completed(future_to_url):
                    result = future.result()
                    results.append(result)
                    completed += &lt;span style=&quot;color: #986801;&quot;&gt;1&lt;/span&gt;
                    
                    &lt;span style=&quot;color: #a0a1a7; font-style: italic;&quot;&gt;# 진행 상황 표시 (10개마다 또는 배치 완료 시)&lt;/span&gt;
                    &lt;span style=&quot;color: #a626a4; font-weight: 600;&quot;&gt;if&lt;/span&gt; completed % &lt;span style=&quot;color: #986801;&quot;&gt;10&lt;/span&gt; == &lt;span style=&quot;color: #986801;&quot;&gt;0&lt;/span&gt; &lt;span style=&quot;color: #a626a4; font-weight: 600;&quot;&gt;or&lt;/span&gt; completed == total_urls:
                        &lt;span style=&quot;color: #4078f2;&quot;&gt;print&lt;/span&gt;(&lt;span style=&quot;color: #50a14f;&quot;&gt;f&quot;Processed {completed}/{total_urls} URLs...&quot;&lt;/span&gt;)
            
            &lt;span style=&quot;color: #a0a1a7; font-style: italic;&quot;&gt;# 현재 배치의 결과를 CSV 파일로 저장&lt;/span&gt;
            current_df = pd.DataFrame(results)
            current_df.to_csv(output_path, index=&lt;span style=&quot;color: #a626a4; font-weight: 600;&quot;&gt;False&lt;/span&gt;, encoding=&lt;span style=&quot;color: #50a14f;&quot;&gt;'utf-8'&lt;/span&gt;)
            &lt;span style=&quot;color: #4078f2;&quot;&gt;print&lt;/span&gt;(&lt;span style=&quot;color: #50a14f;&quot;&gt;f&quot;Saved {len(results)} results to: {output_path}&quot;&lt;/span&gt;)
    &lt;span style=&quot;color: #a626a4; font-weight: 600;&quot;&gt;else&lt;/span&gt;:
        &lt;span style=&quot;color: #a0a1a7; font-style: italic;&quot;&gt;# 직렬 처리 사용 (기존 방식)&lt;/span&gt;
        &lt;span style=&quot;color: #a626a4; font-weight: 600;&quot;&gt;for&lt;/span&gt; i, url &lt;span style=&quot;color: #a626a4; font-weight: 600;&quot;&gt;in&lt;/span&gt; &lt;span style=&quot;color: #4078f2;&quot;&gt;enumerate&lt;/span&gt;(urls):
            &lt;span style=&quot;color: #a626a4; font-weight: 600;&quot;&gt;if&lt;/span&gt; i % &lt;span style=&quot;color: #986801;&quot;&gt;10&lt;/span&gt; == &lt;span style=&quot;color: #986801;&quot;&gt;0&lt;/span&gt; &lt;span style=&quot;color: #a626a4; font-weight: 600;&quot;&gt;or&lt;/span&gt; i == total_urls - &lt;span style=&quot;color: #986801;&quot;&gt;1&lt;/span&gt;:
                &lt;span style=&quot;color: #4078f2;&quot;&gt;print&lt;/span&gt;(&lt;span style=&quot;color: #50a14f;&quot;&gt;f&quot;Processing URL {i+1}/{total_urls}...&quot;&lt;/span&gt;)
            
            &lt;span style=&quot;color: #a0a1a7; font-style: italic;&quot;&gt;# 단일 URL 처리&lt;/span&gt;
            result = process_url(url, interval_time)
            results.append(result)
            
            &lt;span style=&quot;color: #a0a1a7; font-style: italic;&quot;&gt;# batch_size마다 중간 결과 저장&lt;/span&gt;
            &lt;span style=&quot;color: #a626a4; font-weight: 600;&quot;&gt;if&lt;/span&gt; (i + &lt;span style=&quot;color: #986801;&quot;&gt;1&lt;/span&gt;) % batch_size == &lt;span style=&quot;color: #986801;&quot;&gt;0&lt;/span&gt; &lt;span style=&quot;color: #a626a4; font-weight: 600;&quot;&gt;or&lt;/span&gt; i == total_urls - &lt;span style=&quot;color: #986801;&quot;&gt;1&lt;/span&gt;:
                current_df = pd.DataFrame(results)
                current_df.to_csv(output_path, index=&lt;span style=&quot;color: #a626a4; font-weight: 600;&quot;&gt;False&lt;/span&gt;, encoding=&lt;span style=&quot;color: #50a14f;&quot;&gt;'utf-8'&lt;/span&gt;)
                &lt;span style=&quot;color: #4078f2;&quot;&gt;print&lt;/span&gt;(&lt;span style=&quot;color: #50a14f;&quot;&gt;f&quot;Saved {len(results)} results to: {output_path}&quot;&lt;/span&gt;)

    &lt;span style=&quot;color: #a0a1a7; font-style: italic;&quot;&gt;# 루프 종료 후 전체 처리 시간 계산&lt;/span&gt;
    process_end_time = time.time()
    total_process_time = process_end_time - process_start_time
    
    &lt;span style=&quot;color: #a0a1a7; font-style: italic;&quot;&gt;# Create DataFrame with results&lt;/span&gt;
    df = pd.DataFrame(results)

    &lt;span style=&quot;color: #a0a1a7; font-style: italic;&quot;&gt;# Calculate statistics&lt;/span&gt;
    df = pd.DataFrame(results)
    
    &lt;span style=&quot;color: #a0a1a7; font-style: italic;&quot;&gt;# Calculate average processing time&lt;/span&gt;
    avg_time_all = df[&lt;span style=&quot;color: #50a14f;&quot;&gt;'processing_time_sec'&lt;/span&gt;].mean()
    avg_time_google_only = df[df[&lt;span style=&quot;color: #50a14f;&quot;&gt;'status'&lt;/span&gt;] != &lt;span style=&quot;color: #50a14f;&quot;&gt;'SKIPPED'&lt;/span&gt;][&lt;span style=&quot;color: #50a14f;&quot;&gt;'processing_time_sec'&lt;/span&gt;].mean() &lt;span style=&quot;color: #a626a4; font-weight: 600;&quot;&gt;if&lt;/span&gt; &lt;span style=&quot;color: #4078f2;&quot;&gt;any&lt;/span&gt;(df[&lt;span style=&quot;color: #50a14f;&quot;&gt;'status'&lt;/span&gt;] != &lt;span style=&quot;color: #50a14f;&quot;&gt;'SKIPPED'&lt;/span&gt;) &lt;span style=&quot;color: #a626a4; font-weight: 600;&quot;&gt;else&lt;/span&gt; &lt;span style=&quot;color: #986801;&quot;&gt;0&lt;/span&gt;
    
    &lt;span style=&quot;color: #a0a1a7; font-style: italic;&quot;&gt;# 요약 통계&lt;/span&gt;
    success_count = df[&lt;span style=&quot;color: #50a14f;&quot;&gt;'status'&lt;/span&gt;].eq(&lt;span style=&quot;color: #50a14f;&quot;&gt;'SUCCESS'&lt;/span&gt;).&lt;span style=&quot;color: #4078f2;&quot;&gt;sum&lt;/span&gt;()
    failed_count = df[&lt;span style=&quot;color: #50a14f;&quot;&gt;'status'&lt;/span&gt;].eq(&lt;span style=&quot;color: #50a14f;&quot;&gt;'FAILED'&lt;/span&gt;).&lt;span style=&quot;color: #4078f2;&quot;&gt;sum&lt;/span&gt;()
    error_count = df[&lt;span style=&quot;color: #50a14f;&quot;&gt;'status'&lt;/span&gt;].eq(&lt;span style=&quot;color: #50a14f;&quot;&gt;'ERROR'&lt;/span&gt;).&lt;span style=&quot;color: #4078f2;&quot;&gt;sum&lt;/span&gt;() &lt;span style=&quot;color: #a626a4; font-weight: 600;&quot;&gt;if&lt;/span&gt; &lt;span style=&quot;color: #50a14f;&quot;&gt;'ERROR'&lt;/span&gt; &lt;span style=&quot;color: #a626a4; font-weight: 600;&quot;&gt;in&lt;/span&gt; df[&lt;span style=&quot;color: #50a14f;&quot;&gt;'status'&lt;/span&gt;].values &lt;span style=&quot;color: #a626a4; font-weight: 600;&quot;&gt;else&lt;/span&gt; &lt;span style=&quot;color: #986801;&quot;&gt;0&lt;/span&gt;
    skipped_count = df[&lt;span style=&quot;color: #50a14f;&quot;&gt;'status'&lt;/span&gt;].eq(&lt;span style=&quot;color: #50a14f;&quot;&gt;'SKIPPED'&lt;/span&gt;).&lt;span style=&quot;color: #4078f2;&quot;&gt;sum&lt;/span&gt;()
    
    &lt;span style=&quot;color: #a0a1a7; font-style: italic;&quot;&gt;# 총 오류 수 (실패 + 기타 오류)&lt;/span&gt;
    total_errors = failed_count + error_count
    
    &lt;span style=&quot;color: #a0a1a7; font-style: italic;&quot;&gt;# Format time durations for display&lt;/span&gt;
    &lt;span style=&quot;color: #a626a4; font-weight: 600;&quot;&gt;def&lt;/span&gt; format_time(seconds):
        &lt;span style=&quot;color: #a626a4; font-weight: 600;&quot;&gt;if&lt;/span&gt; seconds &amp;lt; &lt;span style=&quot;color: #986801;&quot;&gt;60&lt;/span&gt;:
            &lt;span style=&quot;color: #a626a4; font-weight: 600;&quot;&gt;return&lt;/span&gt; &lt;span style=&quot;color: #50a14f;&quot;&gt;f&quot;{seconds:.2f} seconds&quot;&lt;/span&gt;
        &lt;span style=&quot;color: #a626a4; font-weight: 600;&quot;&gt;else&lt;/span&gt;:
            &lt;span style=&quot;color: #a626a4; font-weight: 600;&quot;&gt;return&lt;/span&gt; &lt;span style=&quot;color: #4078f2;&quot;&gt;str&lt;/span&gt;(timedelta(seconds=&lt;span style=&quot;color: #4078f2;&quot;&gt;round&lt;/span&gt;(seconds)))
    
    &lt;span style=&quot;color: #4078f2;&quot;&gt;print&lt;/span&gt;(&lt;span style=&quot;color: #50a14f;&quot;&gt;f&quot;\n====== SUMMARY ======&quot;&lt;/span&gt;)
    &lt;span style=&quot;color: #4078f2;&quot;&gt;print&lt;/span&gt;(&lt;span style=&quot;color: #50a14f;&quot;&gt;f&quot;Total URLs processed: {total_urls}&quot;&lt;/span&gt;)
    &lt;span style=&quot;color: #4078f2;&quot;&gt;print&lt;/span&gt;(&lt;span style=&quot;color: #50a14f;&quot;&gt;f&quot;Workers used: {workers}&quot;&lt;/span&gt;)
    &lt;span style=&quot;color: #4078f2;&quot;&gt;print&lt;/span&gt;(&lt;span style=&quot;color: #50a14f;&quot;&gt;f&quot;Successfully decoded: {success_count} ({success_count/total_urls*100:.1f}%)&quot;&lt;/span&gt;)
    &lt;span style=&quot;color: #4078f2;&quot;&gt;print&lt;/span&gt;(&lt;span style=&quot;color: #50a14f;&quot;&gt;f&quot;Failed to decode: {failed_count} ({failed_count/total_urls*100:.1f}%)&quot;&lt;/span&gt;)
    &lt;span style=&quot;color: #a626a4; font-weight: 600;&quot;&gt;if&lt;/span&gt; error_count &amp;gt; &lt;span style=&quot;color: #986801;&quot;&gt;0&lt;/span&gt;:
        &lt;span style=&quot;color: #4078f2;&quot;&gt;print&lt;/span&gt;(&lt;span style=&quot;color: #50a14f;&quot;&gt;f&quot;Unexpected errors: {error_count} ({error_count/total_urls*100:.1f}%)&quot;&lt;/span&gt;)
    &lt;span style=&quot;color: #4078f2;&quot;&gt;print&lt;/span&gt;(&lt;span style=&quot;color: #50a14f;&quot;&gt;f&quot;Total errors: {total_errors} ({total_errors/total_urls*100:.1f}%)&quot;&lt;/span&gt;)
    &lt;span style=&quot;color: #4078f2;&quot;&gt;print&lt;/span&gt;(&lt;span style=&quot;color: #50a14f;&quot;&gt;f&quot;Skipped (non-Google News): {skipped_count} ({skipped_count/total_urls*100:.1f}%)&quot;&lt;/span&gt;)
    &lt;span style=&quot;color: #4078f2;&quot;&gt;print&lt;/span&gt;(&lt;span style=&quot;color: #50a14f;&quot;&gt;f&quot;\n----- TIMING INFORMATION -----&quot;&lt;/span&gt;)
    &lt;span style=&quot;color: #4078f2;&quot;&gt;print&lt;/span&gt;(&lt;span style=&quot;color: #50a14f;&quot;&gt;f&quot;Total processing time: {format_time(total_process_time)}&quot;&lt;/span&gt;)
    &lt;span style=&quot;color: #4078f2;&quot;&gt;print&lt;/span&gt;(&lt;span style=&quot;color: #50a14f;&quot;&gt;f&quot;Average processing time per URL: {format_time(avg_time_all)}&quot;&lt;/span&gt;)
    &lt;span style=&quot;color: #4078f2;&quot;&gt;print&lt;/span&gt;(&lt;span style=&quot;color: #50a14f;&quot;&gt;f&quot;Average processing time per Google News URL: {format_time(avg_time_google_only)}&quot;&lt;/span&gt;)
    &lt;span style=&quot;color: #4078f2;&quot;&gt;print&lt;/span&gt;(&lt;span style=&quot;color: #50a14f;&quot;&gt;f&quot;Fastest URL processing time: {format_time(df['processing_time_sec'].min())}&quot;&lt;/span&gt;)
    &lt;span style=&quot;color: #4078f2;&quot;&gt;print&lt;/span&gt;(&lt;span style=&quot;color: #50a14f;&quot;&gt;f&quot;Slowest URL processing time: {format_time(df['processing_time_sec'].max())}&quot;&lt;/span&gt;)
    &lt;span style=&quot;color: #4078f2;&quot;&gt;print&lt;/span&gt;(&lt;span style=&quot;color: #50a14f;&quot;&gt;f&quot;=====================&quot;&lt;/span&gt;)
    
    &lt;span style=&quot;color: #a0a1a7; font-style: italic;&quot;&gt;# Add summary information to the DataFrame as metadata&lt;/span&gt;
    df.attrs[&lt;span style=&quot;color: #50a14f;&quot;&gt;'total_process_time'&lt;/span&gt;] = total_process_time
    df.attrs[&lt;span style=&quot;color: #50a14f;&quot;&gt;'avg_processing_time'&lt;/span&gt;] = avg_time_all
    df.attrs[&lt;span style=&quot;color: #50a14f;&quot;&gt;'avg_google_url_time'&lt;/span&gt;] = avg_time_google_only
    df.attrs[&lt;span style=&quot;color: #50a14f;&quot;&gt;'workers_used'&lt;/span&gt;] = workers
    
    &lt;span style=&quot;color: #a626a4; font-weight: 600;&quot;&gt;return&lt;/span&gt; df

&lt;span style=&quot;color: #a626a4; font-weight: 600;&quot;&gt;def&lt;/span&gt; process_url(url, interval_time=&lt;span style=&quot;color: #986801;&quot;&gt;1&lt;/span&gt;):
    &lt;span style=&quot;color: #50a14f;&quot;&gt;&quot;&quot;&quot;단일 URL을 처리하고 결과를 반환하는 함수&quot;&quot;&quot;&lt;/span&gt;
    url_start_time = time.time()
    
    &lt;span style=&quot;color: #a626a4; font-weight: 600;&quot;&gt;try&lt;/span&gt;:
        &lt;span style=&quot;color: #a626a4; font-weight: 600;&quot;&gt;if&lt;/span&gt; &lt;span style=&quot;color: #50a14f;&quot;&gt;&quot;news.google.com&quot;&lt;/span&gt; &lt;span style=&quot;color: #a626a4; font-weight: 600;&quot;&gt;in&lt;/span&gt; url:
            &lt;span style=&quot;color: #a626a4; font-weight: 600;&quot;&gt;try&lt;/span&gt;:
                decoded_result = gnewsdecoder(url, interval=interval_time)
                &lt;span style=&quot;color: #a626a4; font-weight: 600;&quot;&gt;if&lt;/span&gt; decoded_result.&lt;span style=&quot;color: #4078f2;&quot;&gt;get&lt;/span&gt;(&lt;span style=&quot;color: #50a14f;&quot;&gt;&quot;status&quot;&lt;/span&gt;):
                    status = &lt;span style=&quot;color: #50a14f;&quot;&gt;&quot;SUCCESS&quot;&lt;/span&gt;
                    decoded_url = decoded_result[&lt;span style=&quot;color: #50a14f;&quot;&gt;&quot;decoded_url&quot;&lt;/span&gt;]
                    error_message = &lt;span style=&quot;color: #50a14f;&quot;&gt;&quot;&quot;&lt;/span&gt;
                &lt;span style=&quot;color: #a626a4; font-weight: 600;&quot;&gt;else&lt;/span&gt;:
                    status = &lt;span style=&quot;color: #50a14f;&quot;&gt;&quot;FAILED&quot;&lt;/span&gt;
                    decoded_url = &lt;span style=&quot;color: #50a14f;&quot;&gt;&quot;N/A&quot;&lt;/span&gt;
                    error_message = decoded_result.&lt;span style=&quot;color: #4078f2;&quot;&gt;get&lt;/span&gt;(&lt;span style=&quot;color: #50a14f;&quot;&gt;&quot;message&quot;&lt;/span&gt;, &lt;span style=&quot;color: #50a14f;&quot;&gt;&quot;Unknown error&quot;&lt;/span&gt;)
            &lt;span style=&quot;color: #a626a4; font-weight: 600;&quot;&gt;except&lt;/span&gt; Exception &lt;span style=&quot;color: #a626a4; font-weight: 600;&quot;&gt;as&lt;/span&gt; e:
                status = &lt;span style=&quot;color: #50a14f;&quot;&gt;&quot;FAILED&quot;&lt;/span&gt;
                decoded_url = &lt;span style=&quot;color: #50a14f;&quot;&gt;&quot;N/A&quot;&lt;/span&gt;
                error_message = &lt;span style=&quot;color: #4078f2;&quot;&gt;str&lt;/span&gt;(e)
        &lt;span style=&quot;color: #a626a4; font-weight: 600;&quot;&gt;else&lt;/span&gt;:
            &lt;span style=&quot;color: #a0a1a7; font-style: italic;&quot;&gt;# For non-Google News URLs&lt;/span&gt;
            status = &lt;span style=&quot;color: #50a14f;&quot;&gt;&quot;SKIPPED&quot;&lt;/span&gt;
            decoded_url = url  &lt;span style=&quot;color: #a0a1a7; font-style: italic;&quot;&gt;# Keep the same URL&lt;/span&gt;
            error_message = &lt;span style=&quot;color: #50a14f;&quot;&gt;&quot;Not a Google News URL&quot;&lt;/span&gt;
        
        &lt;span style=&quot;color: #a0a1a7; font-style: italic;&quot;&gt;# Calculate processing time for this URL&lt;/span&gt;
        url_end_time = time.time()
        processing_time = url_end_time - url_start_time
        
        &lt;span style=&quot;color: #a0a1a7; font-style: italic;&quot;&gt;# Return result as dictionary&lt;/span&gt;
        &lt;span style=&quot;color: #a626a4; font-weight: 600;&quot;&gt;return&lt;/span&gt; {
            &lt;span style=&quot;color: #50a14f;&quot;&gt;&quot;original_url&quot;&lt;/span&gt;: url,
            &lt;span style=&quot;color: #50a14f;&quot;&gt;&quot;decoded_url&quot;&lt;/span&gt;: decoded_url,
            &lt;span style=&quot;color: #50a14f;&quot;&gt;&quot;status&quot;&lt;/span&gt;: status,
            &lt;span style=&quot;color: #50a14f;&quot;&gt;&quot;error_message&quot;&lt;/span&gt;: error_message,
            &lt;span style=&quot;color: #50a14f;&quot;&gt;&quot;processing_time_sec&quot;&lt;/span&gt;: &lt;span style=&quot;color: #4078f2;&quot;&gt;round&lt;/span&gt;(processing_time, &lt;span style=&quot;color: #986801;&quot;&gt;3&lt;/span&gt;)
        }
    &lt;span style=&quot;color: #a626a4; font-weight: 600;&quot;&gt;except&lt;/span&gt; Exception &lt;span style=&quot;color: #a626a4; font-weight: 600;&quot;&gt;as&lt;/span&gt; e:
        &lt;span style=&quot;color: #a0a1a7; font-style: italic;&quot;&gt;# Handle any unexpected errors&lt;/span&gt;
        url_end_time = time.time()
        processing_time = url_end_time - url_start_time
        
        &lt;span style=&quot;color: #a626a4; font-weight: 600;&quot;&gt;return&lt;/span&gt; {
            &lt;span style=&quot;color: #50a14f;&quot;&gt;&quot;original_url&quot;&lt;/span&gt;: url,
            &lt;span style=&quot;color: #50a14f;&quot;&gt;&quot;decoded_url&quot;&lt;/span&gt;: &lt;span style=&quot;color: #50a14f;&quot;&gt;&quot;N/A&quot;&lt;/span&gt;,
            &lt;span style=&quot;color: #50a14f;&quot;&gt;&quot;status&quot;&lt;/span&gt;: &lt;span style=&quot;color: #50a14f;&quot;&gt;&quot;ERROR&quot;&lt;/span&gt;,
            &lt;span style=&quot;color: #50a14f;&quot;&gt;&quot;error_message&quot;&lt;/span&gt;: &lt;span style=&quot;color: #50a14f;&quot;&gt;f&quot;Unexpected error: {str(e)}&quot;&lt;/span&gt;,
            &lt;span style=&quot;color: #50a14f;&quot;&gt;&quot;processing_time_sec&quot;&lt;/span&gt;: &lt;span style=&quot;color: #4078f2;&quot;&gt;round&lt;/span&gt;(processing_time, &lt;span style=&quot;color: #986801;&quot;&gt;3&lt;/span&gt;)
        }

&lt;span style=&quot;color: #a626a4; font-weight: 600;&quot;&gt;if&lt;/span&gt; __name__ == &lt;span style=&quot;color: #50a14f;&quot;&gt;&quot;__main__&quot;&lt;/span&gt;:
    &lt;span style=&quot;color: #a0a1a7; font-style: italic;&quot;&gt;# 명령줄에서 인자를 받아 실행하기 위한 코드&lt;/span&gt;
    parser = argparse.ArgumentParser(description=&lt;span style=&quot;color: #50a14f;&quot;&gt;'Google News URL decoder'&lt;/span&gt;)
    parser.add_argument(&lt;span style=&quot;color: #50a14f;&quot;&gt;'input_file_path'&lt;/span&gt;, &lt;span style=&quot;color: #4078f2;&quot;&gt;help&lt;/span&gt;=&lt;span style=&quot;color: #50a14f;&quot;&gt;'Path to the file containing URLs to decode'&lt;/span&gt;)
    parser.add_argument(&lt;span style=&quot;color: #50a14f;&quot;&gt;'--output_dir'&lt;/span&gt;, &lt;span style=&quot;color: #50a14f;&quot;&gt;'-o'&lt;/span&gt;, &lt;span style=&quot;color: #4078f2;&quot;&gt;help&lt;/span&gt;=&lt;span style=&quot;color: #50a14f;&quot;&gt;'Output directory for results (default: same as input file)'&lt;/span&gt;)
    parser.add_argument(&lt;span style=&quot;color: #50a14f;&quot;&gt;'--interval'&lt;/span&gt;, &lt;span style=&quot;color: #50a14f;&quot;&gt;'-i'&lt;/span&gt;, &lt;span style=&quot;color: #4078f2;&quot;&gt;type&lt;/span&gt;=&lt;span style=&quot;color: #4078f2;&quot;&gt;int&lt;/span&gt;, default=&lt;span style=&quot;color: #986801;&quot;&gt;1&lt;/span&gt;, &lt;span style=&quot;color: #4078f2;&quot;&gt;help&lt;/span&gt;=&lt;span style=&quot;color: #50a14f;&quot;&gt;'Interval time between requests (default: 1)'&lt;/span&gt;)
    parser.add_argument(&lt;span style=&quot;color: #50a14f;&quot;&gt;'--batch_size'&lt;/span&gt;, &lt;span style=&quot;color: #50a14f;&quot;&gt;'-b'&lt;/span&gt;, &lt;span style=&quot;color: #4078f2;&quot;&gt;type&lt;/span&gt;=&lt;span style=&quot;color: #4078f2;&quot;&gt;int&lt;/span&gt;, default=&lt;span style=&quot;color: #986801;&quot;&gt;10&lt;/span&gt;, &lt;span style=&quot;color: #4078f2;&quot;&gt;help&lt;/span&gt;=&lt;span style=&quot;color: #50a14f;&quot;&gt;'Batch size for saving interim results (default: 10)'&lt;/span&gt;)
    parser.add_argument(&lt;span style=&quot;color: #50a14f;&quot;&gt;'--workers'&lt;/span&gt;, &lt;span style=&quot;color: #50a14f;&quot;&gt;'-w'&lt;/span&gt;, &lt;span style=&quot;color: #4078f2;&quot;&gt;type&lt;/span&gt;=&lt;span style=&quot;color: #4078f2;&quot;&gt;int&lt;/span&gt;, default=&lt;span style=&quot;color: #986801;&quot;&gt;1&lt;/span&gt;, &lt;span style=&quot;color: #4078f2;&quot;&gt;help&lt;/span&gt;=&lt;span style=&quot;color: #50a14f;&quot;&gt;'Number of worker threads for parallel processing (default: 1)'&lt;/span&gt;)
    
    args = parser.parse_args()
    
    &lt;span style=&quot;color: #a0a1a7; font-style: italic;&quot;&gt;# 인자 출력&lt;/span&gt;
    &lt;span style=&quot;color: #4078f2;&quot;&gt;print&lt;/span&gt;(&lt;span style=&quot;color: #50a14f;&quot;&gt;f&quot;Processing URLs from: {args.input_file_path}&quot;&lt;/span&gt;)
    &lt;span style=&quot;color: #4078f2;&quot;&gt;print&lt;/span&gt;(&lt;span style=&quot;color: #50a14f;&quot;&gt;f&quot;Output directory: {args.output_dir or 'Same as input file'}&quot;&lt;/span&gt;)
    &lt;span style=&quot;color: #4078f2;&quot;&gt;print&lt;/span&gt;(&lt;span style=&quot;color: #50a14f;&quot;&gt;f&quot;Interval time: {args.interval}&quot;&lt;/span&gt;)
    &lt;span style=&quot;color: #4078f2;&quot;&gt;print&lt;/span&gt;(&lt;span style=&quot;color: #50a14f;&quot;&gt;f&quot;Batch size: {args.batch_size}&quot;&lt;/span&gt;)
    &lt;span style=&quot;color: #4078f2;&quot;&gt;print&lt;/span&gt;(&lt;span style=&quot;color: #50a14f;&quot;&gt;f&quot;Workers: {args.workers} (Parallel: {'Enabled' if args.workers &amp;gt; 1 else 'Disabled'})&quot;&lt;/span&gt;)
    
    &lt;span style=&quot;color: #a0a1a7; font-style: italic;&quot;&gt;# 함수 실행&lt;/span&gt;
    result = process_urls_from_file(
        args.input_file_path, 
        args.output_dir, 
        args.interval, 
        args.batch_size,
        args.workers
    )
    
    &lt;span style=&quot;color: #a0a1a7; font-style: italic;&quot;&gt;# 결과 반환&lt;/span&gt;
    &lt;span style=&quot;color: #a626a4; font-weight: 600;&quot;&gt;if&lt;/span&gt; result &lt;span style=&quot;color: #a626a4; font-weight: 600;&quot;&gt;is&lt;/span&gt; &lt;span style=&quot;color: #a626a4; font-weight: 600;&quot;&gt;not&lt;/span&gt; &lt;span style=&quot;color: #a626a4; font-weight: 600;&quot;&gt;None&lt;/span&gt;:
        &lt;span style=&quot;color: #4078f2;&quot;&gt;print&lt;/span&gt;(&lt;span style=&quot;color: #50a14f;&quot;&gt;f&quot;Process completed successfully. Results saved to CSV file.&quot;&lt;/span&gt;)
    &lt;span style=&quot;color: #a626a4; font-weight: 600;&quot;&gt;else&lt;/span&gt;:
        &lt;span style=&quot;color: #4078f2;&quot;&gt;print&lt;/span&gt;(&lt;span style=&quot;color: #50a14f;&quot;&gt;f&quot;Process failed. Please check error messages above.&quot;&lt;/span&gt;)&lt;/pre&gt;
&lt;!-- ▣ 스타일 --&gt;
&lt;style&gt;
    .code-box{border:1px solid #e5e7eb;border-radius:12px;margin:20px 0;background:#fff;
      font-family:-apple-system,BlinkMacSystemFont,'Segoe UI',system-ui,sans-serif;overflow:hidden;
      box-shadow:0 1px 3px rgba(0,0,0,.1)}
    .code-hd{background:#374151;color:#fff;padding:16px 20px;font-weight:600;font-size:16px;
      display:flex;justify-content:space-between;align-items:center;cursor:pointer}
    .code-ct{display:none;padding:24px;background:#1e1e1e;font-size:14px;
      font-family:'Fira Code','D2Coding','Nanum Gothic Coding','Source Code Pro','Courier New',Consolas,Monaco;
      white-space:pre !important;overflow:auto !important;border-top:1px solid #e5e7eb;color:#d4d4d4;line-height:1.6;
      max-height:500px;word-wrap:normal !important;word-break:normal !important}
    .code-act{             /* ← 수정: flex + 오른쪽 정렬 */
      display:none;        /* JS에서 'flex'로 켜 줌 */
      justify-content:flex-end;
      align-items:center;
      padding:16px 20px;background:#f8fafc;border-top:1px solid #e5e7eb}
    .copy-btn{background:linear-gradient(135deg,#10b981 0%,#059669 100%);color:#fff;border:0;
      padding:10px 16px;border-radius:8px;font-size:14px;font-weight:500;cursor:pointer;
      display:inline-flex;align-items:center;gap:6px}
    .copy-btn:hover{background:linear-gradient(135deg,#059669 0%,#047857 100%)}
    .toggle-btn{background:rgba(255,255,255,.2);border:none;color:#fff;padding:8px 12px;border-radius:6px;
      font-size:14px;cursor:pointer;font-weight:500}
    
    /* Python 구문 강조 */
    .keyword{color:#569cd6;font-weight:600}
    .builtin{color:#dcdcaa}
    .string{color:#ce9178}
    &lt;/style&gt;
&lt;!-- ▣ 코드 박스 --&gt;
&lt;div class=&quot;code-box&quot;&gt;
&lt;div class=&quot;code-hd&quot;&gt;  예시 Python 스크립트 &lt;span class=&quot;toggle-btn&quot;&gt;  펼치기&lt;/span&gt;&lt;/div&gt;
&lt;pre class=&quot;code-ct&quot;&gt;&lt;span class=&quot;keyword&quot;&gt;from&lt;/span&gt; googlenewsdecoder &lt;span class=&quot;keyword&quot;&gt;import&lt;/span&gt; gnewsdecoder
&lt;span class=&quot;keyword&quot;&gt;import&lt;/span&gt; pandas &lt;span class=&quot;keyword&quot;&gt;as&lt;/span&gt; pd
&lt;span class=&quot;keyword&quot;&gt;import&lt;/span&gt; os
&lt;span class=&quot;keyword&quot;&gt;import&lt;/span&gt; time
&lt;span class=&quot;keyword&quot;&gt;import&lt;/span&gt; argparse
&lt;span class=&quot;keyword&quot;&gt;import&lt;/span&gt; concurrent.futures
&lt;span class=&quot;keyword&quot;&gt;from&lt;/span&gt; datetime &lt;span class=&quot;keyword&quot;&gt;import&lt;/span&gt; datetime, timedelta

&lt;span class=&quot;keyword&quot;&gt;def&lt;/span&gt; process_urls_from_file(input_file_path, output_dir=&lt;span class=&quot;keyword&quot;&gt;None&lt;/span&gt;, interval_time=1, batch_size=10, workers=1):
    &lt;span class=&quot;string&quot;&gt;&quot;&quot;&quot;
    파일에서 URL을 읽고 처리하는 함수
    
    Args:
        input_file_path: URL이 포함된 파일 경로
        output_dir: 결과 저장 디렉토리 (기본값: 입력 파일과 동일)
        interval_time: URL 처리 사이의 대기 시간 (초)
        batch_size: 중간 결과 저장 단위 (URL 개수)
        workers: 병렬 처리를 위한 작업자 수 (기본값: 1, 병렬 처리 없음)
        
    Returns:
        처리 결과가 포함된 DataFrame 또는 오류 시 None
    &quot;&quot;&quot;&lt;/span&gt;
    &lt;span class=&quot;comment&quot;&gt;# Read URLs from file&lt;/span&gt;
    &lt;span class=&quot;keyword&quot;&gt;try&lt;/span&gt;:
        &lt;span class=&quot;comment&quot;&gt;# 파일 확장자 확인&lt;/span&gt;
        file_ext = os.path.splitext(input_file_path)[1].lower()
        
        &lt;span class=&quot;keyword&quot;&gt;if&lt;/span&gt; file_ext == &lt;span class=&quot;string&quot;&gt;'.csv'&lt;/span&gt;:
            &lt;span class=&quot;comment&quot;&gt;# CSV 파일인 경우 pandas로 읽기&lt;/span&gt;
            &lt;span class=&quot;keyword&quot;&gt;try&lt;/span&gt;:
                df = pd.read_csv(input_file_path, encoding=&lt;span class=&quot;string&quot;&gt;'utf-8-sig'&lt;/span&gt;)
                
                &lt;span class=&quot;comment&quot;&gt;# 'link' 컬럼이 있는지 확인&lt;/span&gt;
                &lt;span class=&quot;keyword&quot;&gt;if&lt;/span&gt; &lt;span class=&quot;string&quot;&gt;'link'&lt;/span&gt; &lt;span class=&quot;keyword&quot;&gt;in&lt;/span&gt; df.columns:
                    urls = df[&lt;span class=&quot;string&quot;&gt;'link'&lt;/span&gt;].dropna().tolist()
                    &lt;span class=&quot;builtin&quot;&gt;print&lt;/span&gt;(&lt;span class=&quot;string&quot;&gt;f&quot;Found {len(urls)} URLs in 'link' column&quot;&lt;/span&gt;)
                &lt;span class=&quot;keyword&quot;&gt;else&lt;/span&gt;:
                    available_columns = &lt;span class=&quot;string&quot;&gt;', '&lt;/span&gt;.join(df.columns)
                    &lt;span class=&quot;builtin&quot;&gt;print&lt;/span&gt;(&lt;span class=&quot;string&quot;&gt;f&quot;Error: 'link' column not found in CSV. Available columns: {available_columns}&quot;&lt;/span&gt;)
                    &lt;span class=&quot;keyword&quot;&gt;return&lt;/span&gt; &lt;span class=&quot;keyword&quot;&gt;None&lt;/span&gt;
            &lt;span class=&quot;keyword&quot;&gt;except&lt;/span&gt; Exception &lt;span class=&quot;keyword&quot;&gt;as&lt;/span&gt; e:
                &lt;span class=&quot;builtin&quot;&gt;print&lt;/span&gt;(&lt;span class=&quot;string&quot;&gt;f&quot;Error parsing CSV file: {e}&quot;&lt;/span&gt;)
                &lt;span class=&quot;keyword&quot;&gt;return&lt;/span&gt; &lt;span class=&quot;keyword&quot;&gt;None&lt;/span&gt;
        &lt;span class=&quot;keyword&quot;&gt;else&lt;/span&gt;:
            &lt;span class=&quot;comment&quot;&gt;# 일반 텍스트 파일로 간주하고 한 줄에 하나의 URL로 읽기&lt;/span&gt;
            &lt;span class=&quot;keyword&quot;&gt;with&lt;/span&gt; &lt;span class=&quot;builtin&quot;&gt;open&lt;/span&gt;(input_file_path, &lt;span class=&quot;string&quot;&gt;'r'&lt;/span&gt;, encoding=&lt;span class=&quot;string&quot;&gt;'utf-8-sig'&lt;/span&gt;) &lt;span class=&quot;keyword&quot;&gt;as&lt;/span&gt; file:
                urls = [line.strip() &lt;span class=&quot;keyword&quot;&gt;for&lt;/span&gt; line &lt;span class=&quot;keyword&quot;&gt;in&lt;/span&gt; file &lt;span class=&quot;keyword&quot;&gt;if&lt;/span&gt; line.strip()]
                &lt;span class=&quot;builtin&quot;&gt;print&lt;/span&gt;(&lt;span class=&quot;string&quot;&gt;f&quot;Found {len(urls)} URLs in text file&quot;&lt;/span&gt;)
    &lt;span class=&quot;keyword&quot;&gt;except&lt;/span&gt; Exception &lt;span class=&quot;keyword&quot;&gt;as&lt;/span&gt; e:
        &lt;span class=&quot;builtin&quot;&gt;print&lt;/span&gt;(&lt;span class=&quot;string&quot;&gt;f&quot;Error reading file: {e}&quot;&lt;/span&gt;)
        &lt;span class=&quot;keyword&quot;&gt;return&lt;/span&gt; &lt;span class=&quot;keyword&quot;&gt;None&lt;/span&gt;

    &lt;span class=&quot;comment&quot;&gt;# 파일 저장 기본 설정&lt;/span&gt;
    &lt;span class=&quot;keyword&quot;&gt;if&lt;/span&gt; output_dir &lt;span class=&quot;keyword&quot;&gt;is&lt;/span&gt; &lt;span class=&quot;keyword&quot;&gt;None&lt;/span&gt;:
        output_dir = os.path.dirname(input_file_path) &lt;span class=&quot;keyword&quot;&gt;or&lt;/span&gt; &lt;span class=&quot;string&quot;&gt;'.'&lt;/span&gt;
    
    timestamp = datetime.now().strftime(&lt;span class=&quot;string&quot;&gt;&quot;%Y-%m-%d_%H%M%S&quot;&lt;/span&gt;)
    input_filename = os.path.basename(input_file_path).split(&lt;span class=&quot;string&quot;&gt;'.'&lt;/span&gt;)[0]
    output_filename = &lt;span class=&quot;string&quot;&gt;f&quot;{input_filename}_decoded_{timestamp}.csv&quot;&lt;/span&gt;
    output_path = os.path.join(output_dir, output_filename)
    
    &lt;span class=&quot;builtin&quot;&gt;print&lt;/span&gt;(&lt;span class=&quot;string&quot;&gt;f&quot;Results will be saved to: {output_path}&quot;&lt;/span&gt;)
    
    &lt;span class=&quot;comment&quot;&gt;# Process each URL&lt;/span&gt;
    results = []
    total_urls = &lt;span class=&quot;builtin&quot;&gt;len&lt;/span&gt;(urls)
    
    &lt;span class=&quot;builtin&quot;&gt;print&lt;/span&gt;(&lt;span class=&quot;string&quot;&gt;f&quot;Starting to process {total_urls} URLs...&quot;&lt;/span&gt;)
    &lt;span class=&quot;builtin&quot;&gt;print&lt;/span&gt;(&lt;span class=&quot;string&quot;&gt;f&quot;Workers: {workers} (Parallel processing: {'Enabled' if workers &amp;gt; 1 else 'Disabled'})&quot;&lt;/span&gt;)

    &lt;span class=&quot;comment&quot;&gt;# Record overall process start time&lt;/span&gt;
    process_start_time = time.time()
    
    &lt;span class=&quot;comment&quot;&gt;# 병렬 처리 사용 여부에 따라 처리 방식 결정&lt;/span&gt;
    &lt;span class=&quot;keyword&quot;&gt;if&lt;/span&gt; workers &amp;gt; 1:
        &lt;span class=&quot;comment&quot;&gt;# 병렬 처리 사용&lt;/span&gt;
        completed = 0
        
        &lt;span class=&quot;comment&quot;&gt;# URL 배치로 나누기 (한 번에 batch_size만큼 병렬 처리)&lt;/span&gt;
        &lt;span class=&quot;keyword&quot;&gt;for&lt;/span&gt; batch_start &lt;span class=&quot;keyword&quot;&gt;in&lt;/span&gt; &lt;span class=&quot;builtin&quot;&gt;range&lt;/span&gt;(0, total_urls, batch_size):
            batch_end = &lt;span class=&quot;builtin&quot;&gt;min&lt;/span&gt;(batch_start + batch_size, total_urls)
            batch_urls = urls[batch_start:batch_end]
            batch_size_actual = &lt;span class=&quot;builtin&quot;&gt;len&lt;/span&gt;(batch_urls)
            
            &lt;span class=&quot;builtin&quot;&gt;print&lt;/span&gt;(&lt;span class=&quot;string&quot;&gt;f&quot;Processing URLs {batch_start+1}-{batch_end} of {total_urls}...&quot;&lt;/span&gt;)
            
            &lt;span class=&quot;comment&quot;&gt;# 병렬 처리 실행&lt;/span&gt;
            &lt;span class=&quot;keyword&quot;&gt;with&lt;/span&gt; concurrent.futures.ThreadPoolExecutor(max_workers=workers) &lt;span class=&quot;keyword&quot;&gt;as&lt;/span&gt; executor:
                &lt;span class=&quot;comment&quot;&gt;# URL을 병렬로 처리&lt;/span&gt;
                future_to_url = {executor.submit(process_url, url, interval_time): url &lt;span class=&quot;keyword&quot;&gt;for&lt;/span&gt; url &lt;span class=&quot;keyword&quot;&gt;in&lt;/span&gt; batch_urls}
                
                &lt;span class=&quot;comment&quot;&gt;# 결과 수집&lt;/span&gt;
                &lt;span class=&quot;keyword&quot;&gt;for&lt;/span&gt; future &lt;span class=&quot;keyword&quot;&gt;in&lt;/span&gt; concurrent.futures.as_completed(future_to_url):
                    result = future.result()
                    results.append(result)
                    completed += 1
                    
                    &lt;span class=&quot;comment&quot;&gt;# 진행 상황 표시 (10개마다 또는 배치 완료 시)&lt;/span&gt;
                    &lt;span class=&quot;keyword&quot;&gt;if&lt;/span&gt; completed % 10 == 0 &lt;span class=&quot;keyword&quot;&gt;or&lt;/span&gt; completed == total_urls:
                        &lt;span class=&quot;builtin&quot;&gt;print&lt;/span&gt;(&lt;span class=&quot;string&quot;&gt;f&quot;Processed {completed}/{total_urls} URLs...&quot;&lt;/span&gt;)
            
            &lt;span class=&quot;comment&quot;&gt;# 현재 배치의 결과를 CSV 파일로 저장&lt;/span&gt;
            current_df = pd.DataFrame(results)
            current_df.to_csv(output_path, index=&lt;span class=&quot;keyword&quot;&gt;False&lt;/span&gt;, encoding=&lt;span class=&quot;string&quot;&gt;'utf-8'&lt;/span&gt;)
            &lt;span class=&quot;builtin&quot;&gt;print&lt;/span&gt;(&lt;span class=&quot;string&quot;&gt;f&quot;Saved {len(results)} results to: {output_path}&quot;&lt;/span&gt;)
    &lt;span class=&quot;keyword&quot;&gt;else&lt;/span&gt;:
        &lt;span class=&quot;comment&quot;&gt;# 직렬 처리 사용 (기존 방식)&lt;/span&gt;
        &lt;span class=&quot;keyword&quot;&gt;for&lt;/span&gt; i, url &lt;span class=&quot;keyword&quot;&gt;in&lt;/span&gt; &lt;span class=&quot;builtin&quot;&gt;enumerate&lt;/span&gt;(urls):
            &lt;span class=&quot;keyword&quot;&gt;if&lt;/span&gt; i % 10 == 0 &lt;span class=&quot;keyword&quot;&gt;or&lt;/span&gt; i == total_urls - 1:
                &lt;span class=&quot;builtin&quot;&gt;print&lt;/span&gt;(&lt;span class=&quot;string&quot;&gt;f&quot;Processing URL {i+1}/{total_urls}...&quot;&lt;/span&gt;)
            
            &lt;span class=&quot;comment&quot;&gt;# 단일 URL 처리&lt;/span&gt;
            result = process_url(url, interval_time)
            results.append(result)
            
            &lt;span class=&quot;comment&quot;&gt;# batch_size마다 중간 결과 저장&lt;/span&gt;
            &lt;span class=&quot;keyword&quot;&gt;if&lt;/span&gt; (i + 1) % batch_size == 0 &lt;span class=&quot;keyword&quot;&gt;or&lt;/span&gt; i == total_urls - 1:
                current_df = pd.DataFrame(results)
                current_df.to_csv(output_path, index=&lt;span class=&quot;keyword&quot;&gt;False&lt;/span&gt;, encoding=&lt;span class=&quot;string&quot;&gt;'utf-8'&lt;/span&gt;)
                &lt;span class=&quot;builtin&quot;&gt;print&lt;/span&gt;(&lt;span class=&quot;string&quot;&gt;f&quot;Saved {len(results)} results to: {output_path}&quot;&lt;/span&gt;)

    &lt;span class=&quot;comment&quot;&gt;# 루프 종료 후 전체 처리 시간 계산&lt;/span&gt;
    process_end_time = time.time()
    total_process_time = process_end_time - process_start_time
    
    &lt;span class=&quot;comment&quot;&gt;# Create DataFrame with results&lt;/span&gt;
    df = pd.DataFrame(results)

    &lt;span class=&quot;comment&quot;&gt;# Calculate statistics&lt;/span&gt;
    df = pd.DataFrame(results)
    
    &lt;span class=&quot;comment&quot;&gt;# Calculate average processing time&lt;/span&gt;
    avg_time_all = df[&lt;span class=&quot;string&quot;&gt;'processing_time_sec'&lt;/span&gt;].mean()
    avg_time_google_only = df[df[&lt;span class=&quot;string&quot;&gt;'status'&lt;/span&gt;] != &lt;span class=&quot;string&quot;&gt;'SKIPPED'&lt;/span&gt;][&lt;span class=&quot;string&quot;&gt;'processing_time_sec'&lt;/span&gt;].mean() &lt;span class=&quot;keyword&quot;&gt;if&lt;/span&gt; &lt;span class=&quot;builtin&quot;&gt;any&lt;/span&gt;(df[&lt;span class=&quot;string&quot;&gt;'status'&lt;/span&gt;] != &lt;span class=&quot;string&quot;&gt;'SKIPPED'&lt;/span&gt;) &lt;span class=&quot;keyword&quot;&gt;else&lt;/span&gt; 0
    
    &lt;span class=&quot;comment&quot;&gt;# 요약 통계&lt;/span&gt;
    success_count = df[&lt;span class=&quot;string&quot;&gt;'status'&lt;/span&gt;].eq(&lt;span class=&quot;string&quot;&gt;'SUCCESS'&lt;/span&gt;).&lt;span class=&quot;builtin&quot;&gt;sum&lt;/span&gt;()
    failed_count = df[&lt;span class=&quot;string&quot;&gt;'status'&lt;/span&gt;].eq(&lt;span class=&quot;string&quot;&gt;'FAILED'&lt;/span&gt;).&lt;span class=&quot;builtin&quot;&gt;sum&lt;/span&gt;()
    error_count = df[&lt;span class=&quot;string&quot;&gt;'status'&lt;/span&gt;].eq(&lt;span class=&quot;string&quot;&gt;'ERROR'&lt;/span&gt;).&lt;span class=&quot;builtin&quot;&gt;sum&lt;/span&gt;() &lt;span class=&quot;keyword&quot;&gt;if&lt;/span&gt; &lt;span class=&quot;string&quot;&gt;'ERROR'&lt;/span&gt; &lt;span class=&quot;keyword&quot;&gt;in&lt;/span&gt; df[&lt;span class=&quot;string&quot;&gt;'status'&lt;/span&gt;].values &lt;span class=&quot;keyword&quot;&gt;else&lt;/span&gt; 0
    skipped_count = df[&lt;span class=&quot;string&quot;&gt;'status'&lt;/span&gt;].eq(&lt;span class=&quot;string&quot;&gt;'SKIPPED'&lt;/span&gt;).&lt;span class=&quot;builtin&quot;&gt;sum&lt;/span&gt;()
    
    &lt;span class=&quot;comment&quot;&gt;# 총 오류 수 (실패 + 기타 오류)&lt;/span&gt;
    total_errors = failed_count + error_count
    
    &lt;span class=&quot;comment&quot;&gt;# Format time durations for display&lt;/span&gt;
    &lt;span class=&quot;keyword&quot;&gt;def&lt;/span&gt; format_time(seconds):
        &lt;span class=&quot;keyword&quot;&gt;if&lt;/span&gt; seconds &amp;lt; 60:
            &lt;span class=&quot;keyword&quot;&gt;return&lt;/span&gt; &lt;span class=&quot;string&quot;&gt;f&quot;{seconds:.2f} seconds&quot;&lt;/span&gt;
        &lt;span class=&quot;keyword&quot;&gt;else&lt;/span&gt;:
            &lt;span class=&quot;keyword&quot;&gt;return&lt;/span&gt; &lt;span class=&quot;builtin&quot;&gt;str&lt;/span&gt;(timedelta(seconds=&lt;span class=&quot;builtin&quot;&gt;round&lt;/span&gt;(seconds)))
    
    &lt;span class=&quot;builtin&quot;&gt;print&lt;/span&gt;(&lt;span class=&quot;string&quot;&gt;f&quot;\n====== SUMMARY ======&quot;&lt;/span&gt;)
    &lt;span class=&quot;builtin&quot;&gt;print&lt;/span&gt;(&lt;span class=&quot;string&quot;&gt;f&quot;Total URLs processed: {total_urls}&quot;&lt;/span&gt;)
    &lt;span class=&quot;builtin&quot;&gt;print&lt;/span&gt;(&lt;span class=&quot;string&quot;&gt;f&quot;Workers used: {workers}&quot;&lt;/span&gt;)
    &lt;span class=&quot;builtin&quot;&gt;print&lt;/span&gt;(&lt;span class=&quot;string&quot;&gt;f&quot;Successfully decoded: {success_count} ({success_count/total_urls*100:.1f}%)&quot;&lt;/span&gt;)
    &lt;span class=&quot;builtin&quot;&gt;print&lt;/span&gt;(&lt;span class=&quot;string&quot;&gt;f&quot;Failed to decode: {failed_count} ({failed_count/total_urls*100:.1f}%)&quot;&lt;/span&gt;)
    &lt;span class=&quot;keyword&quot;&gt;if&lt;/span&gt; error_count &amp;gt; 0:
        &lt;span class=&quot;builtin&quot;&gt;print&lt;/span&gt;(&lt;span class=&quot;string&quot;&gt;f&quot;Unexpected errors: {error_count} ({error_count/total_urls*100:.1f}%)&quot;&lt;/span&gt;)
    &lt;span class=&quot;builtin&quot;&gt;print&lt;/span&gt;(&lt;span class=&quot;string&quot;&gt;f&quot;Total errors: {total_errors} ({total_errors/total_urls*100:.1f}%)&quot;&lt;/span&gt;)
    &lt;span class=&quot;builtin&quot;&gt;print&lt;/span&gt;(&lt;span class=&quot;string&quot;&gt;f&quot;Skipped (non-Google News): {skipped_count} ({skipped_count/total_urls*100:.1f}%)&quot;&lt;/span&gt;)
    &lt;span class=&quot;builtin&quot;&gt;print&lt;/span&gt;(&lt;span class=&quot;string&quot;&gt;f&quot;\n----- TIMING INFORMATION -----&quot;&lt;/span&gt;)
    &lt;span class=&quot;builtin&quot;&gt;print&lt;/span&gt;(&lt;span class=&quot;string&quot;&gt;f&quot;Total processing time: {format_time(total_process_time)}&quot;&lt;/span&gt;)
    &lt;span class=&quot;builtin&quot;&gt;print&lt;/span&gt;(&lt;span class=&quot;string&quot;&gt;f&quot;Average processing time per URL: {format_time(avg_time_all)}&quot;&lt;/span&gt;)
    &lt;span class=&quot;builtin&quot;&gt;print&lt;/span&gt;(&lt;span class=&quot;string&quot;&gt;f&quot;Average processing time per Google News URL: {format_time(avg_time_google_only)}&quot;&lt;/span&gt;)
    &lt;span class=&quot;builtin&quot;&gt;print&lt;/span&gt;(&lt;span class=&quot;string&quot;&gt;f&quot;Fastest URL processing time: {format_time(df['processing_time_sec'].min())}&quot;&lt;/span&gt;)
    &lt;span class=&quot;builtin&quot;&gt;print&lt;/span&gt;(&lt;span class=&quot;string&quot;&gt;f&quot;Slowest URL processing time: {format_time(df['processing_time_sec'].max())}&quot;&lt;/span&gt;)
    &lt;span class=&quot;builtin&quot;&gt;print&lt;/span&gt;(&lt;span class=&quot;string&quot;&gt;f&quot;=====================&quot;&lt;/span&gt;)
    
    &lt;span class=&quot;comment&quot;&gt;# Add summary information to the DataFrame as metadata&lt;/span&gt;
    df.attrs[&lt;span class=&quot;string&quot;&gt;'total_process_time'&lt;/span&gt;] = total_process_time
    df.attrs[&lt;span class=&quot;string&quot;&gt;'avg_processing_time'&lt;/span&gt;] = avg_time_all
    df.attrs[&lt;span class=&quot;string&quot;&gt;'avg_google_url_time'&lt;/span&gt;] = avg_time_google_only
    df.attrs[&lt;span class=&quot;string&quot;&gt;'workers_used'&lt;/span&gt;] = workers
    
    &lt;span class=&quot;keyword&quot;&gt;return&lt;/span&gt; df

&lt;span class=&quot;keyword&quot;&gt;def&lt;/span&gt; process_url(url, interval_time=1):
    &lt;span class=&quot;string&quot;&gt;&quot;&quot;&quot;단일 URL을 처리하고 결과를 반환하는 함수&quot;&quot;&quot;&lt;/span&gt;
    url_start_time = time.time()
    
    &lt;span class=&quot;keyword&quot;&gt;try&lt;/span&gt;:
        &lt;span class=&quot;keyword&quot;&gt;if&lt;/span&gt; &lt;span class=&quot;string&quot;&gt;&quot;news.google.com&quot;&lt;/span&gt; &lt;span class=&quot;keyword&quot;&gt;in&lt;/span&gt; url:
            &lt;span class=&quot;keyword&quot;&gt;try&lt;/span&gt;:
                decoded_result = gnewsdecoder(url, interval=interval_time)
                &lt;span class=&quot;keyword&quot;&gt;if&lt;/span&gt; decoded_result.&lt;span class=&quot;builtin&quot;&gt;get&lt;/span&gt;(&lt;span class=&quot;string&quot;&gt;&quot;status&quot;&lt;/span&gt;):
                    status = &lt;span class=&quot;string&quot;&gt;&quot;SUCCESS&quot;&lt;/span&gt;
                    decoded_url = decoded_result[&lt;span class=&quot;string&quot;&gt;&quot;decoded_url&quot;&lt;/span&gt;]
                    error_message = &lt;span class=&quot;string&quot;&gt;&quot;&quot;&lt;/span&gt;
                &lt;span class=&quot;keyword&quot;&gt;else&lt;/span&gt;:
                    status = &lt;span class=&quot;string&quot;&gt;&quot;FAILED&quot;&lt;/span&gt;
                    decoded_url = &lt;span class=&quot;string&quot;&gt;&quot;N/A&quot;&lt;/span&gt;
                    error_message = decoded_result.&lt;span class=&quot;builtin&quot;&gt;get&lt;/span&gt;(&lt;span class=&quot;string&quot;&gt;&quot;message&quot;&lt;/span&gt;, &lt;span class=&quot;string&quot;&gt;&quot;Unknown error&quot;&lt;/span&gt;)
            &lt;span class=&quot;keyword&quot;&gt;except&lt;/span&gt; Exception &lt;span class=&quot;keyword&quot;&gt;as&lt;/span&gt; e:
                status = &lt;span class=&quot;string&quot;&gt;&quot;FAILED&quot;&lt;/span&gt;
                decoded_url = &lt;span class=&quot;string&quot;&gt;&quot;N/A&quot;&lt;/span&gt;
                error_message = &lt;span class=&quot;builtin&quot;&gt;str&lt;/span&gt;(e)
        &lt;span class=&quot;keyword&quot;&gt;else&lt;/span&gt;:
            &lt;span class=&quot;comment&quot;&gt;# For non-Google News URLs&lt;/span&gt;
            status = &lt;span class=&quot;string&quot;&gt;&quot;SKIPPED&quot;&lt;/span&gt;
            decoded_url = url  &lt;span class=&quot;comment&quot;&gt;# Keep the same URL&lt;/span&gt;
            error_message = &lt;span class=&quot;string&quot;&gt;&quot;Not a Google News URL&quot;&lt;/span&gt;
        
        &lt;span class=&quot;comment&quot;&gt;# Calculate processing time for this URL&lt;/span&gt;
        url_end_time = time.time()
        processing_time = url_end_time - url_start_time
        
        &lt;span class=&quot;comment&quot;&gt;# Return result as dictionary&lt;/span&gt;
        &lt;span class=&quot;keyword&quot;&gt;return&lt;/span&gt; {
            &lt;span class=&quot;string&quot;&gt;&quot;original_url&quot;&lt;/span&gt;: url,
            &lt;span class=&quot;string&quot;&gt;&quot;decoded_url&quot;&lt;/span&gt;: decoded_url,
            &lt;span class=&quot;string&quot;&gt;&quot;status&quot;&lt;/span&gt;: status,
            &lt;span class=&quot;string&quot;&gt;&quot;error_message&quot;&lt;/span&gt;: error_message,
            &lt;span class=&quot;string&quot;&gt;&quot;processing_time_sec&quot;&lt;/span&gt;: &lt;span class=&quot;builtin&quot;&gt;round&lt;/span&gt;(processing_time, 3)
        }
    &lt;span class=&quot;keyword&quot;&gt;except&lt;/span&gt; Exception &lt;span class=&quot;keyword&quot;&gt;as&lt;/span&gt; e:
        &lt;span class=&quot;comment&quot;&gt;# Handle any unexpected errors&lt;/span&gt;
        url_end_time = time.time()
        processing_time = url_end_time - url_start_time
        
        &lt;span class=&quot;keyword&quot;&gt;return&lt;/span&gt; {
            &lt;span class=&quot;string&quot;&gt;&quot;original_url&quot;&lt;/span&gt;: url,
            &lt;span class=&quot;string&quot;&gt;&quot;decoded_url&quot;&lt;/span&gt;: &lt;span class=&quot;string&quot;&gt;&quot;N/A&quot;&lt;/span&gt;,
            &lt;span class=&quot;string&quot;&gt;&quot;status&quot;&lt;/span&gt;: &lt;span class=&quot;string&quot;&gt;&quot;ERROR&quot;&lt;/span&gt;,
            &lt;span class=&quot;string&quot;&gt;&quot;error_message&quot;&lt;/span&gt;: &lt;span class=&quot;string&quot;&gt;f&quot;Unexpected error: {str(e)}&quot;&lt;/span&gt;,
            &lt;span class=&quot;string&quot;&gt;&quot;processing_time_sec&quot;&lt;/span&gt;: &lt;span class=&quot;builtin&quot;&gt;round&lt;/span&gt;(processing_time, 3)
        }

&lt;span class=&quot;keyword&quot;&gt;if&lt;/span&gt; __name__ == &lt;span class=&quot;string&quot;&gt;&quot;__main__&quot;&lt;/span&gt;:
    &lt;span class=&quot;comment&quot;&gt;# 명령줄에서 인자를 받아 실행하기 위한 코드&lt;/span&gt;
    parser = argparse.ArgumentParser(description=&lt;span class=&quot;string&quot;&gt;'Google News URL decoder'&lt;/span&gt;)
    parser.add_argument(&lt;span class=&quot;string&quot;&gt;'input_file_path'&lt;/span&gt;, &lt;span class=&quot;builtin&quot;&gt;help&lt;/span&gt;=&lt;span class=&quot;string&quot;&gt;'Path to the file containing URLs to decode'&lt;/span&gt;)
    parser.add_argument(&lt;span class=&quot;string&quot;&gt;'--output_dir'&lt;/span&gt;, &lt;span class=&quot;string&quot;&gt;'-o'&lt;/span&gt;, &lt;span class=&quot;builtin&quot;&gt;help&lt;/span&gt;=&lt;span class=&quot;string&quot;&gt;'Output directory for results (default: same as input file)'&lt;/span&gt;)
    parser.add_argument(&lt;span class=&quot;string&quot;&gt;'--interval'&lt;/span&gt;, &lt;span class=&quot;string&quot;&gt;'-i'&lt;/span&gt;, &lt;span class=&quot;builtin&quot;&gt;type&lt;/span&gt;=&lt;span class=&quot;builtin&quot;&gt;int&lt;/span&gt;, default=1, &lt;span class=&quot;builtin&quot;&gt;help&lt;/span&gt;=&lt;span class=&quot;string&quot;&gt;'Interval time between requests (default: 1)'&lt;/span&gt;)
    parser.add_argument(&lt;span class=&quot;string&quot;&gt;'--batch_size'&lt;/span&gt;, &lt;span class=&quot;string&quot;&gt;'-b'&lt;/span&gt;, &lt;span class=&quot;builtin&quot;&gt;type&lt;/span&gt;=&lt;span class=&quot;builtin&quot;&gt;int&lt;/span&gt;, default=10, &lt;span class=&quot;builtin&quot;&gt;help&lt;/span&gt;=&lt;span class=&quot;string&quot;&gt;'Batch size for saving interim results (default: 10)'&lt;/span&gt;)
    parser.add_argument(&lt;span class=&quot;string&quot;&gt;'--workers'&lt;/span&gt;, &lt;span class=&quot;string&quot;&gt;'-w'&lt;/span&gt;, &lt;span class=&quot;builtin&quot;&gt;type&lt;/span&gt;=&lt;span class=&quot;builtin&quot;&gt;int&lt;/span&gt;, default=1, &lt;span class=&quot;builtin&quot;&gt;help&lt;/span&gt;=&lt;span class=&quot;string&quot;&gt;'Number of worker threads for parallel processing (default: 1)'&lt;/span&gt;)
    
    args = parser.parse_args()
    
    &lt;span class=&quot;comment&quot;&gt;# 인자 출력&lt;/span&gt;
    &lt;span class=&quot;builtin&quot;&gt;print&lt;/span&gt;(&lt;span class=&quot;string&quot;&gt;f&quot;Processing URLs from: {args.input_file_path}&quot;&lt;/span&gt;)
    &lt;span class=&quot;builtin&quot;&gt;print&lt;/span&gt;(&lt;span class=&quot;string&quot;&gt;f&quot;Output directory: {args.output_dir or 'Same as input file'}&quot;&lt;/span&gt;)
    &lt;span class=&quot;builtin&quot;&gt;print&lt;/span&gt;(&lt;span class=&quot;string&quot;&gt;f&quot;Interval time: {args.interval}&quot;&lt;/span&gt;)
    &lt;span class=&quot;builtin&quot;&gt;print&lt;/span&gt;(&lt;span class=&quot;string&quot;&gt;f&quot;Batch size: {args.batch_size}&quot;&lt;/span&gt;)
    &lt;span class=&quot;builtin&quot;&gt;print&lt;/span&gt;(&lt;span class=&quot;string&quot;&gt;f&quot;Workers: {args.workers} (Parallel: {'Enabled' if args.workers &amp;gt; 1 else 'Disabled'})&quot;&lt;/span&gt;)
    
    &lt;span class=&quot;comment&quot;&gt;# 함수 실행&lt;/span&gt;
    result = process_urls_from_file(
        args.input_file_path, 
        args.output_dir, 
        args.interval, 
        args.batch_size,
        args.workers
    )
    
    &lt;span class=&quot;comment&quot;&gt;# 결과 반환&lt;/span&gt;
    &lt;span class=&quot;keyword&quot;&gt;if&lt;/span&gt; result &lt;span class=&quot;keyword&quot;&gt;is&lt;/span&gt; &lt;span class=&quot;keyword&quot;&gt;not&lt;/span&gt; &lt;span class=&quot;keyword&quot;&gt;None&lt;/span&gt;:
        &lt;span class=&quot;builtin&quot;&gt;print&lt;/span&gt;(&lt;span class=&quot;string&quot;&gt;f&quot;Process completed successfully. Results saved to CSV file.&quot;&lt;/span&gt;)
    &lt;span class=&quot;keyword&quot;&gt;else&lt;/span&gt;:
        &lt;span class=&quot;builtin&quot;&gt;print&lt;/span&gt;(&lt;span class=&quot;string&quot;&gt;f&quot;Process failed. Please check error messages above.&quot;&lt;/span&gt;)&lt;/pre&gt;
&lt;div class=&quot;code-act&quot;&gt;&lt;button class=&quot;copy-btn&quot;&gt;  코드 복사&lt;/button&gt;&lt;/div&gt;
&lt;/div&gt;
&lt;script&gt;
    //===============================================
    //  토글 + 복사 (오른쪽 정렬 버전) – 2025-05-24
    //===============================================
    document.addEventListener('DOMContentLoaded',()=&gt;{
      // 토글
      document.body.addEventListener('click',e=&gt;{
        if(!e.target.classList.contains('toggle-btn')) return;
        const box  = e.target.closest('.code-box');
        const ct   = box.querySelector('.code-ct');
        const act  = box.querySelector('.code-act');
        const open = ct.style.display==='block';
        ct.style.display  = open ? 'none' : 'block';
        act.style.display = open ? 'none' : 'flex';   // ← 수정: flex로 켬
        e.target.textContent = open ? '  펼치기' : '  접기';
      });
      // 복사
      document.body.addEventListener('click',e=&gt;{
        if(!e.target.classList.contains('copy-btn')) return;
        const btn  = e.target;
        const code = btn.closest('.code-box').querySelector('.code-ct').innerText;
        if(navigator.clipboard &amp;&amp; window.isSecureContext){
          navigator.clipboard.writeText(code).then(()=&gt;flash(btn)).catch(()=&gt;fallback(code,btn));
        }else{
          fallback(code,btn);
        }
      });
      function flash(btn){
        const t = btn.textContent;
        btn.textContent='✅ 복사됨!';
        setTimeout(()=&gt;btn.textContent=t,2000);
      }
      function fallback(text,btn){
        const ta=document.createElement('textarea');
        ta.value=text;ta.style.position='fixed';ta.style.top='-1000px';
        document.body.appendChild(ta);ta.select();
        try{document.execCommand('copy');flash(btn);}catch(_){alert('복사 실패  ');}
        document.body.removeChild(ta);
      }
    });
    &lt;/script&gt;
&lt;/div&gt;
&lt;h3 data-ke-size=&quot;size23&quot;&gt;&lt;span&gt; ️&lt;/span&gt; 실행결과&lt;/h3&gt;
&lt;p&gt;&lt;figure class=&quot;imageblock alignCenter&quot; data-ke-mobileStyle=&quot;widthOrigin&quot; data-origin-width=&quot;1115&quot; data-origin-height=&quot;628&quot;&gt;&lt;span data-url=&quot;https://blog.kakaocdn.net/dn/D35nU/btsN4aunDdc/NSbToXB9ih24A9SBE81jpK/img.png&quot; data-phocus=&quot;https://blog.kakaocdn.net/dn/D35nU/btsN4aunDdc/NSbToXB9ih24A9SBE81jpK/img.png&quot;&gt;&lt;img src=&quot;https://blog.kakaocdn.net/dn/D35nU/btsN4aunDdc/NSbToXB9ih24A9SBE81jpK/img.png&quot; srcset=&quot;https://img1.daumcdn.net/thumb/R1280x0/?scode=mtistory2&amp;fname=https%3A%2F%2Fblog.kakaocdn.net%2Fdn%2FD35nU%2FbtsN4aunDdc%2FNSbToXB9ih24A9SBE81jpK%2Fimg.png&quot; onerror=&quot;this.onerror=null; this.src='//t1.daumcdn.net/tistory_admin/static/images/no-image-v1.png'; this.srcset='//t1.daumcdn.net/tistory_admin/static/images/no-image-v1.png';&quot; loading=&quot;lazy&quot; width=&quot;1115&quot; height=&quot;628&quot; data-origin-width=&quot;1115&quot; data-origin-height=&quot;628&quot;/&gt;&lt;/span&gt;&lt;/figure&gt;
&lt;/p&gt;
&lt;h2 data-ke-size=&quot;size26&quot;&gt;  성능 평가 및 결과&lt;/h2&gt;
&lt;p data-ke-size=&quot;size16&quot;&gt;병렬 처리를 적용한 결과, 다음과 같은 성능 향상을 확인하였다.&lt;/p&gt;
&lt;div id=&quot;code_1747920551215&quot; data-ke-type=&quot;html&quot; data-source=&quot;&amp;lt;!DOCTYPE html&amp;gt;
&amp;lt;html&amp;gt;
&amp;lt;head&amp;gt;
    &amp;lt;style&amp;gt;
        .performance-table {
            width: 100%;
            border-collapse: separate;
            border-spacing: 0;
            margin: 25px 0;
            font-family: 'Nanum Gothic', sans-serif;
            box-shadow: 0 3px 8px rgba(0,0,0,0.15);
            font-size: 15px;
            border-radius: 5px;
            overflow: hidden;
            table-layout: fixed;
        }
        
        .performance-table th {
            background-color: #414b5c;
            color: white;
            text-align: center;
            padding: 15px 20px;
            font-weight: bold;
            border: 1px solid #ddd;
            letter-spacing: 0.5px;
            vertical-align: middle;
        }
        
        .performance-table th:first-child {
            width: 25%;
        }
        
        .performance-table th:nth-child(2),
        .performance-table th:nth-child(3) {
            width: 25%;
        }
        
        .performance-table th:nth-child(4) {
            width: 25%;
        }
        
        .performance-table tr:nth-child(even) {
            background-color: #f9f9f9;
        }
        
        .performance-table tr:hover {
            background-color: #f1f1f1;
        }
        
        .performance-table td {
            border: 1px solid #ddd;
            padding: 14px 18px;
            vertical-align: middle;
            line-height: 1.5;
            text-align: center;
            word-break: keep-all;
        }
        
        .metric-name {
            font-weight: bold;
            background-color: #f5f5f5;
            text-align: center;
        }
        
        .better-value {
            color: #2e7d32;
            font-weight: bold;
        }
        
        .worse-value {
            color: #c62828;
        }
        
        .neutral-value {
            color: #0277bd;
        }
        
        .comparison-result {
            font-style: italic;
            color: #555;
            font-weight: 500;
        }
        
        .improvement {
            color: #2e7d32;
            font-weight: bold;
        }
    &amp;lt;/style&amp;gt;
&amp;lt;/head&amp;gt;
&amp;lt;body&amp;gt;
    &amp;lt;table class=&amp;quot;performance-table&amp;quot;&amp;gt;
        &amp;lt;thead&amp;gt;
            &amp;lt;tr&amp;gt;
                &amp;lt;th&amp;gt;성능 지표&amp;lt;/th&amp;gt;
                &amp;lt;th&amp;gt;순차 처리&amp;lt;br&amp;gt;(Sequential)&amp;lt;/th&amp;gt;
                &amp;lt;th&amp;gt;병렬 처리&amp;lt;br&amp;gt;(Parallel)&amp;lt;/th&amp;gt;
                &amp;lt;th&amp;gt;비교 결과&amp;lt;/th&amp;gt;
            &amp;lt;/tr&amp;gt;
        &amp;lt;/thead&amp;gt;
        &amp;lt;tbody&amp;gt;
            &amp;lt;tr&amp;gt;
                &amp;lt;td class=&amp;quot;metric-name&amp;quot;&amp;gt;총 처리 시간&amp;lt;br&amp;gt;&amp;lt;small&amp;gt;(500개 URL)&amp;lt;/small&amp;gt;&amp;lt;/td&amp;gt;
                &amp;lt;td class=&amp;quot;worse-value&amp;quot;&amp;gt;26분 57초&amp;lt;/td&amp;gt;
                &amp;lt;td class=&amp;quot;better-value&amp;quot;&amp;gt;5분 12초&amp;lt;/td&amp;gt;
                &amp;lt;td class=&amp;quot;comparison-result improvement&amp;quot;&amp;gt;약 5배 빠름&amp;lt;/td&amp;gt;
            &amp;lt;/tr&amp;gt;
            &amp;lt;tr&amp;gt;
                &amp;lt;td class=&amp;quot;metric-name&amp;quot;&amp;gt;평균 개별&amp;lt;br&amp;gt;처리 시간&amp;lt;/td&amp;gt;
                &amp;lt;td class=&amp;quot;neutral-value&amp;quot;&amp;gt;3초&amp;lt;/td&amp;gt;
                &amp;lt;td class=&amp;quot;neutral-value&amp;quot;&amp;gt;3초&amp;lt;/td&amp;gt;
                &amp;lt;td class=&amp;quot;comparison-result&amp;quot;&amp;gt;차이 없음&amp;lt;/td&amp;gt;
            &amp;lt;/tr&amp;gt;
            &amp;lt;tr&amp;gt;
                &amp;lt;td class=&amp;quot;metric-name&amp;quot;&amp;gt;전체 처리&amp;lt;br&amp;gt;효율성&amp;lt;/td&amp;gt;
                &amp;lt;td class=&amp;quot;worse-value&amp;quot;&amp;gt;낮음&amp;lt;br&amp;gt;(순차 대기)&amp;lt;/td&amp;gt;
                &amp;lt;td class=&amp;quot;better-value&amp;quot;&amp;gt;높음&amp;lt;br&amp;gt;(동시 처리)&amp;lt;/td&amp;gt;
                &amp;lt;td class=&amp;quot;comparison-result improvement&amp;quot;&amp;gt;5배 향상&amp;lt;/td&amp;gt;
            &amp;lt;/tr&amp;gt;
            &amp;lt;tr&amp;gt;
                &amp;lt;td class=&amp;quot;metric-name&amp;quot;&amp;gt;성공률&amp;lt;/td&amp;gt;
                &amp;lt;td class=&amp;quot;neutral-value&amp;quot;&amp;gt;100%&amp;lt;/td&amp;gt;
                &amp;lt;td class=&amp;quot;neutral-value&amp;quot;&amp;gt;100%&amp;lt;/td&amp;gt;
                &amp;lt;td class=&amp;quot;comparison-result&amp;quot;&amp;gt;동일&amp;lt;/td&amp;gt;
            &amp;lt;/tr&amp;gt;
            &amp;lt;tr&amp;gt;
                &amp;lt;td class=&amp;quot;metric-name&amp;quot;&amp;gt;사용된&amp;lt;br&amp;gt;작업자 수&amp;lt;/td&amp;gt;
                &amp;lt;td&amp;gt;1개&amp;lt;/td&amp;gt;
                &amp;lt;td class=&amp;quot;better-value&amp;quot;&amp;gt;5개&amp;lt;/td&amp;gt;
                &amp;lt;td class=&amp;quot;comparison-result&amp;quot;&amp;gt;병렬 처리 활용&amp;lt;/td&amp;gt;
            &amp;lt;/tr&amp;gt;
        &amp;lt;/tbody&amp;gt;
    &amp;lt;/table&amp;gt;
&amp;lt;/body&amp;gt;
&amp;lt;/html&amp;gt;&quot;&gt;
&lt;style&gt;
        .performance-table {
            width: 100%;
            border-collapse: separate;
            border-spacing: 0;
            margin: 25px 0;
            font-family: 'Nanum Gothic', sans-serif;
            box-shadow: 0 3px 8px rgba(0,0,0,0.15);
            font-size: 15px;
            border-radius: 5px;
            overflow: hidden;
            table-layout: fixed;
        }
        
        .performance-table th {
            background-color: #414b5c;
            color: white;
            text-align: center;
            padding: 15px 20px;
            font-weight: bold;
            border: 1px solid #ddd;
            letter-spacing: 0.5px;
            vertical-align: middle;
        }
        
        .performance-table th:first-child {
            width: 25%;
        }
        
        .performance-table th:nth-child(2),
        .performance-table th:nth-child(3) {
            width: 25%;
        }
        
        .performance-table th:nth-child(4) {
            width: 25%;
        }
        
        .performance-table tr:nth-child(even) {
            background-color: #f9f9f9;
        }
        
        .performance-table tr:hover {
            background-color: #f1f1f1;
        }
        
        .performance-table td {
            border: 1px solid #ddd;
            padding: 14px 18px;
            vertical-align: middle;
            line-height: 1.5;
            text-align: center;
            word-break: keep-all;
        }
        
        .metric-name {
            font-weight: bold;
            background-color: #f5f5f5;
            text-align: center;
        }
        
        .better-value {
            color: #2e7d32;
            font-weight: bold;
        }
        
        .worse-value {
            color: #c62828;
        }
        
        .neutral-value {
            color: #0277bd;
        }
        
        .comparison-result {
            font-style: italic;
            color: #555;
            font-weight: 500;
        }
        
        .improvement {
            color: #2e7d32;
            font-weight: bold;
        }
    &lt;/style&gt;
&lt;table class=&quot;performance-table&quot;&gt;
&lt;thead&gt;
&lt;tr&gt;
&lt;th&gt;성능 지표&lt;/th&gt;
&lt;th&gt;순차 처리&lt;br /&gt;(Sequential)&lt;/th&gt;
&lt;th&gt;병렬 처리&lt;br /&gt;(Parallel)&lt;/th&gt;
&lt;th&gt;비교 결과&lt;/th&gt;
&lt;/tr&gt;
&lt;/thead&gt;
&lt;tbody&gt;
&lt;tr&gt;
&lt;td class=&quot;metric-name&quot;&gt;총 처리 시간&lt;br /&gt;&lt;small&gt;(500개 URL)&lt;/small&gt;&lt;/td&gt;
&lt;td class=&quot;worse-value&quot;&gt;26분 57초&lt;/td&gt;
&lt;td class=&quot;better-value&quot;&gt;5분 12초&lt;/td&gt;
&lt;td class=&quot;comparison-result improvement&quot;&gt;약 5배 빠름&lt;/td&gt;
&lt;/tr&gt;
&lt;tr&gt;
&lt;td class=&quot;metric-name&quot;&gt;평균 개별&lt;br /&gt;처리 시간&lt;/td&gt;
&lt;td class=&quot;neutral-value&quot;&gt;3초&lt;/td&gt;
&lt;td class=&quot;neutral-value&quot;&gt;3초&lt;/td&gt;
&lt;td class=&quot;comparison-result&quot;&gt;차이 없음&lt;/td&gt;
&lt;/tr&gt;
&lt;tr&gt;
&lt;td class=&quot;metric-name&quot;&gt;전체 처리&lt;br /&gt;효율성&lt;/td&gt;
&lt;td class=&quot;worse-value&quot;&gt;낮음&lt;br /&gt;(순차 대기)&lt;/td&gt;
&lt;td class=&quot;better-value&quot;&gt;높음&lt;br /&gt;(동시 처리)&lt;/td&gt;
&lt;td class=&quot;comparison-result improvement&quot;&gt;5배 향상&lt;/td&gt;
&lt;/tr&gt;
&lt;tr&gt;
&lt;td class=&quot;metric-name&quot;&gt;성공률&lt;/td&gt;
&lt;td class=&quot;neutral-value&quot;&gt;100%&lt;/td&gt;
&lt;td class=&quot;neutral-value&quot;&gt;100%&lt;/td&gt;
&lt;td class=&quot;comparison-result&quot;&gt;동일&lt;/td&gt;
&lt;/tr&gt;
&lt;tr&gt;
&lt;td class=&quot;metric-name&quot;&gt;사용된&lt;br /&gt;작업자 수&lt;/td&gt;
&lt;td&gt;1개&lt;/td&gt;
&lt;td class=&quot;better-value&quot;&gt;5개&lt;/td&gt;
&lt;td class=&quot;comparison-result&quot;&gt;병렬 처리 활용&lt;/td&gt;
&lt;/tr&gt;
&lt;/tbody&gt;
&lt;/table&gt;
&lt;/div&gt;
&lt;hr contenteditable=&quot;false&quot; data-ke-type=&quot;horizontalRule&quot; data-ke-style=&quot;style6&quot; /&gt;
&lt;h2 data-ke-size=&quot;size26&quot;&gt;마무리&lt;/h2&gt;
&lt;p data-ke-size=&quot;size16&quot;&gt;googlenewsdecoder는 복잡한 구글 뉴스 URL 리디렉션 문제를 간단히 해결할 수 있는 유용한 도구로 이를 &lt;span style=&quot;color: #006dd7;&quot;&gt;&lt;b&gt;병렬 처리와 결합하면 처리 속도를 획기적으로 향상&lt;/b&gt;&lt;/span&gt;시킬 수 있다. &lt;span&gt;이제 원문 기사 주소를 알아내었으니, 기사 내용을 추출하는 단계로 넘어가겠다. &lt;/span&gt;&lt;span&gt;다음 글에서는&lt;/span&gt;&lt;span&gt; &lt;/span&gt;&lt;span&gt;대량 기사 크롤링 전략&lt;/span&gt;&lt;span&gt;을 다룰 예정이다. &lt;br /&gt;&lt;/span&gt;&lt;/p&gt;</description>
      <category>concurrent.futures</category>
      <category>googlenewsdecoder</category>
      <category>기사수집자동화</category>
      <category>병렬처리</category>
      <author>catalystmind</author>
      <guid isPermaLink="true">https://catalystmind.tistory.com/17</guid>
      <comments>https://catalystmind.tistory.com/17#entry17comment</comments>
      <pubDate>Sat, 24 May 2025 23:35:43 +0900</pubDate>
    </item>
    <item>
      <title>Googlenewsdecoder: 구글 뉴스 RSS 링크를 원문 주소로 변경해주는 도구</title>
      <link>https://catalystmind.tistory.com/16</link>
      <description>&lt;p data-ke-size=&quot;size16&quot;&gt;&amp;nbsp;&lt;/p&gt;
&lt;div id=&quot;code_1747641036119&quot; data-ke-type=&quot;html&quot; data-source=&quot;&amp;lt;div style=&amp;quot;background-color: #fff; border-radius: 8px; padding: 24px; box-shadow: 0 2px 8px rgba(0, 0, 0, 0.05); margin-bottom: 24px; border-left: 4px solid #3b82f6;&amp;quot;&amp;gt;
    &amp;lt;h1 style=&amp;quot;color: #1e40af; font-size: 24px; font-weight: 700; margin-top: 0; margin-bottom: 16px;&amp;quot;&amp;gt;TL;DR&amp;lt;/h1&amp;gt;
&amp;lt;div style=&amp;quot;background-color: #f0f7ff; padding: 12px 16px; border-radius: 6px; margin: 16px 0;&amp;quot;&amp;gt;
    &amp;lt;ul style=&amp;quot;padding-left: 20px; margin: 0;&amp;quot;&amp;gt;
        &amp;lt;li style=&amp;quot;margin-bottom: 10px;&amp;quot;&amp;gt;&amp;lt;span style=&amp;quot;color: #1e40af; font-weight: 600;&amp;quot;&amp;gt;googlenewsdecoder&amp;lt;/span&amp;gt;는 Google News RSS 피드의 &amp;lt;span style=&amp;quot;color: #1e40af; font-weight: 600;&amp;quot;&amp;gt;리디렉션 URL&amp;lt;/span&amp;gt;을 &amp;lt;span style=&amp;quot;color: #1e40af; font-weight: 600;&amp;quot;&amp;gt;원래 주소로 복원&amp;lt;/span&amp;gt;해주는 Python 도구임&amp;lt;/li&amp;gt;
        &amp;lt;li style=&amp;quot;margin-bottom: 10px;&amp;quot;&amp;gt;
            URL &amp;lt;span style=&amp;quot;color: #1e40af; font-weight: 600;&amp;quot;&amp;gt;약 300개&amp;lt;/span&amp;gt; 처리 결과,
            &amp;lt;ul style=&amp;quot;list-style-type: none; padding-left: 20px; margin-top: 8px; margin-bottom: 0;&amp;quot;&amp;gt;
                &amp;lt;li style=&amp;quot;margin-bottom: 5px;&amp;quot;&amp;gt;URL 하나 처리 시간은 평균 &amp;lt;span style=&amp;quot;color: #1e40af; font-weight: 600;&amp;quot;&amp;gt;약 3초&amp;lt;/span&amp;gt;,&amp;lt;/li&amp;gt;
                &amp;lt;li&amp;gt;정확도 &amp;lt;span style=&amp;quot;color: #1e40af; font-weight: 600;&amp;quot;&amp;gt;100%&amp;lt;/span&amp;gt;로 現 시점에는 안정적으로 변환 가능한것으로 판단함&amp;lt;/li&amp;gt;
            &amp;lt;/ul&amp;gt;
        &amp;lt;/li&amp;gt;
        &amp;lt;li style=&amp;quot;margin-bottom: 10px;&amp;quot;&amp;gt;&amp;lt;span style=&amp;quot;color: #1e40af; font-weight: 600;&amp;quot;&amp;gt;자동화 시스템&amp;lt;/span&amp;gt;과 연동하여 뉴스 수집 및 분석 워크플로우를 &amp;lt;span style=&amp;quot;color: #1e40af; font-weight: 600;&amp;quot;&amp;gt;효율화&amp;lt;/span&amp;gt;할 수 있음&amp;lt;/li&amp;gt;
    &amp;lt;/ul&amp;gt;
&amp;lt;/div&amp;gt;
&amp;lt;/div&amp;gt;&quot;&gt;
&lt;div style=&quot;background-color: #fff; border-radius: 8px; padding: 24px; box-shadow: 0 2px 8px rgba(0, 0, 0, 0.05); margin-bottom: 24px; border-left: 4px solid #3b82f6;&quot;&gt;
&lt;h1 style=&quot;color: #1e40af; font-size: 24px; font-weight: bold; margin-top: 0; margin-bottom: 16px;&quot;&gt;TL;DR&lt;/h1&gt;
&lt;div style=&quot;background-color: #f0f7ff; padding: 12px 16px; border-radius: 6px; margin: 16px 0;&quot;&gt;
&lt;ul style=&quot;padding-left: 20px; margin: 0px; list-style-type: disc;&quot; data-ke-list-type=&quot;disc&quot;&gt;
&lt;li style=&quot;margin-bottom: 10px;&quot;&gt;&lt;span style=&quot;color: #1e40af; font-weight: 600;&quot;&gt;googlenewsdecoder&lt;/span&gt;는 Google News RSS 피드의 &lt;span style=&quot;color: #1e40af; font-weight: 600;&quot;&gt;리디렉션 URL&lt;/span&gt;을 &lt;span style=&quot;color: #1e40af; font-weight: 600;&quot;&gt;원래 주소로 복원&lt;/span&gt;해주는 Python 도구임&lt;/li&gt;
&lt;li style=&quot;margin-bottom: 10px;&quot;&gt;URL &lt;span style=&quot;color: #1e40af; font-weight: 600;&quot;&gt;약 300개&lt;/span&gt; 처리 결과,
&lt;ul style=&quot;list-style-type: none; padding-left: 20px; margin-top: 8px; margin-bottom: 0;&quot; data-ke-list-type=&quot;disc&quot;&gt;
&lt;li style=&quot;margin-bottom: 5px;&quot;&gt;URL 하나 처리 시간은 평균 &lt;span style=&quot;color: #1e40af; font-weight: 600;&quot;&gt;약 3초&lt;/span&gt;,&lt;/li&gt;
&lt;li&gt;정확도 &lt;span style=&quot;color: #1e40af; font-weight: 600;&quot;&gt;100%&lt;/span&gt;로 現 시점에는 안정적으로 변환 가능한것으로 판단함&lt;/li&gt;
&lt;/ul&gt;
&lt;/li&gt;
&lt;li style=&quot;margin-bottom: 10px;&quot;&gt;&lt;span style=&quot;color: #1e40af; font-weight: 600;&quot;&gt;자동화 시스템&lt;/span&gt;과 연동하여 뉴스 수집 및 분석 워크플로우를 &lt;span style=&quot;color: #1e40af; font-weight: 600;&quot;&gt;효율화&lt;/span&gt;할 수 있음&lt;/li&gt;
&lt;/ul&gt;
&lt;/div&gt;
&lt;/div&gt;
&lt;/div&gt;
&lt;p data-pm-slice=&quot;1 1 []&quot; data-ke-size=&quot;size16&quot;&gt;&amp;nbsp;&lt;/p&gt;
&lt;p data-pm-slice=&quot;1 1 []&quot; data-ke-size=&quot;size16&quot;&gt;&lt;span&gt;지난 글에서 구글 뉴스 RSS의 복잡한 리디렉션 구조를 설명하며 성공적인 뉴스 본문 추출을 위해서는 원문 URL의 필요성을 강조했다. 이번 글에서는 googlenewsdecoder의 기능과 사용법, 성능(처리 시간 및 정확도)에 대해서 설명하고자 한다.&lt;/span&gt;&lt;/p&gt;
&lt;hr contenteditable=&quot;false&quot; data-ke-type=&quot;horizontalRule&quot; data-ke-style=&quot;style6&quot; /&gt;
&lt;h3 data-pm-slice=&quot;1 1 []&quot; data-ke-size=&quot;size23&quot;&gt;&lt;span&gt;   &amp;nbsp;googlenewsdecoder란?&lt;/span&gt;&lt;/h3&gt;
&lt;p data-ke-size=&quot;size16&quot;&gt;&lt;span&gt;googlenewsdecoder는 이름에서도 알 수 있듯, Google 뉴스 RSS에서 제공되는 리디렉션 URL을 실제 뉴스 원문 URL로 변환해 주는 편리한 파이썬 라이브러리다.&amp;nbsp;&lt;/span&gt;&lt;/p&gt;
&lt;h3 data-ke-size=&quot;size23&quot;&gt;⚠️ googlenewsdecode 사용 시 주의할 점&lt;/h3&gt;
&lt;ul style=&quot;list-style-type: disc;&quot; data-ke-list-type=&quot;disc&quot;&gt;
&lt;li&gt;&lt;span&gt;구글의 URL 형식이 변경되면 언제든 작동하지 않을 수 있다.&lt;/span&gt;&lt;/li&gt;
&lt;li&gt;&lt;span&gt;Google 서버와의 실시간 통신이 필수이므로 인터넷 연결이 반드시 필요하다.&lt;/span&gt;&lt;span&gt;&lt;/span&gt;&lt;/li&gt;
&lt;/ul&gt;
&lt;hr contenteditable=&quot;false&quot; data-ke-type=&quot;horizontalRule&quot; data-ke-style=&quot;style6&quot; /&gt;
&lt;h3 data-pm-slice=&quot;1 1 []&quot; data-ke-size=&quot;size23&quot;&gt;&lt;span&gt;   기본 사용법&lt;/span&gt;&lt;/h3&gt;
&lt;div id=&quot;code_1747650174428&quot; data-ke-type=&quot;html&quot; data-source=&quot;&amp;lt;!DOCTYPE html&amp;gt;
&amp;lt;html&amp;gt;
&amp;lt;head&amp;gt;
  &amp;lt;style&amp;gt;
    /* 코드 블록 스타일을 특정 클래스에만 적용되도록 범위 제한 */
    .tistory-custom-code {
      position: relative;
      background-color: rgba(255, 255, 255, 1);
      border-radius: 8px;
      font-family: 'Hack', 'D2Coding', 'Source Code Pro', monospace;
      font-size: 15px;
      line-height: 133%;
      margin: 15px 0;
      overflow: hidden;
      box-shadow: 0 2px 8px rgba(0, 0, 0, 0.05);
      border: 1px solid #eaeaea;
    }

    .tistory-custom-code .code-header {
      background-color: #f8f8f8;
      color: #383a42;
      padding: 10px 15px;
      font-weight: 600;
      display: flex;
      justify-content: space-between;
      align-items: center;
      border-bottom: 1px solid #eaeaea;
    }

    .tistory-custom-code .code-content {
      padding: 15px 20px;
      overflow-x: auto;
      max-height: 500px;
      color: #383a42;
    }
    
    /* 구문 강조 스타일 */
    .tistory-custom-code .python-keyword {
      color: #a626a4;
      font-weight: normal;
    }
    
    .tistory-custom-code .python-string {
      color: #50a14f;
    }
    
    .tistory-custom-code .python-comment {
      color: #a0a1a7;
      font-style: italic;
    }
    
    .tistory-custom-code .python-function {
      color: #4078f2;
    }
    
    /* 코드 줄바꿈 방지와 스크롤바를 보이게 함 - 필요한 부분만 */
    .tistory-custom-code .long-line {
      white-space: nowrap;
    }
    
    /* 스크롤바 스타일링 */
    .tistory-custom-code .code-content::-webkit-scrollbar {
      height: 8px;
      background-color: #f5f5f5;
    }
    
    .tistory-custom-code .code-content::-webkit-scrollbar-thumb {
      border-radius: 4px;
      background-color: #d1d5da;
    }
    
    .tistory-custom-code .code-content::-webkit-scrollbar-thumb:hover {
      background-color: #a8adb5;
    }
  &amp;lt;/style&amp;gt;
&amp;lt;/head&amp;gt;
&amp;lt;body&amp;gt;
  &amp;lt;!-- 티스토리 코드 블록 시작 --&amp;gt;
  &amp;lt;div class=&amp;quot;tistory-custom-code&amp;quot;&amp;gt;
    &amp;lt;div class=&amp;quot;code-header&amp;quot;&amp;gt;
      &amp;lt;span&amp;gt;Python&amp;lt;/span&amp;gt;
    &amp;lt;/div&amp;gt;
    &amp;lt;div class=&amp;quot;code-content&amp;quot;&amp;gt;
      &amp;lt;pre&amp;gt;&amp;lt;code&amp;gt;&amp;lt;span class=&amp;quot;python-keyword&amp;quot;&amp;gt;from&amp;lt;/span&amp;gt; googlenewsdecoder &amp;lt;span class=&amp;quot;python-keyword&amp;quot;&amp;gt;import&amp;lt;/span&amp;gt; gnewsdecoder

&amp;lt;span class=&amp;quot;python-keyword&amp;quot;&amp;gt;def&amp;lt;/span&amp;gt; &amp;lt;span class=&amp;quot;python-function&amp;quot;&amp;gt;main&amp;lt;/span&amp;gt;():
    interval_time = 1  &amp;lt;span class=&amp;quot;python-comment&amp;quot;&amp;gt;# interval is optional, default is None&amp;lt;/span&amp;gt;
    source_url = &amp;lt;span class=&amp;quot;python-string long-line&amp;quot;&amp;gt;&amp;quot;https://news.google.com/rss/articles/CBMiYkFVX3lxTFBlb0FfcmFmQUZybGF6NXFNNmNhd2NwZjc5NlZ4NkZZUEdqckVERFJZck05OEk5VnhOS1JzTTNnMDlxc1RSdW1xRzhTNFNNTEl1d0RqWDNKWTB6cUxIMGlrM3dR0gFkQVVfeXFMTkcyUEtwTEhtQUNQMGZZYTZnZVFLWTZDZDNhU1ZlUHl2OVVRNGhuRDNQZDNZQXVRS1FHS01peGZJRmlDd3NVX0k1ZmdlM0M0RzRRN1lRMVhfNmtoYnprM0VyMXoyWg?oc=5&amp;quot;&amp;lt;/span&amp;gt;
    &amp;lt;span class=&amp;quot;python-keyword&amp;quot;&amp;gt;try&amp;lt;/span&amp;gt;:
        decoded_url = gnewsdecoder(source_url, interval=interval_time)
        &amp;lt;span class=&amp;quot;python-keyword&amp;quot;&amp;gt;if&amp;lt;/span&amp;gt; decoded_url.get(&amp;lt;span class=&amp;quot;python-string&amp;quot;&amp;gt;&amp;quot;status&amp;quot;&amp;lt;/span&amp;gt;):
            &amp;lt;span class=&amp;quot;python-function&amp;quot;&amp;gt;print&amp;lt;/span&amp;gt;(&amp;lt;span class=&amp;quot;python-string&amp;quot;&amp;gt;&amp;quot;Decoded URL:&amp;quot;&amp;lt;/span&amp;gt;, decoded_url[&amp;lt;span class=&amp;quot;python-string&amp;quot;&amp;gt;&amp;quot;decoded_url&amp;quot;&amp;lt;/span&amp;gt;])
        &amp;lt;span class=&amp;quot;python-keyword&amp;quot;&amp;gt;else&amp;lt;/span&amp;gt;:
            &amp;lt;span class=&amp;quot;python-function&amp;quot;&amp;gt;print&amp;lt;/span&amp;gt;(&amp;lt;span class=&amp;quot;python-string&amp;quot;&amp;gt;&amp;quot;Error:&amp;quot;&amp;lt;/span&amp;gt;, decoded_url[&amp;lt;span class=&amp;quot;python-string&amp;quot;&amp;gt;&amp;quot;message&amp;quot;&amp;lt;/span&amp;gt;])
    &amp;lt;span class=&amp;quot;python-keyword&amp;quot;&amp;gt;except&amp;lt;/span&amp;gt; Exception &amp;lt;span class=&amp;quot;python-keyword&amp;quot;&amp;gt;as&amp;lt;/span&amp;gt; e:
        &amp;lt;span class=&amp;quot;python-function&amp;quot;&amp;gt;print&amp;lt;/span&amp;gt;(&amp;lt;span class=&amp;quot;python-string&amp;quot;&amp;gt;f&amp;quot;Error occurred: {e}&amp;quot;&amp;lt;/span&amp;gt;)

&amp;lt;span class=&amp;quot;python-keyword&amp;quot;&amp;gt;if&amp;lt;/span&amp;gt; __name__ == &amp;lt;span class=&amp;quot;python-string&amp;quot;&amp;gt;&amp;quot;__main__&amp;quot;&amp;lt;/span&amp;gt;:
    main()&amp;lt;/code&amp;gt;&amp;lt;/pre&amp;gt;
    &amp;lt;/div&amp;gt;
  &amp;lt;/div&amp;gt;
  &amp;lt;!-- 티스토리 코드 블록 끝 --&amp;gt;
&amp;lt;/body&amp;gt;
&amp;lt;/html&amp;gt;&quot;&gt;
&lt;style&gt;
    /* 코드 블록 스타일을 특정 클래스에만 적용되도록 범위 제한 */
    .tistory-custom-code {
      position: relative;
      background-color: rgba(255, 255, 255, 1);
      border-radius: 8px;
      font-family: 'Hack', 'D2Coding', 'Source Code Pro', monospace;
      font-size: 15px;
      line-height: 133%;
      margin: 15px 0;
      overflow: hidden;
      box-shadow: 0 2px 8px rgba(0, 0, 0, 0.05);
      border: 1px solid #eaeaea;
    }

    .tistory-custom-code .code-header {
      background-color: #f8f8f8;
      color: #383a42;
      padding: 10px 15px;
      font-weight: 600;
      display: flex;
      justify-content: space-between;
      align-items: center;
      border-bottom: 1px solid #eaeaea;
    }

    .tistory-custom-code .code-content {
      padding: 15px 20px;
      overflow-x: auto;
      max-height: 500px;
      color: #383a42;
    }
    
    /* 구문 강조 스타일 */
    .tistory-custom-code .python-keyword {
      color: #a626a4;
      font-weight: normal;
    }
    
    .tistory-custom-code .python-string {
      color: #50a14f;
    }
    
    .tistory-custom-code .python-comment {
      color: #a0a1a7;
      font-style: italic;
    }
    
    .tistory-custom-code .python-function {
      color: #4078f2;
    }
    
    /* 코드 줄바꿈 방지와 스크롤바를 보이게 함 - 필요한 부분만 */
    .tistory-custom-code .long-line {
      white-space: nowrap;
    }
    
    /* 스크롤바 스타일링 */
    .tistory-custom-code .code-content::-webkit-scrollbar {
      height: 8px;
      background-color: #f5f5f5;
    }
    
    .tistory-custom-code .code-content::-webkit-scrollbar-thumb {
      border-radius: 4px;
      background-color: #d1d5da;
    }
    
    .tistory-custom-code .code-content::-webkit-scrollbar-thumb:hover {
      background-color: #a8adb5;
    }
  &lt;/style&gt;
&lt;!-- 티스토리 코드 블록 시작 --&gt;
&lt;div class=&quot;tistory-custom-code&quot;&gt;
&lt;div class=&quot;code-header&quot;&gt;&lt;span&gt;Python&lt;/span&gt;&lt;/div&gt;
&lt;div class=&quot;code-content&quot;&gt;
&lt;pre&gt;&lt;code&gt;&lt;span class=&quot;python-keyword&quot;&gt;from&lt;/span&gt; googlenewsdecoder &lt;span class=&quot;python-keyword&quot;&gt;import&lt;/span&gt; gnewsdecoder

&lt;span class=&quot;python-keyword&quot;&gt;def&lt;/span&gt; &lt;span class=&quot;python-function&quot;&gt;main&lt;/span&gt;():
    interval_time = 1  &lt;span class=&quot;python-comment&quot;&gt;# interval is optional, default is None&lt;/span&gt;
    source_url = &lt;span class=&quot;python-string long-line&quot;&gt;&quot;https://news.google.com/rss/articles/CBMiYkFVX3lxTFBlb0FfcmFmQUZybGF6NXFNNmNhd2NwZjc5NlZ4NkZZUEdqckVERFJZck05OEk5VnhOS1JzTTNnMDlxc1RSdW1xRzhTNFNNTEl1d0RqWDNKWTB6cUxIMGlrM3dR0gFkQVVfeXFMTkcyUEtwTEhtQUNQMGZZYTZnZVFLWTZDZDNhU1ZlUHl2OVVRNGhuRDNQZDNZQXVRS1FHS01peGZJRmlDd3NVX0k1ZmdlM0M0RzRRN1lRMVhfNmtoYnprM0VyMXoyWg?oc=5&quot;&lt;/span&gt;
    &lt;span class=&quot;python-keyword&quot;&gt;try&lt;/span&gt;:
        decoded_url = gnewsdecoder(source_url, interval=interval_time)
        &lt;span class=&quot;python-keyword&quot;&gt;if&lt;/span&gt; decoded_url.get(&lt;span class=&quot;python-string&quot;&gt;&quot;status&quot;&lt;/span&gt;):
            &lt;span class=&quot;python-function&quot;&gt;print&lt;/span&gt;(&lt;span class=&quot;python-string&quot;&gt;&quot;Decoded URL:&quot;&lt;/span&gt;, decoded_url[&lt;span class=&quot;python-string&quot;&gt;&quot;decoded_url&quot;&lt;/span&gt;])
        &lt;span class=&quot;python-keyword&quot;&gt;else&lt;/span&gt;:
            &lt;span class=&quot;python-function&quot;&gt;print&lt;/span&gt;(&lt;span class=&quot;python-string&quot;&gt;&quot;Error:&quot;&lt;/span&gt;, decoded_url[&lt;span class=&quot;python-string&quot;&gt;&quot;message&quot;&lt;/span&gt;])
    &lt;span class=&quot;python-keyword&quot;&gt;except&lt;/span&gt; Exception &lt;span class=&quot;python-keyword&quot;&gt;as&lt;/span&gt; e:
        &lt;span class=&quot;python-function&quot;&gt;print&lt;/span&gt;(&lt;span class=&quot;python-string&quot;&gt;f&quot;Error occurred: {e}&quot;&lt;/span&gt;)

&lt;span class=&quot;python-keyword&quot;&gt;if&lt;/span&gt; __name__ == &lt;span class=&quot;python-string&quot;&gt;&quot;__main__&quot;&lt;/span&gt;:
    main()&lt;/code&gt;&lt;/pre&gt;
&lt;/div&gt;
&lt;/div&gt;
&lt;!-- 티스토리 코드 블록 끝 --&gt;&lt;/div&gt;
&lt;p data-ke-size=&quot;size16&quot;&gt;&lt;span&gt; ️ 출력 예시:&lt;/span&gt;&lt;/p&gt;
&lt;pre id=&quot;code_1747483716517&quot; class=&quot;python&quot; data-ke-language=&quot;python&quot; data-ke-type=&quot;codeblock&quot;&gt;&lt;code&gt;Decoded URL: https://www.moneys.co.kr/article/2025050209420281578&lt;/code&gt;&lt;/pre&gt;
&lt;h3 data-pm-slice=&quot;1 1 []&quot; data-ke-size=&quot;size23&quot;&gt;&lt;span&gt;   파일 기반 다중 URL 처리&lt;/span&gt;&lt;/h3&gt;
&lt;p data-ke-size=&quot;size16&quot;&gt;&lt;span&gt;하루에 많게는 종목당 약 30개의 주식 관련 뉴스가 쏟아진다. 이것들을 Powerautomate의 반복문을 이용해 처리하는 것은 매우 비효율적이므로, 파이썬 스크립트를 활용하여 수집한 모든 URL을 한번에 처리하는 방식이 효율적이다. 기본 코드에 &lt;span style=&quot;color: #006dd7;&quot;&gt;&lt;b&gt;1. 파일을 읽고 &lt;span&gt;&lt;span style=&quot;color: #006dd7;&quot;&gt;&lt;b&gt;2. &lt;/b&gt;&lt;/span&gt;&lt;/span&gt; 결과를 CSV로 저장하는 코드를 추가하고 3. googlenewsdecoder의 성능을 분석하기 위한 코드가 추가&lt;/b&gt;&lt;/span&gt;되었다. &lt;br /&gt;&lt;/span&gt;&lt;/p&gt;
&lt;h3 data-ke-size=&quot;size23&quot;&gt;  &lt;span&gt; 사용 방법&lt;/span&gt;&lt;/h3&gt;
&lt;p data-ke-size=&quot;size16&quot;&gt;Powershell에서 실행 방법:&lt;/p&gt;
&lt;pre id=&quot;code_1747649031536&quot; class=&quot;jboss-cli&quot; data-ke-type=&quot;codeblock&quot; data-ke-language=&quot;python&quot;&gt;&lt;code&gt;python googlenewsdecoder_external.py urls.txt --output_dir ./results --interval 1 --batch_size 10&lt;/code&gt;&lt;/pre&gt;
&lt;ul style=&quot;list-style-type: disc;&quot; data-spread=&quot;false&quot; data-ke-list-type=&quot;disc&quot;&gt;
&lt;li&gt;&lt;span&gt;input_file_path&lt;/span&gt;&lt;span&gt;: 입력 파일 경로 (필수)&lt;/span&gt;&lt;/li&gt;
&lt;li&gt;&lt;span&gt;--output_dir (-o)&lt;/span&gt;&lt;span&gt;: 결과를 저장할 디렉토리 (선택, 기본값은 입력 파일과 동일)&lt;/span&gt;&lt;/li&gt;
&lt;li&gt;&lt;span&gt;--interval (-i)&lt;/span&gt;&lt;span&gt;: 각 요청 간의 시간 간격 (초 단위, 기본값은 1초)&lt;/span&gt;&lt;/li&gt;
&lt;li&gt;&lt;span&gt;--batch_size (-b)&lt;/span&gt;&lt;span&gt;: 처리한 결과를 몇 개씩 나누어 저장할지 설정 (기본값은 10)&lt;/span&gt;&lt;/li&gt;
&lt;/ul&gt;
&lt;h3 data-ke-size=&quot;size23&quot;&gt;  전체 파이썬 스크립트는 다음과 같다.&lt;/h3&gt;
&lt;div id=&quot;code_1747650641004&quot; data-ke-type=&quot;html&quot; data-source=&quot;&amp;lt;!DOCTYPE html&amp;gt;
&amp;lt;html&amp;gt;
&amp;lt;head&amp;gt;
  &amp;lt;style&amp;gt;
    /* 코드 블록 스타일을 특정 클래스에만 적용되도록 범위 제한 */
    .carbon-code-block {
      position: relative;
      background-color: #fafafa;
      border-radius: 8px;
      font-family: 'Hack', 'D2Coding', 'Source Code Pro', monospace;
      font-size: 15px;
      line-height: 1.5;
      margin: 15px 0;
      overflow: hidden;
      box-shadow: 0 2px 8px rgba(0, 0, 0, 0.08);
      border: 1px solid #e1e4e8;
    }

    .carbon-code-block .code-header {
      background-color: #f6f8fa;
      color: #24292e;
      padding: 10px 15px;
      font-weight: 600;
      display: flex;
      justify-content: space-between;
      align-items: center;
      border-bottom: 1px solid #e1e4e8;
    }

    .carbon-code-block .code-content {
      padding: 15px 20px;
      overflow-x: auto;
      max-height: 600px;
    }
    
    .carbon-code-block pre {
      margin: 0;
      padding: 0;
      white-space: pre;
    }
    
    .carbon-code-block code {
      font-family: 'Hack', 'D2Coding', 'Source Code Pro', monospace;
      display: block;
    }
    
    /* One-light 테마 구문 강조 스타일 */
    .carbon-code-block .python-keyword {
      color: #a626a4; /* magenta - 키워드 */
    }
    
    .carbon-code-block .python-string {
      color: #50a14f; /* green - 문자열 */
    }
    
    .carbon-code-block .python-comment {
      color: #a0a1a7; /* gray - 주석 */
      font-style: italic;
    }
    
    .carbon-code-block .python-function {
      color: #4078f2; /* blue - 함수명 */
    }
    
    .carbon-code-block .python-number {
      color: #986801; /* orange - 숫자 */
    }
    
    .carbon-code-block .python-class {
      color: #c18401; /* yellow - 클래스명 */
    }
    
    /* 스크롤바 스타일링 */
    .carbon-code-block .code-content::-webkit-scrollbar {
      height: 8px;
      width: 8px;
      background-color: #f5f5f5;
    }
    
    .carbon-code-block .code-content::-webkit-scrollbar-thumb {
      border-radius: 4px;
      background-color: #d1d5da;
    }
    
    .carbon-code-block .code-content::-webkit-scrollbar-thumb:hover {
      background-color: #a8adb5;
    }
  &amp;lt;/style&amp;gt;
&amp;lt;/head&amp;gt;
&amp;lt;body&amp;gt;
  &amp;lt;!-- 티스토리 코드 블록 시작 --&amp;gt;
  &amp;lt;div class=&amp;quot;carbon-code-block&amp;quot;&amp;gt;
    &amp;lt;div class=&amp;quot;code-header&amp;quot;&amp;gt;
      &amp;lt;span&amp;gt;Python&amp;lt;/span&amp;gt;
    &amp;lt;/div&amp;gt;
    &amp;lt;div class=&amp;quot;code-content&amp;quot;&amp;gt;
      &amp;lt;pre&amp;gt;&amp;lt;code&amp;gt;&amp;lt;span class=&amp;quot;python-keyword&amp;quot;&amp;gt;from&amp;lt;/span&amp;gt; googlenewsdecoder &amp;lt;span class=&amp;quot;python-keyword&amp;quot;&amp;gt;import&amp;lt;/span&amp;gt; gnewsdecoder
&amp;lt;span class=&amp;quot;python-keyword&amp;quot;&amp;gt;import&amp;lt;/span&amp;gt; pandas &amp;lt;span class=&amp;quot;python-keyword&amp;quot;&amp;gt;as&amp;lt;/span&amp;gt; pd
&amp;lt;span class=&amp;quot;python-keyword&amp;quot;&amp;gt;import&amp;lt;/span&amp;gt; os
&amp;lt;span class=&amp;quot;python-keyword&amp;quot;&amp;gt;import&amp;lt;/span&amp;gt; time
&amp;lt;span class=&amp;quot;python-keyword&amp;quot;&amp;gt;import&amp;lt;/span&amp;gt; argparse
&amp;lt;span class=&amp;quot;python-keyword&amp;quot;&amp;gt;from&amp;lt;/span&amp;gt; datetime &amp;lt;span class=&amp;quot;python-keyword&amp;quot;&amp;gt;import&amp;lt;/span&amp;gt; datetime, timedelta

&amp;lt;span class=&amp;quot;python-keyword&amp;quot;&amp;gt;def&amp;lt;/span&amp;gt; &amp;lt;span class=&amp;quot;python-function&amp;quot;&amp;gt;process_urls_from_file&amp;lt;/span&amp;gt;(input_file_path, output_dir=&amp;lt;span class=&amp;quot;python-keyword&amp;quot;&amp;gt;None&amp;lt;/span&amp;gt;, interval_time=&amp;lt;span class=&amp;quot;python-number&amp;quot;&amp;gt;1&amp;lt;/span&amp;gt;, batch_size=&amp;lt;span class=&amp;quot;python-number&amp;quot;&amp;gt;10&amp;lt;/span&amp;gt;):
    &amp;lt;span class=&amp;quot;python-comment&amp;quot;&amp;gt;# Read URLs from file&amp;lt;/span&amp;gt;
    &amp;lt;span class=&amp;quot;python-keyword&amp;quot;&amp;gt;try&amp;lt;/span&amp;gt;:
        &amp;lt;span class=&amp;quot;python-comment&amp;quot;&amp;gt;# 파일 확장자 확인&amp;lt;/span&amp;gt;
        file_ext = os.path.splitext(input_file_path)[&amp;lt;span class=&amp;quot;python-number&amp;quot;&amp;gt;1&amp;lt;/span&amp;gt;].lower()
        
        &amp;lt;span class=&amp;quot;python-keyword&amp;quot;&amp;gt;if&amp;lt;/span&amp;gt; file_ext == &amp;lt;span class=&amp;quot;python-string&amp;quot;&amp;gt;'.csv'&amp;lt;/span&amp;gt;:
            &amp;lt;span class=&amp;quot;python-comment&amp;quot;&amp;gt;# CSV 파일인 경우 pandas로 읽기&amp;lt;/span&amp;gt;
            &amp;lt;span class=&amp;quot;python-keyword&amp;quot;&amp;gt;try&amp;lt;/span&amp;gt;:
                df = pd.read_csv(input_file_path, encoding=&amp;lt;span class=&amp;quot;python-string&amp;quot;&amp;gt;'utf-8-sig'&amp;lt;/span&amp;gt;)
                
                &amp;lt;span class=&amp;quot;python-comment&amp;quot;&amp;gt;# 'link' 컬럼이 있는지 확인&amp;lt;/span&amp;gt;
                &amp;lt;span class=&amp;quot;python-keyword&amp;quot;&amp;gt;if&amp;lt;/span&amp;gt; &amp;lt;span class=&amp;quot;python-string&amp;quot;&amp;gt;'link'&amp;lt;/span&amp;gt; &amp;lt;span class=&amp;quot;python-keyword&amp;quot;&amp;gt;in&amp;lt;/span&amp;gt; df.columns:
                    urls = df[&amp;lt;span class=&amp;quot;python-string&amp;quot;&amp;gt;'link'&amp;lt;/span&amp;gt;].dropna().tolist()
                    &amp;lt;span class=&amp;quot;python-function&amp;quot;&amp;gt;print&amp;lt;/span&amp;gt;(&amp;lt;span class=&amp;quot;python-string&amp;quot;&amp;gt;f&amp;quot;Found {len(urls)} URLs in 'link' column&amp;quot;&amp;lt;/span&amp;gt;)
                &amp;lt;span class=&amp;quot;python-keyword&amp;quot;&amp;gt;else&amp;lt;/span&amp;gt;:
                    available_columns = &amp;lt;span class=&amp;quot;python-string&amp;quot;&amp;gt;', '&amp;lt;/span&amp;gt;.join(df.columns)
                    &amp;lt;span class=&amp;quot;python-function&amp;quot;&amp;gt;print&amp;lt;/span&amp;gt;(&amp;lt;span class=&amp;quot;python-string&amp;quot;&amp;gt;f&amp;quot;Error: 'link' column not found in CSV. Available columns: {available_columns}&amp;quot;&amp;lt;/span&amp;gt;)
                    &amp;lt;span class=&amp;quot;python-keyword&amp;quot;&amp;gt;return&amp;lt;/span&amp;gt; &amp;lt;span class=&amp;quot;python-keyword&amp;quot;&amp;gt;None&amp;lt;/span&amp;gt;
            &amp;lt;span class=&amp;quot;python-keyword&amp;quot;&amp;gt;except&amp;lt;/span&amp;gt; Exception &amp;lt;span class=&amp;quot;python-keyword&amp;quot;&amp;gt;as&amp;lt;/span&amp;gt; e:
                &amp;lt;span class=&amp;quot;python-function&amp;quot;&amp;gt;print&amp;lt;/span&amp;gt;(&amp;lt;span class=&amp;quot;python-string&amp;quot;&amp;gt;f&amp;quot;Error parsing CSV file: {e}&amp;quot;&amp;lt;/span&amp;gt;)
                &amp;lt;span class=&amp;quot;python-keyword&amp;quot;&amp;gt;return&amp;lt;/span&amp;gt; &amp;lt;span class=&amp;quot;python-keyword&amp;quot;&amp;gt;None&amp;lt;/span&amp;gt;
        &amp;lt;span class=&amp;quot;python-keyword&amp;quot;&amp;gt;else&amp;lt;/span&amp;gt;:
            &amp;lt;span class=&amp;quot;python-comment&amp;quot;&amp;gt;# 일반 텍스트 파일로 간주하고 한 줄에 하나의 URL로 읽기&amp;lt;/span&amp;gt;
            &amp;lt;span class=&amp;quot;python-keyword&amp;quot;&amp;gt;with&amp;lt;/span&amp;gt; &amp;lt;span class=&amp;quot;python-function&amp;quot;&amp;gt;open&amp;lt;/span&amp;gt;(input_file_path, &amp;lt;span class=&amp;quot;python-string&amp;quot;&amp;gt;'r'&amp;lt;/span&amp;gt;, encoding=&amp;lt;span class=&amp;quot;python-string&amp;quot;&amp;gt;'utf-8-sig'&amp;lt;/span&amp;gt;) &amp;lt;span class=&amp;quot;python-keyword&amp;quot;&amp;gt;as&amp;lt;/span&amp;gt; file:
                urls = [line.strip() &amp;lt;span class=&amp;quot;python-keyword&amp;quot;&amp;gt;for&amp;lt;/span&amp;gt; line &amp;lt;span class=&amp;quot;python-keyword&amp;quot;&amp;gt;in&amp;lt;/span&amp;gt; file &amp;lt;span class=&amp;quot;python-keyword&amp;quot;&amp;gt;if&amp;lt;/span&amp;gt; line.strip()]
                &amp;lt;span class=&amp;quot;python-function&amp;quot;&amp;gt;print&amp;lt;/span&amp;gt;(&amp;lt;span class=&amp;quot;python-string&amp;quot;&amp;gt;f&amp;quot;Found {len(urls)} URLs in text file&amp;quot;&amp;lt;/span&amp;gt;)
    &amp;lt;span class=&amp;quot;python-keyword&amp;quot;&amp;gt;except&amp;lt;/span&amp;gt; Exception &amp;lt;span class=&amp;quot;python-keyword&amp;quot;&amp;gt;as&amp;lt;/span&amp;gt; e:
        &amp;lt;span class=&amp;quot;python-function&amp;quot;&amp;gt;print&amp;lt;/span&amp;gt;(&amp;lt;span class=&amp;quot;python-string&amp;quot;&amp;gt;f&amp;quot;Error reading file: {e}&amp;quot;&amp;lt;/span&amp;gt;)
        &amp;lt;span class=&amp;quot;python-keyword&amp;quot;&amp;gt;return&amp;lt;/span&amp;gt; &amp;lt;span class=&amp;quot;python-keyword&amp;quot;&amp;gt;None&amp;lt;/span&amp;gt;

    &amp;lt;span class=&amp;quot;python-comment&amp;quot;&amp;gt;# 파일 저장 기본 설정&amp;lt;/span&amp;gt;
    &amp;lt;span class=&amp;quot;python-keyword&amp;quot;&amp;gt;if&amp;lt;/span&amp;gt; output_dir &amp;lt;span class=&amp;quot;python-keyword&amp;quot;&amp;gt;is&amp;lt;/span&amp;gt; &amp;lt;span class=&amp;quot;python-keyword&amp;quot;&amp;gt;None&amp;lt;/span&amp;gt;:
        output_dir = os.path.dirname(input_file_path) &amp;lt;span class=&amp;quot;python-keyword&amp;quot;&amp;gt;or&amp;lt;/span&amp;gt; &amp;lt;span class=&amp;quot;python-string&amp;quot;&amp;gt;'.'&amp;lt;/span&amp;gt;
    
    timestamp = datetime.now().strftime(&amp;lt;span class=&amp;quot;python-string&amp;quot;&amp;gt;&amp;quot;%Y-%m-%d_%H%M%S&amp;quot;&amp;lt;/span&amp;gt;)
    input_filename = os.path.basename(input_file_path).split(&amp;lt;span class=&amp;quot;python-string&amp;quot;&amp;gt;'.'&amp;lt;/span&amp;gt;)[&amp;lt;span class=&amp;quot;python-number&amp;quot;&amp;gt;0&amp;lt;/span&amp;gt;]
    output_filename = &amp;lt;span class=&amp;quot;python-string&amp;quot;&amp;gt;f&amp;quot;{input_filename}_decoded_{timestamp}.csv&amp;quot;&amp;lt;/span&amp;gt;
    output_path = os.path.join(output_dir, output_filename)
    
    &amp;lt;span class=&amp;quot;python-function&amp;quot;&amp;gt;print&amp;lt;/span&amp;gt;(&amp;lt;span class=&amp;quot;python-string&amp;quot;&amp;gt;f&amp;quot;Results will be saved to: {output_path}&amp;quot;&amp;lt;/span&amp;gt;)
    
    &amp;lt;span class=&amp;quot;python-comment&amp;quot;&amp;gt;# Process each URL&amp;lt;/span&amp;gt;
    results = []
    total_urls = &amp;lt;span class=&amp;quot;python-function&amp;quot;&amp;gt;len&amp;lt;/span&amp;gt;(urls)
    
    &amp;lt;span class=&amp;quot;python-function&amp;quot;&amp;gt;print&amp;lt;/span&amp;gt;(&amp;lt;span class=&amp;quot;python-string&amp;quot;&amp;gt;f&amp;quot;Starting to process {total_urls} URLs...&amp;quot;&amp;lt;/span&amp;gt;)

    &amp;lt;span class=&amp;quot;python-comment&amp;quot;&amp;gt;# Record overall process start time&amp;lt;/span&amp;gt;
    process_start_time = time.time()

    &amp;lt;span class=&amp;quot;python-keyword&amp;quot;&amp;gt;for&amp;lt;/span&amp;gt; i, url &amp;lt;span class=&amp;quot;python-keyword&amp;quot;&amp;gt;in&amp;lt;/span&amp;gt; &amp;lt;span class=&amp;quot;python-function&amp;quot;&amp;gt;enumerate&amp;lt;/span&amp;gt;(urls):
        &amp;lt;span class=&amp;quot;python-keyword&amp;quot;&amp;gt;if&amp;lt;/span&amp;gt; i % &amp;lt;span class=&amp;quot;python-number&amp;quot;&amp;gt;10&amp;lt;/span&amp;gt; == &amp;lt;span class=&amp;quot;python-number&amp;quot;&amp;gt;0&amp;lt;/span&amp;gt; &amp;lt;span class=&amp;quot;python-keyword&amp;quot;&amp;gt;or&amp;lt;/span&amp;gt; i == total_urls - &amp;lt;span class=&amp;quot;python-number&amp;quot;&amp;gt;1&amp;lt;/span&amp;gt;:
            &amp;lt;span class=&amp;quot;python-function&amp;quot;&amp;gt;print&amp;lt;/span&amp;gt;(&amp;lt;span class=&amp;quot;python-string&amp;quot;&amp;gt;f&amp;quot;Processing URL {i+1}/{total_urls}...&amp;quot;&amp;lt;/span&amp;gt;)
        
        &amp;lt;span class=&amp;quot;python-comment&amp;quot;&amp;gt;# Record start time for this URL&amp;lt;/span&amp;gt;
        url_start_time = time.time()
        
        &amp;lt;span class=&amp;quot;python-keyword&amp;quot;&amp;gt;if&amp;lt;/span&amp;gt; &amp;lt;span class=&amp;quot;python-string&amp;quot;&amp;gt;&amp;quot;news.google.com&amp;quot;&amp;lt;/span&amp;gt; &amp;lt;span class=&amp;quot;python-keyword&amp;quot;&amp;gt;in&amp;lt;/span&amp;gt; url:
            &amp;lt;span class=&amp;quot;python-keyword&amp;quot;&amp;gt;try&amp;lt;/span&amp;gt;:
                decoded_result = gnewsdecoder(url, interval=interval_time)
                &amp;lt;span class=&amp;quot;python-keyword&amp;quot;&amp;gt;if&amp;lt;/span&amp;gt; decoded_result.get(&amp;lt;span class=&amp;quot;python-string&amp;quot;&amp;gt;&amp;quot;status&amp;quot;&amp;lt;/span&amp;gt;):
                    status = &amp;lt;span class=&amp;quot;python-string&amp;quot;&amp;gt;&amp;quot;SUCCESS&amp;quot;&amp;lt;/span&amp;gt;
                    decoded_url = decoded_result[&amp;lt;span class=&amp;quot;python-string&amp;quot;&amp;gt;&amp;quot;decoded_url&amp;quot;&amp;lt;/span&amp;gt;]
                    error_message = &amp;lt;span class=&amp;quot;python-string&amp;quot;&amp;gt;&amp;quot;&amp;quot;&amp;lt;/span&amp;gt;
                &amp;lt;span class=&amp;quot;python-keyword&amp;quot;&amp;gt;else&amp;lt;/span&amp;gt;:
                    status = &amp;lt;span class=&amp;quot;python-string&amp;quot;&amp;gt;&amp;quot;FAILED&amp;quot;&amp;lt;/span&amp;gt;
                    decoded_url = &amp;lt;span class=&amp;quot;python-string&amp;quot;&amp;gt;&amp;quot;N/A&amp;quot;&amp;lt;/span&amp;gt;
                    error_message = decoded_result.get(&amp;lt;span class=&amp;quot;python-string&amp;quot;&amp;gt;&amp;quot;message&amp;quot;&amp;lt;/span&amp;gt;, &amp;lt;span class=&amp;quot;python-string&amp;quot;&amp;gt;&amp;quot;Unknown error&amp;quot;&amp;lt;/span&amp;gt;)
            &amp;lt;span class=&amp;quot;python-keyword&amp;quot;&amp;gt;except&amp;lt;/span&amp;gt; Exception &amp;lt;span class=&amp;quot;python-keyword&amp;quot;&amp;gt;as&amp;lt;/span&amp;gt; e:
                status = &amp;lt;span class=&amp;quot;python-string&amp;quot;&amp;gt;&amp;quot;FAILED&amp;quot;&amp;lt;/span&amp;gt;
                decoded_url = &amp;lt;span class=&amp;quot;python-string&amp;quot;&amp;gt;&amp;quot;N/A&amp;quot;&amp;lt;/span&amp;gt;
                error_message = &amp;lt;span class=&amp;quot;python-function&amp;quot;&amp;gt;str&amp;lt;/span&amp;gt;(e)
        &amp;lt;span class=&amp;quot;python-keyword&amp;quot;&amp;gt;else&amp;lt;/span&amp;gt;:
            &amp;lt;span class=&amp;quot;python-comment&amp;quot;&amp;gt;# For non-Google News URLs&amp;lt;/span&amp;gt;
            status = &amp;lt;span class=&amp;quot;python-string&amp;quot;&amp;gt;&amp;quot;SKIPPED&amp;quot;&amp;lt;/span&amp;gt;
            decoded_url = url  &amp;lt;span class=&amp;quot;python-comment&amp;quot;&amp;gt;# Keep the same URL&amp;lt;/span&amp;gt;
            error_message = &amp;lt;span class=&amp;quot;python-string&amp;quot;&amp;gt;&amp;quot;Not a Google News URL&amp;quot;&amp;lt;/span&amp;gt;
        
        &amp;lt;span class=&amp;quot;python-comment&amp;quot;&amp;gt;# Calculate processing time for this URL&amp;lt;/span&amp;gt;
        url_end_time = time.time()
        processing_time = url_end_time - url_start_time
        
        &amp;lt;span class=&amp;quot;python-comment&amp;quot;&amp;gt;# Add to results&amp;lt;/span&amp;gt;
        results.append({
            &amp;lt;span class=&amp;quot;python-string&amp;quot;&amp;gt;&amp;quot;original_url&amp;quot;&amp;lt;/span&amp;gt;: url,
            &amp;lt;span class=&amp;quot;python-string&amp;quot;&amp;gt;&amp;quot;decoded_url&amp;quot;&amp;lt;/span&amp;gt;: decoded_url,
            &amp;lt;span class=&amp;quot;python-string&amp;quot;&amp;gt;&amp;quot;status&amp;quot;&amp;lt;/span&amp;gt;: status,
            &amp;lt;span class=&amp;quot;python-string&amp;quot;&amp;gt;&amp;quot;error_message&amp;quot;&amp;lt;/span&amp;gt;: error_message,
            &amp;lt;span class=&amp;quot;python-string&amp;quot;&amp;gt;&amp;quot;processing_time_sec&amp;quot;&amp;lt;/span&amp;gt;: &amp;lt;span class=&amp;quot;python-function&amp;quot;&amp;gt;round&amp;lt;/span&amp;gt;(processing_time, &amp;lt;span class=&amp;quot;python-number&amp;quot;&amp;gt;3&amp;lt;/span&amp;gt;)
        })
        
        &amp;lt;span class=&amp;quot;python-comment&amp;quot;&amp;gt;# 10개마다 중간 결과 저장&amp;lt;/span&amp;gt;
        &amp;lt;span class=&amp;quot;python-keyword&amp;quot;&amp;gt;if&amp;lt;/span&amp;gt; (i + &amp;lt;span class=&amp;quot;python-number&amp;quot;&amp;gt;1&amp;lt;/span&amp;gt;) % batch_size == &amp;lt;span class=&amp;quot;python-number&amp;quot;&amp;gt;0&amp;lt;/span&amp;gt; &amp;lt;span class=&amp;quot;python-keyword&amp;quot;&amp;gt;or&amp;lt;/span&amp;gt; i == total_urls - &amp;lt;span class=&amp;quot;python-number&amp;quot;&amp;gt;1&amp;lt;/span&amp;gt;:
            current_df = pd.DataFrame(results)
            current_df.to_csv(output_path, index=&amp;lt;span class=&amp;quot;python-keyword&amp;quot;&amp;gt;False&amp;lt;/span&amp;gt;, encoding=&amp;lt;span class=&amp;quot;python-string&amp;quot;&amp;gt;'utf-8'&amp;lt;/span&amp;gt;)
            &amp;lt;span class=&amp;quot;python-function&amp;quot;&amp;gt;print&amp;lt;/span&amp;gt;(&amp;lt;span class=&amp;quot;python-string&amp;quot;&amp;gt;f&amp;quot;Saved {len(results)} results to: {output_path}&amp;quot;&amp;lt;/span&amp;gt;)

    &amp;lt;span class=&amp;quot;python-comment&amp;quot;&amp;gt;# 루프 종료 후 전체 처리 시간 계산&amp;lt;/span&amp;gt;
    process_end_time = time.time()
    total_process_time = process_end_time - process_start_time
    
    &amp;lt;span class=&amp;quot;python-comment&amp;quot;&amp;gt;# Create DataFrame with results&amp;lt;/span&amp;gt;
    df = pd.DataFrame(results)

    &amp;lt;span class=&amp;quot;python-comment&amp;quot;&amp;gt;# Calculate average processing time&amp;lt;/span&amp;gt;
    avg_time_all = df[&amp;lt;span class=&amp;quot;python-string&amp;quot;&amp;gt;'processing_time_sec'&amp;lt;/span&amp;gt;].mean()
    avg_time_google_only = df[df[&amp;lt;span class=&amp;quot;python-string&amp;quot;&amp;gt;'status'&amp;lt;/span&amp;gt;] != &amp;lt;span class=&amp;quot;python-string&amp;quot;&amp;gt;'SKIPPED'&amp;lt;/span&amp;gt;][&amp;lt;span class=&amp;quot;python-string&amp;quot;&amp;gt;'processing_time_sec'&amp;lt;/span&amp;gt;].mean() &amp;lt;span class=&amp;quot;python-keyword&amp;quot;&amp;gt;if&amp;lt;/span&amp;gt; &amp;lt;span class=&amp;quot;python-function&amp;quot;&amp;gt;any&amp;lt;/span&amp;gt;(df[&amp;lt;span class=&amp;quot;python-string&amp;quot;&amp;gt;'status'&amp;lt;/span&amp;gt;] != &amp;lt;span class=&amp;quot;python-string&amp;quot;&amp;gt;'SKIPPED'&amp;lt;/span&amp;gt;) &amp;lt;span class=&amp;quot;python-keyword&amp;quot;&amp;gt;else&amp;lt;/span&amp;gt; &amp;lt;span class=&amp;quot;python-number&amp;quot;&amp;gt;0&amp;lt;/span&amp;gt;
    
    &amp;lt;span class=&amp;quot;python-comment&amp;quot;&amp;gt;# 요약 통계&amp;lt;/span&amp;gt;
    success_count = df[&amp;lt;span class=&amp;quot;python-string&amp;quot;&amp;gt;'status'&amp;lt;/span&amp;gt;].eq(&amp;lt;span class=&amp;quot;python-string&amp;quot;&amp;gt;'SUCCESS'&amp;lt;/span&amp;gt;).sum()
    failed_count = df[&amp;lt;span class=&amp;quot;python-string&amp;quot;&amp;gt;'status'&amp;lt;/span&amp;gt;].eq(&amp;lt;span class=&amp;quot;python-string&amp;quot;&amp;gt;'FAILED'&amp;lt;/span&amp;gt;).sum()
    skipped_count = df[&amp;lt;span class=&amp;quot;python-string&amp;quot;&amp;gt;'status'&amp;lt;/span&amp;gt;].eq(&amp;lt;span class=&amp;quot;python-string&amp;quot;&amp;gt;'SKIPPED'&amp;lt;/span&amp;gt;).sum()
    
    &amp;lt;span class=&amp;quot;python-comment&amp;quot;&amp;gt;# Format time durations for display&amp;lt;/span&amp;gt;
    &amp;lt;span class=&amp;quot;python-keyword&amp;quot;&amp;gt;def&amp;lt;/span&amp;gt; &amp;lt;span class=&amp;quot;python-function&amp;quot;&amp;gt;format_time&amp;lt;/span&amp;gt;(seconds):
        &amp;lt;span class=&amp;quot;python-keyword&amp;quot;&amp;gt;if&amp;lt;/span&amp;gt; seconds &amp;lt; &amp;lt;span class=&amp;quot;python-number&amp;quot;&amp;gt;60&amp;lt;/span&amp;gt;:
            &amp;lt;span class=&amp;quot;python-keyword&amp;quot;&amp;gt;return&amp;lt;/span&amp;gt; &amp;lt;span class=&amp;quot;python-string&amp;quot;&amp;gt;f&amp;quot;{seconds:.2f} seconds&amp;quot;&amp;lt;/span&amp;gt;
        &amp;lt;span class=&amp;quot;python-keyword&amp;quot;&amp;gt;else&amp;lt;/span&amp;gt;:
            &amp;lt;span class=&amp;quot;python-keyword&amp;quot;&amp;gt;return&amp;lt;/span&amp;gt; &amp;lt;span class=&amp;quot;python-function&amp;quot;&amp;gt;str&amp;lt;/span&amp;gt;(timedelta(seconds=&amp;lt;span class=&amp;quot;python-function&amp;quot;&amp;gt;round&amp;lt;/span&amp;gt;(seconds)))
    
    &amp;lt;span class=&amp;quot;python-function&amp;quot;&amp;gt;print&amp;lt;/span&amp;gt;(&amp;lt;span class=&amp;quot;python-string&amp;quot;&amp;gt;f&amp;quot;\n====== SUMMARY ======&amp;quot;&amp;lt;/span&amp;gt;)
    &amp;lt;span class=&amp;quot;python-function&amp;quot;&amp;gt;print&amp;lt;/span&amp;gt;(&amp;lt;span class=&amp;quot;python-string&amp;quot;&amp;gt;f&amp;quot;Total URLs processed: {total_urls}&amp;quot;&amp;lt;/span&amp;gt;)
    &amp;lt;span class=&amp;quot;python-function&amp;quot;&amp;gt;print&amp;lt;/span&amp;gt;(&amp;lt;span class=&amp;quot;python-string&amp;quot;&amp;gt;f&amp;quot;Successfully decoded: {success_count} ({success_count/total_urls*100:.1f}%)&amp;quot;&amp;lt;/span&amp;gt;)
    &amp;lt;span class=&amp;quot;python-function&amp;quot;&amp;gt;print&amp;lt;/span&amp;gt;(&amp;lt;span class=&amp;quot;python-string&amp;quot;&amp;gt;f&amp;quot;Failed to decode: {failed_count} ({failed_count/total_urls*100:.1f}%)&amp;quot;&amp;lt;/span&amp;gt;)
    &amp;lt;span class=&amp;quot;python-function&amp;quot;&amp;gt;print&amp;lt;/span&amp;gt;(&amp;lt;span class=&amp;quot;python-string&amp;quot;&amp;gt;f&amp;quot;Skipped (non-Google News): {skipped_count} ({skipped_count/total_urls*100:.1f}%)&amp;quot;&amp;lt;/span&amp;gt;)
    &amp;lt;span class=&amp;quot;python-function&amp;quot;&amp;gt;print&amp;lt;/span&amp;gt;(&amp;lt;span class=&amp;quot;python-string&amp;quot;&amp;gt;f&amp;quot;\n----- TIMING INFORMATION -----&amp;quot;&amp;lt;/span&amp;gt;)
    &amp;lt;span class=&amp;quot;python-function&amp;quot;&amp;gt;print&amp;lt;/span&amp;gt;(&amp;lt;span class=&amp;quot;python-string&amp;quot;&amp;gt;f&amp;quot;Total processing time: {format_time(total_process_time)}&amp;quot;&amp;lt;/span&amp;gt;)
    &amp;lt;span class=&amp;quot;python-function&amp;quot;&amp;gt;print&amp;lt;/span&amp;gt;(&amp;lt;span class=&amp;quot;python-string&amp;quot;&amp;gt;f&amp;quot;Average processing time per URL: {format_time(avg_time_all)}&amp;quot;&amp;lt;/span&amp;gt;)
    &amp;lt;span class=&amp;quot;python-function&amp;quot;&amp;gt;print&amp;lt;/span&amp;gt;(&amp;lt;span class=&amp;quot;python-string&amp;quot;&amp;gt;f&amp;quot;Average processing time per Google News URL: {format_time(avg_time_google_only)}&amp;quot;&amp;lt;/span&amp;gt;)
    &amp;lt;span class=&amp;quot;python-function&amp;quot;&amp;gt;print&amp;lt;/span&amp;gt;(&amp;lt;span class=&amp;quot;python-string&amp;quot;&amp;gt;f&amp;quot;Fastest URL processing time: {format_time(df['processing_time_sec'].min())}&amp;quot;&amp;lt;/span&amp;gt;)
    &amp;lt;span class=&amp;quot;python-function&amp;quot;&amp;gt;print&amp;lt;/span&amp;gt;(&amp;lt;span class=&amp;quot;python-string&amp;quot;&amp;gt;f&amp;quot;Slowest URL processing time: {format_time(df['processing_time_sec'].max())}&amp;quot;&amp;lt;/span&amp;gt;)
    &amp;lt;span class=&amp;quot;python-function&amp;quot;&amp;gt;print&amp;lt;/span&amp;gt;(&amp;lt;span class=&amp;quot;python-string&amp;quot;&amp;gt;f&amp;quot;=====================&amp;quot;&amp;lt;/span&amp;gt;)
    
    &amp;lt;span class=&amp;quot;python-comment&amp;quot;&amp;gt;# Add summary information to the DataFrame as metadata&amp;lt;/span&amp;gt;
    df.attrs[&amp;lt;span class=&amp;quot;python-string&amp;quot;&amp;gt;'total_process_time'&amp;lt;/span&amp;gt;] = total_process_time
    df.attrs[&amp;lt;span class=&amp;quot;python-string&amp;quot;&amp;gt;'avg_processing_time'&amp;lt;/span&amp;gt;] = avg_time_all
    df.attrs[&amp;lt;span class=&amp;quot;python-string&amp;quot;&amp;gt;'avg_google_url_time'&amp;lt;/span&amp;gt;] = avg_time_google_only
    
    &amp;lt;span class=&amp;quot;python-keyword&amp;quot;&amp;gt;return&amp;lt;/span&amp;gt; df

&amp;lt;span class=&amp;quot;python-comment&amp;quot;&amp;gt;# run_url_decoder 함수를 제거하였습니다.&amp;lt;/span&amp;gt;

&amp;lt;span class=&amp;quot;python-keyword&amp;quot;&amp;gt;if&amp;lt;/span&amp;gt; __name__ == &amp;lt;span class=&amp;quot;python-string&amp;quot;&amp;gt;&amp;quot;__main__&amp;quot;&amp;lt;/span&amp;gt;:
    &amp;lt;span class=&amp;quot;python-comment&amp;quot;&amp;gt;# 명령줄에서 인자를 받아 실행하기 위한 코드&amp;lt;/span&amp;gt;
    parser = argparse.ArgumentParser(description=&amp;lt;span class=&amp;quot;python-string&amp;quot;&amp;gt;'Google News URL decoder'&amp;lt;/span&amp;gt;)
    parser.add_argument(&amp;lt;span class=&amp;quot;python-string&amp;quot;&amp;gt;'input_file_path'&amp;lt;/span&amp;gt;, help=&amp;lt;span class=&amp;quot;python-string&amp;quot;&amp;gt;'Path to the file containing URLs to decode'&amp;lt;/span&amp;gt;)
    parser.add_argument(&amp;lt;span class=&amp;quot;python-string&amp;quot;&amp;gt;'--output_dir'&amp;lt;/span&amp;gt;, &amp;lt;span class=&amp;quot;python-string&amp;quot;&amp;gt;'-o'&amp;lt;/span&amp;gt;, help=&amp;lt;span class=&amp;quot;python-string&amp;quot;&amp;gt;'Output directory for results (default: same as input file)'&amp;lt;/span&amp;gt;)
    parser.add_argument(&amp;lt;span class=&amp;quot;python-string&amp;quot;&amp;gt;'--interval'&amp;lt;/span&amp;gt;, &amp;lt;span class=&amp;quot;python-string&amp;quot;&amp;gt;'-i'&amp;lt;/span&amp;gt;, type=&amp;lt;span class=&amp;quot;python-function&amp;quot;&amp;gt;int&amp;lt;/span&amp;gt;, default=&amp;lt;span class=&amp;quot;python-number&amp;quot;&amp;gt;1&amp;lt;/span&amp;gt;, help=&amp;lt;span class=&amp;quot;python-string&amp;quot;&amp;gt;'Interval time between requests (default: 1)'&amp;lt;/span&amp;gt;)
    parser.add_argument(&amp;lt;span class=&amp;quot;python-string&amp;quot;&amp;gt;'--batch_size'&amp;lt;/span&amp;gt;, &amp;lt;span class=&amp;quot;python-string&amp;quot;&amp;gt;'-b'&amp;lt;/span&amp;gt;, type=&amp;lt;span class=&amp;quot;python-function&amp;quot;&amp;gt;int&amp;lt;/span&amp;gt;, default=&amp;lt;span class=&amp;quot;python-number&amp;quot;&amp;gt;10&amp;lt;/span&amp;gt;, help=&amp;lt;span class=&amp;quot;python-string&amp;quot;&amp;gt;'Batch size for saving interim results (default: 10)'&amp;lt;/span&amp;gt;)
    
    args = parser.parse_args()
    
    &amp;lt;span class=&amp;quot;python-comment&amp;quot;&amp;gt;# 인자 출력&amp;lt;/span&amp;gt;
    &amp;lt;span class=&amp;quot;python-function&amp;quot;&amp;gt;print&amp;lt;/span&amp;gt;(&amp;lt;span class=&amp;quot;python-string&amp;quot;&amp;gt;f&amp;quot;Processing URLs from: {args.input_file_path}&amp;quot;&amp;lt;/span&amp;gt;)
    &amp;lt;span class=&amp;quot;python-function&amp;quot;&amp;gt;print&amp;lt;/span&amp;gt;(&amp;lt;span class=&amp;quot;python-string&amp;quot;&amp;gt;f&amp;quot;Output directory: {args.output_dir or 'Same as input file'}&amp;quot;&amp;lt;/span&amp;gt;)
    &amp;lt;span class=&amp;quot;python-function&amp;quot;&amp;gt;print&amp;lt;/span&amp;gt;(&amp;lt;span class=&amp;quot;python-string&amp;quot;&amp;gt;f&amp;quot;Interval time: {args.interval}&amp;quot;&amp;lt;/span&amp;gt;)
    &amp;lt;span class=&amp;quot;python-function&amp;quot;&amp;gt;print&amp;lt;/span&amp;gt;(&amp;lt;span class=&amp;quot;python-string&amp;quot;&amp;gt;f&amp;quot;Batch size: {args.batch_size}&amp;quot;&amp;lt;/span&amp;gt;)
    
    &amp;lt;span class=&amp;quot;python-comment&amp;quot;&amp;gt;# 함수 실행&amp;lt;/span&amp;gt;
    result = process_urls_from_file(
        args.input_file_path, 
        args.output_dir, 
        args.interval, 
        args.batch_size
    )
    
    &amp;lt;span class=&amp;quot;python-comment&amp;quot;&amp;gt;# 결과 반환&amp;lt;/span&amp;gt;
    &amp;lt;span class=&amp;quot;python-keyword&amp;quot;&amp;gt;if&amp;lt;/span&amp;gt; result &amp;lt;span class=&amp;quot;python-keyword&amp;quot;&amp;gt;is&amp;lt;/span&amp;gt; &amp;lt;span class=&amp;quot;python-keyword&amp;quot;&amp;gt;not&amp;lt;/span&amp;gt; &amp;lt;span class=&amp;quot;python-keyword&amp;quot;&amp;gt;None&amp;lt;/span&amp;gt;:
        &amp;lt;span class=&amp;quot;python-function&amp;quot;&amp;gt;print&amp;lt;/span&amp;gt;(&amp;lt;span class=&amp;quot;python-string&amp;quot;&amp;gt;f&amp;quot;Process completed successfully. Results saved to CSV file.&amp;quot;&amp;lt;/span&amp;gt;)
    &amp;lt;span class=&amp;quot;python-keyword&amp;quot;&amp;gt;else&amp;lt;/span&amp;gt;:
        &amp;lt;span class=&amp;quot;python-function&amp;quot;&amp;gt;print&amp;lt;/span&amp;gt;(&amp;lt;span class=&amp;quot;python-string&amp;quot;&amp;gt;f&amp;quot;Process failed. Please check error messages above.&amp;quot;&amp;lt;/span&amp;gt;)&amp;lt;/code&amp;gt;&amp;lt;/pre&amp;gt;
    &amp;lt;/div&amp;gt;
  &amp;lt;/div&amp;gt;
  &amp;lt;!-- 티스토리 코드 블록 끝 --&amp;gt;
&amp;lt;/body&amp;gt;
&amp;lt;/html&amp;gt;&quot;&gt;
&lt;style&gt;
    /* 코드 블록 스타일을 특정 클래스에만 적용되도록 범위 제한 */
    .carbon-code-block {
      position: relative;
      background-color: #fafafa;
      border-radius: 8px;
      font-family: 'Hack', 'D2Coding', 'Source Code Pro', monospace;
      font-size: 15px;
      line-height: 1.5;
      margin: 15px 0;
      overflow: hidden;
      box-shadow: 0 2px 8px rgba(0, 0, 0, 0.08);
      border: 1px solid #e1e4e8;
    }

    .carbon-code-block .code-header {
      background-color: #f6f8fa;
      color: #24292e;
      padding: 10px 15px;
      font-weight: 600;
      display: flex;
      justify-content: space-between;
      align-items: center;
      border-bottom: 1px solid #e1e4e8;
    }

    .carbon-code-block .code-content {
      padding: 15px 20px;
      overflow-x: auto;
      max-height: 600px;
    }
    
    .carbon-code-block pre {
      margin: 0;
      padding: 0;
      white-space: pre;
    }
    
    .carbon-code-block code {
      font-family: 'Hack', 'D2Coding', 'Source Code Pro', monospace;
      display: block;
    }
    
    /* One-light 테마 구문 강조 스타일 */
    .carbon-code-block .python-keyword {
      color: #a626a4; /* magenta - 키워드 */
    }
    
    .carbon-code-block .python-string {
      color: #50a14f; /* green - 문자열 */
    }
    
    .carbon-code-block .python-comment {
      color: #a0a1a7; /* gray - 주석 */
      font-style: italic;
    }
    
    .carbon-code-block .python-function {
      color: #4078f2; /* blue - 함수명 */
    }
    
    .carbon-code-block .python-number {
      color: #986801; /* orange - 숫자 */
    }
    
    .carbon-code-block .python-class {
      color: #c18401; /* yellow - 클래스명 */
    }
    
    /* 스크롤바 스타일링 */
    .carbon-code-block .code-content::-webkit-scrollbar {
      height: 8px;
      width: 8px;
      background-color: #f5f5f5;
    }
    
    .carbon-code-block .code-content::-webkit-scrollbar-thumb {
      border-radius: 4px;
      background-color: #d1d5da;
    }
    
    .carbon-code-block .code-content::-webkit-scrollbar-thumb:hover {
      background-color: #a8adb5;
    }
  &lt;/style&gt;
&lt;!-- 티스토리 코드 블록 시작 --&gt;
&lt;div class=&quot;carbon-code-block&quot;&gt;
&lt;div class=&quot;code-header&quot;&gt;&lt;span&gt;Python&lt;/span&gt;&lt;/div&gt;
&lt;div class=&quot;code-content&quot;&gt;
&lt;pre&gt;&lt;code&gt;&lt;span class=&quot;python-keyword&quot;&gt;from&lt;/span&gt; googlenewsdecoder &lt;span class=&quot;python-keyword&quot;&gt;import&lt;/span&gt; gnewsdecoder
&lt;span class=&quot;python-keyword&quot;&gt;import&lt;/span&gt; pandas &lt;span class=&quot;python-keyword&quot;&gt;as&lt;/span&gt; pd
&lt;span class=&quot;python-keyword&quot;&gt;import&lt;/span&gt; os
&lt;span class=&quot;python-keyword&quot;&gt;import&lt;/span&gt; time
&lt;span class=&quot;python-keyword&quot;&gt;import&lt;/span&gt; argparse
&lt;span class=&quot;python-keyword&quot;&gt;from&lt;/span&gt; datetime &lt;span class=&quot;python-keyword&quot;&gt;import&lt;/span&gt; datetime, timedelta

&lt;span class=&quot;python-keyword&quot;&gt;def&lt;/span&gt; &lt;span class=&quot;python-function&quot;&gt;process_urls_from_file&lt;/span&gt;(input_file_path, output_dir=&lt;span class=&quot;python-keyword&quot;&gt;None&lt;/span&gt;, interval_time=&lt;span class=&quot;python-number&quot;&gt;1&lt;/span&gt;, batch_size=&lt;span class=&quot;python-number&quot;&gt;10&lt;/span&gt;):
    &lt;span class=&quot;python-comment&quot;&gt;# Read URLs from file&lt;/span&gt;
    &lt;span class=&quot;python-keyword&quot;&gt;try&lt;/span&gt;:
        &lt;span class=&quot;python-comment&quot;&gt;# 파일 확장자 확인&lt;/span&gt;
        file_ext = os.path.splitext(input_file_path)[&lt;span class=&quot;python-number&quot;&gt;1&lt;/span&gt;].lower()
        
        &lt;span class=&quot;python-keyword&quot;&gt;if&lt;/span&gt; file_ext == &lt;span class=&quot;python-string&quot;&gt;'.csv'&lt;/span&gt;:
            &lt;span class=&quot;python-comment&quot;&gt;# CSV 파일인 경우 pandas로 읽기&lt;/span&gt;
            &lt;span class=&quot;python-keyword&quot;&gt;try&lt;/span&gt;:
                df = pd.read_csv(input_file_path, encoding=&lt;span class=&quot;python-string&quot;&gt;'utf-8-sig'&lt;/span&gt;)
                
                &lt;span class=&quot;python-comment&quot;&gt;# 'link' 컬럼이 있는지 확인&lt;/span&gt;
                &lt;span class=&quot;python-keyword&quot;&gt;if&lt;/span&gt; &lt;span class=&quot;python-string&quot;&gt;'link'&lt;/span&gt; &lt;span class=&quot;python-keyword&quot;&gt;in&lt;/span&gt; df.columns:
                    urls = df[&lt;span class=&quot;python-string&quot;&gt;'link'&lt;/span&gt;].dropna().tolist()
                    &lt;span class=&quot;python-function&quot;&gt;print&lt;/span&gt;(&lt;span class=&quot;python-string&quot;&gt;f&quot;Found {len(urls)} URLs in 'link' column&quot;&lt;/span&gt;)
                &lt;span class=&quot;python-keyword&quot;&gt;else&lt;/span&gt;:
                    available_columns = &lt;span class=&quot;python-string&quot;&gt;', '&lt;/span&gt;.join(df.columns)
                    &lt;span class=&quot;python-function&quot;&gt;print&lt;/span&gt;(&lt;span class=&quot;python-string&quot;&gt;f&quot;Error: 'link' column not found in CSV. Available columns: {available_columns}&quot;&lt;/span&gt;)
                    &lt;span class=&quot;python-keyword&quot;&gt;return&lt;/span&gt; &lt;span class=&quot;python-keyword&quot;&gt;None&lt;/span&gt;
            &lt;span class=&quot;python-keyword&quot;&gt;except&lt;/span&gt; Exception &lt;span class=&quot;python-keyword&quot;&gt;as&lt;/span&gt; e:
                &lt;span class=&quot;python-function&quot;&gt;print&lt;/span&gt;(&lt;span class=&quot;python-string&quot;&gt;f&quot;Error parsing CSV file: {e}&quot;&lt;/span&gt;)
                &lt;span class=&quot;python-keyword&quot;&gt;return&lt;/span&gt; &lt;span class=&quot;python-keyword&quot;&gt;None&lt;/span&gt;
        &lt;span class=&quot;python-keyword&quot;&gt;else&lt;/span&gt;:
            &lt;span class=&quot;python-comment&quot;&gt;# 일반 텍스트 파일로 간주하고 한 줄에 하나의 URL로 읽기&lt;/span&gt;
            &lt;span class=&quot;python-keyword&quot;&gt;with&lt;/span&gt; &lt;span class=&quot;python-function&quot;&gt;open&lt;/span&gt;(input_file_path, &lt;span class=&quot;python-string&quot;&gt;'r'&lt;/span&gt;, encoding=&lt;span class=&quot;python-string&quot;&gt;'utf-8-sig'&lt;/span&gt;) &lt;span class=&quot;python-keyword&quot;&gt;as&lt;/span&gt; file:
                urls = [line.strip() &lt;span class=&quot;python-keyword&quot;&gt;for&lt;/span&gt; line &lt;span class=&quot;python-keyword&quot;&gt;in&lt;/span&gt; file &lt;span class=&quot;python-keyword&quot;&gt;if&lt;/span&gt; line.strip()]
                &lt;span class=&quot;python-function&quot;&gt;print&lt;/span&gt;(&lt;span class=&quot;python-string&quot;&gt;f&quot;Found {len(urls)} URLs in text file&quot;&lt;/span&gt;)
    &lt;span class=&quot;python-keyword&quot;&gt;except&lt;/span&gt; Exception &lt;span class=&quot;python-keyword&quot;&gt;as&lt;/span&gt; e:
        &lt;span class=&quot;python-function&quot;&gt;print&lt;/span&gt;(&lt;span class=&quot;python-string&quot;&gt;f&quot;Error reading file: {e}&quot;&lt;/span&gt;)
        &lt;span class=&quot;python-keyword&quot;&gt;return&lt;/span&gt; &lt;span class=&quot;python-keyword&quot;&gt;None&lt;/span&gt;

    &lt;span class=&quot;python-comment&quot;&gt;# 파일 저장 기본 설정&lt;/span&gt;
    &lt;span class=&quot;python-keyword&quot;&gt;if&lt;/span&gt; output_dir &lt;span class=&quot;python-keyword&quot;&gt;is&lt;/span&gt; &lt;span class=&quot;python-keyword&quot;&gt;None&lt;/span&gt;:
        output_dir = os.path.dirname(input_file_path) &lt;span class=&quot;python-keyword&quot;&gt;or&lt;/span&gt; &lt;span class=&quot;python-string&quot;&gt;'.'&lt;/span&gt;
    
    timestamp = datetime.now().strftime(&lt;span class=&quot;python-string&quot;&gt;&quot;%Y-%m-%d_%H%M%S&quot;&lt;/span&gt;)
    input_filename = os.path.basename(input_file_path).split(&lt;span class=&quot;python-string&quot;&gt;'.'&lt;/span&gt;)[&lt;span class=&quot;python-number&quot;&gt;0&lt;/span&gt;]
    output_filename = &lt;span class=&quot;python-string&quot;&gt;f&quot;{input_filename}_decoded_{timestamp}.csv&quot;&lt;/span&gt;
    output_path = os.path.join(output_dir, output_filename)
    
    &lt;span class=&quot;python-function&quot;&gt;print&lt;/span&gt;(&lt;span class=&quot;python-string&quot;&gt;f&quot;Results will be saved to: {output_path}&quot;&lt;/span&gt;)
    
    &lt;span class=&quot;python-comment&quot;&gt;# Process each URL&lt;/span&gt;
    results = []
    total_urls = &lt;span class=&quot;python-function&quot;&gt;len&lt;/span&gt;(urls)
    
    &lt;span class=&quot;python-function&quot;&gt;print&lt;/span&gt;(&lt;span class=&quot;python-string&quot;&gt;f&quot;Starting to process {total_urls} URLs...&quot;&lt;/span&gt;)

    &lt;span class=&quot;python-comment&quot;&gt;# Record overall process start time&lt;/span&gt;
    process_start_time = time.time()

    &lt;span class=&quot;python-keyword&quot;&gt;for&lt;/span&gt; i, url &lt;span class=&quot;python-keyword&quot;&gt;in&lt;/span&gt; &lt;span class=&quot;python-function&quot;&gt;enumerate&lt;/span&gt;(urls):
        &lt;span class=&quot;python-keyword&quot;&gt;if&lt;/span&gt; i % &lt;span class=&quot;python-number&quot;&gt;10&lt;/span&gt; == &lt;span class=&quot;python-number&quot;&gt;0&lt;/span&gt; &lt;span class=&quot;python-keyword&quot;&gt;or&lt;/span&gt; i == total_urls - &lt;span class=&quot;python-number&quot;&gt;1&lt;/span&gt;:
            &lt;span class=&quot;python-function&quot;&gt;print&lt;/span&gt;(&lt;span class=&quot;python-string&quot;&gt;f&quot;Processing URL {i+1}/{total_urls}...&quot;&lt;/span&gt;)
        
        &lt;span class=&quot;python-comment&quot;&gt;# Record start time for this URL&lt;/span&gt;
        url_start_time = time.time()
        
        &lt;span class=&quot;python-keyword&quot;&gt;if&lt;/span&gt; &lt;span class=&quot;python-string&quot;&gt;&quot;news.google.com&quot;&lt;/span&gt; &lt;span class=&quot;python-keyword&quot;&gt;in&lt;/span&gt; url:
            &lt;span class=&quot;python-keyword&quot;&gt;try&lt;/span&gt;:
                decoded_result = gnewsdecoder(url, interval=interval_time)
                &lt;span class=&quot;python-keyword&quot;&gt;if&lt;/span&gt; decoded_result.get(&lt;span class=&quot;python-string&quot;&gt;&quot;status&quot;&lt;/span&gt;):
                    status = &lt;span class=&quot;python-string&quot;&gt;&quot;SUCCESS&quot;&lt;/span&gt;
                    decoded_url = decoded_result[&lt;span class=&quot;python-string&quot;&gt;&quot;decoded_url&quot;&lt;/span&gt;]
                    error_message = &lt;span class=&quot;python-string&quot;&gt;&quot;&quot;&lt;/span&gt;
                &lt;span class=&quot;python-keyword&quot;&gt;else&lt;/span&gt;:
                    status = &lt;span class=&quot;python-string&quot;&gt;&quot;FAILED&quot;&lt;/span&gt;
                    decoded_url = &lt;span class=&quot;python-string&quot;&gt;&quot;N/A&quot;&lt;/span&gt;
                    error_message = decoded_result.get(&lt;span class=&quot;python-string&quot;&gt;&quot;message&quot;&lt;/span&gt;, &lt;span class=&quot;python-string&quot;&gt;&quot;Unknown error&quot;&lt;/span&gt;)
            &lt;span class=&quot;python-keyword&quot;&gt;except&lt;/span&gt; Exception &lt;span class=&quot;python-keyword&quot;&gt;as&lt;/span&gt; e:
                status = &lt;span class=&quot;python-string&quot;&gt;&quot;FAILED&quot;&lt;/span&gt;
                decoded_url = &lt;span class=&quot;python-string&quot;&gt;&quot;N/A&quot;&lt;/span&gt;
                error_message = &lt;span class=&quot;python-function&quot;&gt;str&lt;/span&gt;(e)
        &lt;span class=&quot;python-keyword&quot;&gt;else&lt;/span&gt;:
            &lt;span class=&quot;python-comment&quot;&gt;# For non-Google News URLs&lt;/span&gt;
            status = &lt;span class=&quot;python-string&quot;&gt;&quot;SKIPPED&quot;&lt;/span&gt;
            decoded_url = url  &lt;span class=&quot;python-comment&quot;&gt;# Keep the same URL&lt;/span&gt;
            error_message = &lt;span class=&quot;python-string&quot;&gt;&quot;Not a Google News URL&quot;&lt;/span&gt;
        
        &lt;span class=&quot;python-comment&quot;&gt;# Calculate processing time for this URL&lt;/span&gt;
        url_end_time = time.time()
        processing_time = url_end_time - url_start_time
        
        &lt;span class=&quot;python-comment&quot;&gt;# Add to results&lt;/span&gt;
        results.append({
            &lt;span class=&quot;python-string&quot;&gt;&quot;original_url&quot;&lt;/span&gt;: url,
            &lt;span class=&quot;python-string&quot;&gt;&quot;decoded_url&quot;&lt;/span&gt;: decoded_url,
            &lt;span class=&quot;python-string&quot;&gt;&quot;status&quot;&lt;/span&gt;: status,
            &lt;span class=&quot;python-string&quot;&gt;&quot;error_message&quot;&lt;/span&gt;: error_message,
            &lt;span class=&quot;python-string&quot;&gt;&quot;processing_time_sec&quot;&lt;/span&gt;: &lt;span class=&quot;python-function&quot;&gt;round&lt;/span&gt;(processing_time, &lt;span class=&quot;python-number&quot;&gt;3&lt;/span&gt;)
        })
        
        &lt;span class=&quot;python-comment&quot;&gt;# 10개마다 중간 결과 저장&lt;/span&gt;
        &lt;span class=&quot;python-keyword&quot;&gt;if&lt;/span&gt; (i + &lt;span class=&quot;python-number&quot;&gt;1&lt;/span&gt;) % batch_size == &lt;span class=&quot;python-number&quot;&gt;0&lt;/span&gt; &lt;span class=&quot;python-keyword&quot;&gt;or&lt;/span&gt; i == total_urls - &lt;span class=&quot;python-number&quot;&gt;1&lt;/span&gt;:
            current_df = pd.DataFrame(results)
            current_df.to_csv(output_path, index=&lt;span class=&quot;python-keyword&quot;&gt;False&lt;/span&gt;, encoding=&lt;span class=&quot;python-string&quot;&gt;'utf-8'&lt;/span&gt;)
            &lt;span class=&quot;python-function&quot;&gt;print&lt;/span&gt;(&lt;span class=&quot;python-string&quot;&gt;f&quot;Saved {len(results)} results to: {output_path}&quot;&lt;/span&gt;)

    &lt;span class=&quot;python-comment&quot;&gt;# 루프 종료 후 전체 처리 시간 계산&lt;/span&gt;
    process_end_time = time.time()
    total_process_time = process_end_time - process_start_time
    
    &lt;span class=&quot;python-comment&quot;&gt;# Create DataFrame with results&lt;/span&gt;
    df = pd.DataFrame(results)

    &lt;span class=&quot;python-comment&quot;&gt;# Calculate average processing time&lt;/span&gt;
    avg_time_all = df[&lt;span class=&quot;python-string&quot;&gt;'processing_time_sec'&lt;/span&gt;].mean()
    avg_time_google_only = df[df[&lt;span class=&quot;python-string&quot;&gt;'status'&lt;/span&gt;] != &lt;span class=&quot;python-string&quot;&gt;'SKIPPED'&lt;/span&gt;][&lt;span class=&quot;python-string&quot;&gt;'processing_time_sec'&lt;/span&gt;].mean() &lt;span class=&quot;python-keyword&quot;&gt;if&lt;/span&gt; &lt;span class=&quot;python-function&quot;&gt;any&lt;/span&gt;(df[&lt;span class=&quot;python-string&quot;&gt;'status'&lt;/span&gt;] != &lt;span class=&quot;python-string&quot;&gt;'SKIPPED'&lt;/span&gt;) &lt;span class=&quot;python-keyword&quot;&gt;else&lt;/span&gt; &lt;span class=&quot;python-number&quot;&gt;0&lt;/span&gt;
    
    &lt;span class=&quot;python-comment&quot;&gt;# 요약 통계&lt;/span&gt;
    success_count = df[&lt;span class=&quot;python-string&quot;&gt;'status'&lt;/span&gt;].eq(&lt;span class=&quot;python-string&quot;&gt;'SUCCESS'&lt;/span&gt;).sum()
    failed_count = df[&lt;span class=&quot;python-string&quot;&gt;'status'&lt;/span&gt;].eq(&lt;span class=&quot;python-string&quot;&gt;'FAILED'&lt;/span&gt;).sum()
    skipped_count = df[&lt;span class=&quot;python-string&quot;&gt;'status'&lt;/span&gt;].eq(&lt;span class=&quot;python-string&quot;&gt;'SKIPPED'&lt;/span&gt;).sum()
    
    &lt;span class=&quot;python-comment&quot;&gt;# Format time durations for display&lt;/span&gt;
    &lt;span class=&quot;python-keyword&quot;&gt;def&lt;/span&gt; &lt;span class=&quot;python-function&quot;&gt;format_time&lt;/span&gt;(seconds):
        &lt;span class=&quot;python-keyword&quot;&gt;if&lt;/span&gt; seconds &amp;lt; &lt;span class=&quot;python-number&quot;&gt;60&lt;/span&gt;:
            &lt;span class=&quot;python-keyword&quot;&gt;return&lt;/span&gt; &lt;span class=&quot;python-string&quot;&gt;f&quot;{seconds:.2f} seconds&quot;&lt;/span&gt;
        &lt;span class=&quot;python-keyword&quot;&gt;else&lt;/span&gt;:
            &lt;span class=&quot;python-keyword&quot;&gt;return&lt;/span&gt; &lt;span class=&quot;python-function&quot;&gt;str&lt;/span&gt;(timedelta(seconds=&lt;span class=&quot;python-function&quot;&gt;round&lt;/span&gt;(seconds)))
    
    &lt;span class=&quot;python-function&quot;&gt;print&lt;/span&gt;(&lt;span class=&quot;python-string&quot;&gt;f&quot;\n====== SUMMARY ======&quot;&lt;/span&gt;)
    &lt;span class=&quot;python-function&quot;&gt;print&lt;/span&gt;(&lt;span class=&quot;python-string&quot;&gt;f&quot;Total URLs processed: {total_urls}&quot;&lt;/span&gt;)
    &lt;span class=&quot;python-function&quot;&gt;print&lt;/span&gt;(&lt;span class=&quot;python-string&quot;&gt;f&quot;Successfully decoded: {success_count} ({success_count/total_urls*100:.1f}%)&quot;&lt;/span&gt;)
    &lt;span class=&quot;python-function&quot;&gt;print&lt;/span&gt;(&lt;span class=&quot;python-string&quot;&gt;f&quot;Failed to decode: {failed_count} ({failed_count/total_urls*100:.1f}%)&quot;&lt;/span&gt;)
    &lt;span class=&quot;python-function&quot;&gt;print&lt;/span&gt;(&lt;span class=&quot;python-string&quot;&gt;f&quot;Skipped (non-Google News): {skipped_count} ({skipped_count/total_urls*100:.1f}%)&quot;&lt;/span&gt;)
    &lt;span class=&quot;python-function&quot;&gt;print&lt;/span&gt;(&lt;span class=&quot;python-string&quot;&gt;f&quot;\n----- TIMING INFORMATION -----&quot;&lt;/span&gt;)
    &lt;span class=&quot;python-function&quot;&gt;print&lt;/span&gt;(&lt;span class=&quot;python-string&quot;&gt;f&quot;Total processing time: {format_time(total_process_time)}&quot;&lt;/span&gt;)
    &lt;span class=&quot;python-function&quot;&gt;print&lt;/span&gt;(&lt;span class=&quot;python-string&quot;&gt;f&quot;Average processing time per URL: {format_time(avg_time_all)}&quot;&lt;/span&gt;)
    &lt;span class=&quot;python-function&quot;&gt;print&lt;/span&gt;(&lt;span class=&quot;python-string&quot;&gt;f&quot;Average processing time per Google News URL: {format_time(avg_time_google_only)}&quot;&lt;/span&gt;)
    &lt;span class=&quot;python-function&quot;&gt;print&lt;/span&gt;(&lt;span class=&quot;python-string&quot;&gt;f&quot;Fastest URL processing time: {format_time(df['processing_time_sec'].min())}&quot;&lt;/span&gt;)
    &lt;span class=&quot;python-function&quot;&gt;print&lt;/span&gt;(&lt;span class=&quot;python-string&quot;&gt;f&quot;Slowest URL processing time: {format_time(df['processing_time_sec'].max())}&quot;&lt;/span&gt;)
    &lt;span class=&quot;python-function&quot;&gt;print&lt;/span&gt;(&lt;span class=&quot;python-string&quot;&gt;f&quot;=====================&quot;&lt;/span&gt;)
    
    &lt;span class=&quot;python-comment&quot;&gt;# Add summary information to the DataFrame as metadata&lt;/span&gt;
    df.attrs[&lt;span class=&quot;python-string&quot;&gt;'total_process_time'&lt;/span&gt;] = total_process_time
    df.attrs[&lt;span class=&quot;python-string&quot;&gt;'avg_processing_time'&lt;/span&gt;] = avg_time_all
    df.attrs[&lt;span class=&quot;python-string&quot;&gt;'avg_google_url_time'&lt;/span&gt;] = avg_time_google_only
    
    &lt;span class=&quot;python-keyword&quot;&gt;return&lt;/span&gt; df

&lt;span class=&quot;python-comment&quot;&gt;# run_url_decoder 함수를 제거하였습니다.&lt;/span&gt;

&lt;span class=&quot;python-keyword&quot;&gt;if&lt;/span&gt; __name__ == &lt;span class=&quot;python-string&quot;&gt;&quot;__main__&quot;&lt;/span&gt;:
    &lt;span class=&quot;python-comment&quot;&gt;# 명령줄에서 인자를 받아 실행하기 위한 코드&lt;/span&gt;
    parser = argparse.ArgumentParser(description=&lt;span class=&quot;python-string&quot;&gt;'Google News URL decoder'&lt;/span&gt;)
    parser.add_argument(&lt;span class=&quot;python-string&quot;&gt;'input_file_path'&lt;/span&gt;, help=&lt;span class=&quot;python-string&quot;&gt;'Path to the file containing URLs to decode'&lt;/span&gt;)
    parser.add_argument(&lt;span class=&quot;python-string&quot;&gt;'--output_dir'&lt;/span&gt;, &lt;span class=&quot;python-string&quot;&gt;'-o'&lt;/span&gt;, help=&lt;span class=&quot;python-string&quot;&gt;'Output directory for results (default: same as input file)'&lt;/span&gt;)
    parser.add_argument(&lt;span class=&quot;python-string&quot;&gt;'--interval'&lt;/span&gt;, &lt;span class=&quot;python-string&quot;&gt;'-i'&lt;/span&gt;, type=&lt;span class=&quot;python-function&quot;&gt;int&lt;/span&gt;, default=&lt;span class=&quot;python-number&quot;&gt;1&lt;/span&gt;, help=&lt;span class=&quot;python-string&quot;&gt;'Interval time between requests (default: 1)'&lt;/span&gt;)
    parser.add_argument(&lt;span class=&quot;python-string&quot;&gt;'--batch_size'&lt;/span&gt;, &lt;span class=&quot;python-string&quot;&gt;'-b'&lt;/span&gt;, type=&lt;span class=&quot;python-function&quot;&gt;int&lt;/span&gt;, default=&lt;span class=&quot;python-number&quot;&gt;10&lt;/span&gt;, help=&lt;span class=&quot;python-string&quot;&gt;'Batch size for saving interim results (default: 10)'&lt;/span&gt;)
    
    args = parser.parse_args()
    
    &lt;span class=&quot;python-comment&quot;&gt;# 인자 출력&lt;/span&gt;
    &lt;span class=&quot;python-function&quot;&gt;print&lt;/span&gt;(&lt;span class=&quot;python-string&quot;&gt;f&quot;Processing URLs from: {args.input_file_path}&quot;&lt;/span&gt;)
    &lt;span class=&quot;python-function&quot;&gt;print&lt;/span&gt;(&lt;span class=&quot;python-string&quot;&gt;f&quot;Output directory: {args.output_dir or 'Same as input file'}&quot;&lt;/span&gt;)
    &lt;span class=&quot;python-function&quot;&gt;print&lt;/span&gt;(&lt;span class=&quot;python-string&quot;&gt;f&quot;Interval time: {args.interval}&quot;&lt;/span&gt;)
    &lt;span class=&quot;python-function&quot;&gt;print&lt;/span&gt;(&lt;span class=&quot;python-string&quot;&gt;f&quot;Batch size: {args.batch_size}&quot;&lt;/span&gt;)
    
    &lt;span class=&quot;python-comment&quot;&gt;# 함수 실행&lt;/span&gt;
    result = process_urls_from_file(
        args.input_file_path, 
        args.output_dir, 
        args.interval, 
        args.batch_size
    )
    
    &lt;span class=&quot;python-comment&quot;&gt;# 결과 반환&lt;/span&gt;
    &lt;span class=&quot;python-keyword&quot;&gt;if&lt;/span&gt; result &lt;span class=&quot;python-keyword&quot;&gt;is&lt;/span&gt; &lt;span class=&quot;python-keyword&quot;&gt;not&lt;/span&gt; &lt;span class=&quot;python-keyword&quot;&gt;None&lt;/span&gt;:
        &lt;span class=&quot;python-function&quot;&gt;print&lt;/span&gt;(&lt;span class=&quot;python-string&quot;&gt;f&quot;Process completed successfully. Results saved to CSV file.&quot;&lt;/span&gt;)
    &lt;span class=&quot;python-keyword&quot;&gt;else&lt;/span&gt;:
        &lt;span class=&quot;python-function&quot;&gt;print&lt;/span&gt;(&lt;span class=&quot;python-string&quot;&gt;f&quot;Process failed. Please check error messages above.&quot;&lt;/span&gt;)&lt;/code&gt;&lt;/pre&gt;
&lt;/div&gt;
&lt;/div&gt;
&lt;!-- 티스토리 코드 블록 끝 --&gt;&lt;/div&gt;
&lt;p data-ke-size=&quot;size16&quot;&gt;&amp;nbsp;&lt;/p&gt;
&lt;p data-ke-size=&quot;size16&quot;&gt;&lt;span&gt; ️&lt;/span&gt; 실행결과&lt;/p&gt;
&lt;p&gt;&lt;figure class=&quot;imageblock alignCenter&quot; data-ke-mobileStyle=&quot;widthOrigin&quot; data-origin-width=&quot;1115&quot; data-origin-height=&quot;628&quot;&gt;&lt;span data-url=&quot;https://blog.kakaocdn.net/dn/GBc4d/btsN5PP7eqn/T1VKNJs5wUhLcraGBNkeBk/img.png&quot; data-phocus=&quot;https://blog.kakaocdn.net/dn/GBc4d/btsN5PP7eqn/T1VKNJs5wUhLcraGBNkeBk/img.png&quot;&gt;&lt;img src=&quot;https://blog.kakaocdn.net/dn/GBc4d/btsN5PP7eqn/T1VKNJs5wUhLcraGBNkeBk/img.png&quot; srcset=&quot;https://img1.daumcdn.net/thumb/R1280x0/?scode=mtistory2&amp;fname=https%3A%2F%2Fblog.kakaocdn.net%2Fdn%2FGBc4d%2FbtsN5PP7eqn%2FT1VKNJs5wUhLcraGBNkeBk%2Fimg.png&quot; onerror=&quot;this.onerror=null; this.src='//t1.daumcdn.net/tistory_admin/static/images/no-image-v1.png'; this.srcset='//t1.daumcdn.net/tistory_admin/static/images/no-image-v1.png';&quot; loading=&quot;lazy&quot; width=&quot;1115&quot; height=&quot;628&quot; data-origin-width=&quot;1115&quot; data-origin-height=&quot;628&quot;/&gt;&lt;/span&gt;&lt;/figure&gt;
&lt;/p&gt;
&lt;hr contenteditable=&quot;false&quot; data-ke-type=&quot;horizontalRule&quot; data-ke-style=&quot;style6&quot; /&gt;
&lt;h3 data-pm-slice=&quot;1 1 []&quot; data-ke-size=&quot;size23&quot;&gt;&lt;span style=&quot;color: #000000;&quot;&gt;  성능 평가 및 결과&lt;/span&gt;&lt;/h3&gt;
&lt;p data-ke-size=&quot;size16&quot;&gt;&lt;span style=&quot;color: #000000;&quot;&gt;성능 평가 결과는 다음과 같다:&lt;/span&gt;&lt;/p&gt;
&lt;ul style=&quot;list-style-type: disc;&quot; data-spread=&quot;false&quot; data-ke-list-type=&quot;disc&quot;&gt;
&lt;li&gt;&lt;b&gt;&lt;span style=&quot;color: #000000;&quot;&gt;총 처리 시간: &lt;span style=&quot;color: #006dd7;&quot;&gt;26분 57초&lt;/span&gt;&lt;/span&gt;&lt;/b&gt;&lt;/li&gt;
&lt;li&gt;&lt;span style=&quot;color: #000000;&quot;&gt;&lt;b&gt;평균 처리 시간&lt;/b&gt;: &lt;span style=&quot;color: #006dd7;&quot;&gt;&lt;b&gt;3.23초&lt;/b&gt;&lt;/span&gt;이며, 대부분&lt;span&gt; &lt;/span&gt;&lt;span style=&quot;color: #006dd7;&quot;&gt;&lt;b&gt;3.0-3.5초&lt;/b&gt;&lt;/span&gt;&lt;span&gt;&amp;nbsp;&lt;/span&gt;구간에 집중되어 있음.&lt;/span&gt;&lt;/li&gt;
&lt;li&gt;&lt;span style=&quot;color: #000000;&quot;&gt;&lt;b&gt;정확도&lt;/b&gt;: 총&lt;span&gt;&amp;nbsp;&lt;/span&gt;&lt;span style=&quot;color: #006dd7;&quot;&gt;&lt;b&gt;500&lt;/b&gt;개&lt;/span&gt;의 URL이 분석되었으며, 성공률은&lt;span&gt;&amp;nbsp;&lt;/span&gt;&lt;span style=&quot;color: #006dd7;&quot;&gt;&lt;b&gt;100.00%&lt;/b&gt;&lt;/span&gt;&lt;/span&gt;&lt;/li&gt;
&lt;li&gt;&lt;span style=&quot;color: #000000;&quot;&gt;msn과 같은 &lt;b&gt;&lt;span style=&quot;color: #006dd7;&quot;&gt;까다로운 url도 성공&lt;/span&gt;&lt;/b&gt;함&lt;/span&gt;&lt;/li&gt;
&lt;/ul&gt;
&lt;p&gt;&lt;figure class=&quot;imageblock alignCenter&quot; data-ke-mobileStyle=&quot;widthOrigin&quot; data-origin-width=&quot;895&quot; data-origin-height=&quot;602&quot;&gt;&lt;span data-url=&quot;https://blog.kakaocdn.net/dn/bE6sAo/btsN4Mfvl1S/WY2OPnrYXq34DgWN7Gvnik/img.png&quot; data-phocus=&quot;https://blog.kakaocdn.net/dn/bE6sAo/btsN4Mfvl1S/WY2OPnrYXq34DgWN7Gvnik/img.png&quot;&gt;&lt;img src=&quot;https://blog.kakaocdn.net/dn/bE6sAo/btsN4Mfvl1S/WY2OPnrYXq34DgWN7Gvnik/img.png&quot; srcset=&quot;https://img1.daumcdn.net/thumb/R1280x0/?scode=mtistory2&amp;fname=https%3A%2F%2Fblog.kakaocdn.net%2Fdn%2FbE6sAo%2FbtsN4Mfvl1S%2FWY2OPnrYXq34DgWN7Gvnik%2Fimg.png&quot; onerror=&quot;this.onerror=null; this.src='//t1.daumcdn.net/tistory_admin/static/images/no-image-v1.png'; this.srcset='//t1.daumcdn.net/tistory_admin/static/images/no-image-v1.png';&quot; loading=&quot;lazy&quot; width=&quot;895&quot; height=&quot;602&quot; data-origin-width=&quot;895&quot; data-origin-height=&quot;602&quot;/&gt;&lt;/span&gt;&lt;/figure&gt;
&lt;/p&gt;
&lt;h3 data-ke-size=&quot;size23&quot;&gt;&lt;span&gt;마무리&lt;/span&gt;&lt;/h3&gt;
&lt;p data-ke-size=&quot;size16&quot;&gt;&lt;span&gt;googlenewsdecoder는 &lt;b&gt;&lt;span style=&quot;color: #006dd7;&quot;&gt;구글 뉴스 RSS 링크를 원문 URL로 변환하는 도구&lt;/span&gt;&lt;/b&gt;로 기사 본문 추출에 중요한 역할을 한다. 까다로운 URL 리디렉션을 간편하게 해결하여 기사 수집 자동화를 위한 원문 접근성을 높여준다. 이번 글에는 반복문을 사용하여 순차적으로 변환하였으나, 병렬 처리를 적용하면 속도를 더 높일 수 있다. 다음 글에는 &lt;b&gt;&lt;span style=&quot;color: #006dd7;&quot;&gt;concurrent.futures 패키지&lt;/span&gt;&lt;/b&gt;를 이용한 병렬처리로 시간을 더 단축하는 방법을 알아보겠다.&lt;/span&gt;&lt;/p&gt;</description>
      <category>googlenewsdecoder</category>
      <category>기사수집자동화</category>
      <category>웹스크래핑</category>
      <author>catalystmind</author>
      <guid isPermaLink="true">https://catalystmind.tistory.com/16</guid>
      <comments>https://catalystmind.tistory.com/16#entry16comment</comments>
      <pubDate>Sat, 24 May 2025 22:32:31 +0900</pubDate>
    </item>
    <item>
      <title>Google 뉴스 RSS 링크 분석: 본문 추출을 위한 URL 처리 전략 2가지</title>
      <link>https://catalystmind.tistory.com/15</link>
      <description>&lt;div id=&quot;code_1747532791424&quot; data-ke-type=&quot;html&quot; data-source=&quot;&amp;lt;div style=&amp;quot;background-color: #fff; border-radius: 8px; padding: 24px; box-shadow: 0 2px 8px rgba(0, 0, 0, 0.05); margin-bottom: 24px; border-left: 4px solid #3b82f6;&amp;quot;&amp;gt;
    &amp;lt;h1 style=&amp;quot;color: #1e40af; font-size: 24px; font-weight: 700; margin-top: 0; margin-bottom: 16px;&amp;quot;&amp;gt;TL;DR&amp;lt;/h1&amp;gt;
&amp;lt;div style=&amp;quot;background-color: #f0f7ff; padding: 12px 16px; border-radius: 6px; margin: 16px 0;&amp;quot;&amp;gt;
    &amp;lt;ul style=&amp;quot;padding-left: 20px; margin: 0;&amp;quot;&amp;gt;
        &amp;lt;li style=&amp;quot;margin-bottom: 10px;&amp;quot;&amp;gt;구글 뉴스 RSS에서 제공하는 링크는 &amp;lt;span style=&amp;quot;color: #1e40af; font-weight: 600;&amp;quot;&amp;gt;실제 뉴스 페이지가 아닌 리디렉션 URL&amp;lt;/span&amp;gt;임&amp;lt;/li&amp;gt;
        &amp;lt;li style=&amp;quot;margin-bottom: 10px;&amp;quot;&amp;gt;이 링크는 &amp;lt;span style=&amp;quot;color: #1e40af; font-weight: 600;&amp;quot;&amp;gt;URL-safe base64와 protobuf&amp;lt;/span&amp;gt; 형식으로 인코딩되어 있어 직접 처리가 어려움&amp;lt;/li&amp;gt;
        &amp;lt;li style=&amp;quot;margin-bottom: 10px;&amp;quot;&amp;gt;본문 추출을 위한 두 가지 접근법: &amp;lt;span style=&amp;quot;color: #1e40af; font-weight: 600;&amp;quot;&amp;gt;①오픈소스 패키지 활용&amp;lt;/span&amp;gt;과 &amp;lt;span style=&amp;quot;color: #1e40af; font-weight: 600;&amp;quot;&amp;gt;②브라우저 자동화(Playwright)&amp;lt;/span&amp;gt;&amp;lt;/li&amp;gt;
        &amp;lt;li style=&amp;quot;margin-bottom: 10px;&amp;quot;&amp;gt;두 방법 모두 장단점이 있으며, 어떤 것을 사용할지는 &amp;lt;span style=&amp;quot;color: #1e40af; font-weight: 600;&amp;quot;&amp;gt;각자의 상황에 맞게 선택&amp;lt;/span&amp;gt;해야 함&amp;lt;/li&amp;gt;
        &amp;lt;li style=&amp;quot;margin-bottom: 10px;&amp;quot;&amp;gt;앞으로의 글에서 각 방법의 &amp;lt;span style=&amp;quot;color: #1e40af; font-weight: 600;&amp;quot;&amp;gt;구체적인 구현 방법&amp;lt;/span&amp;gt;과 성과를 다룰 예정&amp;lt;/li&amp;gt;
    &amp;lt;/ul&amp;gt;
&amp;lt;/div&amp;gt;
&amp;lt;/div&amp;gt;&quot;&gt;
&lt;div style=&quot;background-color: #fff; border-radius: 8px; padding: 24px; box-shadow: 0 2px 8px rgba(0, 0, 0, 0.05); margin-bottom: 24px; border-left: 4px solid #3b82f6;&quot;&gt;
&lt;h1 style=&quot;color: #1e40af; font-size: 24px; font-weight: bold; margin-top: 0; margin-bottom: 16px;&quot;&gt;TL;DR&lt;/h1&gt;
&lt;div style=&quot;background-color: #f0f7ff; padding: 12px 16px; border-radius: 6px; margin: 16px 0;&quot;&gt;
&lt;ul style=&quot;padding-left: 20px; margin: 0px; list-style-type: disc;&quot; data-ke-list-type=&quot;disc&quot;&gt;
&lt;li style=&quot;margin-bottom: 10px;&quot;&gt;구글 뉴스 RSS에서 제공하는 링크는 &lt;span style=&quot;color: #1e40af; font-weight: 600;&quot;&gt;실제 뉴스 페이지가 아닌 리디렉션 URL&lt;/span&gt;임&lt;/li&gt;
&lt;li style=&quot;margin-bottom: 10px;&quot;&gt;이 링크는 &lt;span style=&quot;color: #1e40af; font-weight: 600;&quot;&gt;URL-safe base64와 protobuf&lt;/span&gt; 형식으로 인코딩되어 있어 직접 처리가 어려움&lt;/li&gt;
&lt;li style=&quot;margin-bottom: 10px;&quot;&gt;본문 추출을 위한 두 가지 접근법: &lt;span style=&quot;color: #1e40af; font-weight: 600;&quot;&gt;①오픈소스 패키지 활용&lt;/span&gt;과 &lt;span style=&quot;color: #1e40af; font-weight: 600;&quot;&gt;②브라우저 자동화(Playwright)&lt;/span&gt;&lt;/li&gt;
&lt;li style=&quot;margin-bottom: 10px;&quot;&gt;두 방법 모두 장단점이 있으며, 어떤 것을 사용할지는 &lt;span style=&quot;color: #1e40af; font-weight: 600;&quot;&gt;각자의 상황에 맞게 선택&lt;/span&gt;해야 함&lt;/li&gt;
&lt;li style=&quot;margin-bottom: 10px;&quot;&gt;앞으로의 글에서 각 방법의 &lt;span style=&quot;color: #1e40af; font-weight: 600;&quot;&gt;구체적인 구현 방법&lt;/span&gt;과 성과를 다룰 예정&lt;/li&gt;
&lt;/ul&gt;
&lt;/div&gt;
&lt;/div&gt;
&lt;/div&gt;
&lt;hr contenteditable=&quot;false&quot; data-ke-type=&quot;horizontalRule&quot; data-ke-style=&quot;style6&quot; /&gt;
&lt;h3 data-ke-size=&quot;size23&quot;&gt;  구글 뉴스 RSS 링크: 리디렉션 URL과 본문 추출의 시작&lt;/h3&gt;
&lt;p data-ke-size=&quot;size16&quot;&gt;RSS로 수집한 기사 주소를 보면 알겠지만, 사람이 읽고 이해할 수 있는 형태가 아니다. Google이 제공하는 URL을 클릭하면 실제 뉴스 사이트로 이동하지만, 이 주소는 원문 사이트 주소가 아니다. trafilatura는 웹에서 바로 정보를 가져와서 본문을 추출하는 기능이 있지만, 이 URL을 직접 입력하면 작동하지 않는다. 이 주소를 원문 사이트 주소로 바꿔주어야 정상 작동하는 것을 알 수 있다.&lt;/p&gt;
&lt;div id=&quot;code_1747563887514&quot; data-ke-type=&quot;html&quot; data-source=&quot;&amp;lt;!DOCTYPE html&amp;gt;
&amp;lt;html&amp;gt;
&amp;lt;head&amp;gt;
&amp;lt;style&amp;gt;
/* 티스토리 스타일 코드 블록 */
.tistory-code {
  position: relative;
  margin: 15px 0;
  font-family: 'D2Coding', 'Consolas', 'Courier New', monospace;
  border: 1px solid #e8e8e8;
  border-radius: 4px;
  background-color: #f5f5f5;
  overflow: hidden;
}

/* 코드 헤더 */
.code-header {
  display: flex;
  justify-content: space-between;
  align-items: center;
  background-color: #f1f1f1;
  border-bottom: 1px solid #e8e8e8;
  padding: 8px 15px;
  font-size: 13px;
  color: #666;
  font-weight: bold;
}

.language-label {
  color: #999;
  font-size: 12px;
  font-weight: normal;
}

/* 코드 내용 컨테이너 */
.code-content {
  position: relative;
  overflow-x: auto;
  background-color: #f8f8f8;
}

/* 실제 코드 블록 */
.code-block {
  margin: 0;
  padding: 15px;
  font-size: 13px;
  line-height: 1.6;
  color: #333;
  white-space: pre;
  overflow-x: visible;
  width: max-content;
  min-width: 100%;
}

/* 문법 하이라이팅 - 티스토리 스타일 */
.keyword {
  color: #0066cc;
  font-weight: bold;
}

.string {
  color: #d14;
}

.comment {
  color: #998;
  font-style: italic;
}

.variable {
  color: #333;
}

.function {
  color: #900;
}

/* 스크롤바 커스터마이징 */
.code-content::-webkit-scrollbar {
  height: 8px;
}

.code-content::-webkit-scrollbar-track {
  background: #f1f1f1;
}

.code-content::-webkit-scrollbar-thumb {
  background: #ccc;
  border-radius: 4px;
}

.code-content::-webkit-scrollbar-thumb:hover {
  background: #aaa;
}

/* 복사 버튼 (옵션) */
.copy-button {
  position: absolute;
  top: 8px;
  right: 10px;
  background: #e8e8e8;
  border: 1px solid #ddd;
  border-radius: 3px;
  padding: 2px 8px;
  font-size: 12px;
  color: #666;
  cursor: pointer;
  display: none; /* 기본적으로 숨김 */
}

.tistory-code:hover .copy-button {
  display: block; /* 마우스 오버시 표시 */
}
&amp;lt;/style&amp;gt;
&amp;lt;/head&amp;gt;
&amp;lt;body&amp;gt;

&amp;lt;div class=&amp;quot;tistory-code&amp;quot;&amp;gt;
  &amp;lt;div class=&amp;quot;code-header&amp;quot;&amp;gt;
    &amp;lt;span&amp;gt;Python&amp;lt;/span&amp;gt;
    &amp;lt;span class=&amp;quot;language-label&amp;quot;&amp;gt;구글 URL입력&amp;lt;/span&amp;gt;
  &amp;lt;/div&amp;gt;
  &amp;lt;div class=&amp;quot;code-content&amp;quot;&amp;gt;
    &amp;lt;pre class=&amp;quot;code-block&amp;quot;&amp;gt;&amp;lt;span class=&amp;quot;keyword&amp;quot;&amp;gt;import&amp;lt;/span&amp;gt; &amp;lt;span class=&amp;quot;variable&amp;quot;&amp;gt;trafilatura&amp;lt;/span&amp;gt;
&amp;lt;span class=&amp;quot;variable&amp;quot;&amp;gt;url&amp;lt;/span&amp;gt; = &amp;lt;span class=&amp;quot;string&amp;quot;&amp;gt;&amp;quot;https://news.google.com/rss/articles/CBMicEFVX3lxTFBpYk5ZakJVbWRmaW5MLW1Da1pNUEc2dG5uTHdsZXFWSDFFMXVRVF9qajVyYllpNjFmRWR6TnpwQktka0hpTUdURVlLdXdLc3FSQ0hpTlkwSm5uSjlZRnNxZzRyekE4dE1MSENDRno3SWvSAXRBVV95cUxNdXBTR0NLMHVaTEZHb3pGME1BdGNwNjVBNTJzTDd6U1RHbzJCbDlhbmpYMXZ1Tk00M3N5YkxXWTF1Q1FqUXl5X2Z4TC0ycnBxZmVEclEzazRmcmtMY0szaVN5c3dWRWwtemZhNjJNVGZFWEJGVQ?oc=5&amp;quot;&amp;lt;/span&amp;gt;
&amp;lt;span class=&amp;quot;variable&amp;quot;&amp;gt;html&amp;lt;/span&amp;gt; = &amp;lt;span class=&amp;quot;function&amp;quot;&amp;gt;trafilatura.fetch_url&amp;lt;/span&amp;gt;(&amp;lt;span class=&amp;quot;variable&amp;quot;&amp;gt;url&amp;lt;/span&amp;gt;)
&amp;lt;span class=&amp;quot;variable&amp;quot;&amp;gt;text&amp;lt;/span&amp;gt; = &amp;lt;span class=&amp;quot;function&amp;quot;&amp;gt;trafilatura.extract&amp;lt;/span&amp;gt;(&amp;lt;span class=&amp;quot;variable&amp;quot;&amp;gt;html&amp;lt;/span&amp;gt;)
&amp;lt;span class=&amp;quot;variable&amp;quot;&amp;gt;text&amp;lt;/span&amp;gt;&amp;lt;/pre&amp;gt;
  &amp;lt;/div&amp;gt;
&amp;lt;/div&amp;gt;

&amp;lt;/body&amp;gt;
&amp;lt;/html&amp;gt;&quot;&gt;
&lt;style&gt;
/* 티스토리 스타일 코드 블록 */
.tistory-code {
  position: relative;
  margin: 15px 0;
  font-family: 'D2Coding', 'Consolas', 'Courier New', monospace;
  border: 1px solid #e8e8e8;
  border-radius: 4px;
  background-color: #f5f5f5;
  overflow: hidden;
}

/* 코드 헤더 */
.code-header {
  display: flex;
  justify-content: space-between;
  align-items: center;
  background-color: #f1f1f1;
  border-bottom: 1px solid #e8e8e8;
  padding: 8px 15px;
  font-size: 13px;
  color: #666;
  font-weight: bold;
}

.language-label {
  color: #999;
  font-size: 12px;
  font-weight: normal;
}

/* 코드 내용 컨테이너 */
.code-content {
  position: relative;
  overflow-x: auto;
  background-color: #f8f8f8;
}

/* 실제 코드 블록 */
.code-block {
  margin: 0;
  padding: 15px;
  font-size: 13px;
  line-height: 1.6;
  color: #333;
  white-space: pre;
  overflow-x: visible;
  width: max-content;
  min-width: 100%;
}

/* 문법 하이라이팅 - 티스토리 스타일 */
.keyword {
  color: #0066cc;
  font-weight: bold;
}

.string {
  color: #d14;
}

.comment {
  color: #998;
  font-style: italic;
}

.variable {
  color: #333;
}

.function {
  color: #900;
}

/* 스크롤바 커스터마이징 */
.code-content::-webkit-scrollbar {
  height: 8px;
}

.code-content::-webkit-scrollbar-track {
  background: #f1f1f1;
}

.code-content::-webkit-scrollbar-thumb {
  background: #ccc;
  border-radius: 4px;
}

.code-content::-webkit-scrollbar-thumb:hover {
  background: #aaa;
}

/* 복사 버튼 (옵션) */
.copy-button {
  position: absolute;
  top: 8px;
  right: 10px;
  background: #e8e8e8;
  border: 1px solid #ddd;
  border-radius: 3px;
  padding: 2px 8px;
  font-size: 12px;
  color: #666;
  cursor: pointer;
  display: none; /* 기본적으로 숨김 */
}

.tistory-code:hover .copy-button {
  display: block; /* 마우스 오버시 표시 */
}
&lt;/style&gt;
&lt;div class=&quot;tistory-code&quot;&gt;
&lt;div class=&quot;code-header&quot;&gt;&lt;span&gt;Python&lt;/span&gt; &lt;span class=&quot;language-label&quot;&gt;구글 URL입력&lt;/span&gt;&lt;/div&gt;
&lt;div class=&quot;code-content&quot;&gt;
&lt;pre class=&quot;code-block&quot;&gt;&lt;span class=&quot;keyword&quot;&gt;import&lt;/span&gt; &lt;span class=&quot;variable&quot;&gt;trafilatura&lt;/span&gt;
&lt;span class=&quot;variable&quot;&gt;url&lt;/span&gt; = &lt;span class=&quot;string&quot;&gt;&quot;https://news.google.com/rss/articles/CBMicEFVX3lxTFBpYk5ZakJVbWRmaW5MLW1Da1pNUEc2dG5uTHdsZXFWSDFFMXVRVF9qajVyYllpNjFmRWR6TnpwQktka0hpTUdURVlLdXdLc3FSQ0hpTlkwSm5uSjlZRnNxZzRyekE4dE1MSENDRno3SWvSAXRBVV95cUxNdXBTR0NLMHVaTEZHb3pGME1BdGNwNjVBNTJzTDd6U1RHbzJCbDlhbmpYMXZ1Tk00M3N5YkxXWTF1Q1FqUXl5X2Z4TC0ycnBxZmVEclEzazRmcmtMY0szaVN5c3dWRWwtemZhNjJNVGZFWEJGVQ?oc=5&quot;&lt;/span&gt;
&lt;span class=&quot;variable&quot;&gt;html&lt;/span&gt; = &lt;span class=&quot;function&quot;&gt;trafilatura.fetch_url&lt;/span&gt;(&lt;span class=&quot;variable&quot;&gt;url&lt;/span&gt;)
&lt;span class=&quot;variable&quot;&gt;text&lt;/span&gt; = &lt;span class=&quot;function&quot;&gt;trafilatura.extract&lt;/span&gt;(&lt;span class=&quot;variable&quot;&gt;html&lt;/span&gt;)
&lt;span class=&quot;variable&quot;&gt;text&lt;/span&gt;&lt;/pre&gt;
&lt;/div&gt;
&lt;/div&gt;
&lt;/div&gt;
&lt;div id=&quot;code_1747563994812&quot; data-ke-type=&quot;html&quot; data-source=&quot;&amp;lt;!DOCTYPE html&amp;gt;
&amp;lt;html&amp;gt;
&amp;lt;head&amp;gt;
&amp;lt;style&amp;gt;
/* 티스토리 스타일 코드 블록 */
.tistory-code {
  position: relative;
  margin: 15px 0;
  font-family: 'D2Coding', 'Consolas', 'Courier New', monospace;
  border: 1px solid #e8e8e8;
  border-radius: 4px;
  background-color: #f5f5f5;
  overflow: hidden;
}

/* 코드 헤더 */
.code-header {
  display: flex;
  justify-content: space-between;
  align-items: center;
  background-color: #f1f1f1;
  border-bottom: 1px solid #e8e8e8;
  padding: 8px 15px;
  font-size: 13px;
  color: #666;
  font-weight: bold;
}

.language-label {
  color: #999;
  font-size: 12px;
  font-weight: normal;
}

/* 코드 내용 컨테이너 */
.code-content {
  position: relative;
  overflow-x: auto;
  background-color: #f8f8f8;
}

/* 실제 코드 블록 */
.code-block {
  margin: 0;
  padding: 15px;
  font-size: 13px;
  line-height: 1.6;
  color: #333;
  white-space: pre;
  overflow-x: visible;
  width: max-content;
  min-width: 100%;
}

/* 문법 하이라이팅 - 티스토리 스타일 */
.keyword {
  color: #0066cc;
  font-weight: bold;
}

.string {
  color: #d14;
}

.comment {
  color: #998;
  font-style: italic;
}

.variable {
  color: #333;
}

.function {
  color: #900;
}

/* 스크롤바 커스터마이징 */
.code-content::-webkit-scrollbar {
  height: 8px;
}

.code-content::-webkit-scrollbar-track {
  background: #f1f1f1;
}

.code-content::-webkit-scrollbar-thumb {
  background: #ccc;
  border-radius: 4px;
}

.code-content::-webkit-scrollbar-thumb:hover {
  background: #aaa;
}

/* 복사 버튼 (옵션) */
.copy-button {
  position: absolute;
  top: 8px;
  right: 10px;
  background: #e8e8e8;
  border: 1px solid #ddd;
  border-radius: 3px;
  padding: 2px 8px;
  font-size: 12px;
  color: #666;
  cursor: pointer;
  display: none; /* 기본적으로 숨김 */
}

.tistory-code:hover .copy-button {
  display: block; /* 마우스 오버시 표시 */
}
&amp;lt;/style&amp;gt;
&amp;lt;/head&amp;gt;
&amp;lt;body&amp;gt;

&amp;lt;div class=&amp;quot;tistory-code&amp;quot;&amp;gt;
  &amp;lt;div class=&amp;quot;code-header&amp;quot;&amp;gt;
    &amp;lt;span&amp;gt;Python&amp;lt;/span&amp;gt;
    &amp;lt;span class=&amp;quot;language-label&amp;quot;&amp;gt;원본 URL 입력&amp;lt;/span&amp;gt;
  &amp;lt;/div&amp;gt;
  &amp;lt;div class=&amp;quot;code-content&amp;quot;&amp;gt;
    &amp;lt;pre class=&amp;quot;code-block&amp;quot;&amp;gt;&amp;lt;span class=&amp;quot;keyword&amp;quot;&amp;gt;import&amp;lt;/span&amp;gt; &amp;lt;span class=&amp;quot;variable&amp;quot;&amp;gt;trafilatura&amp;lt;/span&amp;gt;
&amp;lt;span class=&amp;quot;variable&amp;quot;&amp;gt;url&amp;lt;/span&amp;gt; = &amp;lt;span class=&amp;quot;string&amp;quot;&amp;gt;&amp;quot;https://www.steeldaily.co.kr/news/articleView.html?idxno=191742&amp;quot;&amp;lt;/span&amp;gt;
&amp;lt;span class=&amp;quot;variable&amp;quot;&amp;gt;html&amp;lt;/span&amp;gt; = &amp;lt;span class=&amp;quot;function&amp;quot;&amp;gt;trafilatura.fetch_url&amp;lt;/span&amp;gt;(&amp;lt;span class=&amp;quot;variable&amp;quot;&amp;gt;url&amp;lt;/span&amp;gt;)
&amp;lt;span class=&amp;quot;variable&amp;quot;&amp;gt;text&amp;lt;/span&amp;gt; = &amp;lt;span class=&amp;quot;function&amp;quot;&amp;gt;trafilatura.extract&amp;lt;/span&amp;gt;(&amp;lt;span class=&amp;quot;variable&amp;quot;&amp;gt;html&amp;lt;/span&amp;gt;)
&amp;lt;span class=&amp;quot;variable&amp;quot;&amp;gt;text&amp;lt;/span&amp;gt;&amp;lt;/pre&amp;gt;
  &amp;lt;/div&amp;gt;
&amp;lt;/div&amp;gt;

&amp;lt;/body&amp;gt;
&amp;lt;/html&amp;gt;&quot;&gt;
&lt;style&gt;
/* 티스토리 스타일 코드 블록 */
.tistory-code {
  position: relative;
  margin: 15px 0;
  font-family: 'D2Coding', 'Consolas', 'Courier New', monospace;
  border: 1px solid #e8e8e8;
  border-radius: 4px;
  background-color: #f5f5f5;
  overflow: hidden;
}

/* 코드 헤더 */
.code-header {
  display: flex;
  justify-content: space-between;
  align-items: center;
  background-color: #f1f1f1;
  border-bottom: 1px solid #e8e8e8;
  padding: 8px 15px;
  font-size: 13px;
  color: #666;
  font-weight: bold;
}

.language-label {
  color: #999;
  font-size: 12px;
  font-weight: normal;
}

/* 코드 내용 컨테이너 */
.code-content {
  position: relative;
  overflow-x: auto;
  background-color: #f8f8f8;
}

/* 실제 코드 블록 */
.code-block {
  margin: 0;
  padding: 15px;
  font-size: 13px;
  line-height: 1.6;
  color: #333;
  white-space: pre;
  overflow-x: visible;
  width: max-content;
  min-width: 100%;
}

/* 문법 하이라이팅 - 티스토리 스타일 */
.keyword {
  color: #0066cc;
  font-weight: bold;
}

.string {
  color: #d14;
}

.comment {
  color: #998;
  font-style: italic;
}

.variable {
  color: #333;
}

.function {
  color: #900;
}

/* 스크롤바 커스터마이징 */
.code-content::-webkit-scrollbar {
  height: 8px;
}

.code-content::-webkit-scrollbar-track {
  background: #f1f1f1;
}

.code-content::-webkit-scrollbar-thumb {
  background: #ccc;
  border-radius: 4px;
}

.code-content::-webkit-scrollbar-thumb:hover {
  background: #aaa;
}

/* 복사 버튼 (옵션) */
.copy-button {
  position: absolute;
  top: 8px;
  right: 10px;
  background: #e8e8e8;
  border: 1px solid #ddd;
  border-radius: 3px;
  padding: 2px 8px;
  font-size: 12px;
  color: #666;
  cursor: pointer;
  display: none; /* 기본적으로 숨김 */
}

.tistory-code:hover .copy-button {
  display: block; /* 마우스 오버시 표시 */
}
&lt;/style&gt;
&lt;div class=&quot;tistory-code&quot;&gt;
&lt;div class=&quot;code-header&quot;&gt;&lt;span&gt;Python&lt;/span&gt; &lt;span class=&quot;language-label&quot;&gt;원본 URL 입력&lt;/span&gt;&lt;/div&gt;
&lt;div class=&quot;code-content&quot;&gt;
&lt;pre class=&quot;code-block&quot;&gt;&lt;span class=&quot;keyword&quot;&gt;import&lt;/span&gt; &lt;span class=&quot;variable&quot;&gt;trafilatura&lt;/span&gt;
&lt;span class=&quot;variable&quot;&gt;url&lt;/span&gt; = &lt;span class=&quot;string&quot;&gt;&quot;https://www.steeldaily.co.kr/news/articleView.html?idxno=191742&quot;&lt;/span&gt;
&lt;span class=&quot;variable&quot;&gt;html&lt;/span&gt; = &lt;span class=&quot;function&quot;&gt;trafilatura.fetch_url&lt;/span&gt;(&lt;span class=&quot;variable&quot;&gt;url&lt;/span&gt;)
&lt;span class=&quot;variable&quot;&gt;text&lt;/span&gt; = &lt;span class=&quot;function&quot;&gt;trafilatura.extract&lt;/span&gt;(&lt;span class=&quot;variable&quot;&gt;html&lt;/span&gt;)
&lt;span class=&quot;variable&quot;&gt;text&lt;/span&gt;&lt;/pre&gt;
&lt;/div&gt;
&lt;/div&gt;
&lt;/div&gt;
&lt;div id=&quot;code_1747564215044&quot; data-ke-type=&quot;html&quot; data-source=&quot;&amp;lt;!DOCTYPE html&amp;gt;
&amp;lt;html&amp;gt;
&amp;lt;head&amp;gt;
&amp;lt;style&amp;gt;
/* 티스토리 스타일 결과 출력 블록 */
.tistory-output {
  position: relative;
  margin: 15px 0;
  font-family: 'D2Coding', 'Consolas', 'Courier New', monospace;
  border: 1px solid #e8e8e8;
  border-radius: 4px;
  background-color: #fff;
  overflow: hidden;
}

/* 결과 헤더 */
.output-header {
  display: flex;
  justify-content: space-between;
  align-items: center;
  background-color: #f1f1f1;
  border-bottom: 1px solid #e8e8e8;
  padding: 8px 15px;
  font-size: 13px;
  color: #666;
  font-weight: bold;
}

.output-label {
  color: #999;
  font-size: 12px;
  font-weight: normal;
}

/* 결과 내용 컨테이너 */
.output-content {
  position: relative;
  overflow-x: auto;
  background-color: #fff;
}

/* 실제 결과 블록 */
.result-block {
  margin: 0;
  padding: 15px;
  font-size: 13px;
  line-height: 1.6;
  color: #333;
  white-space: pre;
  overflow-x: visible;
  width: max-content;
  min-width: 100%;
}

/* 문자열 결과 스타일 */
.result-string {
  color: #067D17;
}

/* 스크롤바 커스터마이징 */
.output-content::-webkit-scrollbar {
  height: 8px;
}

.output-content::-webkit-scrollbar-track {
  background: #f1f1f1;
}

.output-content::-webkit-scrollbar-thumb {
  background: #ccc;
  border-radius: 4px;
}

.output-content::-webkit-scrollbar-thumb:hover {
  background: #aaa;
}
&amp;lt;/style&amp;gt;
&amp;lt;/head&amp;gt;
&amp;lt;body&amp;gt;

&amp;lt;div class=&amp;quot;tistory-output&amp;quot;&amp;gt;
  &amp;lt;div class=&amp;quot;output-header&amp;quot;&amp;gt;
    &amp;lt;span&amp;gt;Output&amp;lt;/span&amp;gt;
    &amp;lt;span class=&amp;quot;output-label&amp;quot;&amp;gt;실행 결과&amp;lt;/span&amp;gt;
  &amp;lt;/div&amp;gt;
  &amp;lt;div class=&amp;quot;output-content&amp;quot;&amp;gt;
    &amp;lt;pre class=&amp;quot;result-block&amp;quot;&amp;gt;&amp;lt;span class=&amp;quot;result-string&amp;quot;&amp;gt;'대구경 강관 전문 제조 기업 동양철관의 1분기 매출 및 영업이익은 전년 동기 대비 동반 축소된 것으로 나타났다.
동양철관은 15일 금융감독원에 분기보고서를 제출하고 1분기 별도 기준 경영 실적에 대해 △매출액 329억 5,100만 원(전년 동기 대비 44.9% 감소) △영업손실 4억 2,900만 원(적자전환) △순손실 15억 6,000만 원(적자전환)을 기록했다고 밝혔다.
이로써 1분기 연결 실적은 매출액 436억 5,800만 원(전년 동기 대비 36% 감소), 영업손실 2,900만 원(적자전환), 순손실 12억 600만 원(적자전환)으로 집계됐다.
이명화 선임기자
lmh@steelnsteel.co.kr'&amp;lt;/span&amp;gt;&amp;lt;/pre&amp;gt;
  &amp;lt;/div&amp;gt;
&amp;lt;/div&amp;gt;

&amp;lt;/body&amp;gt;
&amp;lt;/html&amp;gt;&quot;&gt;
&lt;style&gt;
/* 티스토리 스타일 결과 출력 블록 */
.tistory-output {
  position: relative;
  margin: 15px 0;
  font-family: 'D2Coding', 'Consolas', 'Courier New', monospace;
  border: 1px solid #e8e8e8;
  border-radius: 4px;
  background-color: #fff;
  overflow: hidden;
}

/* 결과 헤더 */
.output-header {
  display: flex;
  justify-content: space-between;
  align-items: center;
  background-color: #f1f1f1;
  border-bottom: 1px solid #e8e8e8;
  padding: 8px 15px;
  font-size: 13px;
  color: #666;
  font-weight: bold;
}

.output-label {
  color: #999;
  font-size: 12px;
  font-weight: normal;
}

/* 결과 내용 컨테이너 */
.output-content {
  position: relative;
  overflow-x: auto;
  background-color: #fff;
}

/* 실제 결과 블록 */
.result-block {
  margin: 0;
  padding: 15px;
  font-size: 13px;
  line-height: 1.6;
  color: #333;
  white-space: pre;
  overflow-x: visible;
  width: max-content;
  min-width: 100%;
}

/* 문자열 결과 스타일 */
.result-string {
  color: #067D17;
}

/* 스크롤바 커스터마이징 */
.output-content::-webkit-scrollbar {
  height: 8px;
}

.output-content::-webkit-scrollbar-track {
  background: #f1f1f1;
}

.output-content::-webkit-scrollbar-thumb {
  background: #ccc;
  border-radius: 4px;
}

.output-content::-webkit-scrollbar-thumb:hover {
  background: #aaa;
}
&lt;/style&gt;
&lt;div class=&quot;tistory-output&quot;&gt;
&lt;div class=&quot;output-header&quot;&gt;&lt;span&gt;Output&lt;/span&gt; &lt;span class=&quot;output-label&quot;&gt;실행 결과&lt;/span&gt;&lt;/div&gt;
&lt;div class=&quot;output-content&quot;&gt;
&lt;pre class=&quot;result-block&quot;&gt;&lt;span class=&quot;result-string&quot;&gt;'대구경 강관 전문 제조 기업 동양철관의 1분기 매출 및 영업이익은 전년 동기 대비 동반 축소된 것으로 나타났다.
동양철관은 15일 금융감독원에 분기보고서를 제출하고 1분기 별도 기준 경영 실적에 대해 △매출액 329억 5,100만 원(전년 동기 대비 44.9% 감소) △영업손실 4억 2,900만 원(적자전환) △순손실 15억 6,000만 원(적자전환)을 기록했다고 밝혔다.
이로써 1분기 연결 실적은 매출액 436억 5,800만 원(전년 동기 대비 36% 감소), 영업손실 2,900만 원(적자전환), 순손실 12억 600만 원(적자전환)으로 집계됐다.
이명화 선임기자
lmh@steelnsteel.co.kr'&lt;/span&gt;&lt;/pre&gt;
&lt;/div&gt;
&lt;/div&gt;
&lt;/div&gt;
&lt;hr contenteditable=&quot;false&quot; data-ke-type=&quot;horizontalRule&quot; data-ke-style=&quot;style6&quot; /&gt;
&lt;h3 data-ke-size=&quot;size23&quot;&gt;❌ 왜 Google 링크를 사용하면 본문 추출이 작동하지 않을까?&lt;/h3&gt;
&lt;div id=&quot;code_1747546348781&quot; data-ke-type=&quot;html&quot; data-source=&quot;&amp;lt;!DOCTYPE html&amp;gt;
&amp;lt;html&amp;gt;
&amp;lt;head&amp;gt;
&amp;lt;style&amp;gt;
  .quadrant-container {
    display: grid;
    grid-template-columns: 1fr 1fr;
    grid-template-rows: auto auto;
    gap: 15px;
    width: 100%;
    max-width: 800px;
    margin: 0 auto;
    font-family: 'Noto Sans KR', sans-serif;
  }
  
  .quadrant {
    background-color: #F8F9FA;
    border-radius: 10px;
    padding: 20px;
    text-align: center;
    box-shadow: 0 2px 5px rgba(0,0,0,0.1);
    position: relative;
  }
  
  .icon {
    font-size: 36px;
    margin-bottom: 15px;
  }
  
  .title {
    font-size: 18px;
    font-weight: bold;
    margin-bottom: 15px;
  }
  
  .description {
    font-size: 14px;
    line-height: 1.6;
    color: #333;
  }
  
  .highlight {
    color: #4285F4;
    font-weight: bold;
  }
  
  .number-badge {
    background-color: #4285F4;
    color: white;
    width: 24px;
    height: 24px;
    border-radius: 50%;
    display: flex;
    align-items: center;
    justify-content: center;
    font-weight: bold;
    position: absolute;
    top: 10px;
    left: 10px;
  }
&amp;lt;/style&amp;gt;
&amp;lt;/head&amp;gt;
&amp;lt;body&amp;gt;

&amp;lt;div class=&amp;quot;quadrant-container&amp;quot;&amp;gt;
  &amp;lt;!-- 1사분면 --&amp;gt;
  &amp;lt;div class=&amp;quot;quadrant&amp;quot;&amp;gt;
    &amp;lt;div class=&amp;quot;number-badge&amp;quot;&amp;gt;1&amp;lt;/div&amp;gt;
    &amp;lt;div class=&amp;quot;icon&amp;quot;&amp;gt; &amp;lt;/div&amp;gt;
    &amp;lt;div class=&amp;quot;title&amp;quot;&amp;gt;보안 및 트래픽 추적&amp;lt;/div&amp;gt;
    &amp;lt;div class=&amp;quot;description&amp;quot;&amp;gt;
      사용자의 &amp;lt;span class=&amp;quot;highlight&amp;quot;&amp;gt;클릭 기록&amp;lt;/span&amp;gt; 및 &amp;lt;span class=&amp;quot;highlight&amp;quot;&amp;gt;인기 기사&amp;lt;/span&amp;gt; 분석을 위해 
      리디렉션 URL과 &amp;lt;span class=&amp;quot;highlight&amp;quot;&amp;gt;추적 매개변수&amp;lt;/span&amp;gt; 활용
    &amp;lt;/div&amp;gt;
  &amp;lt;/div&amp;gt;
  
  &amp;lt;!-- 2사분면 --&amp;gt;
  &amp;lt;div class=&amp;quot;quadrant&amp;quot;&amp;gt;
    &amp;lt;div class=&amp;quot;number-badge&amp;quot;&amp;gt;2&amp;lt;/div&amp;gt;
    &amp;lt;div class=&amp;quot;icon&amp;quot;&amp;gt;⚙️&amp;lt;/div&amp;gt;
    &amp;lt;div class=&amp;quot;title&amp;quot;&amp;gt;복잡한 데이터 인코딩&amp;lt;/div&amp;gt;
    &amp;lt;div class=&amp;quot;description&amp;quot;&amp;gt;
      원문 URL을 &amp;lt;span class=&amp;quot;highlight&amp;quot;&amp;gt;Protobuf&amp;lt;/span&amp;gt; 형식으로 직렬화하고
      &amp;lt;span class=&amp;quot;highlight&amp;quot;&amp;gt;URL-safe Base64&amp;lt;/span&amp;gt;로 인코딩
    &amp;lt;/div&amp;gt;
  &amp;lt;/div&amp;gt;
  
  &amp;lt;!-- 3사분면 --&amp;gt;
  &amp;lt;div class=&amp;quot;quadrant&amp;quot;&amp;gt;
    &amp;lt;div class=&amp;quot;number-badge&amp;quot;&amp;gt;3&amp;lt;/div&amp;gt;
    &amp;lt;div class=&amp;quot;icon&amp;quot;&amp;gt; &amp;lt;/div&amp;gt;
    &amp;lt;div class=&amp;quot;title&amp;quot;&amp;gt;타임스탬프와 서명 인증&amp;lt;/div&amp;gt;
    &amp;lt;div class=&amp;quot;description&amp;quot;&amp;gt;
      리디렉션 요청의 &amp;lt;span class=&amp;quot;highlight&amp;quot;&amp;gt;유효성&amp;lt;/span&amp;gt;을 검증하는
      &amp;lt;span class=&amp;quot;highlight&amp;quot;&amp;gt;signature&amp;lt;/span&amp;gt; 및 &amp;lt;span class=&amp;quot;highlight&amp;quot;&amp;gt;timestamp&amp;lt;/span&amp;gt; 포함
    &amp;lt;/div&amp;gt;
  &amp;lt;/div&amp;gt;
  
  &amp;lt;!-- 4사분면 --&amp;gt;
  &amp;lt;div class=&amp;quot;quadrant&amp;quot;&amp;gt;
    &amp;lt;div class=&amp;quot;number-badge&amp;quot;&amp;gt;4&amp;lt;/div&amp;gt;
    &amp;lt;div class=&amp;quot;icon&amp;quot;&amp;gt; &amp;lt;/div&amp;gt;
    &amp;lt;div class=&amp;quot;title&amp;quot;&amp;gt;진화하는 보안 메커니즘&amp;lt;/div&amp;gt;
    &amp;lt;div class=&amp;quot;description&amp;quot;&amp;gt;
      &amp;lt;span class=&amp;quot;highlight&amp;quot;&amp;gt;스크립트&amp;lt;/span&amp;gt;나 &amp;lt;span class=&amp;quot;highlight&amp;quot;&amp;gt;봇 차단&amp;lt;/span&amp;gt;을 위해 URL 구조와
      데이터 인코딩 &amp;lt;span class=&amp;quot;highlight&amp;quot;&amp;gt;지속적 변화&amp;lt;/span&amp;gt;
    &amp;lt;/div&amp;gt;
  &amp;lt;/div&amp;gt;
&amp;lt;/div&amp;gt;

&amp;lt;/body&amp;gt;
&amp;lt;/html&amp;gt;&quot;&gt;
&lt;style&gt;
  .quadrant-container {
    display: grid;
    grid-template-columns: 1fr 1fr;
    grid-template-rows: auto auto;
    gap: 15px;
    width: 100%;
    max-width: 800px;
    margin: 0 auto;
    font-family: 'Noto Sans KR', sans-serif;
  }
  
  .quadrant {
    background-color: #F8F9FA;
    border-radius: 10px;
    padding: 20px;
    text-align: center;
    box-shadow: 0 2px 5px rgba(0,0,0,0.1);
    position: relative;
  }
  
  .icon {
    font-size: 36px;
    margin-bottom: 15px;
  }
  
  .title {
    font-size: 18px;
    font-weight: bold;
    margin-bottom: 15px;
  }
  
  .description {
    font-size: 14px;
    line-height: 1.6;
    color: #333;
  }
  
  .highlight {
    color: #4285F4;
    font-weight: bold;
  }
  
  .number-badge {
    background-color: #4285F4;
    color: white;
    width: 24px;
    height: 24px;
    border-radius: 50%;
    display: flex;
    align-items: center;
    justify-content: center;
    font-weight: bold;
    position: absolute;
    top: 10px;
    left: 10px;
  }
&lt;/style&gt;
&lt;div class=&quot;quadrant-container&quot;&gt;&lt;!-- 1사분면 --&gt;
&lt;div class=&quot;quadrant&quot;&gt;
&lt;div class=&quot;number-badge&quot;&gt;1&lt;/div&gt;
&lt;div class=&quot;icon&quot;&gt; &lt;/div&gt;
&lt;div class=&quot;title&quot;&gt;보안 및 트래픽 추적&lt;/div&gt;
&lt;div class=&quot;description&quot;&gt;사용자의 &lt;span class=&quot;highlight&quot;&gt;클릭 기록&lt;/span&gt; 및 &lt;span class=&quot;highlight&quot;&gt;인기 기사&lt;/span&gt; 분석을 위해 리디렉션 URL과 &lt;span class=&quot;highlight&quot;&gt;추적 매개변수&lt;/span&gt; 활용&lt;/div&gt;
&lt;/div&gt;
&lt;!-- 2사분면 --&gt;
&lt;div class=&quot;quadrant&quot;&gt;
&lt;div class=&quot;number-badge&quot;&gt;2&lt;/div&gt;
&lt;div class=&quot;icon&quot;&gt;⚙️&lt;/div&gt;
&lt;div class=&quot;title&quot;&gt;복잡한 데이터 인코딩&lt;/div&gt;
&lt;div class=&quot;description&quot;&gt;원문 URL을 &lt;span class=&quot;highlight&quot;&gt;Protobuf&lt;/span&gt; 형식으로 직렬화하고 &lt;span class=&quot;highlight&quot;&gt;URL-safe Base64&lt;/span&gt;로 인코딩&lt;/div&gt;
&lt;/div&gt;
&lt;!-- 3사분면 --&gt;
&lt;div class=&quot;quadrant&quot;&gt;
&lt;div class=&quot;number-badge&quot;&gt;3&lt;/div&gt;
&lt;div class=&quot;icon&quot;&gt; &lt;/div&gt;
&lt;div class=&quot;title&quot;&gt;타임스탬프와 서명 인증&lt;/div&gt;
&lt;div class=&quot;description&quot;&gt;리디렉션 요청의 &lt;span class=&quot;highlight&quot;&gt;유효성&lt;/span&gt;을 검증하는 &lt;span class=&quot;highlight&quot;&gt;signature&lt;/span&gt; 및 &lt;span class=&quot;highlight&quot;&gt;timestamp&lt;/span&gt; 포함&lt;/div&gt;
&lt;/div&gt;
&lt;!-- 4사분면 --&gt;
&lt;div class=&quot;quadrant&quot;&gt;
&lt;div class=&quot;number-badge&quot;&gt;4&lt;/div&gt;
&lt;div class=&quot;icon&quot;&gt; &lt;/div&gt;
&lt;div class=&quot;title&quot;&gt;진화하는 보안 메커니즘&lt;/div&gt;
&lt;div class=&quot;description&quot;&gt;&lt;span class=&quot;highlight&quot;&gt;스크립트&lt;/span&gt;나 &lt;span class=&quot;highlight&quot;&gt;봇 차단&lt;/span&gt;을 위해 URL 구조와 데이터 인코딩 &lt;span class=&quot;highlight&quot;&gt;지속적 변화&lt;/span&gt;&lt;/div&gt;
&lt;/div&gt;
&lt;/div&gt;
&lt;/div&gt;
&lt;p data-ke-size=&quot;size16&quot;&gt;&amp;nbsp;&lt;/p&gt;
&lt;div id=&quot;code_1747546586522&quot; data-ke-type=&quot;html&quot; data-source=&quot;&amp;lt;!DOCTYPE html&amp;gt;
&amp;lt;html&amp;gt;
&amp;lt;head&amp;gt;
&amp;lt;style&amp;gt;
  .toggle-content {
    font-family: 'Noto Sans KR', sans-serif;
    margin: 20px 0;
  }
  
  .toggle-header {
    background-color: #f0f0f0;
    border: 1px solid #ddd;
    border-radius: 5px;
    padding: 10px 15px;
    cursor: pointer;
    font-weight: bold;
    display: flex;
    justify-content: space-between;
    align-items: center;
  }
  
  .toggle-header:hover {
    background-color: #e7e7e7;
  }
  
  .toggle-arrow {
    display: inline-block;
    transition: transform 0.3s;
  }
  
  .toggle-input {
    display: none;
  }
  
  .toggle-detail {
    display: none;
    padding: 15px;
    border: 1px solid #ddd;
    border-top: none;
    border-radius: 0 0 5px 5px;
    line-height: 1.6;
  }
  
  .toggle-input:checked + .toggle-header .toggle-arrow {
    transform: rotate(180deg);
  }
  
  .toggle-input:checked ~ .toggle-detail {
    display: block;
  }
  
  .highlight {
    color: #4285F4;
    font-weight: bold;
  }
  
  .section-title {
    font-weight: bold;
    margin: 15px 0 5px 0;
  }
  
  p {
    margin: 5px 0 15px 0;
  }
&amp;lt;/style&amp;gt;
&amp;lt;/head&amp;gt;
&amp;lt;body&amp;gt;

&amp;lt;div class=&amp;quot;toggle-content&amp;quot;&amp;gt;
  &amp;lt;input type=&amp;quot;checkbox&amp;quot; id=&amp;quot;toggle1&amp;quot; class=&amp;quot;toggle-input&amp;quot;&amp;gt;
  &amp;lt;label for=&amp;quot;toggle1&amp;quot; class=&amp;quot;toggle-header&amp;quot;&amp;gt;
    &amp;lt;span&amp;gt;Google 뉴스 리디렉션 URL 상세 내용&amp;lt;/span&amp;gt;
    &amp;lt;span class=&amp;quot;toggle-arrow&amp;quot;&amp;gt;▼&amp;lt;/span&amp;gt;
  &amp;lt;/label&amp;gt;
  &amp;lt;div class=&amp;quot;toggle-detail&amp;quot;&amp;gt;
    &amp;lt;p&amp;gt;Google이 제공하는 이 URL은 실제 뉴스 기사 페이지가 아니라, &amp;lt;strong&amp;gt;Google 서버를 거쳐 리디렉션되는 중간 링크&amp;lt;/strong&amp;gt;이다. 이 구조는 다음과 같은 이유로 설계되어 있다:&amp;lt;/p&amp;gt;
    
    &amp;lt;div class=&amp;quot;section-title&amp;quot;&amp;gt;&amp;lt;strong&amp;gt;1. 보안 및 트래픽 추적:&amp;lt;/strong&amp;gt;&amp;lt;/div&amp;gt;
    &amp;lt;p&amp;gt;Google은 &amp;lt;span class=&amp;quot;highlight&amp;quot;&amp;gt;사용자의 클릭을 추적&amp;lt;/span&amp;gt;하여 어떤 뉴스가 인기 있는지, 클릭률이 어떻게 형성되는지를 분석한다. 이를 위해 리디렉션 URL과 추가 추적 매개변수(예: url, ref, v, ptok, ei, ctid 등)를 사용하여 사용자의 이동 경로를 기록한다.&amp;lt;/p&amp;gt;
    
    &amp;lt;div class=&amp;quot;section-title&amp;quot;&amp;gt;&amp;lt;strong&amp;gt;2. 원문 보호 및 복잡한 데이터 인코딩:&amp;lt;/strong&amp;gt;&amp;lt;/div&amp;gt;
    &amp;lt;p&amp;gt;이 중간 주소에는 원문 URL을 포함한 다양한 정보가 &amp;lt;span class=&amp;quot;highlight&amp;quot;&amp;gt;Google의 프로토콜 버퍼(Protobuf) 형식으로 직렬화된 후, URL-safe Base64로 인코딩&amp;lt;/span&amp;gt;되어 URL 매개변수에 포함될 가능성이 높다. 이러한 직렬화 및 Base64 인코딩은 해당 형식을 알지 못하면 Base64 디코딩만으로는 원문 링크를 포함한 의미 있는 정보를 직접 파악하는 것을 매우 어렵게 만든다.&amp;lt;/p&amp;gt;
    
    &amp;lt;div class=&amp;quot;section-title&amp;quot;&amp;gt;&amp;lt;strong&amp;gt;3. 타임스탬프와 서명 기반 인증 시스템:&amp;lt;/strong&amp;gt;&amp;lt;/div&amp;gt;
    &amp;lt;p&amp;gt;최근 Google의 리디렉션 주소는 더욱 복잡해졌으며, URL에는 리디렉션 요청의 유효성을 Google 서버 측에서 검증하기 위해 사용될 가능성이 높은 &amp;quot;signature&amp;quot; (sig 또는 유사한 이름), &amp;quot;timestamp&amp;quot; (ts 또는 유사한 이름), &amp;quot;gn_art_id&amp;quot; (id 또는 유사한 이름)와 같은 매개변수가 포함될 수 있다. 이러한 &amp;lt;span class=&amp;quot;highlight&amp;quot;&amp;gt;서명 및 검증 정보&amp;lt;/span&amp;gt;는 클라이언트가 원문 URL을 직접 디코딩하는 데 필요한 정보라기보다는, &amp;lt;span class=&amp;quot;highlight&amp;quot;&amp;gt;Google 서버가 리디렉션 요청을 처리할 때 유효성을 확인&amp;lt;/span&amp;gt;하는 데 사용된다. 따라서 정상적인 리디렉션 처리를 위해서는 이러한 매개변수가 포함된 요청이 Google 서버로 전송되어야 한다.&amp;lt;/p&amp;gt;
    
    &amp;lt;div class=&amp;quot;section-title&amp;quot;&amp;gt;&amp;lt;strong&amp;gt;4. 진화하는 보안 메커니즘:&amp;lt;/strong&amp;gt;&amp;lt;/div&amp;gt;
    &amp;lt;p&amp;gt;초기에는 간단한 Base64 디코딩만으로 원문 URL의 일부를 추출할 수 있었을 수도 있지만, &amp;lt;span class=&amp;quot;highlight&amp;quot;&amp;gt;Google은 자동화된 스크립트나 봇의 대량 정보 추출을 방지&amp;lt;/span&amp;gt;하기 위해 지속적으로 URL 구조, 데이터 인코딩 방식, 추적 메커니즘 등을 변경하고 복잡하게 만들고 있다. 최근에는 대량의 리디렉션 요청을 감지하고 차단하는 비율 제한(rate limiting) 메커니즘이 적용될 수 있으며, 이를 우회하려는 시도에는 프록시 서버 사용 등의 더욱 복잡한 기술적 접근이 필요해지고 있다.&amp;lt;/p&amp;gt;
  &amp;lt;/div&amp;gt;
&amp;lt;/div&amp;gt;

&amp;lt;/body&amp;gt;
&amp;lt;/html&amp;gt;&quot;&gt;
&lt;style&gt;
  .toggle-content {
    font-family: 'Noto Sans KR', sans-serif;
    margin: 20px 0;
  }
  
  .toggle-header {
    background-color: #f0f0f0;
    border: 1px solid #ddd;
    border-radius: 5px;
    padding: 10px 15px;
    cursor: pointer;
    font-weight: bold;
    display: flex;
    justify-content: space-between;
    align-items: center;
  }
  
  .toggle-header:hover {
    background-color: #e7e7e7;
  }
  
  .toggle-arrow {
    display: inline-block;
    transition: transform 0.3s;
  }
  
  .toggle-input {
    display: none;
  }
  
  .toggle-detail {
    display: none;
    padding: 15px;
    border: 1px solid #ddd;
    border-top: none;
    border-radius: 0 0 5px 5px;
    line-height: 1.6;
  }
  
  .toggle-input:checked + .toggle-header .toggle-arrow {
    transform: rotate(180deg);
  }
  
  .toggle-input:checked ~ .toggle-detail {
    display: block;
  }
  
  .highlight {
    color: #4285F4;
    font-weight: bold;
  }
  
  .section-title {
    font-weight: bold;
    margin: 15px 0 5px 0;
  }
  
  p {
    margin: 5px 0 15px 0;
  }
&lt;/style&gt;
&lt;div class=&quot;toggle-content&quot;&gt;&lt;input id=&quot;toggle1&quot; class=&quot;toggle-input&quot; type=&quot;checkbox&quot; /&gt; &lt;label class=&quot;toggle-header&quot; for=&quot;toggle1&quot;&gt; &lt;span&gt;Google 뉴스 리디렉션 URL 상세 내용&lt;/span&gt; &lt;span class=&quot;toggle-arrow&quot;&gt;▼&lt;/span&gt; &lt;/label&gt;
&lt;div class=&quot;toggle-detail&quot;&gt;
&lt;p data-ke-size=&quot;size16&quot;&gt;Google이 제공하는 이 URL은 실제 뉴스 기사 페이지가 아니라, &lt;b&gt;Google 서버를 거쳐 리디렉션되는 중간 링크&lt;/b&gt;이다. 이 구조는 다음과 같은 이유로 설계되어 있다:&lt;/p&gt;
&lt;div class=&quot;section-title&quot;&gt;&lt;b&gt;1. 보안 및 트래픽 추적:&lt;/b&gt;&lt;/div&gt;
&lt;p data-ke-size=&quot;size16&quot;&gt;Google은 &lt;span class=&quot;highlight&quot;&gt;사용자의 클릭을 추적&lt;/span&gt;하여 어떤 뉴스가 인기 있는지, 클릭률이 어떻게 형성되는지를 분석한다. 이를 위해 리디렉션 URL과 추가 추적 매개변수(예: url, ref, v, ptok, ei, ctid 등)를 사용하여 사용자의 이동 경로를 기록한다.&lt;/p&gt;
&lt;div class=&quot;section-title&quot;&gt;&lt;b&gt;2. 원문 보호 및 복잡한 데이터 인코딩:&lt;/b&gt;&lt;/div&gt;
&lt;p data-ke-size=&quot;size16&quot;&gt;이 중간 주소에는 원문 URL을 포함한 다양한 정보가 &lt;span class=&quot;highlight&quot;&gt;Google의 프로토콜 버퍼(Protobuf) 형식으로 직렬화된 후, URL-safe Base64로 인코딩&lt;/span&gt;되어 URL 매개변수에 포함될 가능성이 높다. 이러한 직렬화 및 Base64 인코딩은 해당 형식을 알지 못하면 Base64 디코딩만으로는 원문 링크를 포함한 의미 있는 정보를 직접 파악하는 것을 매우 어렵게 만든다.&lt;/p&gt;
&lt;div class=&quot;section-title&quot;&gt;&lt;b&gt;3. 타임스탬프와 서명 기반 인증 시스템:&lt;/b&gt;&lt;/div&gt;
&lt;p data-ke-size=&quot;size16&quot;&gt;최근 Google의 리디렉션 주소는 더욱 복잡해졌으며, URL에는 리디렉션 요청의 유효성을 Google 서버 측에서 검증하기 위해 사용될 가능성이 높은 &quot;signature&quot; (sig 또는 유사한 이름), &quot;timestamp&quot; (ts 또는 유사한 이름), &quot;gn_art_id&quot; (id 또는 유사한 이름)와 같은 매개변수가 포함될 수 있다. 이러한 &lt;span class=&quot;highlight&quot;&gt;서명 및 검증 정보&lt;/span&gt;는 클라이언트가 원문 URL을 직접 디코딩하는 데 필요한 정보라기보다는, &lt;span class=&quot;highlight&quot;&gt;Google 서버가 리디렉션 요청을 처리할 때 유효성을 확인&lt;/span&gt;하는 데 사용된다. 따라서 정상적인 리디렉션 처리를 위해서는 이러한 매개변수가 포함된 요청이 Google 서버로 전송되어야 한다.&lt;/p&gt;
&lt;div class=&quot;section-title&quot;&gt;&lt;b&gt;4. 진화하는 보안 메커니즘:&lt;/b&gt;&lt;/div&gt;
&lt;p data-ke-size=&quot;size16&quot;&gt;초기에는 간단한 Base64 디코딩만으로 원문 URL의 일부를 추출할 수 있었을 수도 있지만, &lt;span class=&quot;highlight&quot;&gt;Google은 자동화된 스크립트나 봇의 대량 정보 추출을 방지&lt;/span&gt;하기 위해 지속적으로 URL 구조, 데이터 인코딩 방식, 추적 메커니즘 등을 변경하고 복잡하게 만들고 있다. 최근에는 대량의 리디렉션 요청을 감지하고 차단하는 비율 제한(rate limiting) 메커니즘이 적용될 수 있으며, 이를 우회하려는 시도에는 프록시 서버 사용 등의 더욱 복잡한 기술적 접근이 필요해지고 있다.&lt;/p&gt;
&lt;/div&gt;
&lt;/div&gt;
&lt;/div&gt;
&lt;blockquote data-ke-style=&quot;style2&quot;&gt;  요약하자면, Google은 원문 주소를 외부에 직접 노출하지 않도록 설계했고, 중간 주소를 역으로 복호화하거나 디코딩하는 방식이 점점 더 복잡해지고 있다.&amp;nbsp;&lt;/blockquote&gt;
&lt;hr contenteditable=&quot;false&quot; data-ke-type=&quot;horizontalRule&quot; data-ke-style=&quot;style6&quot; /&gt;
&lt;h2 data-ke-size=&quot;size26&quot;&gt;✅ 본문 추출을 위한 두 가지 접근법&lt;/h2&gt;
&lt;p data-ke-size=&quot;size16&quot;&gt;Google News RSS 링크에서 실제 뉴스 본문을 추출하기 위한 두 가지 주요 전략이 존재한다. 이 글에서는 간략히 두 방법을 소개하고, 앞으로의 글에서 각 방법을 다룰 예정이다.&lt;/p&gt;
&lt;div id=&quot;code_1747555377854&quot; data-ke-type=&quot;html&quot; data-source=&quot;&amp;lt;!DOCTYPE html&amp;gt;
&amp;lt;html&amp;gt;
&amp;lt;head&amp;gt;
&amp;lt;style&amp;gt;
  .comparison-container {
    font-family: 'Noto Sans KR', sans-serif;
    max-width: 800px;
    margin: 0 auto;
    padding: 20px;
    position: relative;
  }
  
  .title {
    text-align: center;
    font-size: 24px;
    font-weight: bold;
    margin-bottom: 30px;
  }
  
  .methods-container {
    display: flex;
    justify-content: space-between;
  }
  
  .method {
    width: 48%;
  }
  
  .method-title {
    text-align: center;
    font-size: 18px;
    font-weight: bold;
    margin-bottom: 20px;
    color: #333;
  }
  
  .process-flow {
    display: flex;
    flex-direction: column;
    align-items: center;
  }
  
  .process-box {
    width: 100%;
    padding: 15px 10px;
    margin-bottom: 15px;
    border-radius: 10px;
    border: 2px solid #4285F4;
    display: flex;
    flex-direction: column;
    align-items: center;
    background-color: white;
    box-shadow: 0 2px 5px rgba(0,0,0,0.1);
  }
  
  .highlighted-box {
    background-color: #E8F0FE;
  }
  
  .emoji {
    font-size: 24px;
    margin-bottom: 5px;
  }
  
  .step-label {
    text-align: center;
    font-size: 14px;
  }
  
  .arrow {
    height: 20px;
    margin-bottom: 15px;
    text-align: center;
    font-size: 24px;
    color: #4285F4;
    line-height: 1;
  }
  
  .method-description {
    text-align: center;
    margin-top: 20px;
    font-weight: bold;
    color: #4285F4;
  }
  
  .separator {
    border-left: 2px dashed #CCCCCC;
    position: absolute;
    left: 50%;
    top: 80px;
    bottom: 20px;
    transform: translateX(-50%);
  }
  
  /* 반응형 디자인 */
  @media (max-width: 768px) {
    .methods-container {
      flex-direction: column;
    }
    
    .method {
      width: 100%;
      margin-bottom: 30px;
    }
    
    .separator {
      display: none;
    }
  }
&amp;lt;/style&amp;gt;
&amp;lt;/head&amp;gt;
&amp;lt;body&amp;gt;

&amp;lt;div class=&amp;quot;comparison-container&amp;quot;&amp;gt;
  &amp;lt;div class=&amp;quot;title&amp;quot;&amp;gt;본문 추출을 위한 두 가지 접근법&amp;lt;/div&amp;gt;
  
  &amp;lt;div class=&amp;quot;separator&amp;quot;&amp;gt;&amp;lt;/div&amp;gt;
  
  &amp;lt;div class=&amp;quot;methods-container&amp;quot;&amp;gt;
    &amp;lt;!-- 왼쪽: 패키지 활용 방식 --&amp;gt;
    &amp;lt;div class=&amp;quot;method&amp;quot;&amp;gt;
      &amp;lt;div class=&amp;quot;method-title&amp;quot;&amp;gt;패키지 활용 방식&amp;lt;/div&amp;gt;
      
      &amp;lt;div class=&amp;quot;process-flow&amp;quot;&amp;gt;
        &amp;lt;!-- 단계 1 --&amp;gt;
        &amp;lt;div class=&amp;quot;process-box&amp;quot;&amp;gt;
          &amp;lt;div class=&amp;quot;emoji&amp;quot;&amp;gt; &amp;lt;/div&amp;gt;
          &amp;lt;div class=&amp;quot;step-label&amp;quot;&amp;gt;복잡한 URL&amp;lt;/div&amp;gt;
        &amp;lt;/div&amp;gt;
        
        &amp;lt;div class=&amp;quot;arrow&amp;quot;&amp;gt;&amp;darr;&amp;lt;/div&amp;gt;
        
        &amp;lt;!-- 단계 2 --&amp;gt;
        &amp;lt;div class=&amp;quot;process-box highlighted-box&amp;quot;&amp;gt;
          &amp;lt;div class=&amp;quot;emoji&amp;quot;&amp;gt; &amp;lt;/div&amp;gt;
          &amp;lt;div class=&amp;quot;step-label&amp;quot;&amp;gt;오픈소스 패키지&amp;lt;/div&amp;gt;
        &amp;lt;/div&amp;gt;
        
        &amp;lt;div class=&amp;quot;arrow&amp;quot;&amp;gt;&amp;darr;&amp;lt;/div&amp;gt;
        
        &amp;lt;!-- 단계 3 --&amp;gt;
        &amp;lt;div class=&amp;quot;process-box&amp;quot;&amp;gt;
          &amp;lt;div class=&amp;quot;emoji&amp;quot;&amp;gt;✅&amp;lt;/div&amp;gt;
          &amp;lt;div class=&amp;quot;step-label&amp;quot;&amp;gt;원본 URL 추출 완료&amp;lt;/div&amp;gt;
        &amp;lt;/div&amp;gt;
        
        &amp;lt;div class=&amp;quot;arrow&amp;quot;&amp;gt;&amp;darr;&amp;lt;/div&amp;gt;
        
        &amp;lt;!-- 단계 4 --&amp;gt;
        &amp;lt;div class=&amp;quot;process-box&amp;quot;&amp;gt;
          &amp;lt;div class=&amp;quot;emoji&amp;quot;&amp;gt; &amp;lt;/div&amp;gt;
          &amp;lt;div class=&amp;quot;step-label&amp;quot;&amp;gt;최종 뉴스 페이지 접속&amp;lt;/div&amp;gt;
        &amp;lt;/div&amp;gt;
        
        &amp;lt;div class=&amp;quot;arrow&amp;quot;&amp;gt;&amp;darr;&amp;lt;/div&amp;gt;
        
        &amp;lt;!-- 단계 5 --&amp;gt;
        &amp;lt;div class=&amp;quot;process-box&amp;quot;&amp;gt;
          &amp;lt;div class=&amp;quot;emoji&amp;quot;&amp;gt; &amp;lt;/div&amp;gt;
          &amp;lt;div class=&amp;quot;step-label&amp;quot;&amp;gt;본문 추출&amp;lt;/div&amp;gt;
        &amp;lt;/div&amp;gt;
      &amp;lt;/div&amp;gt;
      
      &amp;lt;div class=&amp;quot;method-description&amp;quot;&amp;gt;직접 URL 해독&amp;lt;/div&amp;gt;
    &amp;lt;/div&amp;gt;
    
    &amp;lt;!-- 오른쪽: 브라우저 자동화 방식 --&amp;gt;
    &amp;lt;div class=&amp;quot;method&amp;quot;&amp;gt;
      &amp;lt;div class=&amp;quot;method-title&amp;quot;&amp;gt;브라우저 자동화 방식&amp;lt;/div&amp;gt;
      
      &amp;lt;div class=&amp;quot;process-flow&amp;quot;&amp;gt;
        &amp;lt;!-- 단계 1 --&amp;gt;
        &amp;lt;div class=&amp;quot;process-box&amp;quot;&amp;gt;
          &amp;lt;div class=&amp;quot;emoji&amp;quot;&amp;gt; &amp;lt;/div&amp;gt;
          &amp;lt;div class=&amp;quot;step-label&amp;quot;&amp;gt;복잡한 URL&amp;lt;/div&amp;gt;
        &amp;lt;/div&amp;gt;
        
        &amp;lt;div class=&amp;quot;arrow&amp;quot;&amp;gt;&amp;darr;&amp;lt;/div&amp;gt;
        
        &amp;lt;!-- 단계 2 --&amp;gt;
        &amp;lt;div class=&amp;quot;process-box highlighted-box&amp;quot;&amp;gt;
          &amp;lt;div class=&amp;quot;emoji&amp;quot;&amp;gt; &amp;lt;/div&amp;gt;
          &amp;lt;div class=&amp;quot;step-label&amp;quot;&amp;gt;자동화 브라우저 접속&amp;lt;/div&amp;gt;
        &amp;lt;/div&amp;gt;
        
        &amp;lt;div class=&amp;quot;arrow&amp;quot;&amp;gt;&amp;darr;&amp;lt;/div&amp;gt;
        
        &amp;lt;!-- 단계 3 --&amp;gt;
        &amp;lt;div class=&amp;quot;process-box&amp;quot;&amp;gt;
          &amp;lt;div class=&amp;quot;emoji&amp;quot;&amp;gt; &amp;lt;/div&amp;gt;
          &amp;lt;div class=&amp;quot;step-label&amp;quot;&amp;gt;리디렉션 진행&amp;lt;/div&amp;gt;
        &amp;lt;/div&amp;gt;
        
        &amp;lt;div class=&amp;quot;arrow&amp;quot;&amp;gt;&amp;darr;&amp;lt;/div&amp;gt;
        
        &amp;lt;!-- 단계 4 --&amp;gt;
        &amp;lt;div class=&amp;quot;process-box&amp;quot;&amp;gt;
          &amp;lt;div class=&amp;quot;emoji&amp;quot;&amp;gt; &amp;lt;/div&amp;gt;
          &amp;lt;div class=&amp;quot;step-label&amp;quot;&amp;gt;최종 뉴스 페이지 도착&amp;lt;/div&amp;gt;
        &amp;lt;/div&amp;gt;
        
        &amp;lt;div class=&amp;quot;arrow&amp;quot;&amp;gt;&amp;darr;&amp;lt;/div&amp;gt;
        
        &amp;lt;!-- 단계 5 --&amp;gt;
        &amp;lt;div class=&amp;quot;process-box&amp;quot;&amp;gt;
          &amp;lt;div class=&amp;quot;emoji&amp;quot;&amp;gt; &amp;lt;/div&amp;gt;
          &amp;lt;div class=&amp;quot;step-label&amp;quot;&amp;gt;HTML 저장&amp;lt;/div&amp;gt;
        &amp;lt;/div&amp;gt;
        
        &amp;lt;div class=&amp;quot;arrow&amp;quot;&amp;gt;&amp;darr;&amp;lt;/div&amp;gt;
        
        &amp;lt;!-- 단계 6 --&amp;gt;
        &amp;lt;div class=&amp;quot;process-box&amp;quot;&amp;gt;
          &amp;lt;div class=&amp;quot;emoji&amp;quot;&amp;gt; &amp;lt;/div&amp;gt;
          &amp;lt;div class=&amp;quot;step-label&amp;quot;&amp;gt;본문 추출&amp;lt;/div&amp;gt;
        &amp;lt;/div&amp;gt;
      &amp;lt;/div&amp;gt;
      
      &amp;lt;div class=&amp;quot;method-description&amp;quot;&amp;gt;Google 처리 과정 모방&amp;lt;/div&amp;gt;
    &amp;lt;/div&amp;gt;
  &amp;lt;/div&amp;gt;
&amp;lt;/div&amp;gt;

&amp;lt;/body&amp;gt;
&amp;lt;/html&amp;gt;&quot;&gt;
&lt;style&gt;
  .comparison-container {
    font-family: 'Noto Sans KR', sans-serif;
    max-width: 800px;
    margin: 0 auto;
    padding: 20px;
    position: relative;
  }
  
  .title {
    text-align: center;
    font-size: 24px;
    font-weight: bold;
    margin-bottom: 30px;
  }
  
  .methods-container {
    display: flex;
    justify-content: space-between;
  }
  
  .method {
    width: 48%;
  }
  
  .method-title {
    text-align: center;
    font-size: 18px;
    font-weight: bold;
    margin-bottom: 20px;
    color: #333;
  }
  
  .process-flow {
    display: flex;
    flex-direction: column;
    align-items: center;
  }
  
  .process-box {
    width: 100%;
    padding: 15px 10px;
    margin-bottom: 15px;
    border-radius: 10px;
    border: 2px solid #4285F4;
    display: flex;
    flex-direction: column;
    align-items: center;
    background-color: white;
    box-shadow: 0 2px 5px rgba(0,0,0,0.1);
  }
  
  .highlighted-box {
    background-color: #E8F0FE;
  }
  
  .emoji {
    font-size: 24px;
    margin-bottom: 5px;
  }
  
  .step-label {
    text-align: center;
    font-size: 14px;
  }
  
  .arrow {
    height: 20px;
    margin-bottom: 15px;
    text-align: center;
    font-size: 24px;
    color: #4285F4;
    line-height: 1;
  }
  
  .method-description {
    text-align: center;
    margin-top: 20px;
    font-weight: bold;
    color: #4285F4;
  }
  
  .separator {
    border-left: 2px dashed #CCCCCC;
    position: absolute;
    left: 50%;
    top: 80px;
    bottom: 20px;
    transform: translateX(-50%);
  }
  
  /* 반응형 디자인 */
  @media (max-width: 768px) {
    .methods-container {
      flex-direction: column;
    }
    
    .method {
      width: 100%;
      margin-bottom: 30px;
    }
    
    .separator {
      display: none;
    }
  }
&lt;/style&gt;
&lt;div class=&quot;comparison-container&quot;&gt;
&lt;div class=&quot;title&quot;&gt;본문 추출을 위한 두 가지 접근법&lt;/div&gt;
&lt;div class=&quot;separator&quot;&gt;&amp;nbsp;&lt;/div&gt;
&lt;div class=&quot;methods-container&quot;&gt;&lt;!-- 왼쪽: 패키지 활용 방식 --&gt;
&lt;div class=&quot;method&quot;&gt;
&lt;div class=&quot;method-title&quot;&gt;패키지 활용 방식&lt;/div&gt;
&lt;div class=&quot;process-flow&quot;&gt;&lt;!-- 단계 1 --&gt;
&lt;div class=&quot;process-box&quot;&gt;
&lt;div class=&quot;emoji&quot;&gt; &lt;/div&gt;
&lt;div class=&quot;step-label&quot;&gt;복잡한 URL&lt;/div&gt;
&lt;/div&gt;
&lt;div class=&quot;arrow&quot;&gt;&amp;darr;&lt;/div&gt;
&lt;!-- 단계 2 --&gt;
&lt;div class=&quot;process-box highlighted-box&quot;&gt;
&lt;div class=&quot;emoji&quot;&gt; &lt;/div&gt;
&lt;div class=&quot;step-label&quot;&gt;오픈소스 패키지&lt;/div&gt;
&lt;/div&gt;
&lt;div class=&quot;arrow&quot;&gt;&amp;darr;&lt;/div&gt;
&lt;!-- 단계 3 --&gt;
&lt;div class=&quot;process-box&quot;&gt;
&lt;div class=&quot;emoji&quot;&gt;✅&lt;/div&gt;
&lt;div class=&quot;step-label&quot;&gt;원본 URL 추출 완료&lt;/div&gt;
&lt;/div&gt;
&lt;div class=&quot;arrow&quot;&gt;&amp;darr;&lt;/div&gt;
&lt;!-- 단계 4 --&gt;
&lt;div class=&quot;process-box&quot;&gt;
&lt;div class=&quot;emoji&quot;&gt; &lt;/div&gt;
&lt;div class=&quot;step-label&quot;&gt;최종 뉴스 페이지 접속&lt;/div&gt;
&lt;/div&gt;
&lt;div class=&quot;arrow&quot;&gt;&amp;darr;&lt;/div&gt;
&lt;!-- 단계 5 --&gt;
&lt;div class=&quot;process-box&quot;&gt;
&lt;div class=&quot;emoji&quot;&gt; &lt;/div&gt;
&lt;div class=&quot;step-label&quot;&gt;본문 추출&lt;/div&gt;
&lt;/div&gt;
&lt;/div&gt;
&lt;div class=&quot;method-description&quot;&gt;직접 URL 해독&lt;/div&gt;
&lt;/div&gt;
&lt;!-- 오른쪽: 브라우저 자동화 방식 --&gt;
&lt;div class=&quot;method&quot;&gt;
&lt;div class=&quot;method-title&quot;&gt;브라우저 자동화 방식&lt;/div&gt;
&lt;div class=&quot;process-flow&quot;&gt;&lt;!-- 단계 1 --&gt;
&lt;div class=&quot;process-box&quot;&gt;
&lt;div class=&quot;emoji&quot;&gt; &lt;/div&gt;
&lt;div class=&quot;step-label&quot;&gt;복잡한 URL&lt;/div&gt;
&lt;/div&gt;
&lt;div class=&quot;arrow&quot;&gt;&amp;darr;&lt;/div&gt;
&lt;!-- 단계 2 --&gt;
&lt;div class=&quot;process-box highlighted-box&quot;&gt;
&lt;div class=&quot;emoji&quot;&gt; &lt;/div&gt;
&lt;div class=&quot;step-label&quot;&gt;자동화 브라우저 접속&lt;/div&gt;
&lt;/div&gt;
&lt;div class=&quot;arrow&quot;&gt;&amp;darr;&lt;/div&gt;
&lt;!-- 단계 3 --&gt;
&lt;div class=&quot;process-box&quot;&gt;
&lt;div class=&quot;emoji&quot;&gt; &lt;/div&gt;
&lt;div class=&quot;step-label&quot;&gt;리디렉션 진행&lt;/div&gt;
&lt;/div&gt;
&lt;div class=&quot;arrow&quot;&gt;&amp;darr;&lt;/div&gt;
&lt;!-- 단계 4 --&gt;
&lt;div class=&quot;process-box&quot;&gt;
&lt;div class=&quot;emoji&quot;&gt; &lt;/div&gt;
&lt;div class=&quot;step-label&quot;&gt;최종 뉴스 페이지 도착&lt;/div&gt;
&lt;/div&gt;
&lt;div class=&quot;arrow&quot;&gt;&amp;darr;&lt;/div&gt;
&lt;!-- 단계 5 --&gt;
&lt;div class=&quot;process-box&quot;&gt;
&lt;div class=&quot;emoji&quot;&gt; &lt;/div&gt;
&lt;div class=&quot;step-label&quot;&gt;HTML 저장&lt;/div&gt;
&lt;/div&gt;
&lt;div class=&quot;arrow&quot;&gt;&amp;darr;&lt;/div&gt;
&lt;!-- 단계 6 --&gt;
&lt;div class=&quot;process-box&quot;&gt;
&lt;div class=&quot;emoji&quot;&gt; &lt;/div&gt;
&lt;div class=&quot;step-label&quot;&gt;본문 추출&lt;/div&gt;
&lt;/div&gt;
&lt;/div&gt;
&lt;div class=&quot;method-description&quot;&gt;Google 처리 과정 모방&lt;/div&gt;
&lt;/div&gt;
&lt;/div&gt;
&lt;/div&gt;
&lt;/div&gt;
&lt;p data-ke-size=&quot;size16&quot;&gt;&amp;nbsp;&lt;/p&gt;
&lt;hr data-ke-style=&quot;style1&quot; /&gt;
&lt;h2 data-ke-size=&quot;size26&quot;&gt;  앞으로 다룰 내용&lt;/h2&gt;
&lt;p data-ke-size=&quot;size16&quot;&gt;이번 글에서는 Google News RSS 링크의 특징과 본문 추출을 위한 두 가지 기본 접근법을 소개했다. 앞으로의 글에서는 다음의 주제들을 다룰 예정이다:&lt;/p&gt;
&lt;div id=&quot;code_1747556253494&quot; data-ke-type=&quot;html&quot; data-source=&quot;&amp;lt;!DOCTYPE html&amp;gt;
&amp;lt;html&amp;gt;
&amp;lt;head&amp;gt;
&amp;lt;style&amp;gt;
  .roadmap-container {
    font-family: 'Noto Sans KR', sans-serif;
    max-width: 800px;
    margin: 0 auto;
    padding: 20px;
  }
  
  .timeline {
    position: relative;
    max-width: 100%;
    margin: 0 auto;
    padding-left: 30px;
  }
  
  .timeline::before {
    content: '';
    position: absolute;
    width: 4px;
    background-color: #4285F4;
    top: 0;
    bottom: 0;
    left: 32px;
    border-radius: 4px;
  }
  
  .timeline-item {
    padding: 10px 0 30px 40px;
    position: relative;
    width: calc(100% - 30px);
    box-sizing: border-box;
  }
  
  .timeline-content {
    padding: 20px;
    background-color: white;
    border-radius: 10px;
    box-shadow: 0 3px 10px rgba(0, 0, 0, 0.1);
    position: relative;
    border-left: 5px solid #4285F4;
  }
  
  .timeline-marker {
    position: absolute;
    width: 20px;
    height: 20px;
    background-color: white;
    border: 4px solid #4285F4;
    border-radius: 50%;
    left: -6px;
    top: 25px;
    z-index: 2;
  }
  
  .timeline-title {
    font-weight: bold;
    font-size: 18px;
    color: #4285F4;
    margin-bottom: 15px;
  }
  
  .timeline-points {
    padding-left: 20px;
    margin: 10px 0;
  }
  
  .timeline-points li {
    margin-bottom: 8px;
    position: relative;
    font-size: 14px;
    line-height: 1.5;
  }
  
  .timeline-points li::before {
    content: '&amp;bull;';
    position: absolute;
    left: -15px;
    color: #4285F4;
    font-weight: bold;
  }
  
  .number-emoji {
    margin-right: 8px;
  }
  
  /* 반응형 디자인을 위한 미디어 쿼리 */
  @media screen and (max-width: 600px) {
    .timeline-item {
      padding-left: 30px;
    }
    
    .timeline-content {
      padding: 15px;
    }
  }
&amp;lt;/style&amp;gt;
&amp;lt;/head&amp;gt;
&amp;lt;body&amp;gt;

&amp;lt;div class=&amp;quot;roadmap-container&amp;quot;&amp;gt;
  &amp;lt;div class=&amp;quot;timeline&amp;quot;&amp;gt;
    &amp;lt;!-- 첫 번째 아티클 --&amp;gt;
    &amp;lt;div class=&amp;quot;timeline-item&amp;quot;&amp;gt;
      &amp;lt;div class=&amp;quot;timeline-marker&amp;quot;&amp;gt;&amp;lt;/div&amp;gt;
      &amp;lt;div class=&amp;quot;timeline-content&amp;quot;&amp;gt;
        &amp;lt;div class=&amp;quot;timeline-title&amp;quot;&amp;gt;&amp;lt;span class=&amp;quot;number-emoji&amp;quot;&amp;gt;1️⃣&amp;lt;/span&amp;gt;Google News URL 간단히 처리하기: 오픈소스 패키지 활용&amp;lt;/div&amp;gt;
        
        &amp;lt;ul class=&amp;quot;timeline-points&amp;quot;&amp;gt;
          &amp;lt;li&amp;gt;복잡한 디코딩 과정 없이 원문 URL 해결하기&amp;lt;/li&amp;gt;
          &amp;lt;li&amp;gt;병렬 처리로 대량 URL 처리 최적화 방법&amp;lt;/li&amp;gt;
          &amp;lt;li&amp;gt;본문 추출 도구 적용 방법&amp;lt;/li&amp;gt;
        &amp;lt;/ul&amp;gt;
      &amp;lt;/div&amp;gt;
    &amp;lt;/div&amp;gt;
    
    &amp;lt;!-- 두 번째 아티클 --&amp;gt;
    &amp;lt;div class=&amp;quot;timeline-item&amp;quot;&amp;gt;
      &amp;lt;div class=&amp;quot;timeline-marker&amp;quot;&amp;gt;&amp;lt;/div&amp;gt;
      &amp;lt;div class=&amp;quot;timeline-content&amp;quot;&amp;gt;
        &amp;lt;div class=&amp;quot;timeline-title&amp;quot;&amp;gt;&amp;lt;span class=&amp;quot;number-emoji&amp;quot;&amp;gt;2️⃣&amp;lt;/span&amp;gt;Playwright를 활용한 Google News 본문 추출 자동화&amp;lt;/div&amp;gt;
        
        &amp;lt;ul class=&amp;quot;timeline-points&amp;quot;&amp;gt;
          &amp;lt;li&amp;gt;Playwright vs Selenium 비교 및 Playwright 선택 이유&amp;lt;/li&amp;gt;
          &amp;lt;li&amp;gt;효율적인 브라우저 자동화 설정&amp;lt;/li&amp;gt;
          &amp;lt;li&amp;gt;Playwright의 비동기 처리를 통한 성능 향상&amp;lt;/li&amp;gt;
          &amp;lt;li&amp;gt;본문 추출 도구 적용 방법&amp;lt;/li&amp;gt;
        &amp;lt;/ul&amp;gt;
      &amp;lt;/div&amp;gt;
    &amp;lt;/div&amp;gt;
    
    &amp;lt;!-- 세 번째 아티클 --&amp;gt;
    &amp;lt;div class=&amp;quot;timeline-item&amp;quot;&amp;gt;
      &amp;lt;div class=&amp;quot;timeline-marker&amp;quot;&amp;gt;&amp;lt;/div&amp;gt;
      &amp;lt;div class=&amp;quot;timeline-content&amp;quot;&amp;gt;
        &amp;lt;div class=&amp;quot;timeline-title&amp;quot;&amp;gt;&amp;lt;span class=&amp;quot;number-emoji&amp;quot;&amp;gt;3️⃣&amp;lt;/span&amp;gt;두 방법의 실전 비교: 어떤 상황에 어떤 방법이 적합한가&amp;lt;/div&amp;gt;
        
        &amp;lt;ul class=&amp;quot;timeline-points&amp;quot;&amp;gt;
          &amp;lt;li&amp;gt;속도, 정확도, 리소스 사용량 비교&amp;lt;/li&amp;gt;
          &amp;lt;li&amp;gt;대규모 배치 처리와 실시간 처리의 차이&amp;lt;/li&amp;gt;
          &amp;lt;li&amp;gt;통계적 유의미성을 위한 최소 표본 크기(300-500개 기사)&amp;lt;/li&amp;gt;
        &amp;lt;/ul&amp;gt;
      &amp;lt;/div&amp;gt;
    &amp;lt;/div&amp;gt;
  &amp;lt;/div&amp;gt;
&amp;lt;/div&amp;gt;

&amp;lt;/body&amp;gt;
&amp;lt;/html&amp;gt;&quot;&gt;
&lt;style&gt;
  .roadmap-container {
    font-family: 'Noto Sans KR', sans-serif;
    max-width: 800px;
    margin: 0 auto;
    padding: 20px;
  }
  
  .timeline {
    position: relative;
    max-width: 100%;
    margin: 0 auto;
    padding-left: 30px;
  }
  
  .timeline::before {
    content: '';
    position: absolute;
    width: 4px;
    background-color: #4285F4;
    top: 0;
    bottom: 0;
    left: 32px;
    border-radius: 4px;
  }
  
  .timeline-item {
    padding: 10px 0 30px 40px;
    position: relative;
    width: calc(100% - 30px);
    box-sizing: border-box;
  }
  
  .timeline-content {
    padding: 20px;
    background-color: white;
    border-radius: 10px;
    box-shadow: 0 3px 10px rgba(0, 0, 0, 0.1);
    position: relative;
    border-left: 5px solid #4285F4;
  }
  
  .timeline-marker {
    position: absolute;
    width: 20px;
    height: 20px;
    background-color: white;
    border: 4px solid #4285F4;
    border-radius: 50%;
    left: -6px;
    top: 25px;
    z-index: 2;
  }
  
  .timeline-title {
    font-weight: bold;
    font-size: 18px;
    color: #4285F4;
    margin-bottom: 15px;
  }
  
  .timeline-points {
    padding-left: 20px;
    margin: 10px 0;
  }
  
  .timeline-points li {
    margin-bottom: 8px;
    position: relative;
    font-size: 14px;
    line-height: 1.5;
  }
  
  .timeline-points li::before {
    content: '•';
    position: absolute;
    left: -15px;
    color: #4285F4;
    font-weight: bold;
  }
  
  .number-emoji {
    margin-right: 8px;
  }
  
  /* 반응형 디자인을 위한 미디어 쿼리 */
  @media screen and (max-width: 600px) {
    .timeline-item {
      padding-left: 30px;
    }
    
    .timeline-content {
      padding: 15px;
    }
  }
&lt;/style&gt;
&lt;div class=&quot;roadmap-container&quot;&gt;
&lt;div class=&quot;timeline&quot;&gt;&lt;!-- 첫 번째 아티클 --&gt;
&lt;div class=&quot;timeline-item&quot;&gt;
&lt;div class=&quot;timeline-marker&quot;&gt;&amp;nbsp;&lt;/div&gt;
&lt;div class=&quot;timeline-content&quot;&gt;
&lt;div class=&quot;timeline-title&quot;&gt;&lt;span class=&quot;number-emoji&quot;&gt;1️⃣&lt;/span&gt;Google News URL 간단히 처리하기: 오픈소스 패키지 활용&lt;/div&gt;
&lt;ul class=&quot;timeline-points&quot; style=&quot;list-style-type: disc;&quot; data-ke-list-type=&quot;disc&quot;&gt;
&lt;li&gt;복잡한 디코딩 과정 없이 원문 URL 해결하기&lt;/li&gt;
&lt;li&gt;병렬 처리로 대량 URL 처리 최적화 방법&lt;/li&gt;
&lt;li&gt;본문 추출 도구 적용 방법&lt;/li&gt;
&lt;/ul&gt;
&lt;/div&gt;
&lt;/div&gt;
&lt;!-- 두 번째 아티클 --&gt;
&lt;div class=&quot;timeline-item&quot;&gt;
&lt;div class=&quot;timeline-marker&quot;&gt;&amp;nbsp;&lt;/div&gt;
&lt;div class=&quot;timeline-content&quot;&gt;
&lt;div class=&quot;timeline-title&quot;&gt;&lt;span class=&quot;number-emoji&quot;&gt;2️⃣&lt;/span&gt;Playwright를 활용한 Google News 본문 추출 자동화&lt;/div&gt;
&lt;ul class=&quot;timeline-points&quot; style=&quot;list-style-type: disc;&quot; data-ke-list-type=&quot;disc&quot;&gt;
&lt;li&gt;Playwright vs Selenium 비교 및 Playwright 선택 이유&lt;/li&gt;
&lt;li&gt;효율적인 브라우저 자동화 설정&lt;/li&gt;
&lt;li&gt;Playwright의 비동기 처리를 통한 성능 향상&lt;/li&gt;
&lt;li&gt;본문 추출 도구 적용 방법&lt;/li&gt;
&lt;/ul&gt;
&lt;/div&gt;
&lt;/div&gt;
&lt;!-- 세 번째 아티클 --&gt;
&lt;div class=&quot;timeline-item&quot;&gt;
&lt;div class=&quot;timeline-marker&quot;&gt;&amp;nbsp;&lt;/div&gt;
&lt;div class=&quot;timeline-content&quot;&gt;
&lt;div class=&quot;timeline-title&quot;&gt;&lt;span class=&quot;number-emoji&quot;&gt;3️⃣&lt;/span&gt;두 방법의 실전 비교: 어떤 상황에 어떤 방법이 적합한가&lt;/div&gt;
&lt;ul class=&quot;timeline-points&quot; style=&quot;list-style-type: disc;&quot; data-ke-list-type=&quot;disc&quot;&gt;
&lt;li&gt;속도, 정확도, 리소스 사용량 비교&lt;/li&gt;
&lt;li&gt;대규모 배치 처리와 실시간 처리의 차이&lt;/li&gt;
&lt;li&gt;통계적 유의미성을 위한 최소 표본 크기(300-500개 기사)&lt;/li&gt;
&lt;/ul&gt;
&lt;/div&gt;
&lt;/div&gt;
&lt;/div&gt;
&lt;/div&gt;
&lt;/div&gt;
&lt;p data-ke-size=&quot;size16&quot;&gt;&amp;nbsp;&lt;/p&gt;
&lt;hr data-ke-style=&quot;style1&quot; /&gt;
&lt;h2 data-ke-size=&quot;size26&quot;&gt; &lt;span&gt;마치며&lt;/span&gt;&lt;/h2&gt;
&lt;p data-ke-size=&quot;size16&quot;&gt;Google News RSS에서 수집된 링크는 원문 뉴스 URL이 아니며, &lt;span style=&quot;color: #006dd7;&quot;&gt;&lt;b&gt;Google의 복잡한 리디렉션 구조를 따르는 중간 링크&lt;/b&gt;&lt;/span&gt;다. 이 링크를 이용해서 실제 뉴스 본문을 추출하기 위해서는 오픈소스 패키지를 활용한 URL 변환 방식과 브라우저 자동화 방식이라는 두 가지 접근법이 존재한다.&lt;/p&gt;
&lt;p data-ke-size=&quot;size16&quot;&gt;다음 글에서는 첫 번째 방법인 오픈소스 패키지 활용 접근법에 대해 살펴보고, 코드로 구현하는 방법과 결과를 알아볼 예정이다.&lt;/p&gt;</description>
      <category>google 뉴스 rss</category>
      <category>기사수집자동화</category>
      <category>주식 재료 분석 자동화</category>
      <author>catalystmind</author>
      <guid isPermaLink="true">https://catalystmind.tistory.com/15</guid>
      <comments>https://catalystmind.tistory.com/15#entry15comment</comments>
      <pubDate>Tue, 20 May 2025 20:25:38 +0900</pubDate>
    </item>
    <item>
      <title>Power Automate + feedparser 개선: Google 뉴스 수집 속도 단축하기</title>
      <link>https://catalystmind.tistory.com/14</link>
      <description>&lt;div id=&quot;code_1747469696279&quot; data-ke-type=&quot;html&quot; data-source=&quot;&amp;lt;div style=&amp;quot;background-color: #fff; border-radius: 8px; padding: 24px; box-shadow: 0 2px 8px rgba(0, 0, 0, 0.05); margin-bottom: 24px; border-left: 4px solid #3b82f6;&amp;quot;&amp;gt;
    &amp;lt;h1 style=&amp;quot;color: #1e40af; font-size: 24px; font-weight: 700; margin-top: 0; margin-bottom: 16px;&amp;quot;&amp;gt;TL;DR&amp;lt;/h1&amp;gt;
&amp;lt;div style=&amp;quot;background-color: #f0f7ff; padding: 12px 16px; border-radius: 6px; margin: 16px 0;&amp;quot;&amp;gt;
    &amp;lt;ul style=&amp;quot;padding-left: 20px; margin: 0;&amp;quot;&amp;gt;
        &amp;lt;li style=&amp;quot;margin-bottom: 10px;&amp;quot;&amp;gt;기존 방식은 &amp;lt;span style=&amp;quot;color: #1e40af; font-weight: 600;&amp;quot;&amp;gt;Power Automate&amp;lt;/span&amp;gt;로 XML 파일 저장 후 &amp;lt;span style=&amp;quot;color: #1e40af; font-weight: 600;&amp;quot;&amp;gt;feedparser&amp;lt;/span&amp;gt;로 처리함&amp;lt;/li&amp;gt;
        &amp;lt;li style=&amp;quot;margin-bottom: 10px;&amp;quot;&amp;gt;개선 방식은 &amp;lt;span style=&amp;quot;color: #1e40af; font-weight: 600;&amp;quot;&amp;gt;feedparser&amp;lt;/span&amp;gt;가 Google News RSS를 &amp;lt;span style=&amp;quot;color: #1e40af; font-weight: 600;&amp;quot;&amp;gt;직접 파싱&amp;lt;/span&amp;gt;하도록 변경해 프로세스 단계 &amp;lt;span style=&amp;quot;color: #1e40af; font-weight: 600;&amp;quot;&amp;gt;1개 제거&amp;lt;/span&amp;gt; 및 처리 시간 &amp;lt;span style=&amp;quot;color: #1e40af; font-weight: 600;&amp;quot;&amp;gt;41.3%&amp;lt;/span&amp;gt; 단축함&amp;lt;/li&amp;gt;
        &amp;lt;li style=&amp;quot;margin-bottom: 10px;&amp;quot;&amp;gt;한글/특수문자 URL 파싱 실패 문제는 Python &amp;lt;span style=&amp;quot;color: #1e40af; font-weight: 600;&amp;quot;&amp;gt;urllib.parse.quote()&amp;lt;/span&amp;gt; 함수로 URL 인코딩 적용해 해결함&amp;lt;/li&amp;gt;
        &amp;lt;li style=&amp;quot;margin-bottom: 10px;&amp;quot;&amp;gt;이 코드는 &amp;lt;span style=&amp;quot;color: #1e40af; font-weight: 600;&amp;quot;&amp;gt;검색어 기반&amp;lt;/span&amp;gt; RSS URL 자동 생성 및 &amp;lt;span style=&amp;quot;color: #1e40af; font-weight: 600;&amp;quot;&amp;gt;기사 제목/링크/발행일&amp;lt;/span&amp;gt; 추출 기능을 구현함&amp;lt;/li&amp;gt;
        &amp;lt;li style=&amp;quot;margin-bottom: 10px;&amp;quot;&amp;gt;이 코드는 &amp;lt;span style=&amp;quot;color: #1e40af; font-weight: 600;&amp;quot;&amp;gt;Power Automate&amp;lt;/span&amp;gt;와 연동해 &amp;lt;span style=&amp;quot;color: #1e40af; font-weight: 600;&amp;quot;&amp;gt;완전 자동화&amp;lt;/span&amp;gt;된 뉴스 수집에 적용될 예정&amp;lt;/li&amp;gt;
    &amp;lt;/ul&amp;gt;
&amp;lt;/div&amp;gt;
&amp;lt;/div&amp;gt;&quot;&gt;
&lt;div style=&quot;background-color: #fff; border-radius: 8px; padding: 24px; box-shadow: 0 2px 8px rgba(0, 0, 0, 0.05); margin-bottom: 24px; border-left: 4px solid #3b82f6;&quot;&gt;
&lt;h1 style=&quot;color: #1e40af; font-size: 24px; font-weight: bold; margin-top: 0; margin-bottom: 16px;&quot;&gt;TL;DR&lt;/h1&gt;
&lt;div style=&quot;background-color: #f0f7ff; padding: 12px 16px; border-radius: 6px; margin: 16px 0;&quot;&gt;
&lt;ul style=&quot;padding-left: 20px; margin: 0px; list-style-type: disc;&quot; data-ke-list-type=&quot;disc&quot;&gt;
&lt;li style=&quot;margin-bottom: 10px;&quot;&gt;기존 방식은 &lt;span style=&quot;color: #1e40af; font-weight: 600;&quot;&gt;Power Automate&lt;/span&gt;로 XML 파일 저장 후 &lt;span style=&quot;color: #1e40af; font-weight: 600;&quot;&gt;feedparser&lt;/span&gt;로 처리함&lt;/li&gt;
&lt;li style=&quot;margin-bottom: 10px;&quot;&gt;개선 방식은 &lt;span style=&quot;color: #1e40af; font-weight: 600;&quot;&gt;feedparser&lt;/span&gt;가 Google News RSS를 &lt;span style=&quot;color: #1e40af; font-weight: 600;&quot;&gt;직접 파싱&lt;/span&gt;하도록 변경해 프로세스 단계 &lt;span style=&quot;color: #1e40af; font-weight: 600;&quot;&gt;1개 제거&lt;/span&gt; 및 처리 시간 &lt;span style=&quot;color: #1e40af; font-weight: 600;&quot;&gt;41.3%&lt;/span&gt; 단축함&lt;/li&gt;
&lt;li style=&quot;margin-bottom: 10px;&quot;&gt;한글/특수문자 URL 파싱 실패 문제는 Python &lt;span style=&quot;color: #1e40af; font-weight: 600;&quot;&gt;urllib.parse.quote()&lt;/span&gt; 함수로 URL 인코딩 적용해 해결함&lt;/li&gt;
&lt;li style=&quot;margin-bottom: 10px;&quot;&gt;이 코드는 &lt;span style=&quot;color: #1e40af; font-weight: 600;&quot;&gt;검색어 기반&lt;/span&gt; RSS URL 자동 생성 및 &lt;span style=&quot;color: #1e40af; font-weight: 600;&quot;&gt;기사 제목/링크/발행일&lt;/span&gt; 추출 기능을 구현함&lt;/li&gt;
&lt;li style=&quot;margin-bottom: 10px;&quot;&gt;이 코드는 &lt;span style=&quot;color: #1e40af; font-weight: 600;&quot;&gt;Power Automate&lt;/span&gt;와 연동해 &lt;span style=&quot;color: #1e40af; font-weight: 600;&quot;&gt;완전 자동화&lt;/span&gt;된 뉴스 수집에 적용될 예정&lt;/li&gt;
&lt;/ul&gt;
&lt;/div&gt;
&lt;/div&gt;
&lt;/div&gt;
&lt;p data-ke-size=&quot;size16&quot;&gt;&amp;nbsp;&lt;/p&gt;
&lt;p data-ke-size=&quot;size16&quot;&gt;앞에서 소개한 Power Automate를 활용한 방식은 &lt;span style=&quot;color: #006dd7;&quot;&gt;&lt;b&gt; Power Automate로 xml 파일 10개를 저장하고 이를 다시 feedparser로 처리하는 방식&lt;/b&gt;&lt;/span&gt;이었다. Feedparser는 url&amp;nbsp; 주소을 입력하면, 직접 웹에서 정보를 가져와 처리할 수 있으므로, 불필요한 단계를 하나 더 제거하고자 코드를 수정하였다. 먼저, 결과는 다음과 같다.&lt;/p&gt;
&lt;p data-ke-size=&quot;size16&quot;&gt;&amp;nbsp;&lt;/p&gt;
&lt;div id=&quot;code_1747407133990&quot; data-ke-type=&quot;html&quot; data-source=&quot;&amp;lt;style&amp;gt;
  /* 전체 컨테이너 */
  .process-comparison {
    font-family: 'Noto Sans KR', sans-serif;
    max-width: 100%;
    margin: 30px auto;
    color: #333;
    position: relative;
  }
  
  /* 프로세스 컨테이너 */
  .process-wrapper {
    display: flex;
    justify-content: space-between;
    margin-bottom: 30px;
    position: relative;
  }
  
  /* 구분선 */
  .divider {
    position: absolute;
    left: 50%;
    top: 0;
    bottom: 0;
    width: 1px;
    background-color: #ccc;
    transform: translateX(-50%);
  }
  
  /* 각 프로세스 흐름 컨테이너 */
  .process-column {
    width: 48%;
    position: relative;
    display: flex;
    flex-direction: column;
  }
  
  /* 프로세스 제목 - 기존 프로세스 */
  .process-header.original {
    background-color: #3f51b5;
    color: white;
    text-align: center;
    padding: 10px 0;
    font-weight: 600;
    border-radius: 4px;
    margin-bottom: 25px;
    box-shadow: 0 2px 4px rgba(0,0,0,0.1);
  }
  
  /* 프로세스 제목 - 개선된 프로세스 */
  .process-header.improved {
    background-color: #00897b;
    color: white;
    text-align: center;
    padding: 10px 0;
    font-weight: 600;
    border-radius: 4px;
    margin-bottom: 25px;
    box-shadow: 0 2px 4px rgba(0,0,0,0.1);
  }
  
  /* 단계 컨테이너 */
  .step-row {
    display: flex;
    align-items: center;
    margin-bottom: 20px;
    position: relative;
  }
  
  /* 단계 번호 박스 - 기존 프로세스 */
  .step-number.original {
    background-color: #3f51b5;
    color: white;
    width: 30px;
    height: 30px;
    border-radius: 0;  /* 사각형으로 변경 */
    display: flex;
    justify-content: center;
    align-items: center;
    font-weight: bold;
    margin-right: 15px;
    box-shadow: 0 2px 4px rgba(0,0,0,0.1);
    flex-shrink: 0;
    align-self: center;
  }
  
  /* 단계 번호 박스 - 개선된 프로세스 */
  .step-number.improved {
    background-color: #00897b;
    color: white;
    width: 30px;
    height: 30px;
    border-radius: 0;  /* 사각형으로 변경 */
    display: flex;
    justify-content: center;
    align-items: center;
    font-weight: bold;
    margin-right: 15px;
    box-shadow: 0 2px 4px rgba(0,0,0,0.1);
    flex-shrink: 0;
    align-self: center;
  }
  
  /* 단계 내용 박스 */
  .step-content {
    flex-grow: 1;
    border: 1px solid #e0e0e0;
    border-radius: 4px;
    padding: 12px;
    background-color: #fff;
    box-shadow: 0 2px 4px rgba(0,0,0,0.05);
  }
  
  /* 단계 제목 - 기존 프로세스 */
  .step-title.original {
    font-weight: 600;
    margin-bottom: 5px;
    color: #3f51b5;
  }
  
  /* 단계 제목 - 개선된 프로세스 */
  .step-title.improved {
    font-weight: 600;
    margin-bottom: 5px;
    color: #00897b;
  }
  
  /* 도구 정보 */
  .step-tool {
    font-size: 13px;
    color: #555;
    margin-bottom: 3px;
  }
  
  /* 시간 정보 */
  .step-time {
    font-size: 13px;
    font-weight: 600;
    color: #e53935;
  }
  
  /* 총 소요시간 - 기존 프로세스 */
  .process-total.original {
    background-color: #f5f5f5;
    border: 1px solid #e0e0e0;
    border-left: 4px solid #3f51b5;
    padding: 10px;
    text-align: center;
    font-weight: 600;
    border-radius: 4px;
    margin-top: auto;
  }
  
  /* 총 소요시간 - 개선된 프로세스 */
  .process-total.improved {
    background-color: #f5f5f5;
    border: 1px solid #e0e0e0;
    border-left: 4px solid #00897b;
    padding: 10px;
    text-align: center;
    font-weight: 600;
    border-radius: 4px;
    margin-top: auto;
  }
  
  /* 시간 강조 */
  .time-highlight {
    color: #e53935;
  }
  
  /* 개선 효과 요약 */
  .improvement-summary {
    background-color: #e8f5e9;
    border: 1px solid #c8e6c9;
    border-left: 4px solid #00897b;
    padding: 12px;
    text-align: center;
    font-weight: 600;
    color: #00695c;
    margin-top: 20px;
    border-radius: 4px;
  }
  
  /* 스페이서 - 빈 공간 유지용 */
  .spacer {
    height: 20px;
  }
&amp;lt;/style&amp;gt;

&amp;lt;div class=&amp;quot;process-comparison&amp;quot;&amp;gt;
  &amp;lt;div class=&amp;quot;process-wrapper&amp;quot;&amp;gt;
    &amp;lt;!-- 기존 프로세스 --&amp;gt;
    &amp;lt;div class=&amp;quot;process-column&amp;quot;&amp;gt;
      &amp;lt;div class=&amp;quot;process-header original&amp;quot;&amp;gt;기존 프로세스&amp;lt;/div&amp;gt;
      
      &amp;lt;!-- 단계 1 --&amp;gt;
      &amp;lt;div class=&amp;quot;step-row&amp;quot;&amp;gt;
        &amp;lt;div class=&amp;quot;step-number original&amp;quot;&amp;gt;1&amp;lt;/div&amp;gt;
        &amp;lt;div class=&amp;quot;step-content&amp;quot;&amp;gt;
          &amp;lt;div class=&amp;quot;step-title original&amp;quot;&amp;gt;KRX 거래량 상위 종목 추출&amp;lt;/div&amp;gt;
          &amp;lt;div class=&amp;quot;step-tool&amp;quot;&amp;gt;사용 도구: PAD&amp;lt;/div&amp;gt;
          &amp;lt;div class=&amp;quot;step-time&amp;quot;&amp;gt;소요 시간: 12초&amp;lt;/div&amp;gt;
        &amp;lt;/div&amp;gt;
      &amp;lt;/div&amp;gt;
      
      &amp;lt;!-- 단계 2 --&amp;gt;
      &amp;lt;div class=&amp;quot;step-row&amp;quot;&amp;gt;
        &amp;lt;div class=&amp;quot;step-number original&amp;quot;&amp;gt;2&amp;lt;/div&amp;gt;
        &amp;lt;div class=&amp;quot;step-content&amp;quot;&amp;gt;
          &amp;lt;div class=&amp;quot;step-title original&amp;quot;&amp;gt;XML 파일 저장 10개&amp;lt;/div&amp;gt;
          &amp;lt;div class=&amp;quot;step-tool&amp;quot;&amp;gt;사용 도구: PAD&amp;lt;/div&amp;gt;
          &amp;lt;div class=&amp;quot;step-time&amp;quot;&amp;gt;소요 시간: 65초&amp;lt;/div&amp;gt;
        &amp;lt;/div&amp;gt;
      &amp;lt;/div&amp;gt;
      
      &amp;lt;!-- 단계 3 --&amp;gt;
      &amp;lt;div class=&amp;quot;step-row&amp;quot;&amp;gt;
        &amp;lt;div class=&amp;quot;step-number original&amp;quot;&amp;gt;3&amp;lt;/div&amp;gt;
        &amp;lt;div class=&amp;quot;step-content&amp;quot;&amp;gt;
          &amp;lt;div class=&amp;quot;step-title original&amp;quot;&amp;gt;구글 URL 추출&amp;lt;/div&amp;gt;
          &amp;lt;div class=&amp;quot;step-tool&amp;quot;&amp;gt;사용 도구: feedparser&amp;lt;/div&amp;gt;
          &amp;lt;div class=&amp;quot;step-time&amp;quot;&amp;gt;소요 시간: 3초&amp;lt;/div&amp;gt;
        &amp;lt;/div&amp;gt;
      &amp;lt;/div&amp;gt;
      
      &amp;lt;!-- 총 소요시간 --&amp;gt;
      &amp;lt;div class=&amp;quot;process-total original&amp;quot;&amp;gt;
        총 소요시간: &amp;lt;span class=&amp;quot;time-highlight&amp;quot;&amp;gt;80초&amp;lt;/span&amp;gt;
      &amp;lt;/div&amp;gt;
    &amp;lt;/div&amp;gt;
    
    &amp;lt;!-- 구분선 --&amp;gt;
    &amp;lt;div class=&amp;quot;divider&amp;quot;&amp;gt;&amp;lt;/div&amp;gt;
    
    &amp;lt;!-- 개선된 프로세스 --&amp;gt;
    &amp;lt;div class=&amp;quot;process-column&amp;quot;&amp;gt;
      &amp;lt;div class=&amp;quot;process-header improved&amp;quot;&amp;gt;개선된 프로세스&amp;lt;/div&amp;gt;
      
      &amp;lt;!-- 단계 1 --&amp;gt;
      &amp;lt;div class=&amp;quot;step-row&amp;quot;&amp;gt;
        &amp;lt;div class=&amp;quot;step-number improved&amp;quot;&amp;gt;1&amp;lt;/div&amp;gt;
        &amp;lt;div class=&amp;quot;step-content&amp;quot;&amp;gt;
          &amp;lt;div class=&amp;quot;step-title improved&amp;quot;&amp;gt;KRX 거래량 상위 종목 추출&amp;lt;/div&amp;gt;
          &amp;lt;div class=&amp;quot;step-tool&amp;quot;&amp;gt;사용 도구: PAD&amp;lt;/div&amp;gt;
          &amp;lt;div class=&amp;quot;step-time&amp;quot;&amp;gt;소요 시간: 12초&amp;lt;/div&amp;gt;
        &amp;lt;/div&amp;gt;
      &amp;lt;/div&amp;gt;
      
      &amp;lt;!-- 단계 2 --&amp;gt;
      &amp;lt;div class=&amp;quot;step-row&amp;quot;&amp;gt;
        &amp;lt;div class=&amp;quot;step-number improved&amp;quot;&amp;gt;2&amp;lt;/div&amp;gt;
        &amp;lt;div class=&amp;quot;step-content&amp;quot;&amp;gt;
          &amp;lt;div class=&amp;quot;step-title improved&amp;quot;&amp;gt;웹에서 직접 구글 URL 추출 10개&amp;lt;/div&amp;gt;
          &amp;lt;div class=&amp;quot;step-tool&amp;quot;&amp;gt;사용 도구: feedparser&amp;lt;/div&amp;gt;
          &amp;lt;div class=&amp;quot;step-time&amp;quot;&amp;gt;소요 시간: 35초&amp;lt;/div&amp;gt;
        &amp;lt;/div&amp;gt;
      &amp;lt;/div&amp;gt;
      
      &amp;lt;!-- 빈 공간 유지 - 오른쪽 열에 동일한 여백 유지를 위해 --&amp;gt;
      &amp;lt;div class=&amp;quot;spacer&amp;quot;&amp;gt;&amp;lt;/div&amp;gt;
      
      &amp;lt;!-- 총 소요시간 --&amp;gt;
      &amp;lt;div class=&amp;quot;process-total improved&amp;quot;&amp;gt;
        총 소요시간: &amp;lt;span class=&amp;quot;time-highlight&amp;quot;&amp;gt;47초&amp;lt;/span&amp;gt;
      &amp;lt;/div&amp;gt;
    &amp;lt;/div&amp;gt;
  &amp;lt;/div&amp;gt;
  
  &amp;lt;!-- 개선 효과 요약 --&amp;gt;
  &amp;lt;div class=&amp;quot;improvement-summary&amp;quot;&amp;gt;
    프로세스 개선 효과: 시간 33초 절약 (41.3%), 단계 1개 감소, PAD 사용 1회 감소
  &amp;lt;/div&amp;gt;
&amp;lt;/div&amp;gt;&quot;&gt;
&lt;style&gt;
  /* 전체 컨테이너 */
  .process-comparison {
    font-family: 'Noto Sans KR', sans-serif;
    max-width: 100%;
    margin: 30px auto;
    color: #333;
    position: relative;
  }
  
  /* 프로세스 컨테이너 */
  .process-wrapper {
    display: flex;
    justify-content: space-between;
    margin-bottom: 30px;
    position: relative;
  }
  
  /* 구분선 */
  .divider {
    position: absolute;
    left: 50%;
    top: 0;
    bottom: 0;
    width: 1px;
    background-color: #ccc;
    transform: translateX(-50%);
  }
  
  /* 각 프로세스 흐름 컨테이너 */
  .process-column {
    width: 48%;
    position: relative;
    display: flex;
    flex-direction: column;
  }
  
  /* 프로세스 제목 - 기존 프로세스 */
  .process-header.original {
    background-color: #3f51b5;
    color: white;
    text-align: center;
    padding: 10px 0;
    font-weight: 600;
    border-radius: 4px;
    margin-bottom: 25px;
    box-shadow: 0 2px 4px rgba(0,0,0,0.1);
  }
  
  /* 프로세스 제목 - 개선된 프로세스 */
  .process-header.improved {
    background-color: #00897b;
    color: white;
    text-align: center;
    padding: 10px 0;
    font-weight: 600;
    border-radius: 4px;
    margin-bottom: 25px;
    box-shadow: 0 2px 4px rgba(0,0,0,0.1);
  }
  
  /* 단계 컨테이너 */
  .step-row {
    display: flex;
    align-items: center;
    margin-bottom: 20px;
    position: relative;
  }
  
  /* 단계 번호 박스 - 기존 프로세스 */
  .step-number.original {
    background-color: #3f51b5;
    color: white;
    width: 30px;
    height: 30px;
    border-radius: 0;  /* 사각형으로 변경 */
    display: flex;
    justify-content: center;
    align-items: center;
    font-weight: bold;
    margin-right: 15px;
    box-shadow: 0 2px 4px rgba(0,0,0,0.1);
    flex-shrink: 0;
    align-self: center;
  }
  
  /* 단계 번호 박스 - 개선된 프로세스 */
  .step-number.improved {
    background-color: #00897b;
    color: white;
    width: 30px;
    height: 30px;
    border-radius: 0;  /* 사각형으로 변경 */
    display: flex;
    justify-content: center;
    align-items: center;
    font-weight: bold;
    margin-right: 15px;
    box-shadow: 0 2px 4px rgba(0,0,0,0.1);
    flex-shrink: 0;
    align-self: center;
  }
  
  /* 단계 내용 박스 */
  .step-content {
    flex-grow: 1;
    border: 1px solid #e0e0e0;
    border-radius: 4px;
    padding: 12px;
    background-color: #fff;
    box-shadow: 0 2px 4px rgba(0,0,0,0.05);
  }
  
  /* 단계 제목 - 기존 프로세스 */
  .step-title.original {
    font-weight: 600;
    margin-bottom: 5px;
    color: #3f51b5;
  }
  
  /* 단계 제목 - 개선된 프로세스 */
  .step-title.improved {
    font-weight: 600;
    margin-bottom: 5px;
    color: #00897b;
  }
  
  /* 도구 정보 */
  .step-tool {
    font-size: 13px;
    color: #555;
    margin-bottom: 3px;
  }
  
  /* 시간 정보 */
  .step-time {
    font-size: 13px;
    font-weight: 600;
    color: #e53935;
  }
  
  /* 총 소요시간 - 기존 프로세스 */
  .process-total.original {
    background-color: #f5f5f5;
    border: 1px solid #e0e0e0;
    border-left: 4px solid #3f51b5;
    padding: 10px;
    text-align: center;
    font-weight: 600;
    border-radius: 4px;
    margin-top: auto;
  }
  
  /* 총 소요시간 - 개선된 프로세스 */
  .process-total.improved {
    background-color: #f5f5f5;
    border: 1px solid #e0e0e0;
    border-left: 4px solid #00897b;
    padding: 10px;
    text-align: center;
    font-weight: 600;
    border-radius: 4px;
    margin-top: auto;
  }
  
  /* 시간 강조 */
  .time-highlight {
    color: #e53935;
  }
  
  /* 개선 효과 요약 */
  .improvement-summary {
    background-color: #e8f5e9;
    border: 1px solid #c8e6c9;
    border-left: 4px solid #00897b;
    padding: 12px;
    text-align: center;
    font-weight: 600;
    color: #00695c;
    margin-top: 20px;
    border-radius: 4px;
  }
  
  /* 스페이서 - 빈 공간 유지용 */
  .spacer {
    height: 20px;
  }
&lt;/style&gt;
&lt;div class=&quot;process-comparison&quot;&gt;
&lt;div class=&quot;process-wrapper&quot;&gt;&lt;!-- 기존 프로세스 --&gt;
&lt;div class=&quot;process-column&quot;&gt;
&lt;div class=&quot;process-header original&quot;&gt;기존 프로세스&lt;/div&gt;
&lt;!-- 단계 1 --&gt;
&lt;div class=&quot;step-row&quot;&gt;
&lt;div class=&quot;step-number original&quot;&gt;1&lt;/div&gt;
&lt;div class=&quot;step-content&quot;&gt;
&lt;div class=&quot;step-title original&quot;&gt;KRX 거래량 상위 종목 추출&lt;/div&gt;
&lt;div class=&quot;step-tool&quot;&gt;사용 도구: PAD&lt;/div&gt;
&lt;div class=&quot;step-time&quot;&gt;소요 시간: 12초&lt;/div&gt;
&lt;/div&gt;
&lt;/div&gt;
&lt;!-- 단계 2 --&gt;
&lt;div class=&quot;step-row&quot;&gt;
&lt;div class=&quot;step-number original&quot;&gt;2&lt;/div&gt;
&lt;div class=&quot;step-content&quot;&gt;
&lt;div class=&quot;step-title original&quot;&gt;XML 파일 저장 10개&lt;/div&gt;
&lt;div class=&quot;step-tool&quot;&gt;사용 도구: PAD&lt;/div&gt;
&lt;div class=&quot;step-time&quot;&gt;소요 시간: 65초&lt;/div&gt;
&lt;/div&gt;
&lt;/div&gt;
&lt;!-- 단계 3 --&gt;
&lt;div class=&quot;step-row&quot;&gt;
&lt;div class=&quot;step-number original&quot;&gt;3&lt;/div&gt;
&lt;div class=&quot;step-content&quot;&gt;
&lt;div class=&quot;step-title original&quot;&gt;구글 URL 추출&lt;/div&gt;
&lt;div class=&quot;step-tool&quot;&gt;사용 도구: feedparser&lt;/div&gt;
&lt;div class=&quot;step-time&quot;&gt;소요 시간: 3초&lt;/div&gt;
&lt;/div&gt;
&lt;/div&gt;
&lt;!-- 총 소요시간 --&gt;
&lt;div class=&quot;process-total original&quot;&gt;총 소요시간: &lt;span class=&quot;time-highlight&quot;&gt;80초&lt;/span&gt;&lt;/div&gt;
&lt;/div&gt;
&lt;!-- 구분선 --&gt;
&lt;div class=&quot;divider&quot;&gt;&amp;nbsp;&lt;/div&gt;
&lt;!-- 개선된 프로세스 --&gt;
&lt;div class=&quot;process-column&quot;&gt;
&lt;div class=&quot;process-header improved&quot;&gt;개선된 프로세스&lt;/div&gt;
&lt;!-- 단계 1 --&gt;
&lt;div class=&quot;step-row&quot;&gt;
&lt;div class=&quot;step-number improved&quot;&gt;1&lt;/div&gt;
&lt;div class=&quot;step-content&quot;&gt;
&lt;div class=&quot;step-title improved&quot;&gt;KRX 거래량 상위 종목 추출&lt;/div&gt;
&lt;div class=&quot;step-tool&quot;&gt;사용 도구: PAD&lt;/div&gt;
&lt;div class=&quot;step-time&quot;&gt;소요 시간: 12초&lt;/div&gt;
&lt;/div&gt;
&lt;/div&gt;
&lt;!-- 단계 2 --&gt;
&lt;div class=&quot;step-row&quot;&gt;
&lt;div class=&quot;step-number improved&quot;&gt;2&lt;/div&gt;
&lt;div class=&quot;step-content&quot;&gt;
&lt;div class=&quot;step-title improved&quot;&gt;웹에서 직접 구글 URL 추출 10개&lt;/div&gt;
&lt;div class=&quot;step-tool&quot;&gt;사용 도구: feedparser&lt;/div&gt;
&lt;div class=&quot;step-time&quot;&gt;소요 시간: 35초&lt;/div&gt;
&lt;/div&gt;
&lt;/div&gt;
&lt;!-- 빈 공간 유지 - 오른쪽 열에 동일한 여백 유지를 위해 --&gt;
&lt;div class=&quot;spacer&quot;&gt;&amp;nbsp;&lt;/div&gt;
&lt;!-- 총 소요시간 --&gt;
&lt;div class=&quot;process-total improved&quot;&gt;총 소요시간: &lt;span class=&quot;time-highlight&quot;&gt;47초&lt;/span&gt;&lt;/div&gt;
&lt;/div&gt;
&lt;/div&gt;
&lt;!-- 개선 효과 요약 --&gt;
&lt;div class=&quot;improvement-summary&quot;&gt;프로세스 개선 효과: 시간 33초 절약 (41.3%), 단계 1개 감소, PAD 사용 1회 감소&lt;/div&gt;
&lt;/div&gt;
&lt;/div&gt;
&lt;hr contenteditable=&quot;false&quot; data-ke-type=&quot;horizontalRule&quot; data-ke-style=&quot;style1&quot; /&gt;
&lt;h3 data-start=&quot;166&quot; data-end=&quot;208&quot; data-ke-size=&quot;size23&quot;&gt;feedparser 사용 시 URL 인코딩의 중요성 - urllib.parse&lt;/h3&gt;
&lt;p data-ke-size=&quot;size16&quot;&gt;feedparser는 URL을 직접 입력받아 웹에서 RSS 정보를 실시간으로 가져와 처리할 수 있지만, URL을 입력할 때는 특별한 주의가 필요하다. 우리가 &lt;b&gt;&lt;span style=&quot;color: #006dd7;&quot;&gt;브라우저 주소창에서 보는 URL과 실제로 서버에 전달되는 URL은 다르기&lt;/span&gt;&lt;/b&gt; 때문이다. &lt;/p&gt;
&lt;p data-ke-size=&quot;size16&quot;&gt;예를 들어, 구글 뉴스에서&lt;b&gt; &lt;span style=&quot;color: #006dd7;&quot;&gt;&quot;에코프로&quot;&lt;/span&gt;&lt;/b&gt;를 검색하면 브라우저 주소창에는 우리가 읽을 수 있는 형태로 다음과 같이 표시된다.&lt;/p&gt;
&lt;pre class=&quot;sas&quot;&gt;&lt;code&gt;https://news.google.com/search?q=&quot;에코프로&quot;&amp;amp;hl=ko&amp;amp;gl=KR&amp;amp;ceid=KR%3Ako
&lt;/code&gt;&lt;/pre&gt;
&lt;p data-ke-size=&quot;size16&quot;&gt;하지만, 이 URL을 복사하여 붙여보면 실제로는 다음과 같이 한글이 다른 문자로 변환되어 있음을 확인할 수 있다.&lt;/p&gt;
&lt;pre class=&quot;python&quot; data-ke-language=&quot;python&quot;&gt;&lt;code&gt;https://news.google.com/search?q=%22%EC%97%90%EC%BD%94%ED%94%84%EB%A1%9C%22&amp;amp;hl=ko&amp;amp;gl=KR&amp;amp;ceid=KR%3Ako&lt;/code&gt;&lt;/pre&gt;
&lt;p data-ke-size=&quot;size16&quot;&gt;브라우저는 이러한 URL 변환 과정을 자동으로 처리하지만, feedparser에는 그런 기능이 없다. 따라서, 이 문제를 해결하기 위해 Python의 urllib.parse 모듈을 사용해야 하며, 이 모듈은 URL의 한글이나 특수문자를 올바르게 인코딩하여 웹 요청에 적합한 형태로 변환해준다. &lt;/p&gt;
&lt;hr contenteditable=&quot;false&quot; data-ke-type=&quot;horizontalRule&quot; data-ke-style=&quot;style1&quot; /&gt;
&lt;h3 data-end=&quot;208&quot; data-start=&quot;166&quot; data-ke-size=&quot;size23&quot;&gt;urllib.parse&amp;nbsp;사용 예시&lt;/h3&gt;
&lt;p data-ke-size=&quot;size16&quot;&gt;&amp;nbsp;&lt;/p&gt;
&lt;pre id=&quot;code_1747452106385&quot; class=&quot;python&quot; data-ke-language=&quot;python&quot; data-ke-type=&quot;codeblock&quot;&gt;&lt;code&gt;import urllib.parse

def get_rss_news_url(search_term):
    
    query = f'&quot;{search_term}&quot;'
           
    # URL 인코딩
    encoded_query = urllib.parse.quote(query)
    rss_url = f'https://news.google.com/rss/search?q={encoded_query}&amp;amp;hl=ko&amp;amp;gl=KR&amp;amp;ceid=KR%3Ako'
    return rss_url

def main():
    # 검색어 입력 받기
    search_term = input(&quot;검색어를 입력하세요: &quot;)
    
    # URL 생성
    news_rss_url = get_rss_news_url(search_term)
    
    # 결과 출력
    print(f&quot;\n생성된 Google News rss URL: {news_rss_url}&quot;)    

if __name__ == &quot;__main__&quot;:
    main()&lt;/code&gt;&lt;/pre&gt;
&lt;pre id=&quot;code_1747451303963&quot; class=&quot;python&quot; data-ke-language=&quot;python&quot; data-ke-type=&quot;codeblock&quot;&gt;&lt;code&gt;검색어를 입력하세요:  에코프로

생성된 Google News rss URL: https://news.google.com/rss/search?q=%22%EC%97%90%EC%BD%94%ED%94%84%EB%A1%9C%22&amp;amp;hl=ko&amp;amp;gl=KR&amp;amp;ceid=KR%3Ako&lt;/code&gt;&lt;/pre&gt;
&lt;h3 data-end=&quot;208&quot; data-start=&quot;166&quot; data-ke-size=&quot;size23&quot;&gt;urllib.parse와 feedparser 결합&lt;/h3&gt;
&lt;pre id=&quot;code_1747451687491&quot; class=&quot;python&quot; data-ke-language=&quot;python&quot; data-ke-type=&quot;codeblock&quot;&gt;&lt;code&gt;import urllib.parse
import feedparser

def get_rss_url(search_term):
    encoded_search_term = urllib.parse.quote(f'&quot;{search_term}&quot;')
    rss_url = f'https://news.google.com/rss/search?q={encoded_search_term}&amp;amp;hl=ko&amp;amp;gl=KR&amp;amp;ceid=KR%3Ako'
    return rss_url

def main():
    search_term = input(&quot;검색어를 입력하세요: &quot;)
       
    # RSS 피드 URL 생성 (Google News는 검색결과를 RSS로도 제공)
    rss_url = get_rss_url(search_term)
    print(f&quot;RSS URL: {rss_url}&quot;)
    
    # 잠시 대기
    time.sleep(2)
    
    # RSS 피드 파싱
    print(&quot;\nRSS 피드 파싱 중...&quot;)
    feed = feedparser.parse(rss_url)
       
    # 모든 기사 출력 (요약 없음)
    print(&quot;\n모든 기사:&quot;)
    for i, entry in enumerate(feed.entries, 1):
        print(f&quot;\n{i}. {entry.title}&quot;)
        print(f&quot;   링크: {entry.link}&quot;)
        print(f&quot;   발행일: {entry.published if 'published' in entry else '날짜 정보 없음'}&quot;)

if __name__ == &quot;__main__&quot;:
    main()&lt;/code&gt;&lt;/pre&gt;
&lt;pre id=&quot;code_1747452266494&quot; class=&quot;python&quot; data-ke-language=&quot;python&quot; data-ke-type=&quot;codeblock&quot;&gt;&lt;code&gt;검색어를 입력하세요:  에코프로
RSS URL: https://news.google.com/rss/search?q=%22%EC%97%90%EC%BD%94%ED%94%84%EB%A1%9C%22&amp;amp;hl=ko&amp;amp;gl=KR&amp;amp;ceid=KR%3Ako

RSS 피드 파싱 중...

모든 기사:

1. [직장 돋보기 분석] 에코프로, 평균연봉 9500만원인 친환경 기술 기업...고연봉이지만 평균 근속연수는 3.8년 - 뉴스투데이
   링크: https://news.google.com/rss/articles/CBMiXkFVX3lxTE9reHNJcWMyNU1zdmNkN09qa1g1R1VmYW1Ic0VOa2xYZWR2RTRVOXZuVGpRVmZWMjU1OFdyRkdwRDJ6WXB2MWtkRl9USmxiMmdEaWpXdWs5N1g0bDdPTXc?oc=5
   발행일: Sat, 17 May 2025 03:00:00 GMT

2. 에코프로머티, 전구체 설비 계약 일정 조정&amp;hellip;&quot;투자 계획 변동 없어&quot; - 전자부품 전문 미디어 디일렉
   링크: https://news.google.com/rss/articles/CBMiZkFVX3lxTFBKTmc0NGVHNFc4QmJybS1zLV9kUExFTE5BMHJjQXU3Y1BQa1JYdU9HLU8zNDFtdFRHZHdKa1VvZ2NybmlncHVvNDlCMGE2WVpCZWRfbXNrVEg4NkYwdWVyUFkyam84dw?oc=5
   발행일: Thu, 17 Apr 2025 07:00:00 GMT&lt;/code&gt;&lt;/pre&gt;
&lt;h3 data-ke-size=&quot;size23&quot; data-start=&quot;115&quot; data-end=&quot;133&quot;&gt;최종 파이썬 코드 구성&lt;/h3&gt;
&lt;p data-ke-size=&quot;size16&quot;&gt;아래의 파이썬 코드는 이전 글에서 작성한 파일 단위로 저장된 XML을 처리하고 결과를 CSV로 정리하는 &lt;b&gt;파이썬 스크립트&lt;/b&gt;에서 url을 받도록 구성된 것이며, 대부분의 중요한 기능은 동일하다.&amp;nbsp;&lt;/p&gt;
&lt;div id=&quot;code_1747467430248&quot; data-ke-type=&quot;html&quot; data-source=&quot;&amp;lt;div class=&amp;quot;notice-area&amp;quot;&amp;gt;
  &amp;lt;details&amp;gt;
    &amp;lt;summary style=&amp;quot;cursor: pointer; font-weight: bold; padding: 10px; background-color: #f0f0f0; border: 1px solid #ddd; border-radius: 4px; color: #333; position: relative;&amp;quot;&amp;gt;
      &amp;lt;span style=&amp;quot;display: inline-block; vertical-align: middle;&amp;quot;&amp;gt;  Google News RSS feedparser&amp;lt;/span&amp;gt;
      &amp;lt;span style=&amp;quot;position: absolute; right: 10px; font-weight: normal; font-size: 0.9em;&amp;quot;&amp;gt;[펼치기/접기]&amp;lt;/span&amp;gt;
    &amp;lt;/summary&amp;gt;
    &amp;lt;div style=&amp;quot;padding: 15px; background-color: #f5f5f5; border: 1px solid #ddd; border-top: none; border-radius: 0 0 4px 4px; margin-top: -1px;&amp;quot;&amp;gt;
      &amp;lt;pre style=&amp;quot;background-color: #f5f5f5; padding: 15px; border-radius: 4px; overflow-x: auto; font-family: 'D2Coding', 'Consolas', 'Monaco', 'Courier New', monospace; line-height: 1.5;&amp;quot;&amp;gt;
&amp;lt;code&amp;gt;&amp;lt;span style=&amp;quot;color: #888888;&amp;quot;&amp;gt;# feedparser는 url을 바로 입력해도 처리함.
# 시간변환함수 추가, 
# 파일로 저장 기능 추가
# 사용방법: python google_feed_parser_url.py {search_term} {} output_path&amp;lt;/span&amp;gt;

&amp;lt;span style=&amp;quot;color: #0000ff;&amp;quot;&amp;gt;import&amp;lt;/span&amp;gt; os
&amp;lt;span style=&amp;quot;color: #0000ff;&amp;quot;&amp;gt;import&amp;lt;/span&amp;gt; sys
&amp;lt;span style=&amp;quot;color: #0000ff;&amp;quot;&amp;gt;import&amp;lt;/span&amp;gt; urllib.parse
&amp;lt;span style=&amp;quot;color: #0000ff;&amp;quot;&amp;gt;import&amp;lt;/span&amp;gt; feedparser
&amp;lt;span style=&amp;quot;color: #0000ff;&amp;quot;&amp;gt;from&amp;lt;/span&amp;gt; datetime &amp;lt;span style=&amp;quot;color: #0000ff;&amp;quot;&amp;gt;import&amp;lt;/span&amp;gt; datetime, timezone, timedelta &amp;lt;span style=&amp;quot;color: #888888;&amp;quot;&amp;gt;# 날짜 형식 변환&amp;lt;/span&amp;gt;
&amp;lt;span style=&amp;quot;color: #0000ff;&amp;quot;&amp;gt;from&amp;lt;/span&amp;gt; email.utils &amp;lt;span style=&amp;quot;color: #0000ff;&amp;quot;&amp;gt;import&amp;lt;/span&amp;gt; parsedate_to_datetime &amp;lt;span style=&amp;quot;color: #888888;&amp;quot;&amp;gt;# 날짜 형식 변환&amp;lt;/span&amp;gt;
&amp;lt;span style=&amp;quot;color: #0000ff;&amp;quot;&amp;gt;import&amp;lt;/span&amp;gt; csv &amp;lt;span style=&amp;quot;color: #888888;&amp;quot;&amp;gt;# 파일로 저장 기능 추가&amp;lt;/span&amp;gt;

&amp;lt;span style=&amp;quot;color: #888888;&amp;quot;&amp;gt;# 한국 시간대 (UTC+9)&amp;lt;/span&amp;gt;
KST = timezone(timedelta(hours=9))

&amp;lt;span style=&amp;quot;color: #888888;&amp;quot;&amp;gt;# 날짜 형식 변환 함수 - 한국 시간으로 변환&amp;lt;/span&amp;gt;
&amp;lt;span style=&amp;quot;color: #0000ff;&amp;quot;&amp;gt;def&amp;lt;/span&amp;gt; &amp;lt;span style=&amp;quot;color: #880000;&amp;quot;&amp;gt;format_date_to_kst&amp;lt;/span&amp;gt;(date_str):
    &amp;lt;span style=&amp;quot;color: #0000ff;&amp;quot;&amp;gt;try&amp;lt;/span&amp;gt;:
        &amp;lt;span style=&amp;quot;color: #888888;&amp;quot;&amp;gt;# RSS 날짜 형식(RFC 822)을 파싱&amp;lt;/span&amp;gt;
        dt = parsedate_to_datetime(date_str)
        &amp;lt;span style=&amp;quot;color: #888888;&amp;quot;&amp;gt;# UTC에서 KST로 변환&amp;lt;/span&amp;gt;
        dt_kst = dt.astimezone(KST)
        &amp;lt;span style=&amp;quot;color: #888888;&amp;quot;&amp;gt;# 연도, 월, 날짜, 시간(24시간) 형식으로 변환&amp;lt;/span&amp;gt;
        &amp;lt;span style=&amp;quot;color: #0000ff;&amp;quot;&amp;gt;return&amp;lt;/span&amp;gt; dt_kst.strftime(&amp;lt;span style=&amp;quot;color: #008800;&amp;quot;&amp;gt;'%Y-%m-%d %H:%M:%S'&amp;lt;/span&amp;gt;)
    &amp;lt;span style=&amp;quot;color: #0000ff;&amp;quot;&amp;gt;except&amp;lt;/span&amp;gt; Exception:
        &amp;lt;span style=&amp;quot;color: #0000ff;&amp;quot;&amp;gt;try&amp;lt;/span&amp;gt;:
            &amp;lt;span style=&amp;quot;color: #888888;&amp;quot;&amp;gt;# ISO 8601 형식 시도&amp;lt;/span&amp;gt;
            dt = datetime.fromisoformat(date_str.replace(&amp;lt;span style=&amp;quot;color: #008800;&amp;quot;&amp;gt;'Z'&amp;lt;/span&amp;gt;, &amp;lt;span style=&amp;quot;color: #008800;&amp;quot;&amp;gt;'+00:00'&amp;lt;/span&amp;gt;))
            &amp;lt;span style=&amp;quot;color: #888888;&amp;quot;&amp;gt;# UTC에서 KST로 변환&amp;lt;/span&amp;gt;
            dt_kst = dt.astimezone(KST)
            &amp;lt;span style=&amp;quot;color: #0000ff;&amp;quot;&amp;gt;return&amp;lt;/span&amp;gt; dt_kst.strftime(&amp;lt;span style=&amp;quot;color: #008800;&amp;quot;&amp;gt;'%Y-%m-%d %H:%M:%S'&amp;lt;/span&amp;gt;)
        &amp;lt;span style=&amp;quot;color: #0000ff;&amp;quot;&amp;gt;except&amp;lt;/span&amp;gt;:
            &amp;lt;span style=&amp;quot;color: #888888;&amp;quot;&amp;gt;# 변환 실패 시 원본 반환&amp;lt;/span&amp;gt;
            &amp;lt;span style=&amp;quot;color: #0000ff;&amp;quot;&amp;gt;return&amp;lt;/span&amp;gt; date_str

&amp;lt;span style=&amp;quot;color: #0000ff;&amp;quot;&amp;gt;def&amp;lt;/span&amp;gt; &amp;lt;span style=&amp;quot;color: #880000;&amp;quot;&amp;gt;get_rss_url&amp;lt;/span&amp;gt;(search_term):
    &amp;lt;span style=&amp;quot;color: #888888;&amp;quot;&amp;gt;# search_term을 그대로 인코딩 (이미 날짜 범위 등 포함)&amp;lt;/span&amp;gt;
    encoded_search_term = urllib.parse.quote(search_term)
    rss_url = &amp;lt;span style=&amp;quot;color: #008800;&amp;quot;&amp;gt;f'https://news.google.com/rss/search?q={encoded_search_term}&amp;amp;hl=ko&amp;amp;gl=KR&amp;amp;ceid=KR%3Ako'&amp;lt;/span&amp;gt;
    &amp;lt;span style=&amp;quot;color: #0000ff;&amp;quot;&amp;gt;return&amp;lt;/span&amp;gt; rss_url

&amp;lt;span style=&amp;quot;color: #0000ff;&amp;quot;&amp;gt;def&amp;lt;/span&amp;gt; &amp;lt;span style=&amp;quot;color: #880000;&amp;quot;&amp;gt;main&amp;lt;/span&amp;gt;():
    &amp;lt;span style=&amp;quot;color: #888888;&amp;quot;&amp;gt;# 명령줄 인자 처리&amp;lt;/span&amp;gt;
    &amp;lt;span style=&amp;quot;color: #0000ff;&amp;quot;&amp;gt;if&amp;lt;/span&amp;gt; len(sys.argv) != 3:
        print(&amp;lt;span style=&amp;quot;color: #008800;&amp;quot;&amp;gt;&amp;quot;사용방법: python foo.py \&amp;quot;search_term\&amp;quot; output_path&amp;quot;&amp;lt;/span&amp;gt;)
        print(&amp;lt;span style=&amp;quot;color: #008800;&amp;quot;&amp;gt;&amp;quot;예시: python foo.py \&amp;quot;\\\&amp;quot;포스코\\\&amp;quot; after:2025-05-15 before:2025-05-17\&amp;quot; output.csv&amp;quot;&amp;lt;/span&amp;gt;)
        sys.exit(1)
    
    &amp;lt;span style=&amp;quot;color: #888888;&amp;quot;&amp;gt;# 명령줄에서 인자 받기&amp;lt;/span&amp;gt;
    search_term = sys.argv[1]
    output_path = sys.argv[2]
    
    &amp;lt;span style=&amp;quot;color: #888888;&amp;quot;&amp;gt;# RSS 피드 URL 생성 (Google News는 검색결과를 RSS로도 제공)&amp;lt;/span&amp;gt;
    rss_url = get_rss_url(search_term)
    print(&amp;lt;span style=&amp;quot;color: #008800;&amp;quot;&amp;gt;f&amp;quot;검색어: {search_term}&amp;quot;&amp;lt;/span&amp;gt;)
    print(&amp;lt;span style=&amp;quot;color: #008800;&amp;quot;&amp;gt;f&amp;quot;RSS URL: {rss_url}&amp;quot;&amp;lt;/span&amp;gt;)
    
    &amp;lt;span style=&amp;quot;color: #888888;&amp;quot;&amp;gt;# RSS 피드 파싱&amp;lt;/span&amp;gt;
    print(&amp;lt;span style=&amp;quot;color: #008800;&amp;quot;&amp;gt;&amp;quot;\nRSS 피드 파싱 중...&amp;quot;&amp;lt;/span&amp;gt;)
    feed = feedparser.parse(rss_url)
    
    &amp;lt;span style=&amp;quot;color: #888888;&amp;quot;&amp;gt;# CSV 파일로 데이터 저장&amp;lt;/span&amp;gt;
    &amp;lt;span style=&amp;quot;color: #0000ff;&amp;quot;&amp;gt;with&amp;lt;/span&amp;gt; open(output_path, &amp;lt;span style=&amp;quot;color: #008800;&amp;quot;&amp;gt;'w'&amp;lt;/span&amp;gt;, newline=&amp;lt;span style=&amp;quot;color: #008800;&amp;quot;&amp;gt;''&amp;lt;/span&amp;gt;, encoding=&amp;lt;span style=&amp;quot;color: #008800;&amp;quot;&amp;gt;'utf-8'&amp;lt;/span&amp;gt;) &amp;lt;span style=&amp;quot;color: #0000ff;&amp;quot;&amp;gt;as&amp;lt;/span&amp;gt; csvfile:
        &amp;lt;span style=&amp;quot;color: #888888;&amp;quot;&amp;gt;# CSV 작성자 생성&amp;lt;/span&amp;gt;
        csv_writer = csv.writer(csvfile)       
        &amp;lt;span style=&amp;quot;color: #888888;&amp;quot;&amp;gt;# 헤더 작성&amp;lt;/span&amp;gt;
        csv_writer.writerow([&amp;lt;span style=&amp;quot;color: #008800;&amp;quot;&amp;gt;'pubtime'&amp;lt;/span&amp;gt;, &amp;lt;span style=&amp;quot;color: #008800;&amp;quot;&amp;gt;'title'&amp;lt;/span&amp;gt;, &amp;lt;span style=&amp;quot;color: #008800;&amp;quot;&amp;gt;'link'&amp;lt;/span&amp;gt;])
        
        &amp;lt;span style=&amp;quot;color: #888888;&amp;quot;&amp;gt;# 모든 기사 출력 (요약 없음)&amp;lt;/span&amp;gt;
        print(&amp;lt;span style=&amp;quot;color: #008800;&amp;quot;&amp;gt;&amp;quot;\n모든 기사:&amp;quot;&amp;lt;/span&amp;gt;)
        &amp;lt;span style=&amp;quot;color: #0000ff;&amp;quot;&amp;gt;for&amp;lt;/span&amp;gt; i, entry &amp;lt;span style=&amp;quot;color: #0000ff;&amp;quot;&amp;gt;in&amp;lt;/span&amp;gt; enumerate(feed.entries, 1):           
            title = entry.title &amp;lt;span style=&amp;quot;color: #0000ff;&amp;quot;&amp;gt;if&amp;lt;/span&amp;gt; hasattr(entry, &amp;lt;span style=&amp;quot;color: #008800;&amp;quot;&amp;gt;'title'&amp;lt;/span&amp;gt;) &amp;lt;span style=&amp;quot;color: #0000ff;&amp;quot;&amp;gt;else&amp;lt;/span&amp;gt; &amp;lt;span style=&amp;quot;color: #008800;&amp;quot;&amp;gt;&amp;quot;제목 없음&amp;quot;&amp;lt;/span&amp;gt;
            link = entry.link &amp;lt;span style=&amp;quot;color: #0000ff;&amp;quot;&amp;gt;if&amp;lt;/span&amp;gt; hasattr(entry, &amp;lt;span style=&amp;quot;color: #008800;&amp;quot;&amp;gt;'link'&amp;lt;/span&amp;gt;) &amp;lt;span style=&amp;quot;color: #0000ff;&amp;quot;&amp;gt;else&amp;lt;/span&amp;gt; &amp;lt;span style=&amp;quot;color: #008800;&amp;quot;&amp;gt;&amp;quot;링크 없음&amp;quot;&amp;lt;/span&amp;gt;        
            published = format_date_to_kst(entry.published) &amp;lt;span style=&amp;quot;color: #0000ff;&amp;quot;&amp;gt;if&amp;lt;/span&amp;gt; hasattr(entry, &amp;lt;span style=&amp;quot;color: #008800;&amp;quot;&amp;gt;'published'&amp;lt;/span&amp;gt;) &amp;lt;span style=&amp;quot;color: #0000ff;&amp;quot;&amp;gt;else&amp;lt;/span&amp;gt; &amp;lt;span style=&amp;quot;color: #008800;&amp;quot;&amp;gt;&amp;quot;발행일 없음&amp;quot;&amp;lt;/span&amp;gt;
                 
            &amp;lt;span style=&amp;quot;color: #888888;&amp;quot;&amp;gt;# CSV 파일에 데이터 저장&amp;lt;/span&amp;gt;
            csv_writer.writerow([published, title, link])
    
    print(&amp;lt;span style=&amp;quot;color: #008800;&amp;quot;&amp;gt;f&amp;quot;CSV 파일이 성공적으로 저장되었습니다: {output_path}&amp;quot;&amp;lt;/span&amp;gt;)
    print(&amp;lt;span style=&amp;quot;color: #008800;&amp;quot;&amp;gt;f&amp;quot;총 {len(feed.entries)}개 항목이 저장되었습니다.&amp;quot;&amp;lt;/span&amp;gt;)

&amp;lt;span style=&amp;quot;color: #0000ff;&amp;quot;&amp;gt;if&amp;lt;/span&amp;gt; __name__ == &amp;lt;span style=&amp;quot;color: #008800;&amp;quot;&amp;gt;&amp;quot;__main__&amp;quot;&amp;lt;/span&amp;gt;:
    main()&amp;lt;/span&amp;gt;&amp;lt;/code&amp;gt;
      &amp;lt;/pre&amp;gt;
    &amp;lt;/div&amp;gt;
  &amp;lt;/details&amp;gt;
&amp;lt;/div&amp;gt;&quot;&gt;
&lt;div class=&quot;notice-area&quot;&gt;&lt;details&gt;
&lt;summary style=&quot;cursor: pointer; font-weight: bold; padding: 10px; background-color: #f0f0f0; border: 1px solid #ddd; border-radius: 4px; color: #333; position: relative;&quot;&gt;&lt;span style=&quot;display: inline-block; vertical-align: middle;&quot;&gt;  Google News RSS feedparser&lt;/span&gt; &lt;span style=&quot;position: absolute; right: 10px; font-weight: normal; font-size: 0.9em;&quot;&gt;[펼치기/접기]&lt;/span&gt;&lt;/summary&gt;
&lt;div style=&quot;padding: 15px; background-color: #f5f5f5; border: 1px solid #ddd; border-top: none; border-radius: 0 0 4px 4px; margin-top: -1px;&quot;&gt;
&lt;pre style=&quot;background-color: #f5f5f5; padding: 15px; border-radius: 4px; overflow-x: auto; font-family: 'D2Coding', 'Consolas', 'Monaco', 'Courier New', monospace; line-height: 1.5;&quot;&gt;&lt;code&gt;&lt;span style=&quot;color: #888888;&quot;&gt;# feedparser는 url을 바로 입력해도 처리함.
# 시간변환함수 추가, 
# 파일로 저장 기능 추가
# 사용방법: python google_feed_parser_url.py {search_term} {} output_path&lt;/span&gt;

&lt;span style=&quot;color: #0000ff;&quot;&gt;import&lt;/span&gt; os
&lt;span style=&quot;color: #0000ff;&quot;&gt;import&lt;/span&gt; sys
&lt;span style=&quot;color: #0000ff;&quot;&gt;import&lt;/span&gt; urllib.parse
&lt;span style=&quot;color: #0000ff;&quot;&gt;import&lt;/span&gt; feedparser
&lt;span style=&quot;color: #0000ff;&quot;&gt;from&lt;/span&gt; datetime &lt;span style=&quot;color: #0000ff;&quot;&gt;import&lt;/span&gt; datetime, timezone, timedelta &lt;span style=&quot;color: #888888;&quot;&gt;# 날짜 형식 변환&lt;/span&gt;
&lt;span style=&quot;color: #0000ff;&quot;&gt;from&lt;/span&gt; email.utils &lt;span style=&quot;color: #0000ff;&quot;&gt;import&lt;/span&gt; parsedate_to_datetime &lt;span style=&quot;color: #888888;&quot;&gt;# 날짜 형식 변환&lt;/span&gt;
&lt;span style=&quot;color: #0000ff;&quot;&gt;import&lt;/span&gt; csv &lt;span style=&quot;color: #888888;&quot;&gt;# 파일로 저장 기능 추가&lt;/span&gt;

&lt;span style=&quot;color: #888888;&quot;&gt;# 한국 시간대 (UTC+9)&lt;/span&gt;
KST = timezone(timedelta(hours=9))

&lt;span style=&quot;color: #888888;&quot;&gt;# 날짜 형식 변환 함수 - 한국 시간으로 변환&lt;/span&gt;
&lt;span style=&quot;color: #0000ff;&quot;&gt;def&lt;/span&gt; &lt;span style=&quot;color: #880000;&quot;&gt;format_date_to_kst&lt;/span&gt;(date_str):
    &lt;span style=&quot;color: #0000ff;&quot;&gt;try&lt;/span&gt;:
        &lt;span style=&quot;color: #888888;&quot;&gt;# RSS 날짜 형식(RFC 822)을 파싱&lt;/span&gt;
        dt = parsedate_to_datetime(date_str)
        &lt;span style=&quot;color: #888888;&quot;&gt;# UTC에서 KST로 변환&lt;/span&gt;
        dt_kst = dt.astimezone(KST)
        &lt;span style=&quot;color: #888888;&quot;&gt;# 연도, 월, 날짜, 시간(24시간) 형식으로 변환&lt;/span&gt;
        &lt;span style=&quot;color: #0000ff;&quot;&gt;return&lt;/span&gt; dt_kst.strftime(&lt;span style=&quot;color: #008800;&quot;&gt;'%Y-%m-%d %H:%M:%S'&lt;/span&gt;)
    &lt;span style=&quot;color: #0000ff;&quot;&gt;except&lt;/span&gt; Exception:
        &lt;span style=&quot;color: #0000ff;&quot;&gt;try&lt;/span&gt;:
            &lt;span style=&quot;color: #888888;&quot;&gt;# ISO 8601 형식 시도&lt;/span&gt;
            dt = datetime.fromisoformat(date_str.replace(&lt;span style=&quot;color: #008800;&quot;&gt;'Z'&lt;/span&gt;, &lt;span style=&quot;color: #008800;&quot;&gt;'+00:00'&lt;/span&gt;))
            &lt;span style=&quot;color: #888888;&quot;&gt;# UTC에서 KST로 변환&lt;/span&gt;
            dt_kst = dt.astimezone(KST)
            &lt;span style=&quot;color: #0000ff;&quot;&gt;return&lt;/span&gt; dt_kst.strftime(&lt;span style=&quot;color: #008800;&quot;&gt;'%Y-%m-%d %H:%M:%S'&lt;/span&gt;)
        &lt;span style=&quot;color: #0000ff;&quot;&gt;except&lt;/span&gt;:
            &lt;span style=&quot;color: #888888;&quot;&gt;# 변환 실패 시 원본 반환&lt;/span&gt;
            &lt;span style=&quot;color: #0000ff;&quot;&gt;return&lt;/span&gt; date_str

&lt;span style=&quot;color: #0000ff;&quot;&gt;def&lt;/span&gt; &lt;span style=&quot;color: #880000;&quot;&gt;get_rss_url&lt;/span&gt;(search_term):
    &lt;span style=&quot;color: #888888;&quot;&gt;# search_term을 그대로 인코딩 (이미 날짜 범위 등 포함)&lt;/span&gt;
    encoded_search_term = urllib.parse.quote(search_term)
    rss_url = &lt;span style=&quot;color: #008800;&quot;&gt;f'https://news.google.com/rss/search?q={encoded_search_term}&amp;amp;hl=ko&amp;amp;gl=KR&amp;amp;ceid=KR%3Ako'&lt;/span&gt;
    &lt;span style=&quot;color: #0000ff;&quot;&gt;return&lt;/span&gt; rss_url

&lt;span style=&quot;color: #0000ff;&quot;&gt;def&lt;/span&gt; &lt;span style=&quot;color: #880000;&quot;&gt;main&lt;/span&gt;():
    &lt;span style=&quot;color: #888888;&quot;&gt;# 명령줄 인자 처리&lt;/span&gt;
    &lt;span style=&quot;color: #0000ff;&quot;&gt;if&lt;/span&gt; len(sys.argv) != 3:
        print(&lt;span style=&quot;color: #008800;&quot;&gt;&quot;사용방법: python foo.py \&quot;search_term\&quot; output_path&quot;&lt;/span&gt;)
        print(&lt;span style=&quot;color: #008800;&quot;&gt;&quot;예시: python foo.py \&quot;\\\&quot;포스코\\\&quot; after:2025-05-15 before:2025-05-17\&quot; output.csv&quot;&lt;/span&gt;)
        sys.exit(1)
    
    &lt;span style=&quot;color: #888888;&quot;&gt;# 명령줄에서 인자 받기&lt;/span&gt;
    search_term = sys.argv[1]
    output_path = sys.argv[2]
    
    &lt;span style=&quot;color: #888888;&quot;&gt;# RSS 피드 URL 생성 (Google News는 검색결과를 RSS로도 제공)&lt;/span&gt;
    rss_url = get_rss_url(search_term)
    print(&lt;span style=&quot;color: #008800;&quot;&gt;f&quot;검색어: {search_term}&quot;&lt;/span&gt;)
    print(&lt;span style=&quot;color: #008800;&quot;&gt;f&quot;RSS URL: {rss_url}&quot;&lt;/span&gt;)
    
    &lt;span style=&quot;color: #888888;&quot;&gt;# RSS 피드 파싱&lt;/span&gt;
    print(&lt;span style=&quot;color: #008800;&quot;&gt;&quot;\nRSS 피드 파싱 중...&quot;&lt;/span&gt;)
    feed = feedparser.parse(rss_url)
    
    &lt;span style=&quot;color: #888888;&quot;&gt;# CSV 파일로 데이터 저장&lt;/span&gt;
    &lt;span style=&quot;color: #0000ff;&quot;&gt;with&lt;/span&gt; open(output_path, &lt;span style=&quot;color: #008800;&quot;&gt;'w'&lt;/span&gt;, newline=&lt;span style=&quot;color: #008800;&quot;&gt;''&lt;/span&gt;, encoding=&lt;span style=&quot;color: #008800;&quot;&gt;'utf-8'&lt;/span&gt;) &lt;span style=&quot;color: #0000ff;&quot;&gt;as&lt;/span&gt; csvfile:
        &lt;span style=&quot;color: #888888;&quot;&gt;# CSV 작성자 생성&lt;/span&gt;
        csv_writer = csv.writer(csvfile)       
        &lt;span style=&quot;color: #888888;&quot;&gt;# 헤더 작성&lt;/span&gt;
        csv_writer.writerow([&lt;span style=&quot;color: #008800;&quot;&gt;'pubtime'&lt;/span&gt;, &lt;span style=&quot;color: #008800;&quot;&gt;'title'&lt;/span&gt;, &lt;span style=&quot;color: #008800;&quot;&gt;'link'&lt;/span&gt;])
        
        &lt;span style=&quot;color: #888888;&quot;&gt;# 모든 기사 출력 (요약 없음)&lt;/span&gt;
        print(&lt;span style=&quot;color: #008800;&quot;&gt;&quot;\n모든 기사:&quot;&lt;/span&gt;)
        &lt;span style=&quot;color: #0000ff;&quot;&gt;for&lt;/span&gt; i, entry &lt;span style=&quot;color: #0000ff;&quot;&gt;in&lt;/span&gt; enumerate(feed.entries, 1):           
            title = entry.title &lt;span style=&quot;color: #0000ff;&quot;&gt;if&lt;/span&gt; hasattr(entry, &lt;span style=&quot;color: #008800;&quot;&gt;'title'&lt;/span&gt;) &lt;span style=&quot;color: #0000ff;&quot;&gt;else&lt;/span&gt; &lt;span style=&quot;color: #008800;&quot;&gt;&quot;제목 없음&quot;&lt;/span&gt;
            link = entry.link &lt;span style=&quot;color: #0000ff;&quot;&gt;if&lt;/span&gt; hasattr(entry, &lt;span style=&quot;color: #008800;&quot;&gt;'link'&lt;/span&gt;) &lt;span style=&quot;color: #0000ff;&quot;&gt;else&lt;/span&gt; &lt;span style=&quot;color: #008800;&quot;&gt;&quot;링크 없음&quot;&lt;/span&gt;        
            published = format_date_to_kst(entry.published) &lt;span style=&quot;color: #0000ff;&quot;&gt;if&lt;/span&gt; hasattr(entry, &lt;span style=&quot;color: #008800;&quot;&gt;'published'&lt;/span&gt;) &lt;span style=&quot;color: #0000ff;&quot;&gt;else&lt;/span&gt; &lt;span style=&quot;color: #008800;&quot;&gt;&quot;발행일 없음&quot;&lt;/span&gt;
                 
            &lt;span style=&quot;color: #888888;&quot;&gt;# CSV 파일에 데이터 저장&lt;/span&gt;
            csv_writer.writerow([published, title, link])
    
    print(&lt;span style=&quot;color: #008800;&quot;&gt;f&quot;CSV 파일이 성공적으로 저장되었습니다: {output_path}&quot;&lt;/span&gt;)
    print(&lt;span style=&quot;color: #008800;&quot;&gt;f&quot;총 {len(feed.entries)}개 항목이 저장되었습니다.&quot;&lt;/span&gt;)

&lt;span style=&quot;color: #0000ff;&quot;&gt;if&lt;/span&gt; __name__ == &lt;span style=&quot;color: #008800;&quot;&gt;&quot;__main__&quot;&lt;/span&gt;:
    main()&lt;/code&gt;&lt;/pre&gt;
&lt;/div&gt;
&lt;/details&gt;&lt;/div&gt;
&lt;/div&gt;
&lt;p data-ke-size=&quot;size16&quot; data-start=&quot;149&quot; data-end=&quot;181&quot;&gt;&amp;nbsp;&lt;/p&gt;
&lt;p data-ke-size=&quot;size16&quot; data-start=&quot;149&quot; data-end=&quot;181&quot;&gt;파이썬 코드 실행 결과 예시&lt;/p&gt;
&lt;p&gt;&lt;figure class=&quot;imageblock alignCenter&quot; data-ke-mobileStyle=&quot;widthOrigin&quot; data-origin-width=&quot;1115&quot; data-origin-height=&quot;628&quot;&gt;&lt;span data-url=&quot;https://blog.kakaocdn.net/dn/b59g7R/btsN1gA08Pn/aY8LXJTxK1P2F9uNtZTDn0/img.png&quot; data-phocus=&quot;https://blog.kakaocdn.net/dn/b59g7R/btsN1gA08Pn/aY8LXJTxK1P2F9uNtZTDn0/img.png&quot;&gt;&lt;img src=&quot;https://blog.kakaocdn.net/dn/b59g7R/btsN1gA08Pn/aY8LXJTxK1P2F9uNtZTDn0/img.png&quot; srcset=&quot;https://img1.daumcdn.net/thumb/R1280x0/?scode=mtistory2&amp;fname=https%3A%2F%2Fblog.kakaocdn.net%2Fdn%2Fb59g7R%2FbtsN1gA08Pn%2FaY8LXJTxK1P2F9uNtZTDn0%2Fimg.png&quot; onerror=&quot;this.onerror=null; this.src='//t1.daumcdn.net/tistory_admin/static/images/no-image-v1.png'; this.srcset='//t1.daumcdn.net/tistory_admin/static/images/no-image-v1.png';&quot; loading=&quot;lazy&quot; width=&quot;1115&quot; height=&quot;628&quot; data-origin-width=&quot;1115&quot; data-origin-height=&quot;628&quot;/&gt;&lt;/span&gt;&lt;/figure&gt;
&lt;/p&gt;
&lt;p data-ke-size=&quot;size16&quot;&gt;파일을 열어보면, 모든 정보가 잘 저장되어 있다. &lt;/p&gt;
&lt;p&gt;&lt;figure class=&quot;imageblock alignCenter&quot; data-ke-mobileStyle=&quot;widthOrigin&quot; data-origin-width=&quot;1430&quot; data-origin-height=&quot;838&quot;&gt;&lt;span data-url=&quot;https://blog.kakaocdn.net/dn/bxoUqM/btsN1AyZfsy/ygj09Jm5l7dx3GfjMj21sK/img.png&quot; data-phocus=&quot;https://blog.kakaocdn.net/dn/bxoUqM/btsN1AyZfsy/ygj09Jm5l7dx3GfjMj21sK/img.png&quot;&gt;&lt;img src=&quot;https://blog.kakaocdn.net/dn/bxoUqM/btsN1AyZfsy/ygj09Jm5l7dx3GfjMj21sK/img.png&quot; srcset=&quot;https://img1.daumcdn.net/thumb/R1280x0/?scode=mtistory2&amp;fname=https%3A%2F%2Fblog.kakaocdn.net%2Fdn%2FbxoUqM%2FbtsN1AyZfsy%2Fygj09Jm5l7dx3GfjMj21sK%2Fimg.png&quot; onerror=&quot;this.onerror=null; this.src='//t1.daumcdn.net/tistory_admin/static/images/no-image-v1.png'; this.srcset='//t1.daumcdn.net/tistory_admin/static/images/no-image-v1.png';&quot; loading=&quot;lazy&quot; width=&quot;1430&quot; height=&quot;838&quot; data-origin-width=&quot;1430&quot; data-origin-height=&quot;838&quot;/&gt;&lt;/span&gt;&lt;/figure&gt;
&lt;/p&gt;
&lt;h3 data-ke-size=&quot;size23&quot; data-start=&quot;149&quot; data-end=&quot;181&quot;&gt;  Power Automate와 연동한 자동 실행&lt;/h3&gt;
&lt;p data-ke-size=&quot;size16&quot;&gt;이제 마지막으로,&amp;nbsp; &lt;b&gt;파이썬 스크립트&lt;/b&gt;를&amp;nbsp; Powerautomate와 연동해 자동화를 하면 다음과 같은 결과를 얻을 수 있다. Power Automate는 스크립트를 작성하고, 실행하는 역할을 하고 실제 기능은 파이썬에서 구동된다.한가지, 아쉬운 점은 Power Automate는 변수 하나에 한줄을 사용해야 해서 간단한 변수를 설정하는데도 흐름이 길어진다. &lt;/p&gt;
&lt;h3 data-end=&quot;181&quot; data-start=&quot;149&quot; data-ke-size=&quot;size23&quot;&gt;Power Automate 최종 흐름&lt;/h3&gt;
&lt;p&gt;&lt;figure class=&quot;imageblock alignCenter&quot; data-ke-mobileStyle=&quot;widthOrigin&quot; data-origin-width=&quot;722&quot; data-origin-height=&quot;669&quot;&gt;&lt;span data-url=&quot;https://blog.kakaocdn.net/dn/7nEwA/btsN1iFEyqk/7qPIT7jwvc3tLdk5ta1Fyk/img.png&quot; data-phocus=&quot;https://blog.kakaocdn.net/dn/7nEwA/btsN1iFEyqk/7qPIT7jwvc3tLdk5ta1Fyk/img.png&quot;&gt;&lt;img src=&quot;https://blog.kakaocdn.net/dn/7nEwA/btsN1iFEyqk/7qPIT7jwvc3tLdk5ta1Fyk/img.png&quot; srcset=&quot;https://img1.daumcdn.net/thumb/R1280x0/?scode=mtistory2&amp;fname=https%3A%2F%2Fblog.kakaocdn.net%2Fdn%2F7nEwA%2FbtsN1iFEyqk%2F7qPIT7jwvc3tLdk5ta1Fyk%2Fimg.png&quot; onerror=&quot;this.onerror=null; this.src='//t1.daumcdn.net/tistory_admin/static/images/no-image-v1.png'; this.srcset='//t1.daumcdn.net/tistory_admin/static/images/no-image-v1.png';&quot; loading=&quot;lazy&quot; width=&quot;722&quot; height=&quot;669&quot; data-origin-width=&quot;722&quot; data-origin-height=&quot;669&quot;/&gt;&lt;/span&gt;&lt;/figure&gt;
&lt;/p&gt;
&lt;p data-ke-size=&quot;size16&quot;&gt;&amp;nbsp;&lt;/p&gt;
&lt;p&gt;&lt;figure class=&quot;imageblock alignCenter&quot; data-ke-mobileStyle=&quot;widthOrigin&quot; data-origin-width=&quot;622&quot; data-origin-height=&quot;316&quot;&gt;&lt;span data-url=&quot;https://blog.kakaocdn.net/dn/yv2Mz/btsN2QnigQL/BjsrlDcNtWVW12LClo6qeK/img.png&quot; data-phocus=&quot;https://blog.kakaocdn.net/dn/yv2Mz/btsN2QnigQL/BjsrlDcNtWVW12LClo6qeK/img.png&quot;&gt;&lt;img src=&quot;https://blog.kakaocdn.net/dn/yv2Mz/btsN2QnigQL/BjsrlDcNtWVW12LClo6qeK/img.png&quot; srcset=&quot;https://img1.daumcdn.net/thumb/R1280x0/?scode=mtistory2&amp;fname=https%3A%2F%2Fblog.kakaocdn.net%2Fdn%2Fyv2Mz%2FbtsN2QnigQL%2FBjsrlDcNtWVW12LClo6qeK%2Fimg.png&quot; onerror=&quot;this.onerror=null; this.src='//t1.daumcdn.net/tistory_admin/static/images/no-image-v1.png'; this.srcset='//t1.daumcdn.net/tistory_admin/static/images/no-image-v1.png';&quot; loading=&quot;lazy&quot; width=&quot;622&quot; height=&quot;316&quot; data-origin-width=&quot;622&quot; data-origin-height=&quot;316&quot;/&gt;&lt;/span&gt;&lt;/figure&gt;
&lt;/p&gt;
&lt;p data-ke-size=&quot;size16&quot;&gt;&amp;nbsp;&lt;/p&gt;
&lt;h3 data-ke-size=&quot;size23&quot;&gt;&lt;span&gt;마치며&lt;/span&gt;&lt;/h3&gt;
&lt;p data-ke-size=&quot;size16&quot;&gt;이번 글에서는 feedparser와 urllib.parse를 활용해, 구글 뉴스 rss에서 기사 제목과 링크, 발행일을 웹에서 직접 추출하여 시간을 단축하는 방법을 살펴보았다. 다음 글에서는 수집한 Google 뉴스 주소를 원문 URL로 변환하는 방법을 소개하고자 한다. &lt;/p&gt;
&lt;p data-ke-size=&quot;size16&quot;&gt;&amp;nbsp;&lt;/p&gt;</description>
      <category>feedparser</category>
      <category>urllib.parse</category>
      <category>웹스크래핑</category>
      <author>catalystmind</author>
      <guid isPermaLink="true">https://catalystmind.tistory.com/14</guid>
      <comments>https://catalystmind.tistory.com/14#entry14comment</comments>
      <pubDate>Sun, 18 May 2025 10:43:14 +0900</pubDate>
    </item>
    <item>
      <title>Power Automate와 Python을 활용한 주도주 재료 분석 자동화 - 1</title>
      <link>https://catalystmind.tistory.com/13</link>
      <description>&lt;div id=&quot;code_1747276800262&quot; data-ke-type=&quot;html&quot; data-source=&quot;&amp;lt;!DOCTYPE html&amp;gt;
&amp;lt;html lang=&amp;quot;ko&amp;quot;&amp;gt;
&amp;lt;head&amp;gt;
    &amp;lt;meta charset=&amp;quot;UTF-8&amp;quot;&amp;gt;
    &amp;lt;meta name=&amp;quot;viewport&amp;quot; content=&amp;quot;width=device-width, initial-scale=1.0&amp;quot;&amp;gt;
    &amp;lt;title&amp;gt;Google 뉴스 RSS 자동 처리하기&amp;lt;/title&amp;gt;
    &amp;lt;style&amp;gt;
        /* 전체 너비 조정을 위한 스타일 */
        .tistory-content-body {
            font-family: -apple-system, BlinkMacSystemFont, 'Segoe UI', Roboto, Oxygen, Ubuntu, Cantarell, 'Open Sans', 'Helvetica Neue', sans-serif;
            line-height: 1.6;
            color: #333;
            margin: 0 auto;
            padding: 0;
            background-color: transparent;
            width: 100% !important;
            max-width: 100% !important;
            box-sizing: border-box !important;
        }
        
        .tistory-card {
            background-color: #fff;
            border-radius: 8px;
            padding: 24px;
            box-shadow: 0 2px 8px rgba(0, 0, 0, 0.05);
            margin-bottom: 24px;
            border-left: 4px solid #3b82f6;
            box-sizing: border-box !important;
            width: 100% !important;
            max-width: 100% !important;
            display: block !important;
        }
        
        /* 제목에 직접 인라인 스타일 추가 */
        .blue-title {
            color: #1e40af !important;
            font-size: 24px !important;
            font-weight: 700 !important;
            margin-top: 0 !important;
            margin-bottom: 16px !important;
            width: 100% !important;
        }
        
        .tistory-key-point {
            background-color: #f0f7ff;
            padding: 12px 16px;
            border-radius: 6px;
            margin: 16px 0;
            box-sizing: border-box !important;
            width: 100% !important;
        }
        
        .tistory-key-point ul {
            padding-left: 20px;
            margin: 0;
            width: 100% !important;
        }
        
        .tistory-key-point li {
            margin-bottom: 10px;
            width: 100% !important;
        }
        
        /* 강조 스타일을 직접 인라인으로 적용 */
        .highlight {
            color: #1e40af !important;
            font-weight: 600 !important;
        }
        
        .tistory-code {
            background-color: #222;
            color: #fff;
            padding: 3px 6px;
            border-radius: 4px;
            display: inline-block;
            white-space: nowrap;
            font-family: Consolas, Monaco, 'Andale Mono', monospace;
        }
        
        .tistory-code .pkg1 {
            color: #e06c75;
        }
        
        .tistory-code .pkg2 {
            color: #61afef;
        }
        
        .tistory-code .pkg3 {
            color: #98c379;
        }
    &amp;lt;/style&amp;gt;
&amp;lt;/head&amp;gt;
&amp;lt;body&amp;gt;
&amp;lt;div class=&amp;quot;tistory-content-body&amp;quot;&amp;gt;
    &amp;lt;div class=&amp;quot;tistory-card&amp;quot;&amp;gt;
        &amp;lt;!-- 제목 태그 변경 및 클래스 추가 --&amp;gt;
        &amp;lt;h1 class=&amp;quot;blue-title&amp;quot;&amp;gt;TL;DR&amp;lt;/h1&amp;gt;
        
        &amp;lt;div class=&amp;quot;tistory-key-point&amp;quot;&amp;gt;
            &amp;lt;ul&amp;gt;
                &amp;lt;li&amp;gt;&amp;lt;span class=&amp;quot;highlight&amp;quot;&amp;gt;Google RSS XML&amp;lt;/span&amp;gt;, 기본 XML 구조 따르며 컴퓨터 &amp;lt;span class=&amp;quot;highlight&amp;quot;&amp;gt;폴더 구조와 유사한 계층적 특성&amp;lt;/span&amp;gt;&amp;lt;/li&amp;gt;
                &amp;lt;li&amp;gt;&amp;lt;span class=&amp;quot;highlight&amp;quot;&amp;gt;feedparser&amp;lt;/span&amp;gt;, 폴더 구조 같은 XML 파일을 &amp;lt;span class=&amp;quot;highlight&amp;quot;&amp;gt;간단한 명령어&amp;lt;/span&amp;gt;로 쉽게 처리 가능&amp;lt;/li&amp;gt;
                &amp;lt;li&amp;gt;최종 &amp;lt;span class=&amp;quot;highlight&amp;quot;&amp;gt;Python 코드&amp;lt;/span&amp;gt;는 XML 파일 처리 후 &amp;lt;span class=&amp;quot;highlight&amp;quot;&amp;gt;CSV&amp;lt;/span&amp;gt;로 정리&amp;lt;/li&amp;gt;
                &amp;lt;li&amp;gt;해당 코드, &amp;lt;span class=&amp;quot;highlight&amp;quot;&amp;gt;단독 실행 가능&amp;lt;/span&amp;gt; 및 &amp;lt;span class=&amp;quot;highlight&amp;quot;&amp;gt;Power Automate와 연동&amp;lt;/span&amp;gt; 통한 자동화 흐름 일부로 활용 가능&amp;lt;/li&amp;gt;
                &amp;lt;li&amp;gt;&amp;lt;span class=&amp;quot;highlight&amp;quot;&amp;gt;Power Automate&amp;lt;/span&amp;gt;만으로 &amp;lt;span class=&amp;quot;highlight&amp;quot;&amp;gt;5분&amp;lt;/span&amp;gt; 걸리던 작업, &amp;lt;span class=&amp;quot;highlight&amp;quot;&amp;gt;feedparser&amp;lt;/span&amp;gt; 활용 시 단 &amp;lt;span class=&amp;quot;highlight&amp;quot;&amp;gt;3초&amp;lt;/span&amp;gt; 만에 처리&amp;lt;/li&amp;gt;
            &amp;lt;/ul&amp;gt;
        &amp;lt;/div&amp;gt;
        
    &amp;lt;/div&amp;gt;
&amp;lt;/div&amp;gt;
&amp;lt;/body&amp;gt;
&amp;lt;/html&amp;gt;&quot;&gt;
&lt;style&gt;
        /* 전체 너비 조정을 위한 스타일 */
        .tistory-content-body {
            font-family: -apple-system, BlinkMacSystemFont, 'Segoe UI', Roboto, Oxygen, Ubuntu, Cantarell, 'Open Sans', 'Helvetica Neue', sans-serif;
            line-height: 1.6;
            color: #333;
            margin: 0 auto;
            padding: 0;
            background-color: transparent;
            width: 100% !important;
            max-width: 100% !important;
            box-sizing: border-box !important;
        }
        
        .tistory-card {
            background-color: #fff;
            border-radius: 8px;
            padding: 24px;
            box-shadow: 0 2px 8px rgba(0, 0, 0, 0.05);
            margin-bottom: 24px;
            border-left: 4px solid #3b82f6;
            box-sizing: border-box !important;
            width: 100% !important;
            max-width: 100% !important;
            display: block !important;
        }
        
        /* 제목에 직접 인라인 스타일 추가 */
        .blue-title {
            color: #1e40af !important;
            font-size: 24px !important;
            font-weight: 700 !important;
            margin-top: 0 !important;
            margin-bottom: 16px !important;
            width: 100% !important;
        }
        
        .tistory-key-point {
            background-color: #f0f7ff;
            padding: 12px 16px;
            border-radius: 6px;
            margin: 16px 0;
            box-sizing: border-box !important;
            width: 100% !important;
        }
        
        .tistory-key-point ul {
            padding-left: 20px;
            margin: 0;
            width: 100% !important;
        }
        
        .tistory-key-point li {
            margin-bottom: 10px;
            width: 100% !important;
        }
        
        /* 강조 스타일을 직접 인라인으로 적용 */
        .highlight {
            color: #1e40af !important;
            font-weight: 600 !important;
        }
        
        .tistory-code {
            background-color: #222;
            color: #fff;
            padding: 3px 6px;
            border-radius: 4px;
            display: inline-block;
            white-space: nowrap;
            font-family: Consolas, Monaco, 'Andale Mono', monospace;
        }
        
        .tistory-code .pkg1 {
            color: #e06c75;
        }
        
        .tistory-code .pkg2 {
            color: #61afef;
        }
        
        .tistory-code .pkg3 {
            color: #98c379;
        }
    &lt;/style&gt;
&lt;div class=&quot;tistory-content-body&quot;&gt;
&lt;div class=&quot;tistory-card&quot;&gt;&lt;!-- 제목 태그 변경 및 클래스 추가 --&gt;
&lt;h1 class=&quot;blue-title&quot;&gt;TL;DR&lt;/h1&gt;
&lt;div class=&quot;tistory-key-point&quot;&gt;
&lt;ul style=&quot;list-style-type: disc;&quot; data-ke-list-type=&quot;disc&quot;&gt;
&lt;li&gt;&lt;span class=&quot;highlight&quot;&gt;Google RSS XML&lt;/span&gt;, 기본 XML 구조 따르며 컴퓨터 &lt;span class=&quot;highlight&quot;&gt;폴더 구조와 유사한 계층적 특성&lt;/span&gt;&lt;/li&gt;
&lt;li&gt;&lt;span class=&quot;highlight&quot;&gt;feedparser&lt;/span&gt;, 폴더 구조 같은 XML 파일을 &lt;span class=&quot;highlight&quot;&gt;간단한 명령어&lt;/span&gt;로 쉽게 처리 가능&lt;/li&gt;
&lt;li&gt;최종 &lt;span class=&quot;highlight&quot;&gt;Python 코드&lt;/span&gt;는 XML 파일 처리 후 &lt;span class=&quot;highlight&quot;&gt;CSV&lt;/span&gt;로 정리&lt;/li&gt;
&lt;li&gt;해당 코드, &lt;span class=&quot;highlight&quot;&gt;단독 실행 가능&lt;/span&gt; 및 &lt;span class=&quot;highlight&quot;&gt;Power Automate와 연동&lt;/span&gt; 통한 자동화 흐름 일부로 활용 가능&lt;/li&gt;
&lt;li&gt;&lt;span class=&quot;highlight&quot;&gt;Power Automate&lt;/span&gt;만으로 &lt;span class=&quot;highlight&quot;&gt;5분&lt;/span&gt; 걸리던 작업, &lt;span class=&quot;highlight&quot;&gt;feedparser&lt;/span&gt; 활용 시 단 &lt;span class=&quot;highlight&quot;&gt;3초&lt;/span&gt; 만에 처리&lt;/li&gt;
&lt;/ul&gt;
&lt;/div&gt;
&lt;/div&gt;
&lt;/div&gt;
&lt;/div&gt;
&lt;p data-end=&quot;513&quot; data-start=&quot;346&quot; data-ke-size=&quot;size16&quot;&gt;앞에서 소개한 Power Automate를 활용한 방식은 &lt;span style=&quot;color: #006dd7;&quot;&gt;&lt;b&gt;저장된 xml 파일 10개 처리에만 약 5분가량이 소요&lt;/b&gt;&lt;/span&gt;되었다. 이 단계의 처리 시간 개선을 위해 &lt;span style=&quot;color: #006dd7;&quot;&gt;&lt;b&gt;사용법이 간단하면서도 강력한 기능&lt;/b&gt;&lt;/span&gt;을 제공하는 파이썬 라이브러리 feedparser를 도입하였다.&lt;/p&gt;
&lt;h3 data-end=&quot;513&quot; data-start=&quot;346&quot; data-ke-size=&quot;size23&quot;&gt;XML은 폴더 구조와 같은 계층적 특성을 가진다.&lt;/h3&gt;
&lt;p data-end=&quot;513&quot; data-start=&quot;346&quot; data-ke-size=&quot;size16&quot;&gt;구글 RSS은 엄격한 XML 문법을 따르지는 않지만 기본적인 XML 구조를 따른다. XML은 컴퓨터의 &lt;b&gt;&lt;span style=&quot;color: #006dd7;&quot;&gt;폴더 구조와 매우 유사&lt;/span&gt;&lt;/b&gt;한 다음과 같은 계층적 구조를 가진다. (폴더 아이콘을 클릭하면 펼쳐짐)&lt;/p&gt;
&lt;div id=&quot;code_1747276930569&quot; data-ke-type=&quot;html&quot; data-source=&quot;&amp;lt;div class=&amp;quot;tx-xml-container&amp;quot;&amp;gt;
    &amp;lt;style&amp;gt;
        .tx-xml-container {
            font-family: 'Segoe UI', Tahoma, Geneva, Verdana, sans-serif;
            line-height: 1.6;
            color: #2d3748;
            width: 100%;
            box-sizing: border-box;
        }
        
        .tx-xml-container * {
            box-sizing: border-box;
        }
        
        .tx-xml-title {
            text-align: center;
            margin-bottom: 2rem;
            color: #4a5568;
            font-size: 1.5rem;
            font-weight: bold;
        }
        
        .tx-folder-structure {
            border: 1px solid #a0aec0;
            border-radius: 4px;
            background-color: white;
            box-shadow: 0 2px 4px rgba(0, 0, 0, 0.05);
            width: 100%;
            margin-bottom: 20px;
        }
        
        .tx-folder {
            border-bottom: 1px solid #a0aec0;
        }
        
        .tx-folder:last-child {
            border-bottom: none;
        }
        
        .tx-folder-header {
            display: flex;
            align-items: center;
            padding: 0.75rem 1rem;
            background-color: #e2e8f0;
            font-weight: 500;
            cursor: pointer;
        }
        
        .tx-folder-header:hover {
            background-color: #cbd5e0;
        }
        
        .tx-folder-header .tx-icon {
            margin-right: 0.5rem;
            font-weight: bold;
        }
        
        .tx-folder-header .tx-tag {
            color: #4a5568;
            font-family: 'Courier New', Courier, monospace;
        }
        
        .tx-folder-content {
            padding-left: 1.5rem;
            border-top: 1px solid #a0aec0;
            background-color: white;
            display: none; /* 기본적으로 접혀있도록 설정 */
        }
        
        .tx-folder.open &amp;gt; .tx-folder-content {
            display: block; /* 열린 상태일 때만 표시 */
        }
        
        .tx-level-0 &amp;gt; .tx-folder-header {
            background-color: #4a5568;
            color: white;
        }
        
        .tx-level-1 &amp;gt; .tx-folder-header {
            background-color: #cbd5e0;
        }
        
        .tx-level-2 &amp;gt; .tx-folder-header {
            background-color: #e2e8f0;
        }
        
        .tx-level-3 &amp;gt; .tx-folder-header {
            background-color: #f7fafc;
        }
        
        .tx-file {
            display: flex;
            align-items: center;
            padding: 0.75rem 1rem;
            border-bottom: 1px solid #a0aec0;
        }
        
        .tx-file:last-child {
            border-bottom: none;
        }
        
        .tx-file .tx-icon {
            margin-right: 0.5rem;
            color: #4a5568;
        }
        
        .tx-file .tx-tag {
            color: #4a5568;
            font-family: 'Courier New', Courier, monospace;
        }
        
        .tx-item-folder {
            background-color: #f0f4ff;
        }
        
        .tx-item-file {
            background-color: #f0f4ff;
        }
    &amp;lt;/style&amp;gt;
    
    &amp;lt;h2 class=&amp;quot;tx-xml-title&amp;quot;&amp;gt;RSS 피드 XML 구조 (폴더형 시각화)&amp;lt;/h2&amp;gt;
    
    &amp;lt;div class=&amp;quot;tx-folder-structure&amp;quot;&amp;gt;
        &amp;lt;!-- Level 0: Root --&amp;gt;
        &amp;lt;div class=&amp;quot;tx-folder tx-level-0&amp;quot;&amp;gt;
            &amp;lt;div class=&amp;quot;tx-folder-header&amp;quot;&amp;gt;
                &amp;lt;span class=&amp;quot;tx-icon&amp;quot;&amp;gt; &amp;lt;/span&amp;gt;
                &amp;lt;span class=&amp;quot;tx-tag&amp;quot;&amp;gt;&amp;amp;lt;rss&amp;amp;gt;&amp;lt;/span&amp;gt;
                &amp;lt;span&amp;gt; - RSS 루트 요소 (버전, 네임스페이스 정보 포함)&amp;lt;/span&amp;gt;
            &amp;lt;/div&amp;gt;
            
            &amp;lt;div class=&amp;quot;tx-folder-content&amp;quot;&amp;gt;
                &amp;lt;!-- Level 1: channel --&amp;gt;
                &amp;lt;div class=&amp;quot;tx-folder tx-level-1&amp;quot;&amp;gt;
                    &amp;lt;div class=&amp;quot;tx-folder-header&amp;quot;&amp;gt;
                        &amp;lt;span class=&amp;quot;tx-icon&amp;quot;&amp;gt; &amp;lt;/span&amp;gt;
                        &amp;lt;span class=&amp;quot;tx-tag&amp;quot;&amp;gt;&amp;amp;lt;channel&amp;amp;gt;&amp;lt;/span&amp;gt;
                        &amp;lt;span&amp;gt; - 피드 메타데이터와 뉴스 아이템 컨테이너&amp;lt;/span&amp;gt;
                    &amp;lt;/div&amp;gt;
                    
                    &amp;lt;div class=&amp;quot;tx-folder-content&amp;quot;&amp;gt;
                        &amp;lt;!-- Level 2: Meta elements --&amp;gt;
                        &amp;lt;div class=&amp;quot;tx-file&amp;quot;&amp;gt;
                            &amp;lt;span class=&amp;quot;tx-icon&amp;quot;&amp;gt; &amp;lt;/span&amp;gt;
                            &amp;lt;span class=&amp;quot;tx-tag&amp;quot;&amp;gt;&amp;amp;lt;title&amp;amp;gt;&amp;lt;/span&amp;gt;
                            &amp;lt;span&amp;gt; - 피드 제목 (&amp;quot;KTis after:2025-05-01 before:2025-05-03&amp;quot; - Google 뉴스)&amp;lt;/span&amp;gt;
                        &amp;lt;/div&amp;gt;
                        
                        &amp;lt;div class=&amp;quot;tx-file&amp;quot;&amp;gt;
                            &amp;lt;span class=&amp;quot;tx-icon&amp;quot;&amp;gt; &amp;lt;/span&amp;gt;
                            &amp;lt;span class=&amp;quot;tx-tag&amp;quot;&amp;gt;&amp;amp;lt;link&amp;amp;gt;&amp;lt;/span&amp;gt;
                            &amp;lt;span&amp;gt; - 피드 링크 (https://news.google.com/search?...)&amp;lt;/span&amp;gt;
                        &amp;lt;/div&amp;gt;
                        
                        &amp;lt;div class=&amp;quot;tx-file&amp;quot;&amp;gt;
                            &amp;lt;span class=&amp;quot;tx-icon&amp;quot;&amp;gt; &amp;lt;/span&amp;gt;
                            &amp;lt;span class=&amp;quot;tx-tag&amp;quot;&amp;gt;&amp;amp;lt;language&amp;amp;gt;&amp;lt;/span&amp;gt;
                            &amp;lt;span&amp;gt; - 언어 (ko)&amp;lt;/span&amp;gt;
                        &amp;lt;/div&amp;gt;
                        
                        &amp;lt;div class=&amp;quot;tx-file&amp;quot;&amp;gt;
                            &amp;lt;span class=&amp;quot;tx-icon&amp;quot;&amp;gt; &amp;lt;/span&amp;gt;
                            &amp;lt;span class=&amp;quot;tx-tag&amp;quot;&amp;gt;&amp;amp;lt;webMaster&amp;amp;gt;&amp;lt;/span&amp;gt;
                            &amp;lt;span&amp;gt; - 웹마스터 정보 (news-webmaster@google.com)&amp;lt;/span&amp;gt;
                        &amp;lt;/div&amp;gt;
                        
                        &amp;lt;div class=&amp;quot;tx-file&amp;quot;&amp;gt;
                            &amp;lt;span class=&amp;quot;tx-icon&amp;quot;&amp;gt; &amp;lt;/span&amp;gt;
                            &amp;lt;span class=&amp;quot;tx-tag&amp;quot;&amp;gt;&amp;amp;lt;copyright&amp;amp;gt;&amp;lt;/span&amp;gt;
                            &amp;lt;span&amp;gt; - 저작권 정보&amp;lt;/span&amp;gt;
                        &amp;lt;/div&amp;gt;
                        
                        &amp;lt;div class=&amp;quot;tx-file&amp;quot;&amp;gt;
                            &amp;lt;span class=&amp;quot;tx-icon&amp;quot;&amp;gt; &amp;lt;/span&amp;gt;
                            &amp;lt;span class=&amp;quot;tx-tag&amp;quot;&amp;gt;&amp;amp;lt;lastBuildDate&amp;amp;gt;&amp;lt;/span&amp;gt;
                            &amp;lt;span&amp;gt; - 마지막 업데이트 날짜 (Sat, 03 May 2025 11:23:03 GMT)&amp;lt;/span&amp;gt;
                        &amp;lt;/div&amp;gt;
                        
                        &amp;lt;div class=&amp;quot;tx-folder tx-level-2&amp;quot;&amp;gt;
                            &amp;lt;div class=&amp;quot;tx-folder-header&amp;quot;&amp;gt;
                                &amp;lt;span class=&amp;quot;tx-icon&amp;quot;&amp;gt; &amp;lt;/span&amp;gt;
                                &amp;lt;span class=&amp;quot;tx-tag&amp;quot;&amp;gt;&amp;amp;lt;image&amp;amp;gt;&amp;lt;/span&amp;gt;
                                &amp;lt;span&amp;gt; - 피드 이미지 정보&amp;lt;/span&amp;gt;
                            &amp;lt;/div&amp;gt;
                            
                            &amp;lt;div class=&amp;quot;tx-folder-content&amp;quot;&amp;gt;
                                &amp;lt;div class=&amp;quot;tx-file&amp;quot;&amp;gt;
                                    &amp;lt;span class=&amp;quot;tx-icon&amp;quot;&amp;gt; &amp;lt;/span&amp;gt;
                                    &amp;lt;span class=&amp;quot;tx-tag&amp;quot;&amp;gt;&amp;amp;lt;title&amp;amp;gt;&amp;lt;/span&amp;gt;
                                    &amp;lt;span&amp;gt; - 이미지 제목&amp;lt;/span&amp;gt;
                                &amp;lt;/div&amp;gt;
                                
                                &amp;lt;div class=&amp;quot;tx-file&amp;quot;&amp;gt;
                                    &amp;lt;span class=&amp;quot;tx-icon&amp;quot;&amp;gt; &amp;lt;/span&amp;gt;
                                    &amp;lt;span class=&amp;quot;tx-tag&amp;quot;&amp;gt;&amp;amp;lt;url&amp;amp;gt;&amp;lt;/span&amp;gt;
                                    &amp;lt;span&amp;gt; - 이미지 URL&amp;lt;/span&amp;gt;
                                &amp;lt;/div&amp;gt;
                                
                                &amp;lt;div class=&amp;quot;tx-file&amp;quot;&amp;gt;
                                    &amp;lt;span class=&amp;quot;tx-icon&amp;quot;&amp;gt; &amp;lt;/span&amp;gt;
                                    &amp;lt;span class=&amp;quot;tx-tag&amp;quot;&amp;gt;&amp;amp;lt;link&amp;amp;gt;&amp;lt;/span&amp;gt;
                                    &amp;lt;span&amp;gt; - 이미지 링크&amp;lt;/span&amp;gt;
                                &amp;lt;/div&amp;gt;
                                
                                &amp;lt;div class=&amp;quot;tx-file&amp;quot;&amp;gt;
                                    &amp;lt;span class=&amp;quot;tx-icon&amp;quot;&amp;gt; &amp;lt;/span&amp;gt;
                                    &amp;lt;span class=&amp;quot;tx-tag&amp;quot;&amp;gt;&amp;amp;lt;height&amp;amp;gt;&amp;lt;/span&amp;gt;
                                    &amp;lt;span&amp;gt; - 이미지 높이&amp;lt;/span&amp;gt;
                                &amp;lt;/div&amp;gt;
                                
                                &amp;lt;div class=&amp;quot;tx-file&amp;quot;&amp;gt;
                                    &amp;lt;span class=&amp;quot;tx-icon&amp;quot;&amp;gt; &amp;lt;/span&amp;gt;
                                    &amp;lt;span class=&amp;quot;tx-tag&amp;quot;&amp;gt;&amp;amp;lt;width&amp;amp;gt;&amp;lt;/span&amp;gt;
                                    &amp;lt;span&amp;gt; - 이미지 너비&amp;lt;/span&amp;gt;
                                &amp;lt;/div&amp;gt;
                            &amp;lt;/div&amp;gt;
                        &amp;lt;/div&amp;gt;
                        
                        &amp;lt;div class=&amp;quot;tx-file&amp;quot;&amp;gt;
                            &amp;lt;span class=&amp;quot;tx-icon&amp;quot;&amp;gt; &amp;lt;/span&amp;gt;
                            &amp;lt;span class=&amp;quot;tx-tag&amp;quot;&amp;gt;&amp;amp;lt;description&amp;amp;gt;&amp;lt;/span&amp;gt;
                            &amp;lt;span&amp;gt; - 피드 설명 (Google 뉴스)&amp;lt;/span&amp;gt;
                        &amp;lt;/div&amp;gt;
                        
                        &amp;lt;!-- Level 2: Items --&amp;gt;
                        &amp;lt;div class=&amp;quot;tx-folder tx-level-2 tx-item-folder&amp;quot;&amp;gt;
                            &amp;lt;div class=&amp;quot;tx-folder-header&amp;quot;&amp;gt;
                                &amp;lt;span class=&amp;quot;tx-icon&amp;quot;&amp;gt; &amp;lt;/span&amp;gt;
                                &amp;lt;span class=&amp;quot;tx-tag&amp;quot;&amp;gt;&amp;amp;lt;item&amp;amp;gt;&amp;lt;/span&amp;gt;
                                &amp;lt;span&amp;gt; - 첫 번째 뉴스 기사&amp;lt;/span&amp;gt;
                            &amp;lt;/div&amp;gt;
                            
                            &amp;lt;div class=&amp;quot;tx-folder-content&amp;quot;&amp;gt;
                                &amp;lt;div class=&amp;quot;tx-file tx-item-file&amp;quot;&amp;gt;
                                    &amp;lt;span class=&amp;quot;tx-icon&amp;quot;&amp;gt; &amp;lt;/span&amp;gt;
                                    &amp;lt;span class=&amp;quot;tx-tag&amp;quot;&amp;gt;&amp;amp;lt;title&amp;amp;gt;&amp;lt;/span&amp;gt;
                                    &amp;lt;span&amp;gt; - 뉴스 제목 ([특징주] KTis, SKT 사태 해결까지 신규 모집중단&amp;hellip;)&amp;lt;/span&amp;gt;
                                &amp;lt;/div&amp;gt;
                                
                                &amp;lt;div class=&amp;quot;tx-file tx-item-file&amp;quot;&amp;gt;
                                    &amp;lt;span class=&amp;quot;tx-icon&amp;quot;&amp;gt; &amp;lt;/span&amp;gt;
                                    &amp;lt;span class=&amp;quot;tx-tag&amp;quot;&amp;gt;&amp;amp;lt;link&amp;amp;gt;&amp;lt;/span&amp;gt;
                                    &amp;lt;span&amp;gt; - 뉴스 링크&amp;lt;/span&amp;gt;
                                &amp;lt;/div&amp;gt;
                                
                                &amp;lt;div class=&amp;quot;tx-file tx-item-file&amp;quot;&amp;gt;
                                    &amp;lt;span class=&amp;quot;tx-icon&amp;quot;&amp;gt; &amp;lt;/span&amp;gt;
                                    &amp;lt;span class=&amp;quot;tx-tag&amp;quot;&amp;gt;&amp;amp;lt;guid&amp;amp;gt;&amp;lt;/span&amp;gt;
                                    &amp;lt;span&amp;gt; - 고유 식별자 (각 뉴스 항목의 영구적인 고유 ID로 중복 방지와 추적에 사용)&amp;lt;/span&amp;gt;
                                &amp;lt;/div&amp;gt;
                                
                                &amp;lt;div class=&amp;quot;tx-file tx-item-file&amp;quot;&amp;gt;
                                    &amp;lt;span class=&amp;quot;tx-icon&amp;quot;&amp;gt; &amp;lt;/span&amp;gt;
                                    &amp;lt;span class=&amp;quot;tx-tag&amp;quot;&amp;gt;&amp;amp;lt;pubDate&amp;amp;gt;&amp;lt;/span&amp;gt;
                                    &amp;lt;span&amp;gt; - 발행일 (Fri, 02 May 2025 00:45:14 GMT)&amp;lt;/span&amp;gt;
                                &amp;lt;/div&amp;gt;
                                
                                &amp;lt;div class=&amp;quot;tx-file tx-item-file&amp;quot;&amp;gt;
                                    &amp;lt;span class=&amp;quot;tx-icon&amp;quot;&amp;gt; &amp;lt;/span&amp;gt;
                                    &amp;lt;span class=&amp;quot;tx-tag&amp;quot;&amp;gt;&amp;amp;lt;description&amp;amp;gt;&amp;lt;/span&amp;gt;
                                    &amp;lt;span&amp;gt; - 설명 (HTML 포함)&amp;lt;/span&amp;gt;
                                &amp;lt;/div&amp;gt;
                                
                                &amp;lt;div class=&amp;quot;tx-file tx-item-file&amp;quot;&amp;gt;
                                    &amp;lt;span class=&amp;quot;tx-icon&amp;quot;&amp;gt; &amp;lt;/span&amp;gt;
                                    &amp;lt;span class=&amp;quot;tx-tag&amp;quot;&amp;gt;&amp;amp;lt;source&amp;amp;gt;&amp;lt;/span&amp;gt;
                                    &amp;lt;span&amp;gt; - 출처 (머니S)&amp;lt;/span&amp;gt;
                                &amp;lt;/div&amp;gt;
                            &amp;lt;/div&amp;gt;
                        &amp;lt;/div&amp;gt;
                        
                        &amp;lt;div class=&amp;quot;tx-folder tx-level-2 tx-item-folder&amp;quot;&amp;gt;
                            &amp;lt;div class=&amp;quot;tx-folder-header&amp;quot;&amp;gt;
                                &amp;lt;span class=&amp;quot;tx-icon&amp;quot;&amp;gt; &amp;lt;/span&amp;gt;
                                &amp;lt;span class=&amp;quot;tx-tag&amp;quot;&amp;gt;&amp;amp;lt;item&amp;amp;gt;&amp;lt;/span&amp;gt;
                                &amp;lt;span&amp;gt; - 두 번째 뉴스 기사&amp;lt;/span&amp;gt;
                            &amp;lt;/div&amp;gt;
                            
                            &amp;lt;div class=&amp;quot;tx-folder-content&amp;quot;&amp;gt;
                                &amp;lt;div class=&amp;quot;tx-file tx-item-file&amp;quot;&amp;gt;
                                    &amp;lt;span class=&amp;quot;tx-icon&amp;quot;&amp;gt; &amp;lt;/span&amp;gt;
                                    &amp;lt;span class=&amp;quot;tx-tag&amp;quot;&amp;gt;&amp;amp;lt;title&amp;amp;gt;&amp;lt;/span&amp;gt;
                                    &amp;lt;span&amp;gt; - 뉴스 제목 (KTis, +18.98% 52주 신고가)&amp;lt;/span&amp;gt;
                                &amp;lt;/div&amp;gt;
                                
                                &amp;lt;div class=&amp;quot;tx-file tx-item-file&amp;quot;&amp;gt;
                                    &amp;lt;span class=&amp;quot;tx-icon&amp;quot;&amp;gt; &amp;lt;/span&amp;gt;
                                    &amp;lt;span class=&amp;quot;tx-tag&amp;quot;&amp;gt;&amp;amp;lt;link&amp;amp;gt;&amp;lt;/span&amp;gt;
                                    &amp;lt;span&amp;gt; - 뉴스 링크&amp;lt;/span&amp;gt;
                                &amp;lt;/div&amp;gt;
                                
                                &amp;lt;div class=&amp;quot;tx-file tx-item-file&amp;quot;&amp;gt;
                                    &amp;lt;span class=&amp;quot;tx-icon&amp;quot;&amp;gt; &amp;lt;/span&amp;gt;
                                    &amp;lt;span class=&amp;quot;tx-tag&amp;quot;&amp;gt;&amp;amp;lt;guid&amp;amp;gt;&amp;lt;/span&amp;gt;
                                    &amp;lt;span&amp;gt; - 고유 식별자 (각 뉴스 항목의 영구적인 고유 ID로 중복 방지와 추적에 사용)&amp;lt;/span&amp;gt;
                                &amp;lt;/div&amp;gt;
                                
                                &amp;lt;div class=&amp;quot;tx-file tx-item-file&amp;quot;&amp;gt;
                                    &amp;lt;span class=&amp;quot;tx-icon&amp;quot;&amp;gt; &amp;lt;/span&amp;gt;
                                    &amp;lt;span class=&amp;quot;tx-tag&amp;quot;&amp;gt;&amp;amp;lt;pubDate&amp;amp;gt;&amp;lt;/span&amp;gt;
                                    &amp;lt;span&amp;gt; - 발행일 (Fri, 02 May 2025 04:17:00 GMT)&amp;lt;/span&amp;gt;
                                &amp;lt;/div&amp;gt;
                                
                                &amp;lt;div class=&amp;quot;tx-file tx-item-file&amp;quot;&amp;gt;
                                    &amp;lt;span class=&amp;quot;tx-icon&amp;quot;&amp;gt; &amp;lt;/span&amp;gt;
                                    &amp;lt;span class=&amp;quot;tx-tag&amp;quot;&amp;gt;&amp;amp;lt;description&amp;amp;gt;&amp;lt;/span&amp;gt;
                                    &amp;lt;span&amp;gt; - 설명 (HTML 포함)&amp;lt;/span&amp;gt;
                                &amp;lt;/div&amp;gt;
                                
                                &amp;lt;div class=&amp;quot;tx-file tx-item-file&amp;quot;&amp;gt;
                                    &amp;lt;span class=&amp;quot;tx-icon&amp;quot;&amp;gt; &amp;lt;/span&amp;gt;
                                    &amp;lt;span class=&amp;quot;tx-tag&amp;quot;&amp;gt;&amp;amp;lt;source&amp;amp;gt;&amp;lt;/span&amp;gt;
                                    &amp;lt;span&amp;gt; - 출처 (Chosunbiz)&amp;lt;/span&amp;gt;
                                &amp;lt;/div&amp;gt;
                            &amp;lt;/div&amp;gt;
                        &amp;lt;/div&amp;gt;
                        
                        &amp;lt;div class=&amp;quot;tx-folder tx-level-2 tx-item-folder&amp;quot;&amp;gt;
                            &amp;lt;div class=&amp;quot;tx-folder-header&amp;quot;&amp;gt;
                                &amp;lt;span class=&amp;quot;tx-icon&amp;quot;&amp;gt; &amp;lt;/span&amp;gt;
                                &amp;lt;span class=&amp;quot;tx-tag&amp;quot;&amp;gt;&amp;amp;lt;item&amp;amp;gt;&amp;lt;/span&amp;gt;
                                &amp;lt;span&amp;gt; - 추가 뉴스 기사들...&amp;lt;/span&amp;gt;
                            &amp;lt;/div&amp;gt;
                        &amp;lt;/div&amp;gt;
                    &amp;lt;/div&amp;gt;
                &amp;lt;/div&amp;gt;
            &amp;lt;/div&amp;gt;
        &amp;lt;/div&amp;gt;
    &amp;lt;/div&amp;gt;

    &amp;lt;script&amp;gt;
        // 페이지 로드 후 폴더 토글 기능 추가
        document.addEventListener('DOMContentLoaded', function() {
            // 기본적으로 최상위 폴더만 열어둠
            document.querySelectorAll('.tx-folder.tx-level-0').forEach(folder =&amp;gt; {
                folder.classList.add('open');
            });
            
            // 모든 폴더 헤더에 클릭 이벤트 추가
            document.querySelectorAll('.tx-folder-header').forEach(header =&amp;gt; {
                header.addEventListener('click', function(event) {
                    const folder = this.parentElement;
                    folder.classList.toggle('open');
                    
                    // 아이콘 변경
                    const icon = this.querySelector('.tx-icon');
                    if (folder.classList.contains('open')) {
                        icon.textContent = ' '; // 열린 폴더 아이콘
                    } else {
                        icon.textContent = ' '; // 닫힌 폴더 아이콘
                    }
                    
                    // 이벤트 버블링 방지
                    event.stopPropagation();
                });
            });
        });
    &amp;lt;/script&amp;gt;
&amp;lt;/div&amp;gt;&quot;&gt;
&lt;div class=&quot;tx-xml-container&quot;&gt;
&lt;style&gt;
        .tx-xml-container {
            font-family: 'Segoe UI', Tahoma, Geneva, Verdana, sans-serif;
            line-height: 1.6;
            color: #2d3748;
            width: 100%;
            box-sizing: border-box;
        }
        
        .tx-xml-container * {
            box-sizing: border-box;
        }
        
        .tx-xml-title {
            text-align: center;
            margin-bottom: 2rem;
            color: #4a5568;
            font-size: 1.5rem;
            font-weight: bold;
        }
        
        .tx-folder-structure {
            border: 1px solid #a0aec0;
            border-radius: 4px;
            background-color: white;
            box-shadow: 0 2px 4px rgba(0, 0, 0, 0.05);
            width: 100%;
            margin-bottom: 20px;
        }
        
        .tx-folder {
            border-bottom: 1px solid #a0aec0;
        }
        
        .tx-folder:last-child {
            border-bottom: none;
        }
        
        .tx-folder-header {
            display: flex;
            align-items: center;
            padding: 0.75rem 1rem;
            background-color: #e2e8f0;
            font-weight: 500;
            cursor: pointer;
        }
        
        .tx-folder-header:hover {
            background-color: #cbd5e0;
        }
        
        .tx-folder-header .tx-icon {
            margin-right: 0.5rem;
            font-weight: bold;
        }
        
        .tx-folder-header .tx-tag {
            color: #4a5568;
            font-family: 'Courier New', Courier, monospace;
        }
        
        .tx-folder-content {
            padding-left: 1.5rem;
            border-top: 1px solid #a0aec0;
            background-color: white;
            display: none; /* 기본적으로 접혀있도록 설정 */
        }
        
        .tx-folder.open &gt; .tx-folder-content {
            display: block; /* 열린 상태일 때만 표시 */
        }
        
        .tx-level-0 &gt; .tx-folder-header {
            background-color: #4a5568;
            color: white;
        }
        
        .tx-level-1 &gt; .tx-folder-header {
            background-color: #cbd5e0;
        }
        
        .tx-level-2 &gt; .tx-folder-header {
            background-color: #e2e8f0;
        }
        
        .tx-level-3 &gt; .tx-folder-header {
            background-color: #f7fafc;
        }
        
        .tx-file {
            display: flex;
            align-items: center;
            padding: 0.75rem 1rem;
            border-bottom: 1px solid #a0aec0;
        }
        
        .tx-file:last-child {
            border-bottom: none;
        }
        
        .tx-file .tx-icon {
            margin-right: 0.5rem;
            color: #4a5568;
        }
        
        .tx-file .tx-tag {
            color: #4a5568;
            font-family: 'Courier New', Courier, monospace;
        }
        
        .tx-item-folder {
            background-color: #f0f4ff;
        }
        
        .tx-item-file {
            background-color: #f0f4ff;
        }
    &lt;/style&gt;
&lt;h2 class=&quot;tx-xml-title&quot; data-ke-size=&quot;size26&quot;&gt;RSS 피드 XML 구조 (폴더형 시각화)&lt;/h2&gt;
&lt;div class=&quot;tx-folder-structure&quot;&gt;&lt;!-- Level 0: Root --&gt;
&lt;div class=&quot;tx-folder tx-level-0&quot;&gt;
&lt;div class=&quot;tx-folder-header&quot;&gt;&lt;span class=&quot;tx-icon&quot;&gt; &lt;/span&gt; &lt;span class=&quot;tx-tag&quot;&gt;&amp;lt;rss&amp;gt;&lt;/span&gt; &lt;span&gt; - RSS 루트 요소 (버전, 네임스페이스 정보 포함)&lt;/span&gt;&lt;/div&gt;
&lt;div class=&quot;tx-folder-content&quot;&gt;&lt;!-- Level 1: channel --&gt;
&lt;div class=&quot;tx-folder tx-level-1&quot;&gt;
&lt;div class=&quot;tx-folder-header&quot;&gt;&lt;span class=&quot;tx-icon&quot;&gt; &lt;/span&gt; &lt;span class=&quot;tx-tag&quot;&gt;&amp;lt;channel&amp;gt;&lt;/span&gt; &lt;span&gt; - 피드 메타데이터와 뉴스 아이템 컨테이너&lt;/span&gt;&lt;/div&gt;
&lt;div class=&quot;tx-folder-content&quot;&gt;&lt;!-- Level 2: Meta elements --&gt;
&lt;div class=&quot;tx-file&quot;&gt;&lt;span class=&quot;tx-icon&quot;&gt; &lt;/span&gt; &lt;span class=&quot;tx-tag&quot;&gt;&amp;lt;title&amp;gt;&lt;/span&gt; &lt;span&gt; - 피드 제목 (&quot;KTis after:2025-05-01 before:2025-05-03&quot; - Google 뉴스)&lt;/span&gt;&lt;/div&gt;
&lt;div class=&quot;tx-file&quot;&gt;&lt;span class=&quot;tx-icon&quot;&gt; &lt;/span&gt; &lt;span class=&quot;tx-tag&quot;&gt;&amp;lt;link&amp;gt;&lt;/span&gt; &lt;span&gt; - 피드 링크 (https://news.google.com/search?...)&lt;/span&gt;&lt;/div&gt;
&lt;div class=&quot;tx-file&quot;&gt;&lt;span class=&quot;tx-icon&quot;&gt; &lt;/span&gt; &lt;span class=&quot;tx-tag&quot;&gt;&amp;lt;language&amp;gt;&lt;/span&gt; &lt;span&gt; - 언어 (ko)&lt;/span&gt;&lt;/div&gt;
&lt;div class=&quot;tx-file&quot;&gt;&lt;span class=&quot;tx-icon&quot;&gt; &lt;/span&gt; &lt;span class=&quot;tx-tag&quot;&gt;&amp;lt;webMaster&amp;gt;&lt;/span&gt; &lt;span&gt; - 웹마스터 정보 (news-webmaster@google.com)&lt;/span&gt;&lt;/div&gt;
&lt;div class=&quot;tx-file&quot;&gt;&lt;span class=&quot;tx-icon&quot;&gt; &lt;/span&gt; &lt;span class=&quot;tx-tag&quot;&gt;&amp;lt;copyright&amp;gt;&lt;/span&gt; &lt;span&gt; - 저작권 정보&lt;/span&gt;&lt;/div&gt;
&lt;div class=&quot;tx-file&quot;&gt;&lt;span class=&quot;tx-icon&quot;&gt; &lt;/span&gt; &lt;span class=&quot;tx-tag&quot;&gt;&amp;lt;lastBuildDate&amp;gt;&lt;/span&gt; &lt;span&gt; - 마지막 업데이트 날짜 (Sat, 03 May 2025 11:23:03 GMT)&lt;/span&gt;&lt;/div&gt;
&lt;div class=&quot;tx-folder tx-level-2&quot;&gt;
&lt;div class=&quot;tx-folder-header&quot;&gt;&lt;span class=&quot;tx-icon&quot;&gt; &lt;/span&gt; &lt;span class=&quot;tx-tag&quot;&gt;&amp;lt;image&amp;gt;&lt;/span&gt; &lt;span&gt; - 피드 이미지 정보&lt;/span&gt;&lt;/div&gt;
&lt;div class=&quot;tx-folder-content&quot;&gt;
&lt;div class=&quot;tx-file&quot;&gt;&lt;span class=&quot;tx-icon&quot;&gt; &lt;/span&gt; &lt;span class=&quot;tx-tag&quot;&gt;&amp;lt;title&amp;gt;&lt;/span&gt; &lt;span&gt; - 이미지 제목&lt;/span&gt;&lt;/div&gt;
&lt;div class=&quot;tx-file&quot;&gt;&lt;span class=&quot;tx-icon&quot;&gt; &lt;/span&gt; &lt;span class=&quot;tx-tag&quot;&gt;&amp;lt;url&amp;gt;&lt;/span&gt; &lt;span&gt; - 이미지 URL&lt;/span&gt;&lt;/div&gt;
&lt;div class=&quot;tx-file&quot;&gt;&lt;span class=&quot;tx-icon&quot;&gt; &lt;/span&gt; &lt;span class=&quot;tx-tag&quot;&gt;&amp;lt;link&amp;gt;&lt;/span&gt; &lt;span&gt; - 이미지 링크&lt;/span&gt;&lt;/div&gt;
&lt;div class=&quot;tx-file&quot;&gt;&lt;span class=&quot;tx-icon&quot;&gt; &lt;/span&gt; &lt;span class=&quot;tx-tag&quot;&gt;&amp;lt;height&amp;gt;&lt;/span&gt; &lt;span&gt; - 이미지 높이&lt;/span&gt;&lt;/div&gt;
&lt;div class=&quot;tx-file&quot;&gt;&lt;span class=&quot;tx-icon&quot;&gt; &lt;/span&gt; &lt;span class=&quot;tx-tag&quot;&gt;&amp;lt;width&amp;gt;&lt;/span&gt; &lt;span&gt; - 이미지 너비&lt;/span&gt;&lt;/div&gt;
&lt;/div&gt;
&lt;/div&gt;
&lt;div class=&quot;tx-file&quot;&gt;&lt;span class=&quot;tx-icon&quot;&gt; &lt;/span&gt; &lt;span class=&quot;tx-tag&quot;&gt;&amp;lt;description&amp;gt;&lt;/span&gt; &lt;span&gt; - 피드 설명 (Google 뉴스)&lt;/span&gt;&lt;/div&gt;
&lt;!-- Level 2: Items --&gt;
&lt;div class=&quot;tx-folder tx-level-2 tx-item-folder&quot;&gt;
&lt;div class=&quot;tx-folder-header&quot;&gt;&lt;span class=&quot;tx-icon&quot;&gt; &lt;/span&gt; &lt;span class=&quot;tx-tag&quot;&gt;&amp;lt;item&amp;gt;&lt;/span&gt; &lt;span&gt; - 첫 번째 뉴스 기사&lt;/span&gt;&lt;/div&gt;
&lt;div class=&quot;tx-folder-content&quot;&gt;
&lt;div class=&quot;tx-file tx-item-file&quot;&gt;&lt;span class=&quot;tx-icon&quot;&gt; &lt;/span&gt; &lt;span class=&quot;tx-tag&quot;&gt;&amp;lt;title&amp;gt;&lt;/span&gt; &lt;span&gt; - 뉴스 제목 ([특징주] KTis, SKT 사태 해결까지 신규 모집중단&amp;hellip;)&lt;/span&gt;&lt;/div&gt;
&lt;div class=&quot;tx-file tx-item-file&quot;&gt;&lt;span class=&quot;tx-icon&quot;&gt; &lt;/span&gt; &lt;span class=&quot;tx-tag&quot;&gt;&amp;lt;link&amp;gt;&lt;/span&gt; &lt;span&gt; - 뉴스 링크&lt;/span&gt;&lt;/div&gt;
&lt;div class=&quot;tx-file tx-item-file&quot;&gt;&lt;span class=&quot;tx-icon&quot;&gt; &lt;/span&gt; &lt;span class=&quot;tx-tag&quot;&gt;&amp;lt;guid&amp;gt;&lt;/span&gt; &lt;span&gt; - 고유 식별자 (각 뉴스 항목의 영구적인 고유 ID로 중복 방지와 추적에 사용)&lt;/span&gt;&lt;/div&gt;
&lt;div class=&quot;tx-file tx-item-file&quot;&gt;&lt;span class=&quot;tx-icon&quot;&gt; &lt;/span&gt; &lt;span class=&quot;tx-tag&quot;&gt;&amp;lt;pubDate&amp;gt;&lt;/span&gt; &lt;span&gt; - 발행일 (Fri, 02 May 2025 00:45:14 GMT)&lt;/span&gt;&lt;/div&gt;
&lt;div class=&quot;tx-file tx-item-file&quot;&gt;&lt;span class=&quot;tx-icon&quot;&gt; &lt;/span&gt; &lt;span class=&quot;tx-tag&quot;&gt;&amp;lt;description&amp;gt;&lt;/span&gt; &lt;span&gt; - 설명 (HTML 포함)&lt;/span&gt;&lt;/div&gt;
&lt;div class=&quot;tx-file tx-item-file&quot;&gt;&lt;span class=&quot;tx-icon&quot;&gt; &lt;/span&gt; &lt;span class=&quot;tx-tag&quot;&gt;&amp;lt;source&amp;gt;&lt;/span&gt; &lt;span&gt; - 출처 (머니S)&lt;/span&gt;&lt;/div&gt;
&lt;/div&gt;
&lt;/div&gt;
&lt;div class=&quot;tx-folder tx-level-2 tx-item-folder&quot;&gt;
&lt;div class=&quot;tx-folder-header&quot;&gt;&lt;span class=&quot;tx-icon&quot;&gt; &lt;/span&gt; &lt;span class=&quot;tx-tag&quot;&gt;&amp;lt;item&amp;gt;&lt;/span&gt; &lt;span&gt; - 두 번째 뉴스 기사&lt;/span&gt;&lt;/div&gt;
&lt;div class=&quot;tx-folder-content&quot;&gt;
&lt;div class=&quot;tx-file tx-item-file&quot;&gt;&lt;span class=&quot;tx-icon&quot;&gt; &lt;/span&gt; &lt;span class=&quot;tx-tag&quot;&gt;&amp;lt;title&amp;gt;&lt;/span&gt; &lt;span&gt; - 뉴스 제목 (KTis, +18.98% 52주 신고가)&lt;/span&gt;&lt;/div&gt;
&lt;div class=&quot;tx-file tx-item-file&quot;&gt;&lt;span class=&quot;tx-icon&quot;&gt; &lt;/span&gt; &lt;span class=&quot;tx-tag&quot;&gt;&amp;lt;link&amp;gt;&lt;/span&gt; &lt;span&gt; - 뉴스 링크&lt;/span&gt;&lt;/div&gt;
&lt;div class=&quot;tx-file tx-item-file&quot;&gt;&lt;span class=&quot;tx-icon&quot;&gt; &lt;/span&gt; &lt;span class=&quot;tx-tag&quot;&gt;&amp;lt;guid&amp;gt;&lt;/span&gt; &lt;span&gt; - 고유 식별자 (각 뉴스 항목의 영구적인 고유 ID로 중복 방지와 추적에 사용)&lt;/span&gt;&lt;/div&gt;
&lt;div class=&quot;tx-file tx-item-file&quot;&gt;&lt;span class=&quot;tx-icon&quot;&gt; &lt;/span&gt; &lt;span class=&quot;tx-tag&quot;&gt;&amp;lt;pubDate&amp;gt;&lt;/span&gt; &lt;span&gt; - 발행일 (Fri, 02 May 2025 04:17:00 GMT)&lt;/span&gt;&lt;/div&gt;
&lt;div class=&quot;tx-file tx-item-file&quot;&gt;&lt;span class=&quot;tx-icon&quot;&gt; &lt;/span&gt; &lt;span class=&quot;tx-tag&quot;&gt;&amp;lt;description&amp;gt;&lt;/span&gt; &lt;span&gt; - 설명 (HTML 포함)&lt;/span&gt;&lt;/div&gt;
&lt;div class=&quot;tx-file tx-item-file&quot;&gt;&lt;span class=&quot;tx-icon&quot;&gt; &lt;/span&gt; &lt;span class=&quot;tx-tag&quot;&gt;&amp;lt;source&amp;gt;&lt;/span&gt; &lt;span&gt; - 출처 (Chosunbiz)&lt;/span&gt;&lt;/div&gt;
&lt;/div&gt;
&lt;/div&gt;
&lt;div class=&quot;tx-folder tx-level-2 tx-item-folder&quot;&gt;
&lt;div class=&quot;tx-folder-header&quot;&gt;&lt;span class=&quot;tx-icon&quot;&gt; &lt;/span&gt; &lt;span class=&quot;tx-tag&quot;&gt;&amp;lt;item&amp;gt;&lt;/span&gt; &lt;span&gt; - 추가 뉴스 기사들...&lt;/span&gt;&lt;/div&gt;
&lt;/div&gt;
&lt;/div&gt;
&lt;/div&gt;
&lt;/div&gt;
&lt;/div&gt;
&lt;/div&gt;
&lt;script&gt;
        // 페이지 로드 후 폴더 토글 기능 추가
        document.addEventListener('DOMContentLoaded', function() {
            // 기본적으로 최상위 폴더만 열어둠
            document.querySelectorAll('.tx-folder.tx-level-0').forEach(folder =&gt; {
                folder.classList.add('open');
            });
            
            // 모든 폴더 헤더에 클릭 이벤트 추가
            document.querySelectorAll('.tx-folder-header').forEach(header =&gt; {
                header.addEventListener('click', function(event) {
                    const folder = this.parentElement;
                    folder.classList.toggle('open');
                    
                    // 아이콘 변경
                    const icon = this.querySelector('.tx-icon');
                    if (folder.classList.contains('open')) {
                        icon.textContent = ' '; // 열린 폴더 아이콘
                    } else {
                        icon.textContent = ' '; // 닫힌 폴더 아이콘
                    }
                    
                    // 이벤트 버블링 방지
                    event.stopPropagation();
                });
            });
        });
    &lt;/script&gt;
&lt;/div&gt;
&lt;/div&gt;
&lt;h3 data-ke-size=&quot;size23&quot; data-start=&quot;346&quot; data-end=&quot;513&quot;&gt;Feedparser:&amp;nbsp; 폴더처럼 생긴 XML을 쉽게 다루는 도구&lt;/h3&gt;
&lt;div id=&quot;code_1747233146769&quot; data-ke-type=&quot;html&quot; data-source=&quot;&amp;lt;div class=&amp;quot;tx-feedparser-container&amp;quot;&amp;gt;
  &amp;lt;style&amp;gt;
    .tx-feedparser-container {
      width: 100%;
      box-sizing: border-box;
      font-family: 'Segoe UI', Tahoma, Geneva, Verdana, sans-serif;
      margin-bottom: 20px;
    }
    
    .tx-feedparser-container * {
      box-sizing: border-box;
    }
    
    .tx-feedparser-title {
      color: #4a5568;
      border-bottom: 2px solid #4a5568;
      padding-bottom: 10px;
      margin-bottom: 20px;
      font-size: 1.5rem;
      font-weight: bold;
    }
    
    .tx-feedparser-table {
      width: 100%;
      border-collapse: collapse;
      box-shadow: 0 2px 4px rgba(0,0,0,0.05);
      border: 1px solid #a0aec0;
      border-radius: 4px;
      overflow: hidden;
      margin-bottom: 20px;
    }
    
    .tx-feedparser-thead {
      background-color: #4a5568;
    }
    
    .tx-feedparser-th {
      padding: 12px 15px;
      text-align: left;
      border-bottom: 1px solid #a0aec0;
      font-weight: 500;
      color: white !important;
    }
    
    .tx-feedparser-feature {
      width: 30%;
    }
    
    .tx-feedparser-desc {
      width: 70%;
    }
    
    .tx-feedparser-tr-alt {
      background-color: #f7fafc;
    }
    
    .tx-feedparser-tr {
      background-color: white;
    }
    
    .tx-feedparser-td {
      padding: 12px 15px;
      border-bottom: 1px solid #e2e8f0;
    }
    
    .tx-feedparser-td-feature {
      font-weight: 500;
    }
    
    .tx-feedparser-tr:last-child .tx-feedparser-td,
    .tx-feedparser-tr-alt:last-child .tx-feedparser-td {
      border-bottom: none;
    }
  &amp;lt;/style&amp;gt;
  
  &amp;lt;h2 class=&amp;quot;tx-feedparser-title&amp;quot;&amp;gt;feedparser의 주요 기능&amp;lt;/h2&amp;gt;
  
  &amp;lt;table class=&amp;quot;tx-feedparser-table&amp;quot;&amp;gt;
    &amp;lt;thead class=&amp;quot;tx-feedparser-thead&amp;quot;&amp;gt;
      &amp;lt;tr&amp;gt;
        &amp;lt;th class=&amp;quot;tx-feedparser-th tx-feedparser-feature&amp;quot;&amp;gt;기능&amp;lt;/th&amp;gt;
        &amp;lt;th class=&amp;quot;tx-feedparser-th tx-feedparser-desc&amp;quot;&amp;gt;설명&amp;lt;/th&amp;gt;
      &amp;lt;/tr&amp;gt;
    &amp;lt;/thead&amp;gt;
    &amp;lt;tbody&amp;gt;
      &amp;lt;tr class=&amp;quot;tx-feedparser-tr-alt&amp;quot;&amp;gt;
        &amp;lt;td class=&amp;quot;tx-feedparser-td tx-feedparser-td-feature&amp;quot;&amp;gt;다양한 피드 형식 지원&amp;lt;/td&amp;gt;
        &amp;lt;td class=&amp;quot;tx-feedparser-td&amp;quot;&amp;gt;대부분의 RSS 2.0, Atom 1.0 등 표준 피드를 지원&amp;lt;/td&amp;gt;
      &amp;lt;/tr&amp;gt;
      &amp;lt;tr class=&amp;quot;tx-feedparser-tr&amp;quot;&amp;gt;
        &amp;lt;td class=&amp;quot;tx-feedparser-td tx-feedparser-td-feature&amp;quot;&amp;gt;간편한 사용법&amp;lt;/td&amp;gt;
        &amp;lt;td class=&amp;quot;tx-feedparser-td&amp;quot;&amp;gt;복잡한 XML을 Python의 딕셔너리 및 리스트 형태로 자동 변환&amp;lt;/td&amp;gt;
      &amp;lt;/tr&amp;gt;
      &amp;lt;tr class=&amp;quot;tx-feedparser-tr-alt&amp;quot;&amp;gt;
        &amp;lt;td class=&amp;quot;tx-feedparser-td tx-feedparser-td-feature&amp;quot;&amp;gt;자동 인코딩 감지&amp;lt;/td&amp;gt;
        &amp;lt;td class=&amp;quot;tx-feedparser-td&amp;quot;&amp;gt;UTF-8, ISO-8859 등 다양한 문자 인코딩을 자동 인식&amp;lt;/td&amp;gt;
      &amp;lt;/tr&amp;gt;
      &amp;lt;tr class=&amp;quot;tx-feedparser-tr&amp;quot;&amp;gt;
        &amp;lt;td class=&amp;quot;tx-feedparser-td tx-feedparser-td-feature&amp;quot;&amp;gt;날짜 형식 자동 처리&amp;lt;/td&amp;gt;
        &amp;lt;td class=&amp;quot;tx-feedparser-td&amp;quot;&amp;gt;다양한 피드의 날짜 표현을 Python datetime 형식으로 변환&amp;lt;/td&amp;gt;
      &amp;lt;/tr&amp;gt;
      &amp;lt;tr class=&amp;quot;tx-feedparser-tr-alt&amp;quot;&amp;gt;
        &amp;lt;td class=&amp;quot;tx-feedparser-td tx-feedparser-td-feature&amp;quot;&amp;gt;순수 Python 구현&amp;lt;/td&amp;gt;
        &amp;lt;td class=&amp;quot;tx-feedparser-td&amp;quot;&amp;gt;별도의 의존성 없이 설치와 실행이 간단&amp;lt;/td&amp;gt;
      &amp;lt;/tr&amp;gt;
    &amp;lt;/tbody&amp;gt;
  &amp;lt;/table&amp;gt;
&amp;lt;/div&amp;gt;&quot;&gt;
&lt;div class=&quot;tx-feedparser-container&quot;&gt;
&lt;style&gt;
    .tx-feedparser-container {
      width: 100%;
      box-sizing: border-box;
      font-family: 'Segoe UI', Tahoma, Geneva, Verdana, sans-serif;
      margin-bottom: 20px;
    }
    
    .tx-feedparser-container * {
      box-sizing: border-box;
    }
    
    .tx-feedparser-title {
      color: #4a5568;
      border-bottom: 2px solid #4a5568;
      padding-bottom: 10px;
      margin-bottom: 20px;
      font-size: 1.5rem;
      font-weight: bold;
    }
    
    .tx-feedparser-table {
      width: 100%;
      border-collapse: collapse;
      box-shadow: 0 2px 4px rgba(0,0,0,0.05);
      border: 1px solid #a0aec0;
      border-radius: 4px;
      overflow: hidden;
      margin-bottom: 20px;
    }
    
    .tx-feedparser-thead {
      background-color: #4a5568;
    }
    
    .tx-feedparser-th {
      padding: 12px 15px;
      text-align: left;
      border-bottom: 1px solid #a0aec0;
      font-weight: 500;
      color: white !important;
    }
    
    .tx-feedparser-feature {
      width: 30%;
    }
    
    .tx-feedparser-desc {
      width: 70%;
    }
    
    .tx-feedparser-tr-alt {
      background-color: #f7fafc;
    }
    
    .tx-feedparser-tr {
      background-color: white;
    }
    
    .tx-feedparser-td {
      padding: 12px 15px;
      border-bottom: 1px solid #e2e8f0;
    }
    
    .tx-feedparser-td-feature {
      font-weight: 500;
    }
    
    .tx-feedparser-tr:last-child .tx-feedparser-td,
    .tx-feedparser-tr-alt:last-child .tx-feedparser-td {
      border-bottom: none;
    }
  &lt;/style&gt;
&lt;h2 class=&quot;tx-feedparser-title&quot; data-ke-size=&quot;size26&quot;&gt;feedparser의 주요 기능&lt;/h2&gt;
&lt;table class=&quot;tx-feedparser-table&quot;&gt;
&lt;thead class=&quot;tx-feedparser-thead&quot;&gt;
&lt;tr&gt;
&lt;th class=&quot;tx-feedparser-th tx-feedparser-feature&quot;&gt;기능&lt;/th&gt;
&lt;th class=&quot;tx-feedparser-th tx-feedparser-desc&quot;&gt;설명&lt;/th&gt;
&lt;/tr&gt;
&lt;/thead&gt;
&lt;tbody&gt;
&lt;tr class=&quot;tx-feedparser-tr-alt&quot;&gt;
&lt;td class=&quot;tx-feedparser-td tx-feedparser-td-feature&quot;&gt;다양한 피드 형식 지원&lt;/td&gt;
&lt;td class=&quot;tx-feedparser-td&quot;&gt;대부분의 RSS 2.0, Atom 1.0 등 표준 피드를 지원&lt;/td&gt;
&lt;/tr&gt;
&lt;tr class=&quot;tx-feedparser-tr&quot;&gt;
&lt;td class=&quot;tx-feedparser-td tx-feedparser-td-feature&quot;&gt;간편한 사용법&lt;/td&gt;
&lt;td class=&quot;tx-feedparser-td&quot;&gt;복잡한 XML을 Python의 딕셔너리 및 리스트 형태로 자동 변환&lt;/td&gt;
&lt;/tr&gt;
&lt;tr class=&quot;tx-feedparser-tr-alt&quot;&gt;
&lt;td class=&quot;tx-feedparser-td tx-feedparser-td-feature&quot;&gt;자동 인코딩 감지&lt;/td&gt;
&lt;td class=&quot;tx-feedparser-td&quot;&gt;UTF-8, ISO-8859 등 다양한 문자 인코딩을 자동 인식&lt;/td&gt;
&lt;/tr&gt;
&lt;tr class=&quot;tx-feedparser-tr&quot;&gt;
&lt;td class=&quot;tx-feedparser-td tx-feedparser-td-feature&quot;&gt;날짜 형식 자동 처리&lt;/td&gt;
&lt;td class=&quot;tx-feedparser-td&quot;&gt;다양한 피드의 날짜 표현을 Python datetime 형식으로 변환&lt;/td&gt;
&lt;/tr&gt;
&lt;tr class=&quot;tx-feedparser-tr-alt&quot;&gt;
&lt;td class=&quot;tx-feedparser-td tx-feedparser-td-feature&quot;&gt;순수 Python 구현&lt;/td&gt;
&lt;td class=&quot;tx-feedparser-td&quot;&gt;별도의 의존성 없이 설치와 실행이 간단&lt;/td&gt;
&lt;/tr&gt;
&lt;/tbody&gt;
&lt;/table&gt;
&lt;/div&gt;
&lt;/div&gt;
&lt;div id=&quot;code_1747228258944&quot; data-ke-type=&quot;html&quot; data-source=&quot;&amp;lt;div class=&amp;quot;tx-code-example&amp;quot;&amp;gt;
  &amp;lt;style&amp;gt;
    .tx-code-example {
      width: 100%;
      box-sizing: border-box;
      font-family: -apple-system, BlinkMacSystemFont, 'Segoe UI', Roboto, Oxygen, Ubuntu, Cantarell, sans-serif;
    }
    
    .tx-code-example * {
      box-sizing: border-box;
    }
    
    .tx-code-container {
      border-left: 4px solid #5D7FEB;
      padding-left: 15px;
      margin: 20px 0;
    }
    
    .tx-code-title {
      color: #333;
      font-size: 20px;
      margin-bottom: 15px;
      font-weight: bold;
    }
    
    .tx-code-description {
      color: #444;
      margin-bottom: 15px;
      line-height: 1.5;
    }
    
    .tx-code-block {
      background-color: #f0f4f8;
      border-radius: 5px;
      padding: 15px;
      overflow: auto;
      max-width: 100%;
    }
    
    .tx-code-pre {
      margin: 0;
      font-family: Consolas, Monaco, 'Courier New', monospace;
      font-size: 14px;
      line-height: 1.5;
      white-space: pre;
      overflow-x: auto;
    }
    
    .tx-code-keyword {
      color: #0000CD;
    }
    
    .tx-code-comment {
      color: #008000;
    }
    
    .tx-code-string {
      color: #A31515;
    }
    
    .tx-code-function {
      color: #795E26;
    }
  &amp;lt;/style&amp;gt;
  
  &amp;lt;div class=&amp;quot;tx-code-container&amp;quot;&amp;gt;
    &amp;lt;h2 class=&amp;quot;tx-code-title&amp;quot;&amp;gt;feedparser 사용 예시&amp;lt;/h2&amp;gt;
    &amp;lt;p class=&amp;quot;tx-code-description&amp;quot;&amp;gt;Python feedparser 라이브러리를 사용하여 이 XML 구조의 데이터를 추출하는 기본 코드 예시:&amp;lt;/p&amp;gt;
    
    &amp;lt;div class=&amp;quot;tx-code-block&amp;quot;&amp;gt;
      &amp;lt;pre class=&amp;quot;tx-code-pre&amp;quot;&amp;gt;&amp;lt;span class=&amp;quot;tx-code-keyword&amp;quot;&amp;gt;import&amp;lt;/span&amp;gt; feedparser
&amp;lt;span class=&amp;quot;tx-code-comment&amp;quot;&amp;gt;# XML 파일 파싱&amp;lt;/span&amp;gt;
feed = feedparser.parse(&amp;lt;span class=&amp;quot;tx-code-string&amp;quot;&amp;gt;&amp;quot;2025-05-02_KTis.xml&amp;quot;&amp;lt;/span&amp;gt;)
&amp;lt;span class=&amp;quot;tx-code-comment&amp;quot;&amp;gt;# 피드 메타데이터 접근&amp;lt;/span&amp;gt;
feed_title = feed.feed.title
feed_link = feed.feed.link
feed_language = feed.feed.language
last_build_date = feed.feed.lastbuilddate
&amp;lt;span class=&amp;quot;tx-code-comment&amp;quot;&amp;gt;# 뉴스 기사 접근&amp;lt;/span&amp;gt;
&amp;lt;span class=&amp;quot;tx-code-keyword&amp;quot;&amp;gt;for&amp;lt;/span&amp;gt; entry &amp;lt;span class=&amp;quot;tx-code-keyword&amp;quot;&amp;gt;in&amp;lt;/span&amp;gt; feed.entries:
    title = entry.title
    link = entry.link
    published = entry.published
    source = entry.source.title &amp;lt;span class=&amp;quot;tx-code-keyword&amp;quot;&amp;gt;if&amp;lt;/span&amp;gt; hasattr(entry, &amp;lt;span class=&amp;quot;tx-code-string&amp;quot;&amp;gt;'source'&amp;lt;/span&amp;gt;) &amp;lt;span class=&amp;quot;tx-code-keyword&amp;quot;&amp;gt;else&amp;lt;/span&amp;gt; &amp;lt;span class=&amp;quot;tx-code-string&amp;quot;&amp;gt;''&amp;lt;/span&amp;gt;
    
    &amp;lt;span class=&amp;quot;tx-code-function&amp;quot;&amp;gt;print&amp;lt;/span&amp;gt;(&amp;lt;span class=&amp;quot;tx-code-string&amp;quot;&amp;gt;f&amp;quot;제목: {title}&amp;quot;&amp;lt;/span&amp;gt;)
    &amp;lt;span class=&amp;quot;tx-code-function&amp;quot;&amp;gt;print&amp;lt;/span&amp;gt;(&amp;lt;span class=&amp;quot;tx-code-string&amp;quot;&amp;gt;f&amp;quot;출처: {source}&amp;quot;&amp;lt;/span&amp;gt;)
    &amp;lt;span class=&amp;quot;tx-code-function&amp;quot;&amp;gt;print&amp;lt;/span&amp;gt;(&amp;lt;span class=&amp;quot;tx-code-string&amp;quot;&amp;gt;f&amp;quot;발행일: {published}&amp;quot;&amp;lt;/span&amp;gt;)
    &amp;lt;span class=&amp;quot;tx-code-function&amp;quot;&amp;gt;print&amp;lt;/span&amp;gt;(&amp;lt;span class=&amp;quot;tx-code-string&amp;quot;&amp;gt;f&amp;quot;링크: {link}&amp;quot;&amp;lt;/span&amp;gt;)
    &amp;lt;span class=&amp;quot;tx-code-function&amp;quot;&amp;gt;print&amp;lt;/span&amp;gt;(&amp;lt;span class=&amp;quot;tx-code-string&amp;quot;&amp;gt;&amp;quot;-------------------&amp;quot;&amp;lt;/span&amp;gt;)&amp;lt;/pre&amp;gt;
    &amp;lt;/div&amp;gt;
  &amp;lt;/div&amp;gt;
&amp;lt;/div&amp;gt;&quot;&gt;
&lt;div class=&quot;tx-code-example&quot;&gt;
&lt;style&gt;
    .tx-code-example {
      width: 100%;
      box-sizing: border-box;
      font-family: -apple-system, BlinkMacSystemFont, 'Segoe UI', Roboto, Oxygen, Ubuntu, Cantarell, sans-serif;
    }
    
    .tx-code-example * {
      box-sizing: border-box;
    }
    
    .tx-code-container {
      border-left: 4px solid #5D7FEB;
      padding-left: 15px;
      margin: 20px 0;
    }
    
    .tx-code-title {
      color: #333;
      font-size: 20px;
      margin-bottom: 15px;
      font-weight: bold;
    }
    
    .tx-code-description {
      color: #444;
      margin-bottom: 15px;
      line-height: 1.5;
    }
    
    .tx-code-block {
      background-color: #f0f4f8;
      border-radius: 5px;
      padding: 15px;
      overflow: auto;
      max-width: 100%;
    }
    
    .tx-code-pre {
      margin: 0;
      font-family: Consolas, Monaco, 'Courier New', monospace;
      font-size: 14px;
      line-height: 1.5;
      white-space: pre;
      overflow-x: auto;
    }
    
    .tx-code-keyword {
      color: #0000CD;
    }
    
    .tx-code-comment {
      color: #008000;
    }
    
    .tx-code-string {
      color: #A31515;
    }
    
    .tx-code-function {
      color: #795E26;
    }
  &lt;/style&gt;
&lt;div class=&quot;tx-code-container&quot;&gt;
&lt;h2 class=&quot;tx-code-title&quot; data-ke-size=&quot;size26&quot;&gt;feedparser 사용 예시&lt;/h2&gt;
&lt;p class=&quot;tx-code-description&quot; data-ke-size=&quot;size16&quot;&gt;Python feedparser 라이브러리를 사용하여 이 XML 구조의 데이터를 추출하는 기본 코드 예시:&lt;/p&gt;
&lt;div class=&quot;tx-code-block&quot;&gt;
&lt;pre class=&quot;tx-code-pre&quot;&gt;&lt;span class=&quot;tx-code-keyword&quot;&gt;import&lt;/span&gt; feedparser
&lt;span class=&quot;tx-code-comment&quot;&gt;# XML 파일 파싱&lt;/span&gt;
feed = feedparser.parse(&lt;span class=&quot;tx-code-string&quot;&gt;&quot;2025-05-02_KTis.xml&quot;&lt;/span&gt;)
&lt;span class=&quot;tx-code-comment&quot;&gt;# 피드 메타데이터 접근&lt;/span&gt;
feed_title = feed.feed.title
feed_link = feed.feed.link
feed_language = feed.feed.language
last_build_date = feed.feed.lastbuilddate
&lt;span class=&quot;tx-code-comment&quot;&gt;# 뉴스 기사 접근&lt;/span&gt;
&lt;span class=&quot;tx-code-keyword&quot;&gt;for&lt;/span&gt; entry &lt;span class=&quot;tx-code-keyword&quot;&gt;in&lt;/span&gt; feed.entries:
    title = entry.title
    link = entry.link
    published = entry.published
    source = entry.source.title &lt;span class=&quot;tx-code-keyword&quot;&gt;if&lt;/span&gt; hasattr(entry, &lt;span class=&quot;tx-code-string&quot;&gt;'source'&lt;/span&gt;) &lt;span class=&quot;tx-code-keyword&quot;&gt;else&lt;/span&gt; &lt;span class=&quot;tx-code-string&quot;&gt;''&lt;/span&gt;
    
    &lt;span class=&quot;tx-code-function&quot;&gt;print&lt;/span&gt;(&lt;span class=&quot;tx-code-string&quot;&gt;f&quot;제목: {title}&quot;&lt;/span&gt;)
    &lt;span class=&quot;tx-code-function&quot;&gt;print&lt;/span&gt;(&lt;span class=&quot;tx-code-string&quot;&gt;f&quot;출처: {source}&quot;&lt;/span&gt;)
    &lt;span class=&quot;tx-code-function&quot;&gt;print&lt;/span&gt;(&lt;span class=&quot;tx-code-string&quot;&gt;f&quot;발행일: {published}&quot;&lt;/span&gt;)
    &lt;span class=&quot;tx-code-function&quot;&gt;print&lt;/span&gt;(&lt;span class=&quot;tx-code-string&quot;&gt;f&quot;링크: {link}&quot;&lt;/span&gt;)
    &lt;span class=&quot;tx-code-function&quot;&gt;print&lt;/span&gt;(&lt;span class=&quot;tx-code-string&quot;&gt;&quot;-------------------&quot;&lt;/span&gt;)&lt;/pre&gt;
&lt;/div&gt;
&lt;/div&gt;
&lt;/div&gt;
&lt;/div&gt;
&lt;p data-ke-size=&quot;size16&quot;&gt;&amp;nbsp;&lt;/p&gt;
&lt;h3 data-end=&quot;133&quot; data-start=&quot;115&quot; data-ke-size=&quot;size23&quot;&gt;최종 파이썬 코드 구성&lt;/h3&gt;
&lt;div id=&quot;code_1747277429797&quot; data-ke-type=&quot;html&quot; data-source=&quot;&amp;lt;div class=&amp;quot;tx-flow-container&amp;quot;&amp;gt; 
&amp;lt;div class=&amp;quot;tx-flow-container&amp;quot;&amp;gt; 
&amp;lt;style&amp;gt; 
/* 티스토리 스타일 충돌 방지를 위한 리셋 */
.tx-flow-container,
.tx-flow-container * {
    margin: 0;
    padding: 0;
    box-sizing: border-box;
}

.tx-flow-container { 
    width: 100% !important; 
    max-width: 100% !important; /* 최대 넓이를 100%로 설정 */
    margin: 0 auto 20px;
    font-family: 'Segoe UI', Tahoma, Geneva, Verdana, sans-serif; 
    position: relative;
    overflow: visible !important; /* 오버플로우 제거 */
} 

.tx-flow-step { 
    display: flex; 
    margin-bottom: 30px; 
    position: relative; 
    width: 100%;
} 

.tx-flow-left { 
    position: relative; 
    width: 30px; 
    min-width: 30px; /* 최소 너비 보장 */
    flex-shrink: 0; 
    margin-right: 15px; 
} 

/* 중앙 연결선 스타일 */
.tx-flow-main-line {
    position: absolute;
    left: 14px;
    top: 0;
    width: 2px;
    height: 100%;
    background-color: #5D7FEB;
    z-index: 1;
}

.tx-flow-number { 
    background-color: #5D7FEB !important; /* 티스토리 스타일 오버라이드 */
    color: white !important; 
    width: 30px !important; 
    height: 30px !important; 
    border-radius: 4px !important; 
    display: flex !important; 
    align-items: center !important; 
    justify-content: center !important; 
    font-weight: bold !important; 
    position: absolute !important; 
    top: 50% !important; 
    transform: translateY(-50%) !important; 
    z-index: 2 !important;
    font-size: 14px !important;
    line-height: 1 !important;
    text-align: center !important;
} 

.tx-flow-content { 
    flex-grow: 1; 
    min-width: 0; /* 너비 오버플로우 방지 */
    width: calc(100% - 45px) !important; /* 왼쪽 영역을 제외한 너비 계산 */
} 

.tx-flow-box { 
    background-color: #f0f4f8 !important; 
    border-left: 4px solid #5D7FEB !important; 
    border-radius: 5px !important; 
    padding: 15px !important; 
    color: #444 !important; 
    line-height: 1.5 !important;
    word-wrap: break-word !important; /* 긴 텍스트 줄바꿈 */
    width: 100% !important;
    max-width: 100% !important;
} 

/* 모바일 화면 대응 */
@media (max-width: 640px) {
    .tx-flow-container {
        padding: 0 5px !important;
        width: calc(100% - 10px) !important;
    }
    
    .tx-flow-box {
        padding: 10px !important;
    }
    
    .tx-flow-left {
        margin-right: 10px;
    }
}

/* 텍스트 박스 컨테이너 스타일 오버라이드 */
.txc-textbox .tx-flow-container {
    width: 100% !important;
    max-width: 100% !important;
    overflow: visible !important;
}

.entry-content .tx-flow-container {
    width: 100% !important;
    max-width: 100% !important;
    overflow: visible !important;
}
&amp;lt;/style&amp;gt; 

&amp;lt;!-- 중앙 연결선 추가 --&amp;gt;
&amp;lt;div style=&amp;quot;position: absolute; left: 14px; top: 15px; width: 2px; height: calc(100% - 30px); background-color: #5D7FEB; z-index: 1;&amp;quot;&amp;gt;&amp;lt;/div&amp;gt;

&amp;lt;div class=&amp;quot;tx-flow-step&amp;quot;&amp;gt; 
    &amp;lt;div class=&amp;quot;tx-flow-left&amp;quot;&amp;gt; 
        &amp;lt;div class=&amp;quot;tx-flow-number&amp;quot;&amp;gt;1&amp;lt;/div&amp;gt; 
    &amp;lt;/div&amp;gt; 
    &amp;lt;div class=&amp;quot;tx-flow-content&amp;quot;&amp;gt; 
        &amp;lt;div class=&amp;quot;tx-flow-box&amp;quot;&amp;gt; 
            &amp;lt;strong&amp;gt;입력 폴더 처리:&amp;lt;/strong&amp;gt; 지정된 폴더 내에 있는 모든 *.xml 파일들을 순회하면서 하나씩 읽고 처리. 명령줄 인자로 경로를 입력받아, Powerautomate 자동화 흐름에 쉽게 통합 가능. 
        &amp;lt;/div&amp;gt; 
    &amp;lt;/div&amp;gt; 
&amp;lt;/div&amp;gt; 
&amp;lt;div class=&amp;quot;tx-flow-step&amp;quot;&amp;gt; 
    &amp;lt;div class=&amp;quot;tx-flow-left&amp;quot;&amp;gt; 
        &amp;lt;div class=&amp;quot;tx-flow-number&amp;quot;&amp;gt;2&amp;lt;/div&amp;gt; 
    &amp;lt;/div&amp;gt; 
    &amp;lt;div class=&amp;quot;tx-flow-content&amp;quot;&amp;gt; 
        &amp;lt;div class=&amp;quot;tx-flow-box&amp;quot;&amp;gt; 
            &amp;lt;strong&amp;gt;Feedparser를 활용한 XML 구조 해석:&amp;lt;/strong&amp;gt; XML 파일을 feedparser로 읽고, 내부에 있는 각 뉴스 항목의 제목, 링크, 발행일 등을 추출. 복잡한 XML 트리를 Powerautomate의 정규식을 이용해서 직접 다룰 필요 없음. 
        &amp;lt;/div&amp;gt; 
    &amp;lt;/div&amp;gt; 
&amp;lt;/div&amp;gt; 
&amp;lt;div class=&amp;quot;tx-flow-step&amp;quot;&amp;gt; 
    &amp;lt;div class=&amp;quot;tx-flow-left&amp;quot;&amp;gt; 
        &amp;lt;div class=&amp;quot;tx-flow-number&amp;quot;&amp;gt;3&amp;lt;/div&amp;gt; 
    &amp;lt;/div&amp;gt; 
    &amp;lt;div class=&amp;quot;tx-flow-content&amp;quot;&amp;gt; 
        &amp;lt;div class=&amp;quot;tx-flow-box&amp;quot;&amp;gt; 
            &amp;lt;strong&amp;gt;시간 정보 처리 (UTC &amp;rarr; KST 변환):&amp;lt;/strong&amp;gt; RSS 피드는 발행 시간을 &amp;lt;strong&amp;gt;한국 표준시(KST, UTC+9)&amp;lt;/strong&amp;gt;로 변환. 
        &amp;lt;/div&amp;gt; 
    &amp;lt;/div&amp;gt; 
&amp;lt;/div&amp;gt; 
&amp;lt;div class=&amp;quot;tx-flow-step&amp;quot;&amp;gt; 
    &amp;lt;div class=&amp;quot;tx-flow-left&amp;quot;&amp;gt; 
        &amp;lt;div class=&amp;quot;tx-flow-number&amp;quot;&amp;gt;4&amp;lt;/div&amp;gt; 
    &amp;lt;/div&amp;gt; 
    &amp;lt;div class=&amp;quot;tx-flow-content&amp;quot;&amp;gt; 
        &amp;lt;div class=&amp;quot;tx-flow-box&amp;quot;&amp;gt; 
            &amp;lt;strong&amp;gt;CSV 저장:&amp;lt;/strong&amp;gt; 각 XML 파일에 대해 [파일명]_google_url.csv 형태로 결과를 저장. 다음 작업 연계를 위해, 저장되는 항목은 발행일, 제목, 링크로 구성됨. 
        &amp;lt;/div&amp;gt; 
    &amp;lt;/div&amp;gt; 
&amp;lt;/div&amp;gt; 
&amp;lt;div class=&amp;quot;tx-flow-step&amp;quot;&amp;gt; 
    &amp;lt;div class=&amp;quot;tx-flow-left&amp;quot;&amp;gt; 
        &amp;lt;div class=&amp;quot;tx-flow-number&amp;quot;&amp;gt;5&amp;lt;/div&amp;gt;
    &amp;lt;/div&amp;gt; 
    &amp;lt;div class=&amp;quot;tx-flow-content&amp;quot;&amp;gt; 
        &amp;lt;div class=&amp;quot;tx-flow-box&amp;quot;&amp;gt; 
            &amp;lt;strong&amp;gt;디버깅을 위한 출력 처리:&amp;lt;/strong&amp;gt; 처리 완료된 파일 수와 항목 수를 요약 출력, 스크립트가 정상 실행되었는지 확인 가능. 
        &amp;lt;/div&amp;gt; 
    &amp;lt;/div&amp;gt; 
&amp;lt;/div&amp;gt; 
&amp;lt;/div&amp;gt;&quot;&gt;
&lt;div class=&quot;tx-flow-container&quot;&gt;
&lt;div class=&quot;tx-flow-container&quot;&gt;
&lt;style&gt; 
/* 티스토리 스타일 충돌 방지를 위한 리셋 */
.tx-flow-container,
.tx-flow-container * {
    margin: 0;
    padding: 0;
    box-sizing: border-box;
}

.tx-flow-container { 
    width: 100% !important; 
    max-width: 100% !important; /* 최대 넓이를 100%로 설정 */
    margin: 0 auto 20px;
    font-family: 'Segoe UI', Tahoma, Geneva, Verdana, sans-serif; 
    position: relative;
    overflow: visible !important; /* 오버플로우 제거 */
} 

.tx-flow-step { 
    display: flex; 
    margin-bottom: 30px; 
    position: relative; 
    width: 100%;
} 

.tx-flow-left { 
    position: relative; 
    width: 30px; 
    min-width: 30px; /* 최소 너비 보장 */
    flex-shrink: 0; 
    margin-right: 15px; 
} 

/* 중앙 연결선 스타일 */
.tx-flow-main-line {
    position: absolute;
    left: 14px;
    top: 0;
    width: 2px;
    height: 100%;
    background-color: #5D7FEB;
    z-index: 1;
}

.tx-flow-number { 
    background-color: #5D7FEB !important; /* 티스토리 스타일 오버라이드 */
    color: white !important; 
    width: 30px !important; 
    height: 30px !important; 
    border-radius: 4px !important; 
    display: flex !important; 
    align-items: center !important; 
    justify-content: center !important; 
    font-weight: bold !important; 
    position: absolute !important; 
    top: 50% !important; 
    transform: translateY(-50%) !important; 
    z-index: 2 !important;
    font-size: 14px !important;
    line-height: 1 !important;
    text-align: center !important;
} 

.tx-flow-content { 
    flex-grow: 1; 
    min-width: 0; /* 너비 오버플로우 방지 */
    width: calc(100% - 45px) !important; /* 왼쪽 영역을 제외한 너비 계산 */
} 

.tx-flow-box { 
    background-color: #f0f4f8 !important; 
    border-left: 4px solid #5D7FEB !important; 
    border-radius: 5px !important; 
    padding: 15px !important; 
    color: #444 !important; 
    line-height: 1.5 !important;
    word-wrap: break-word !important; /* 긴 텍스트 줄바꿈 */
    width: 100% !important;
    max-width: 100% !important;
} 

/* 모바일 화면 대응 */
@media (max-width: 640px) {
    .tx-flow-container {
        padding: 0 5px !important;
        width: calc(100% - 10px) !important;
    }
    
    .tx-flow-box {
        padding: 10px !important;
    }
    
    .tx-flow-left {
        margin-right: 10px;
    }
}

/* 텍스트 박스 컨테이너 스타일 오버라이드 */
.txc-textbox .tx-flow-container {
    width: 100% !important;
    max-width: 100% !important;
    overflow: visible !important;
}

.entry-content .tx-flow-container {
    width: 100% !important;
    max-width: 100% !important;
    overflow: visible !important;
}
&lt;/style&gt;
&lt;!-- 중앙 연결선 추가 --&gt;
&lt;div style=&quot;position: absolute; left: 14px; top: 15px; width: 2px; height: calc(100% - 30px); background-color: #5d7feb; z-index: 1;&quot;&gt;&amp;nbsp;&lt;/div&gt;
&lt;div class=&quot;tx-flow-step&quot;&gt;
&lt;div class=&quot;tx-flow-left&quot;&gt;
&lt;div class=&quot;tx-flow-number&quot;&gt;1&lt;/div&gt;
&lt;/div&gt;
&lt;div class=&quot;tx-flow-content&quot;&gt;
&lt;div class=&quot;tx-flow-box&quot;&gt;&lt;b&gt;입력 폴더 처리:&lt;/b&gt; 지정된 폴더 내에 있는 모든 *.xml 파일들을 순회하면서 하나씩 읽고 처리. 명령줄 인자로 경로를 입력받아, Powerautomate 자동화 흐름에 쉽게 통합 가능.&lt;/div&gt;
&lt;/div&gt;
&lt;/div&gt;
&lt;div class=&quot;tx-flow-step&quot;&gt;
&lt;div class=&quot;tx-flow-left&quot;&gt;
&lt;div class=&quot;tx-flow-number&quot;&gt;2&lt;/div&gt;
&lt;/div&gt;
&lt;div class=&quot;tx-flow-content&quot;&gt;
&lt;div class=&quot;tx-flow-box&quot;&gt;&lt;b&gt;Feedparser를 활용한 XML 구조 해석:&lt;/b&gt; XML 파일을 feedparser로 읽고, 내부에 있는 각 뉴스 항목의 제목, 링크, 발행일 등을 추출. 복잡한 XML 트리를 Powerautomate의 정규식을 이용해서 직접 다룰 필요 없음.&lt;/div&gt;
&lt;/div&gt;
&lt;/div&gt;
&lt;div class=&quot;tx-flow-step&quot;&gt;
&lt;div class=&quot;tx-flow-left&quot;&gt;
&lt;div class=&quot;tx-flow-number&quot;&gt;3&lt;/div&gt;
&lt;/div&gt;
&lt;div class=&quot;tx-flow-content&quot;&gt;
&lt;div class=&quot;tx-flow-box&quot;&gt;&lt;b&gt;시간 정보 처리 (UTC &amp;rarr; KST 변환):&lt;/b&gt; RSS 피드는 발행 시간을 &lt;b&gt;한국 표준시(KST, UTC+9)&lt;/b&gt;로 변환.&lt;/div&gt;
&lt;/div&gt;
&lt;/div&gt;
&lt;div class=&quot;tx-flow-step&quot;&gt;
&lt;div class=&quot;tx-flow-left&quot;&gt;
&lt;div class=&quot;tx-flow-number&quot;&gt;4&lt;/div&gt;
&lt;/div&gt;
&lt;div class=&quot;tx-flow-content&quot;&gt;
&lt;div class=&quot;tx-flow-box&quot;&gt;&lt;b&gt;CSV 저장:&lt;/b&gt; 각 XML 파일에 대해 [파일명]_google_url.csv 형태로 결과를 저장. 다음 작업 연계를 위해, 저장되는 항목은 발행일, 제목, 링크로 구성됨.&lt;/div&gt;
&lt;/div&gt;
&lt;/div&gt;
&lt;div class=&quot;tx-flow-step&quot;&gt;
&lt;div class=&quot;tx-flow-left&quot;&gt;
&lt;div class=&quot;tx-flow-number&quot;&gt;5&lt;/div&gt;
&lt;/div&gt;
&lt;div class=&quot;tx-flow-content&quot;&gt;
&lt;div class=&quot;tx-flow-box&quot;&gt;&lt;b&gt;디버깅을 위한 출력 처리:&lt;/b&gt; 처리 완료된 파일 수와 항목 수를 요약 출력, 스크립트가 정상 실행되었는지 확인 가능.&lt;/div&gt;
&lt;/div&gt;
&lt;/div&gt;
&lt;/div&gt;
&lt;/div&gt;
&lt;/div&gt;
&lt;p data-ke-size=&quot;size16&quot;&gt;&amp;nbsp;&lt;/p&gt;
&lt;p data-ke-size=&quot;size16&quot;&gt;아래의 최종 코드는 실제 파일 단위로 저장된 XML을 처리하고 결과를 CSV로 정리하는 &lt;b&gt;파이썬 스크립트&lt;/b&gt;다. 단독으로 실행할 수 있을 뿐만 아니라, Powerautomate와 연동해 자동화 흐름의 일부로 동작시킬 수 있다. 주요 구성은 다음과 같다:&lt;/p&gt;
&lt;div id=&quot;code_1747228093073&quot; data-ke-type=&quot;html&quot; data-source=&quot;&amp;lt;style&amp;gt;
  .code-container {
    width: 100%;
    max-width: 100%;
    margin: 20px 0;
    border: 1px solid #e8e8e8;
    border-radius: 4px;
    background-color: #f0f4f8;
  }
  
  .code-summary {
    padding: 10px 15px;
    cursor: pointer;
    font-weight: bold;
    color: #4a5568;
    display: flex;
    justify-content: space-between;
    align-items: center;
    user-select: none;
  }
  
  .code-toggle {
    color: #718096;
    font-weight: normal;
  }
  
  .code-content {
    background-color: #f8fafc;
    padding: 15px;
    border-radius: 0 0 4px 4px;
    overflow-x: auto;
    max-width: 100%;
  }
  
  .code-pre {
    margin: 0;
    padding: 0;
    font-family: Consolas, Monaco, 'Courier New', monospace;
    font-size: 14px;
    line-height: 1.5;
    color: #2d3748;
    white-space: pre-wrap;
    word-break: break-all;
    width: 100%;
  }
  
  /* 색상 스타일 유지 */
  .blue { color: #0000CD; }
  .green { color: #008000; }
  .purple { color: #795E26; }
  .red { color: #A31515; }
  
  /* details 스타일 조정 */
  details {
    width: 100%;
  }
  
  details summary {
    outline: none;
  }
&amp;lt;/style&amp;gt;

&amp;lt;div class=&amp;quot;code-container&amp;quot;&amp;gt;
  &amp;lt;details&amp;gt;
    &amp;lt;summary class=&amp;quot;code-summary&amp;quot;&amp;gt;
      &amp;lt;span&amp;gt;google_feed_parser.py&amp;lt;/span&amp;gt;
      &amp;lt;span class=&amp;quot;code-toggle&amp;quot;&amp;gt;[펼치기/접기]&amp;lt;/span&amp;gt;
    &amp;lt;/summary&amp;gt;
    &amp;lt;div class=&amp;quot;code-content&amp;quot;&amp;gt;
      &amp;lt;pre class=&amp;quot;code-pre&amp;quot;&amp;gt;
&amp;lt;span class=&amp;quot;blue&amp;quot;&amp;gt;#!/usr/bin/env python3&amp;lt;/span&amp;gt;
&amp;lt;span class=&amp;quot;blue&amp;quot;&amp;gt;# -*- coding: utf-8 -*-&amp;lt;/span&amp;gt;

&amp;lt;span class=&amp;quot;green&amp;quot;&amp;gt;&amp;quot;&amp;quot;&amp;quot;
구글 뉴스 XML/RSS/TXT 피드 파서
다양한 형식의 구글 뉴스 피드 파일을 CSV 파일로 변환합니다.

사용법: python google_feed_parser.py [입력_폴더] [출력_폴더]
출력 폴더를 지정하지 않으면 입력 폴더와 동일한 위치에 저장됩니다.
예시: python google_feed_parser.py xml_files
      python google_feed_parser.py xml_files csv_files
&amp;quot;&amp;quot;&amp;quot;&amp;lt;/span&amp;gt;

&amp;lt;span class=&amp;quot;blue&amp;quot;&amp;gt;import&amp;lt;/span&amp;gt; feedparser
&amp;lt;span class=&amp;quot;blue&amp;quot;&amp;gt;import&amp;lt;/span&amp;gt; os
&amp;lt;span class=&amp;quot;blue&amp;quot;&amp;gt;import&amp;lt;/span&amp;gt; csv
&amp;lt;span class=&amp;quot;blue&amp;quot;&amp;gt;import&amp;lt;/span&amp;gt; sys
&amp;lt;span class=&amp;quot;blue&amp;quot;&amp;gt;from&amp;lt;/span&amp;gt; datetime &amp;lt;span class=&amp;quot;blue&amp;quot;&amp;gt;import&amp;lt;/span&amp;gt; datetime, timezone, timedelta
&amp;lt;span class=&amp;quot;blue&amp;quot;&amp;gt;from&amp;lt;/span&amp;gt; email.utils &amp;lt;span class=&amp;quot;blue&amp;quot;&amp;gt;import&amp;lt;/span&amp;gt; parsedate_to_datetime

&amp;lt;span class=&amp;quot;blue&amp;quot;&amp;gt;def&amp;lt;/span&amp;gt; &amp;lt;span class=&amp;quot;purple&amp;quot;&amp;gt;main&amp;lt;/span&amp;gt;():
    &amp;lt;span class=&amp;quot;green&amp;quot;&amp;gt;# 명령줄 인수 처리&amp;lt;/span&amp;gt;
    &amp;lt;span class=&amp;quot;blue&amp;quot;&amp;gt;if&amp;lt;/span&amp;gt; &amp;lt;span class=&amp;quot;purple&amp;quot;&amp;gt;len&amp;lt;/span&amp;gt;(sys.argv) &amp;gt;= 2:
        input_folder = sys.argv[1]
        &amp;lt;span class=&amp;quot;green&amp;quot;&amp;gt;# 출력 폴더가 지정되지 않은 경우 입력 폴더와 동일하게 설정&amp;lt;/span&amp;gt;
        output_folder = sys.argv[2] &amp;lt;span class=&amp;quot;blue&amp;quot;&amp;gt;if&amp;lt;/span&amp;gt; &amp;lt;span class=&amp;quot;purple&amp;quot;&amp;gt;len&amp;lt;/span&amp;gt;(sys.argv) &amp;gt;= 3 &amp;lt;span class=&amp;quot;blue&amp;quot;&amp;gt;else&amp;lt;/span&amp;gt; input_folder
    &amp;lt;span class=&amp;quot;blue&amp;quot;&amp;gt;else&amp;lt;/span&amp;gt;:
        &amp;lt;span class=&amp;quot;purple&amp;quot;&amp;gt;print&amp;lt;/span&amp;gt;(&amp;lt;span class=&amp;quot;red&amp;quot;&amp;gt;&amp;quot;사용법: python google_feed_parser.py [입력_폴더] [출력_폴더]&amp;quot;&amp;lt;/span&amp;gt;)
        &amp;lt;span class=&amp;quot;purple&amp;quot;&amp;gt;print&amp;lt;/span&amp;gt;(&amp;lt;span class=&amp;quot;red&amp;quot;&amp;gt;&amp;quot;출력 폴더를 지정하지 않으면 입력 폴더와 동일한 위치에 저장됩니다.&amp;quot;&amp;lt;/span&amp;gt;)
        &amp;lt;span class=&amp;quot;purple&amp;quot;&amp;gt;print&amp;lt;/span&amp;gt;(&amp;lt;span class=&amp;quot;red&amp;quot;&amp;gt;&amp;quot;예시: python google_feed_parser.py xml_files&amp;quot;&amp;lt;/span&amp;gt;)
        &amp;lt;span class=&amp;quot;purple&amp;quot;&amp;gt;print&amp;lt;/span&amp;gt;(&amp;lt;span class=&amp;quot;red&amp;quot;&amp;gt;&amp;quot;      python google_feed_parser.py xml_files csv_files&amp;quot;&amp;lt;/span&amp;gt;)
        sys.exit(1)
    
    &amp;lt;span class=&amp;quot;green&amp;quot;&amp;gt;# 입력 폴더 확인&amp;lt;/span&amp;gt;
    &amp;lt;span class=&amp;quot;blue&amp;quot;&amp;gt;if&amp;lt;/span&amp;gt; &amp;lt;span class=&amp;quot;blue&amp;quot;&amp;gt;not&amp;lt;/span&amp;gt; os.path.exists(input_folder):
        &amp;lt;span class=&amp;quot;purple&amp;quot;&amp;gt;print&amp;lt;/span&amp;gt;(&amp;lt;span class=&amp;quot;red&amp;quot;&amp;gt;f&amp;quot;오류: 입력 폴더를 찾을 수 없습니다: {input_folder}&amp;quot;&amp;lt;/span&amp;gt;)
        sys.exit(1)
    
    &amp;lt;span class=&amp;quot;blue&amp;quot;&amp;gt;if&amp;lt;/span&amp;gt; &amp;lt;span class=&amp;quot;blue&amp;quot;&amp;gt;not&amp;lt;/span&amp;gt; os.path.isdir(input_folder):
        &amp;lt;span class=&amp;quot;purple&amp;quot;&amp;gt;print&amp;lt;/span&amp;gt;(&amp;lt;span class=&amp;quot;red&amp;quot;&amp;gt;f&amp;quot;오류: 입력 경로가 폴더가 아닙니다: {input_folder}&amp;quot;&amp;lt;/span&amp;gt;)
        sys.exit(1)
    
    &amp;lt;span class=&amp;quot;green&amp;quot;&amp;gt;# 출력 폴더가 입력 폴더와 다른 경우에만 생성&amp;lt;/span&amp;gt;
    &amp;lt;span class=&amp;quot;blue&amp;quot;&amp;gt;if&amp;lt;/span&amp;gt; output_folder != input_folder &amp;lt;span class=&amp;quot;blue&amp;quot;&amp;gt;and&amp;lt;/span&amp;gt; &amp;lt;span class=&amp;quot;blue&amp;quot;&amp;gt;not&amp;lt;/span&amp;gt; os.path.exists(output_folder):
        &amp;lt;span class=&amp;quot;blue&amp;quot;&amp;gt;try&amp;lt;/span&amp;gt;:
            os.makedirs(output_folder)
            &amp;lt;span class=&amp;quot;purple&amp;quot;&amp;gt;print&amp;lt;/span&amp;gt;(&amp;lt;span class=&amp;quot;red&amp;quot;&amp;gt;f&amp;quot;출력 폴더를 생성했습니다: {output_folder}&amp;quot;&amp;lt;/span&amp;gt;)
        &amp;lt;span class=&amp;quot;blue&amp;quot;&amp;gt;except&amp;lt;/span&amp;gt; Exception &amp;lt;span class=&amp;quot;blue&amp;quot;&amp;gt;as&amp;lt;/span&amp;gt; e:
            &amp;lt;span class=&amp;quot;purple&amp;quot;&amp;gt;print&amp;lt;/span&amp;gt;(&amp;lt;span class=&amp;quot;red&amp;quot;&amp;gt;f&amp;quot;오류: 출력 폴더를 생성할 수 없습니다: {output_folder} - {str(e)}&amp;quot;&amp;lt;/span&amp;gt;)
            sys.exit(1)
    
    &amp;lt;span class=&amp;quot;green&amp;quot;&amp;gt;# 파일 처리 시작&amp;lt;/span&amp;gt;
    process_folder(input_folder, output_folder)

&amp;lt;span class=&amp;quot;blue&amp;quot;&amp;gt;def&amp;lt;/span&amp;gt; &amp;lt;span class=&amp;quot;purple&amp;quot;&amp;gt;process_folder&amp;lt;/span&amp;gt;(input_folder, output_folder):
    &amp;lt;span class=&amp;quot;green&amp;quot;&amp;gt;&amp;quot;&amp;quot;&amp;quot;지정된 폴더의 모든 피드 파일을 처리합니다.&amp;quot;&amp;quot;&amp;quot;&amp;lt;/span&amp;gt;
    
    &amp;lt;span class=&amp;quot;green&amp;quot;&amp;gt;# 한국 시간대 (UTC+9)&amp;lt;/span&amp;gt;
    KST = timezone(timedelta(hours=9))
    
    &amp;lt;span class=&amp;quot;green&amp;quot;&amp;gt;# 처리 결과 카운터&amp;lt;/span&amp;gt;
    successful_files = 0
    failed_files = 0
    skipped_files = 0
    total_entries = 0
    
    &amp;lt;span class=&amp;quot;green&amp;quot;&amp;gt;# 지원되는 확장자 목록 (txt 포함)&amp;lt;/span&amp;gt;
    supported_extensions = [&amp;lt;span class=&amp;quot;red&amp;quot;&amp;gt;'.xml'&amp;lt;/span&amp;gt;, &amp;lt;span class=&amp;quot;red&amp;quot;&amp;gt;'.rss'&amp;lt;/span&amp;gt;, &amp;lt;span class=&amp;quot;red&amp;quot;&amp;gt;'.atom'&amp;lt;/span&amp;gt;, &amp;lt;span class=&amp;quot;red&amp;quot;&amp;gt;'.feed'&amp;lt;/span&amp;gt;, &amp;lt;span class=&amp;quot;red&amp;quot;&amp;gt;'.txt'&amp;lt;/span&amp;gt;]
    
    &amp;lt;span class=&amp;quot;purple&amp;quot;&amp;gt;print&amp;lt;/span&amp;gt;(&amp;lt;span class=&amp;quot;red&amp;quot;&amp;gt;f&amp;quot;폴더 '{input_folder}'에서 피드 파일 검색 중...&amp;quot;&amp;lt;/span&amp;gt;)
    
    &amp;lt;span class=&amp;quot;green&amp;quot;&amp;gt;# 폴더 내 모든 파일 나열&amp;lt;/span&amp;gt;
    all_files = os.listdir(input_folder)
    &amp;lt;span class=&amp;quot;purple&amp;quot;&amp;gt;print&amp;lt;/span&amp;gt;(&amp;lt;span class=&amp;quot;red&amp;quot;&amp;gt;f&amp;quot;총 {len(all_files)}개 파일 발견&amp;quot;&amp;lt;/span&amp;gt;)
    
    &amp;lt;span class=&amp;quot;green&amp;quot;&amp;gt;# 폴더 내 모든 파일 처리&amp;lt;/span&amp;gt;
    &amp;lt;span class=&amp;quot;blue&amp;quot;&amp;gt;for&amp;lt;/span&amp;gt; filename &amp;lt;span class=&amp;quot;blue&amp;quot;&amp;gt;in&amp;lt;/span&amp;gt; all_files:
        file_path = os.path.join(input_folder, filename)
        
        &amp;lt;span class=&amp;quot;green&amp;quot;&amp;gt;# 파일인지 확인&amp;lt;/span&amp;gt;
        &amp;lt;span class=&amp;quot;blue&amp;quot;&amp;gt;if&amp;lt;/span&amp;gt; os.path.isfile(file_path):
            &amp;lt;span class=&amp;quot;green&amp;quot;&amp;gt;# 파일 확장자 확인&amp;lt;/span&amp;gt;
            _, ext = os.path.splitext(filename)
            
            &amp;lt;span class=&amp;quot;purple&amp;quot;&amp;gt;print&amp;lt;/span&amp;gt;(&amp;lt;span class=&amp;quot;red&amp;quot;&amp;gt;f&amp;quot;파일 검사 중: {filename} (확장자: {ext})&amp;quot;&amp;lt;/span&amp;gt;)
            
            &amp;lt;span class=&amp;quot;green&amp;quot;&amp;gt;# 지원되는 확장자인 경우&amp;lt;/span&amp;gt;
            &amp;lt;span class=&amp;quot;blue&amp;quot;&amp;gt;if&amp;lt;/span&amp;gt; ext.lower() &amp;lt;span class=&amp;quot;blue&amp;quot;&amp;gt;in&amp;lt;/span&amp;gt; supported_extensions:
                &amp;lt;span class=&amp;quot;purple&amp;quot;&amp;gt;print&amp;lt;/span&amp;gt;(&amp;lt;span class=&amp;quot;red&amp;quot;&amp;gt;f&amp;quot;지원되는 확장자: {ext} - 처리 시도&amp;quot;&amp;lt;/span&amp;gt;)
                
                &amp;lt;span class=&amp;quot;green&amp;quot;&amp;gt;# 피드 파일 처리&amp;lt;/span&amp;gt;
                &amp;lt;span class=&amp;quot;blue&amp;quot;&amp;gt;if&amp;lt;/span&amp;gt; process_feed_file(file_path, output_folder, KST):
                    successful_files += 1
                    &amp;lt;span class=&amp;quot;green&amp;quot;&amp;gt;# 항목 수 추가&amp;lt;/span&amp;gt;
                    feed = feedparser.parse(file_path)
                    total_entries += &amp;lt;span class=&amp;quot;purple&amp;quot;&amp;gt;len&amp;lt;/span&amp;gt;(feed.entries)
                &amp;lt;span class=&amp;quot;blue&amp;quot;&amp;gt;else&amp;lt;/span&amp;gt;:
                    failed_files += 1
            &amp;lt;span class=&amp;quot;blue&amp;quot;&amp;gt;else&amp;lt;/span&amp;gt;:
                &amp;lt;span class=&amp;quot;green&amp;quot;&amp;gt;# 지원되지 않는 확장자지만 유효한 피드인지 확인&amp;lt;/span&amp;gt;
                &amp;lt;span class=&amp;quot;purple&amp;quot;&amp;gt;print&amp;lt;/span&amp;gt;(&amp;lt;span class=&amp;quot;red&amp;quot;&amp;gt;f&amp;quot;지원되지 않는 확장자: {ext} - 유효성 확인 중&amp;quot;&amp;lt;/span&amp;gt;)
                &amp;lt;span class=&amp;quot;blue&amp;quot;&amp;gt;if&amp;lt;/span&amp;gt; is_valid_feed(file_path):
                    &amp;lt;span class=&amp;quot;purple&amp;quot;&amp;gt;print&amp;lt;/span&amp;gt;(&amp;lt;span class=&amp;quot;red&amp;quot;&amp;gt;f&amp;quot;유효한 피드로 확인됨: {filename} - 처리 시도&amp;quot;&amp;lt;/span&amp;gt;)
                    &amp;lt;span class=&amp;quot;blue&amp;quot;&amp;gt;if&amp;lt;/span&amp;gt; process_feed_file(file_path, output_folder, KST):
                        successful_files += 1
                        feed = feedparser.parse(file_path)
                        total_entries += &amp;lt;span class=&amp;quot;purple&amp;quot;&amp;gt;len&amp;lt;/span&amp;gt;(feed.entries)
                    &amp;lt;span class=&amp;quot;blue&amp;quot;&amp;gt;else&amp;lt;/span&amp;gt;:
                        failed_files += 1
                &amp;lt;span class=&amp;quot;blue&amp;quot;&amp;gt;else&amp;lt;/span&amp;gt;:
                    skipped_files += 1
                    &amp;lt;span class=&amp;quot;purple&amp;quot;&amp;gt;print&amp;lt;/span&amp;gt;(&amp;lt;span class=&amp;quot;red&amp;quot;&amp;gt;f&amp;quot;건너뜀: {filename} (지원되지 않는 파일 형식)&amp;quot;&amp;lt;/span&amp;gt;)
    
    &amp;lt;span class=&amp;quot;green&amp;quot;&amp;gt;# 결과 요약 출력&amp;lt;/span&amp;gt;
    &amp;lt;span class=&amp;quot;purple&amp;quot;&amp;gt;print&amp;lt;/span&amp;gt;(&amp;lt;span class=&amp;quot;red&amp;quot;&amp;gt;&amp;quot;\n===== 처리 결과 =====&amp;quot;&amp;lt;/span&amp;gt;)
    &amp;lt;span class=&amp;quot;purple&amp;quot;&amp;gt;print&amp;lt;/span&amp;gt;(&amp;lt;span class=&amp;quot;red&amp;quot;&amp;gt;f&amp;quot;처리된 피드 파일: {successful_files}개&amp;quot;&amp;lt;/span&amp;gt;)
    &amp;lt;span class=&amp;quot;purple&amp;quot;&amp;gt;print&amp;lt;/span&amp;gt;(&amp;lt;span class=&amp;quot;red&amp;quot;&amp;gt;f&amp;quot;실패한 피드 파일: {failed_files}개&amp;quot;&amp;lt;/span&amp;gt;)
    &amp;lt;span class=&amp;quot;purple&amp;quot;&amp;gt;print&amp;lt;/span&amp;gt;(&amp;lt;span class=&amp;quot;red&amp;quot;&amp;gt;f&amp;quot;건너뛴 파일: {skipped_files}개&amp;quot;&amp;lt;/span&amp;gt;)
    &amp;lt;span class=&amp;quot;purple&amp;quot;&amp;gt;print&amp;lt;/span&amp;gt;(&amp;lt;span class=&amp;quot;red&amp;quot;&amp;gt;f&amp;quot;처리된 총 항목 수: {total_entries}개&amp;quot;&amp;lt;/span&amp;gt;)
    &amp;lt;span class=&amp;quot;purple&amp;quot;&amp;gt;print&amp;lt;/span&amp;gt;(&amp;lt;span class=&amp;quot;red&amp;quot;&amp;gt;f&amp;quot;CSV 파일 저장 위치: {output_folder}&amp;quot;&amp;lt;/span&amp;gt;)
    &amp;lt;span class=&amp;quot;purple&amp;quot;&amp;gt;print&amp;lt;/span&amp;gt;(&amp;lt;span class=&amp;quot;red&amp;quot;&amp;gt;&amp;quot;모든 시간은 한국 시간(KST, UTC+9) 기준으로 변환되었습니다.&amp;quot;&amp;lt;/span&amp;gt;)
    &amp;lt;span class=&amp;quot;purple&amp;quot;&amp;gt;print&amp;lt;/span&amp;gt;(&amp;lt;span class=&amp;quot;red&amp;quot;&amp;gt;&amp;quot;모든 CSV 파일은 '[원본파일명]_google_url.csv' 형식으로 저장되었습니다.&amp;quot;&amp;lt;/span&amp;gt;)
    &amp;lt;span class=&amp;quot;purple&amp;quot;&amp;gt;print&amp;lt;/span&amp;gt;(&amp;lt;span class=&amp;quot;red&amp;quot;&amp;gt;f&amp;quot;지원되는 파일 확장자: {', '.join(supported_extensions)}&amp;quot;&amp;lt;/span&amp;gt;)

&amp;lt;span class=&amp;quot;blue&amp;quot;&amp;gt;def&amp;lt;/span&amp;gt; &amp;lt;span class=&amp;quot;purple&amp;quot;&amp;gt;format_date_to_kst&amp;lt;/span&amp;gt;(date_str, KST):
    &amp;lt;span class=&amp;quot;green&amp;quot;&amp;gt;&amp;quot;&amp;quot;&amp;quot;날짜 문자열을 한국 시간으로 변환합니다.&amp;quot;&amp;quot;&amp;quot;&amp;lt;/span&amp;gt;
    &amp;lt;span class=&amp;quot;blue&amp;quot;&amp;gt;try&amp;lt;/span&amp;gt;:
        &amp;lt;span class=&amp;quot;green&amp;quot;&amp;gt;# RSS 날짜 형식(RFC 822)을 파싱&amp;lt;/span&amp;gt;
        dt = parsedate_to_datetime(date_str)
        &amp;lt;span class=&amp;quot;green&amp;quot;&amp;gt;# UTC에서 KST로 변환&amp;lt;/span&amp;gt;
        dt_kst = dt.astimezone(KST)
        &amp;lt;span class=&amp;quot;green&amp;quot;&amp;gt;# 연도, 월, 날짜, 시간(24시간) 형식으로 변환&amp;lt;/span&amp;gt;
        &amp;lt;span class=&amp;quot;blue&amp;quot;&amp;gt;return&amp;lt;/span&amp;gt; dt_kst.strftime(&amp;lt;span class=&amp;quot;red&amp;quot;&amp;gt;'%Y-%m-%d %H:%M:%S'&amp;lt;/span&amp;gt;)
    &amp;lt;span class=&amp;quot;blue&amp;quot;&amp;gt;except&amp;lt;/span&amp;gt; Exception:
        &amp;lt;span class=&amp;quot;blue&amp;quot;&amp;gt;try&amp;lt;/span&amp;gt;:
            &amp;lt;span class=&amp;quot;green&amp;quot;&amp;gt;# ISO 8601 형식 시도&amp;lt;/span&amp;gt;
            dt = datetime.fromisoformat(date_str.replace(&amp;lt;span class=&amp;quot;red&amp;quot;&amp;gt;'Z'&amp;lt;/span&amp;gt;, &amp;lt;span class=&amp;quot;red&amp;quot;&amp;gt;'+00:00'&amp;lt;/span&amp;gt;))
            &amp;lt;span class=&amp;quot;green&amp;quot;&amp;gt;# UTC에서 KST로 변환&amp;lt;/span&amp;gt;
            dt_kst = dt.astimezone(KST)
            &amp;lt;span class=&amp;quot;blue&amp;quot;&amp;gt;return&amp;lt;/span&amp;gt; dt_kst.strftime(&amp;lt;span class=&amp;quot;red&amp;quot;&amp;gt;'%Y-%m-%d %H:%M:%S'&amp;lt;/span&amp;gt;)
        &amp;lt;span class=&amp;quot;blue&amp;quot;&amp;gt;except&amp;lt;/span&amp;gt;:
            &amp;lt;span class=&amp;quot;green&amp;quot;&amp;gt;# 변환 실패 시 원본 반환&amp;lt;/span&amp;gt;
            &amp;lt;span class=&amp;quot;blue&amp;quot;&amp;gt;return&amp;lt;/span&amp;gt; date_str

&amp;lt;span class=&amp;quot;blue&amp;quot;&amp;gt;def&amp;lt;/span&amp;gt; &amp;lt;span class=&amp;quot;purple&amp;quot;&amp;gt;is_valid_feed&amp;lt;/span&amp;gt;(file_path):
    &amp;lt;span class=&amp;quot;green&amp;quot;&amp;gt;&amp;quot;&amp;quot;&amp;quot;파일이 유효한 피드인지 확인합니다.&amp;quot;&amp;quot;&amp;quot;&amp;lt;/span&amp;gt;
    &amp;lt;span class=&amp;quot;blue&amp;quot;&amp;gt;try&amp;lt;/span&amp;gt;:
        &amp;lt;span class=&amp;quot;green&amp;quot;&amp;gt;# 파일 내용 읽기 (디버깅 출력)&amp;lt;/span&amp;gt;
        &amp;lt;span class=&amp;quot;blue&amp;quot;&amp;gt;try&amp;lt;/span&amp;gt;:
            &amp;lt;span class=&amp;quot;blue&amp;quot;&amp;gt;with&amp;lt;/span&amp;gt; &amp;lt;span class=&amp;quot;purple&amp;quot;&amp;gt;open&amp;lt;/span&amp;gt;(file_path, &amp;lt;span class=&amp;quot;red&amp;quot;&amp;gt;'r'&amp;lt;/span&amp;gt;, encoding=&amp;lt;span class=&amp;quot;red&amp;quot;&amp;gt;'utf-8'&amp;lt;/span&amp;gt;, errors=&amp;lt;span class=&amp;quot;red&amp;quot;&amp;gt;'ignore'&amp;lt;/span&amp;gt;) &amp;lt;span class=&amp;quot;blue&amp;quot;&amp;gt;as&amp;lt;/span&amp;gt; f:
                content = f.read(1000)  &amp;lt;span class=&amp;quot;green&amp;quot;&amp;gt;# 처음 1000자만 읽기&amp;lt;/span&amp;gt;
                &amp;lt;span class=&amp;quot;purple&amp;quot;&amp;gt;print&amp;lt;/span&amp;gt;(&amp;lt;span class=&amp;quot;red&amp;quot;&amp;gt;f&amp;quot;파일 확인: {file_path}&amp;quot;&amp;lt;/span&amp;gt;)
                &amp;lt;span class=&amp;quot;purple&amp;quot;&amp;gt;print&amp;lt;/span&amp;gt;(&amp;lt;span class=&amp;quot;red&amp;quot;&amp;gt;f&amp;quot;파일 미리보기: {content[:100]}...&amp;quot;&amp;lt;/span&amp;gt;)  &amp;lt;span class=&amp;quot;green&amp;quot;&amp;gt;# 처음 100자 출력&amp;lt;/span&amp;gt;
        &amp;lt;span class=&amp;quot;blue&amp;quot;&amp;gt;except&amp;lt;/span&amp;gt; Exception &amp;lt;span class=&amp;quot;blue&amp;quot;&amp;gt;as&amp;lt;/span&amp;gt; e:
            &amp;lt;span class=&amp;quot;purple&amp;quot;&amp;gt;print&amp;lt;/span&amp;gt;(&amp;lt;span class=&amp;quot;red&amp;quot;&amp;gt;f&amp;quot;파일 읽기 오류: {file_path} - {str(e)}&amp;quot;&amp;lt;/span&amp;gt;)
        
        &amp;lt;span class=&amp;quot;green&amp;quot;&amp;gt;# 피드 파싱 시도&amp;lt;/span&amp;gt;
        feed = feedparser.parse(file_path)
        
        &amp;lt;span class=&amp;quot;green&amp;quot;&amp;gt;# 디버깅 출력&amp;lt;/span&amp;gt;
        &amp;lt;span class=&amp;quot;purple&amp;quot;&amp;gt;print&amp;lt;/span&amp;gt;(&amp;lt;span class=&amp;quot;red&amp;quot;&amp;gt;f&amp;quot;파싱 결과: version={feed.get('version', 'None')}, entries={len(feed.get('entries', []))}&amp;quot;&amp;lt;/span&amp;gt;)
        
        &amp;lt;span class=&amp;quot;green&amp;quot;&amp;gt;# 피드에 entries가 있으면 유효한 피드로 간주&amp;lt;/span&amp;gt;
        &amp;lt;span class=&amp;quot;blue&amp;quot;&amp;gt;return&amp;lt;/span&amp;gt; &amp;lt;span class=&amp;quot;purple&amp;quot;&amp;gt;hasattr&amp;lt;/span&amp;gt;(feed, &amp;lt;span class=&amp;quot;red&amp;quot;&amp;gt;'entries'&amp;lt;/span&amp;gt;) &amp;lt;span class=&amp;quot;blue&amp;quot;&amp;gt;and&amp;lt;/span&amp;gt; &amp;lt;span class=&amp;quot;purple&amp;quot;&amp;gt;len&amp;lt;/span&amp;gt;(feed.entries) &amp;gt; 0
    &amp;lt;span class=&amp;quot;blue&amp;quot;&amp;gt;except&amp;lt;/span&amp;gt; Exception &amp;lt;span class=&amp;quot;blue&amp;quot;&amp;gt;as&amp;lt;/span&amp;gt; e:
        &amp;lt;span class=&amp;quot;purple&amp;quot;&amp;gt;print&amp;lt;/span&amp;gt;(&amp;lt;span class=&amp;quot;red&amp;quot;&amp;gt;f&amp;quot;파일 유효성 확인 오류: {file_path} - {str(e)}&amp;quot;&amp;lt;/span&amp;gt;)
        &amp;lt;span class=&amp;quot;blue&amp;quot;&amp;gt;return&amp;lt;/span&amp;gt; &amp;lt;span class=&amp;quot;blue&amp;quot;&amp;gt;False&amp;lt;/span&amp;gt;

&amp;lt;span class=&amp;quot;blue&amp;quot;&amp;gt;def&amp;lt;/span&amp;gt; &amp;lt;span class=&amp;quot;purple&amp;quot;&amp;gt;process_feed_file&amp;lt;/span&amp;gt;(feed_file_path, output_folder, KST):
    &amp;lt;span class=&amp;quot;green&amp;quot;&amp;gt;&amp;quot;&amp;quot;&amp;quot;피드 파일을 처리하여 CSV 파일로 저장합니다.&amp;quot;&amp;quot;&amp;quot;&amp;lt;/span&amp;gt;
    &amp;lt;span class=&amp;quot;green&amp;quot;&amp;gt;# 출력 CSV 파일 경로 생성 (피드 파일명 + google_url)&amp;lt;/span&amp;gt;
    base_filename = os.path.splitext(os.path.basename(feed_file_path))[0]
    csv_file_path = os.path.join(output_folder, base_filename + &amp;lt;span class=&amp;quot;red&amp;quot;&amp;gt;&amp;quot;_google_url.csv&amp;quot;&amp;lt;/span&amp;gt;)
    
    &amp;lt;span class=&amp;quot;blue&amp;quot;&amp;gt;try&amp;lt;/span&amp;gt;:
        &amp;lt;span class=&amp;quot;green&amp;quot;&amp;gt;# 피드 파일 파싱&amp;lt;/span&amp;gt;
        feed = feedparser.parse(feed_file_path)
        
        &amp;lt;span class=&amp;quot;green&amp;quot;&amp;gt;# 피드 기본 정보 (디버깅)&amp;lt;/span&amp;gt;
        &amp;lt;span class=&amp;quot;blue&amp;quot;&amp;gt;if&amp;lt;/span&amp;gt; &amp;lt;span class=&amp;quot;purple&amp;quot;&amp;gt;hasattr&amp;lt;/span&amp;gt;(feed, &amp;lt;span class=&amp;quot;red&amp;quot;&amp;gt;'feed'&amp;lt;/span&amp;gt;) &amp;lt;span class=&amp;quot;blue&amp;quot;&amp;gt;and&amp;lt;/span&amp;gt; &amp;lt;span class=&amp;quot;purple&amp;quot;&amp;gt;hasattr&amp;lt;/span&amp;gt;(feed.feed, &amp;lt;span class=&amp;quot;red&amp;quot;&amp;gt;'title'&amp;lt;/span&amp;gt;):
            &amp;lt;span class=&amp;quot;purple&amp;quot;&amp;gt;print&amp;lt;/span&amp;gt;(&amp;lt;span class=&amp;quot;red&amp;quot;&amp;gt;f&amp;quot;피드 제목: {feed.feed.title}&amp;quot;&amp;lt;/span&amp;gt;)
        
        &amp;lt;span class=&amp;quot;green&amp;quot;&amp;gt;# 항목이 없으면 건너뛰기&amp;lt;/span&amp;gt;
        &amp;lt;span class=&amp;quot;blue&amp;quot;&amp;gt;if&amp;lt;/span&amp;gt; &amp;lt;span class=&amp;quot;blue&amp;quot;&amp;gt;not&amp;lt;/span&amp;gt; &amp;lt;span class=&amp;quot;purple&amp;quot;&amp;gt;hasattr&amp;lt;/span&amp;gt;(feed, &amp;lt;span class=&amp;quot;red&amp;quot;&amp;gt;'entries'&amp;lt;/span&amp;gt;) &amp;lt;span class=&amp;quot;blue&amp;quot;&amp;gt;or&amp;lt;/span&amp;gt; &amp;lt;span class=&amp;quot;purple&amp;quot;&amp;gt;len&amp;lt;/span&amp;gt;(feed.entries) == 0:
            &amp;lt;span class=&amp;quot;purple&amp;quot;&amp;gt;print&amp;lt;/span&amp;gt;(&amp;lt;span class=&amp;quot;red&amp;quot;&amp;gt;f&amp;quot;건너뜀: {feed_file_path} 파일에 유효한 피드 항목이 없습니다.&amp;quot;&amp;lt;/span&amp;gt;)
            &amp;lt;span class=&amp;quot;blue&amp;quot;&amp;gt;return&amp;lt;/span&amp;gt; &amp;lt;span class=&amp;quot;blue&amp;quot;&amp;gt;False&amp;lt;/span&amp;gt;
            
        &amp;lt;span class=&amp;quot;green&amp;quot;&amp;gt;# CSV 파일로 데이터 저장&amp;lt;/span&amp;gt;
        &amp;lt;span class=&amp;quot;blue&amp;quot;&amp;gt;with&amp;lt;/span&amp;gt; &amp;lt;span class=&amp;quot;purple&amp;quot;&amp;gt;open&amp;lt;/span&amp;gt;(csv_file_path, &amp;lt;span class=&amp;quot;red&amp;quot;&amp;gt;'w'&amp;lt;/span&amp;gt;, newline=&amp;lt;span class=&amp;quot;red&amp;quot;&amp;gt;''&amp;lt;/span&amp;gt;, encoding=&amp;lt;span class=&amp;quot;red&amp;quot;&amp;gt;'utf-8'&amp;lt;/span&amp;gt;) &amp;lt;span class=&amp;quot;blue&amp;quot;&amp;gt;as&amp;lt;/span&amp;gt; csvfile:
            &amp;lt;span class=&amp;quot;green&amp;quot;&amp;gt;# CSV 작성자 생성&amp;lt;/span&amp;gt;
            csv_writer = csv.writer(csvfile)
            
            &amp;lt;span class=&amp;quot;green&amp;quot;&amp;gt;# 헤더 작성&amp;lt;/span&amp;gt;
            csv_writer.writerow([&amp;lt;span class=&amp;quot;red&amp;quot;&amp;gt;'발행일(KST)'&amp;lt;/span&amp;gt;, &amp;lt;span class=&amp;quot;red&amp;quot;&amp;gt;'제목'&amp;lt;/span&amp;gt;, &amp;lt;span class=&amp;quot;red&amp;quot;&amp;gt;'링크'&amp;lt;/span&amp;gt;])
            
            &amp;lt;span class=&amp;quot;green&amp;quot;&amp;gt;# 각 항목(기사) 정보를 CSV에 저장&amp;lt;/span&amp;gt;
            &amp;lt;span class=&amp;quot;blue&amp;quot;&amp;gt;for&amp;lt;/span&amp;gt; entry &amp;lt;span class=&amp;quot;blue&amp;quot;&amp;gt;in&amp;lt;/span&amp;gt; feed.entries:
                title = entry.title &amp;lt;span class=&amp;quot;blue&amp;quot;&amp;gt;if&amp;lt;/span&amp;gt; &amp;lt;span class=&amp;quot;purple&amp;quot;&amp;gt;hasattr&amp;lt;/span&amp;gt;(entry, &amp;lt;span class=&amp;quot;red&amp;quot;&amp;gt;'title'&amp;lt;/span&amp;gt;) &amp;lt;span class=&amp;quot;blue&amp;quot;&amp;gt;else&amp;lt;/span&amp;gt; &amp;lt;span class=&amp;quot;red&amp;quot;&amp;gt;&amp;quot;제목 없음&amp;quot;&amp;lt;/span&amp;gt;
                link = entry.link &amp;lt;span class=&amp;quot;blue&amp;quot;&amp;gt;if&amp;lt;/span&amp;gt; &amp;lt;span class=&amp;quot;purple&amp;quot;&amp;gt;hasattr&amp;lt;/span&amp;gt;(entry, &amp;lt;span class=&amp;quot;red&amp;quot;&amp;gt;'link'&amp;lt;/span&amp;gt;) &amp;lt;span class=&amp;quot;blue&amp;quot;&amp;gt;else&amp;lt;/span&amp;gt; &amp;lt;span class=&amp;quot;red&amp;quot;&amp;gt;&amp;quot;링크 없음&amp;quot;&amp;lt;/span&amp;gt;
                
                &amp;lt;span class=&amp;quot;green&amp;quot;&amp;gt;# 발행일 형식 변환 (KST 적용)&amp;lt;/span&amp;gt;
                &amp;lt;span class=&amp;quot;blue&amp;quot;&amp;gt;if&amp;lt;/span&amp;gt; &amp;lt;span class=&amp;quot;purple&amp;quot;&amp;gt;hasattr&amp;lt;/span&amp;gt;(entry, &amp;lt;span class=&amp;quot;red&amp;quot;&amp;gt;'published'&amp;lt;/span&amp;gt;):
                    published = format_date_to_kst(entry.published, KST)
                &amp;lt;span class=&amp;quot;blue&amp;quot;&amp;gt;else&amp;lt;/span&amp;gt;:
                    published = &amp;lt;span class=&amp;quot;red&amp;quot;&amp;gt;&amp;quot;발행일 없음&amp;quot;&amp;lt;/span&amp;gt;
                
                &amp;lt;span class=&amp;quot;green&amp;quot;&amp;gt;# CSV 파일에 데이터 저장&amp;lt;/span&amp;gt;
                csv_writer.writerow([published, title, link])
        
        &amp;lt;span class=&amp;quot;purple&amp;quot;&amp;gt;print&amp;lt;/span&amp;gt;(&amp;lt;span class=&amp;quot;red&amp;quot;&amp;gt;f&amp;quot;처리 완료: {feed_file_path} -&amp;gt; {csv_file_path} (항목 {len(feed.entries)}개)&amp;quot;&amp;lt;/span&amp;gt;)
        &amp;lt;span class=&amp;quot;blue&amp;quot;&amp;gt;return&amp;lt;/span&amp;gt; &amp;lt;span class=&amp;quot;blue&amp;quot;&amp;gt;True&amp;lt;/span&amp;gt;
    
    &amp;lt;span class=&amp;quot;blue&amp;quot;&amp;gt;except&amp;lt;/span&amp;gt; Exception &amp;lt;span class=&amp;quot;blue&amp;quot;&amp;gt;as&amp;lt;/span&amp;gt; e:
        &amp;lt;span class=&amp;quot;purple&amp;quot;&amp;gt;print&amp;lt;/span&amp;gt;(&amp;lt;span class=&amp;quot;red&amp;quot;&amp;gt;f&amp;quot;오류 발생: {feed_file_path} 처리 중 - {str(e)}&amp;quot;&amp;lt;/span&amp;gt;)
        &amp;lt;span class=&amp;quot;blue&amp;quot;&amp;gt;return&amp;lt;/span&amp;gt; &amp;lt;span class=&amp;quot;blue&amp;quot;&amp;gt;False&amp;lt;/span&amp;gt;

&amp;lt;span class=&amp;quot;green&amp;quot;&amp;gt;# 메인 함수 실행&amp;lt;/span&amp;gt;
&amp;lt;span class=&amp;quot;blue&amp;quot;&amp;gt;if&amp;lt;/span&amp;gt; __name__ == &amp;lt;span class=&amp;quot;red&amp;quot;&amp;gt;&amp;quot;__main__&amp;quot;&amp;lt;/span&amp;gt;:
    &amp;lt;span class=&amp;quot;blue&amp;quot;&amp;gt;try&amp;lt;/span&amp;gt;:
        main()
    &amp;lt;span class=&amp;quot;blue&amp;quot;&amp;gt;except&amp;lt;/span&amp;gt; KeyboardInterrupt:
        &amp;lt;span class=&amp;quot;purple&amp;quot;&amp;gt;print&amp;lt;/span&amp;gt;(&amp;lt;span class=&amp;quot;red&amp;quot;&amp;gt;&amp;quot;\n사용자에 의해 프로그램이 중단되었습니다.&amp;quot;&amp;lt;/span&amp;gt;)
        sys.exit(0)
    &amp;lt;span class=&amp;quot;blue&amp;quot;&amp;gt;except&amp;lt;/span&amp;gt; Exception &amp;lt;span class=&amp;quot;blue&amp;quot;&amp;gt;as&amp;lt;/span&amp;gt; e:
        &amp;lt;span class=&amp;quot;purple&amp;quot;&amp;gt;print&amp;lt;/span&amp;gt;(&amp;lt;span class=&amp;quot;red&amp;quot;&amp;gt;f&amp;quot;\n예상치 못한 오류가 발생했습니다: {str(e)}&amp;quot;&amp;lt;/span&amp;gt;)
        sys.exit(1)&amp;lt;/pre&amp;gt;
    &amp;lt;/div&amp;gt;
  &amp;lt;/details&amp;gt;
&amp;lt;/div&amp;gt;&quot;&gt;
&lt;style&gt;
  .code-container {
    width: 100%;
    max-width: 100%;
    margin: 20px 0;
    border: 1px solid #e8e8e8;
    border-radius: 4px;
    background-color: #f0f4f8;
  }
  
  .code-summary {
    padding: 10px 15px;
    cursor: pointer;
    font-weight: bold;
    color: #4a5568;
    display: flex;
    justify-content: space-between;
    align-items: center;
    user-select: none;
  }
  
  .code-toggle {
    color: #718096;
    font-weight: normal;
  }
  
  .code-content {
    background-color: #f8fafc;
    padding: 15px;
    border-radius: 0 0 4px 4px;
    overflow-x: auto;
    max-width: 100%;
  }
  
  .code-pre {
    margin: 0;
    padding: 0;
    font-family: Consolas, Monaco, 'Courier New', monospace;
    font-size: 14px;
    line-height: 1.5;
    color: #2d3748;
    white-space: pre-wrap;
    word-break: break-all;
    width: 100%;
  }
  
  /* 색상 스타일 유지 */
  .blue { color: #0000CD; }
  .green { color: #008000; }
  .purple { color: #795E26; }
  .red { color: #A31515; }
  
  /* details 스타일 조정 */
  details {
    width: 100%;
  }
  
  details summary {
    outline: none;
  }
&lt;/style&gt;
&lt;div class=&quot;code-container&quot;&gt;&lt;details&gt;
&lt;summary class=&quot;code-summary&quot;&gt;&lt;span&gt;google_feed_parser.py&lt;/span&gt; &lt;span class=&quot;code-toggle&quot;&gt;[펼치기/접기]&lt;/span&gt;&lt;/summary&gt;
&lt;div class=&quot;code-content&quot;&gt;
&lt;pre class=&quot;code-pre&quot;&gt;&lt;span class=&quot;blue&quot;&gt;#!/usr/bin/env python3&lt;/span&gt;
&lt;span class=&quot;blue&quot;&gt;# -*- coding: utf-8 -*-&lt;/span&gt;

&lt;span class=&quot;green&quot;&gt;&quot;&quot;&quot;
구글 뉴스 XML/RSS/TXT 피드 파서
다양한 형식의 구글 뉴스 피드 파일을 CSV 파일로 변환합니다.

사용법: python google_feed_parser.py [입력_폴더] [출력_폴더]
출력 폴더를 지정하지 않으면 입력 폴더와 동일한 위치에 저장됩니다.
예시: python google_feed_parser.py xml_files
      python google_feed_parser.py xml_files csv_files
&quot;&quot;&quot;&lt;/span&gt;

&lt;span class=&quot;blue&quot;&gt;import&lt;/span&gt; feedparser
&lt;span class=&quot;blue&quot;&gt;import&lt;/span&gt; os
&lt;span class=&quot;blue&quot;&gt;import&lt;/span&gt; csv
&lt;span class=&quot;blue&quot;&gt;import&lt;/span&gt; sys
&lt;span class=&quot;blue&quot;&gt;from&lt;/span&gt; datetime &lt;span class=&quot;blue&quot;&gt;import&lt;/span&gt; datetime, timezone, timedelta
&lt;span class=&quot;blue&quot;&gt;from&lt;/span&gt; email.utils &lt;span class=&quot;blue&quot;&gt;import&lt;/span&gt; parsedate_to_datetime

&lt;span class=&quot;blue&quot;&gt;def&lt;/span&gt; &lt;span class=&quot;purple&quot;&gt;main&lt;/span&gt;():
    &lt;span class=&quot;green&quot;&gt;# 명령줄 인수 처리&lt;/span&gt;
    &lt;span class=&quot;blue&quot;&gt;if&lt;/span&gt; &lt;span class=&quot;purple&quot;&gt;len&lt;/span&gt;(sys.argv) &amp;gt;= 2:
        input_folder = sys.argv[1]
        &lt;span class=&quot;green&quot;&gt;# 출력 폴더가 지정되지 않은 경우 입력 폴더와 동일하게 설정&lt;/span&gt;
        output_folder = sys.argv[2] &lt;span class=&quot;blue&quot;&gt;if&lt;/span&gt; &lt;span class=&quot;purple&quot;&gt;len&lt;/span&gt;(sys.argv) &amp;gt;= 3 &lt;span class=&quot;blue&quot;&gt;else&lt;/span&gt; input_folder
    &lt;span class=&quot;blue&quot;&gt;else&lt;/span&gt;:
        &lt;span class=&quot;purple&quot;&gt;print&lt;/span&gt;(&lt;span class=&quot;red&quot;&gt;&quot;사용법: python google_feed_parser.py [입력_폴더] [출력_폴더]&quot;&lt;/span&gt;)
        &lt;span class=&quot;purple&quot;&gt;print&lt;/span&gt;(&lt;span class=&quot;red&quot;&gt;&quot;출력 폴더를 지정하지 않으면 입력 폴더와 동일한 위치에 저장됩니다.&quot;&lt;/span&gt;)
        &lt;span class=&quot;purple&quot;&gt;print&lt;/span&gt;(&lt;span class=&quot;red&quot;&gt;&quot;예시: python google_feed_parser.py xml_files&quot;&lt;/span&gt;)
        &lt;span class=&quot;purple&quot;&gt;print&lt;/span&gt;(&lt;span class=&quot;red&quot;&gt;&quot;      python google_feed_parser.py xml_files csv_files&quot;&lt;/span&gt;)
        sys.exit(1)
    
    &lt;span class=&quot;green&quot;&gt;# 입력 폴더 확인&lt;/span&gt;
    &lt;span class=&quot;blue&quot;&gt;if&lt;/span&gt; &lt;span class=&quot;blue&quot;&gt;not&lt;/span&gt; os.path.exists(input_folder):
        &lt;span class=&quot;purple&quot;&gt;print&lt;/span&gt;(&lt;span class=&quot;red&quot;&gt;f&quot;오류: 입력 폴더를 찾을 수 없습니다: {input_folder}&quot;&lt;/span&gt;)
        sys.exit(1)
    
    &lt;span class=&quot;blue&quot;&gt;if&lt;/span&gt; &lt;span class=&quot;blue&quot;&gt;not&lt;/span&gt; os.path.isdir(input_folder):
        &lt;span class=&quot;purple&quot;&gt;print&lt;/span&gt;(&lt;span class=&quot;red&quot;&gt;f&quot;오류: 입력 경로가 폴더가 아닙니다: {input_folder}&quot;&lt;/span&gt;)
        sys.exit(1)
    
    &lt;span class=&quot;green&quot;&gt;# 출력 폴더가 입력 폴더와 다른 경우에만 생성&lt;/span&gt;
    &lt;span class=&quot;blue&quot;&gt;if&lt;/span&gt; output_folder != input_folder &lt;span class=&quot;blue&quot;&gt;and&lt;/span&gt; &lt;span class=&quot;blue&quot;&gt;not&lt;/span&gt; os.path.exists(output_folder):
        &lt;span class=&quot;blue&quot;&gt;try&lt;/span&gt;:
            os.makedirs(output_folder)
            &lt;span class=&quot;purple&quot;&gt;print&lt;/span&gt;(&lt;span class=&quot;red&quot;&gt;f&quot;출력 폴더를 생성했습니다: {output_folder}&quot;&lt;/span&gt;)
        &lt;span class=&quot;blue&quot;&gt;except&lt;/span&gt; Exception &lt;span class=&quot;blue&quot;&gt;as&lt;/span&gt; e:
            &lt;span class=&quot;purple&quot;&gt;print&lt;/span&gt;(&lt;span class=&quot;red&quot;&gt;f&quot;오류: 출력 폴더를 생성할 수 없습니다: {output_folder} - {str(e)}&quot;&lt;/span&gt;)
            sys.exit(1)
    
    &lt;span class=&quot;green&quot;&gt;# 파일 처리 시작&lt;/span&gt;
    process_folder(input_folder, output_folder)

&lt;span class=&quot;blue&quot;&gt;def&lt;/span&gt; &lt;span class=&quot;purple&quot;&gt;process_folder&lt;/span&gt;(input_folder, output_folder):
    &lt;span class=&quot;green&quot;&gt;&quot;&quot;&quot;지정된 폴더의 모든 피드 파일을 처리합니다.&quot;&quot;&quot;&lt;/span&gt;
    
    &lt;span class=&quot;green&quot;&gt;# 한국 시간대 (UTC+9)&lt;/span&gt;
    KST = timezone(timedelta(hours=9))
    
    &lt;span class=&quot;green&quot;&gt;# 처리 결과 카운터&lt;/span&gt;
    successful_files = 0
    failed_files = 0
    skipped_files = 0
    total_entries = 0
    
    &lt;span class=&quot;green&quot;&gt;# 지원되는 확장자 목록 (txt 포함)&lt;/span&gt;
    supported_extensions = [&lt;span class=&quot;red&quot;&gt;'.xml'&lt;/span&gt;, &lt;span class=&quot;red&quot;&gt;'.rss'&lt;/span&gt;, &lt;span class=&quot;red&quot;&gt;'.atom'&lt;/span&gt;, &lt;span class=&quot;red&quot;&gt;'.feed'&lt;/span&gt;, &lt;span class=&quot;red&quot;&gt;'.txt'&lt;/span&gt;]
    
    &lt;span class=&quot;purple&quot;&gt;print&lt;/span&gt;(&lt;span class=&quot;red&quot;&gt;f&quot;폴더 '{input_folder}'에서 피드 파일 검색 중...&quot;&lt;/span&gt;)
    
    &lt;span class=&quot;green&quot;&gt;# 폴더 내 모든 파일 나열&lt;/span&gt;
    all_files = os.listdir(input_folder)
    &lt;span class=&quot;purple&quot;&gt;print&lt;/span&gt;(&lt;span class=&quot;red&quot;&gt;f&quot;총 {len(all_files)}개 파일 발견&quot;&lt;/span&gt;)
    
    &lt;span class=&quot;green&quot;&gt;# 폴더 내 모든 파일 처리&lt;/span&gt;
    &lt;span class=&quot;blue&quot;&gt;for&lt;/span&gt; filename &lt;span class=&quot;blue&quot;&gt;in&lt;/span&gt; all_files:
        file_path = os.path.join(input_folder, filename)
        
        &lt;span class=&quot;green&quot;&gt;# 파일인지 확인&lt;/span&gt;
        &lt;span class=&quot;blue&quot;&gt;if&lt;/span&gt; os.path.isfile(file_path):
            &lt;span class=&quot;green&quot;&gt;# 파일 확장자 확인&lt;/span&gt;
            _, ext = os.path.splitext(filename)
            
            &lt;span class=&quot;purple&quot;&gt;print&lt;/span&gt;(&lt;span class=&quot;red&quot;&gt;f&quot;파일 검사 중: {filename} (확장자: {ext})&quot;&lt;/span&gt;)
            
            &lt;span class=&quot;green&quot;&gt;# 지원되는 확장자인 경우&lt;/span&gt;
            &lt;span class=&quot;blue&quot;&gt;if&lt;/span&gt; ext.lower() &lt;span class=&quot;blue&quot;&gt;in&lt;/span&gt; supported_extensions:
                &lt;span class=&quot;purple&quot;&gt;print&lt;/span&gt;(&lt;span class=&quot;red&quot;&gt;f&quot;지원되는 확장자: {ext} - 처리 시도&quot;&lt;/span&gt;)
                
                &lt;span class=&quot;green&quot;&gt;# 피드 파일 처리&lt;/span&gt;
                &lt;span class=&quot;blue&quot;&gt;if&lt;/span&gt; process_feed_file(file_path, output_folder, KST):
                    successful_files += 1
                    &lt;span class=&quot;green&quot;&gt;# 항목 수 추가&lt;/span&gt;
                    feed = feedparser.parse(file_path)
                    total_entries += &lt;span class=&quot;purple&quot;&gt;len&lt;/span&gt;(feed.entries)
                &lt;span class=&quot;blue&quot;&gt;else&lt;/span&gt;:
                    failed_files += 1
            &lt;span class=&quot;blue&quot;&gt;else&lt;/span&gt;:
                &lt;span class=&quot;green&quot;&gt;# 지원되지 않는 확장자지만 유효한 피드인지 확인&lt;/span&gt;
                &lt;span class=&quot;purple&quot;&gt;print&lt;/span&gt;(&lt;span class=&quot;red&quot;&gt;f&quot;지원되지 않는 확장자: {ext} - 유효성 확인 중&quot;&lt;/span&gt;)
                &lt;span class=&quot;blue&quot;&gt;if&lt;/span&gt; is_valid_feed(file_path):
                    &lt;span class=&quot;purple&quot;&gt;print&lt;/span&gt;(&lt;span class=&quot;red&quot;&gt;f&quot;유효한 피드로 확인됨: {filename} - 처리 시도&quot;&lt;/span&gt;)
                    &lt;span class=&quot;blue&quot;&gt;if&lt;/span&gt; process_feed_file(file_path, output_folder, KST):
                        successful_files += 1
                        feed = feedparser.parse(file_path)
                        total_entries += &lt;span class=&quot;purple&quot;&gt;len&lt;/span&gt;(feed.entries)
                    &lt;span class=&quot;blue&quot;&gt;else&lt;/span&gt;:
                        failed_files += 1
                &lt;span class=&quot;blue&quot;&gt;else&lt;/span&gt;:
                    skipped_files += 1
                    &lt;span class=&quot;purple&quot;&gt;print&lt;/span&gt;(&lt;span class=&quot;red&quot;&gt;f&quot;건너뜀: {filename} (지원되지 않는 파일 형식)&quot;&lt;/span&gt;)
    
    &lt;span class=&quot;green&quot;&gt;# 결과 요약 출력&lt;/span&gt;
    &lt;span class=&quot;purple&quot;&gt;print&lt;/span&gt;(&lt;span class=&quot;red&quot;&gt;&quot;\n===== 처리 결과 =====&quot;&lt;/span&gt;)
    &lt;span class=&quot;purple&quot;&gt;print&lt;/span&gt;(&lt;span class=&quot;red&quot;&gt;f&quot;처리된 피드 파일: {successful_files}개&quot;&lt;/span&gt;)
    &lt;span class=&quot;purple&quot;&gt;print&lt;/span&gt;(&lt;span class=&quot;red&quot;&gt;f&quot;실패한 피드 파일: {failed_files}개&quot;&lt;/span&gt;)
    &lt;span class=&quot;purple&quot;&gt;print&lt;/span&gt;(&lt;span class=&quot;red&quot;&gt;f&quot;건너뛴 파일: {skipped_files}개&quot;&lt;/span&gt;)
    &lt;span class=&quot;purple&quot;&gt;print&lt;/span&gt;(&lt;span class=&quot;red&quot;&gt;f&quot;처리된 총 항목 수: {total_entries}개&quot;&lt;/span&gt;)
    &lt;span class=&quot;purple&quot;&gt;print&lt;/span&gt;(&lt;span class=&quot;red&quot;&gt;f&quot;CSV 파일 저장 위치: {output_folder}&quot;&lt;/span&gt;)
    &lt;span class=&quot;purple&quot;&gt;print&lt;/span&gt;(&lt;span class=&quot;red&quot;&gt;&quot;모든 시간은 한국 시간(KST, UTC+9) 기준으로 변환되었습니다.&quot;&lt;/span&gt;)
    &lt;span class=&quot;purple&quot;&gt;print&lt;/span&gt;(&lt;span class=&quot;red&quot;&gt;&quot;모든 CSV 파일은 '[원본파일명]_google_url.csv' 형식으로 저장되었습니다.&quot;&lt;/span&gt;)
    &lt;span class=&quot;purple&quot;&gt;print&lt;/span&gt;(&lt;span class=&quot;red&quot;&gt;f&quot;지원되는 파일 확장자: {', '.join(supported_extensions)}&quot;&lt;/span&gt;)

&lt;span class=&quot;blue&quot;&gt;def&lt;/span&gt; &lt;span class=&quot;purple&quot;&gt;format_date_to_kst&lt;/span&gt;(date_str, KST):
    &lt;span class=&quot;green&quot;&gt;&quot;&quot;&quot;날짜 문자열을 한국 시간으로 변환합니다.&quot;&quot;&quot;&lt;/span&gt;
    &lt;span class=&quot;blue&quot;&gt;try&lt;/span&gt;:
        &lt;span class=&quot;green&quot;&gt;# RSS 날짜 형식(RFC 822)을 파싱&lt;/span&gt;
        dt = parsedate_to_datetime(date_str)
        &lt;span class=&quot;green&quot;&gt;# UTC에서 KST로 변환&lt;/span&gt;
        dt_kst = dt.astimezone(KST)
        &lt;span class=&quot;green&quot;&gt;# 연도, 월, 날짜, 시간(24시간) 형식으로 변환&lt;/span&gt;
        &lt;span class=&quot;blue&quot;&gt;return&lt;/span&gt; dt_kst.strftime(&lt;span class=&quot;red&quot;&gt;'%Y-%m-%d %H:%M:%S'&lt;/span&gt;)
    &lt;span class=&quot;blue&quot;&gt;except&lt;/span&gt; Exception:
        &lt;span class=&quot;blue&quot;&gt;try&lt;/span&gt;:
            &lt;span class=&quot;green&quot;&gt;# ISO 8601 형식 시도&lt;/span&gt;
            dt = datetime.fromisoformat(date_str.replace(&lt;span class=&quot;red&quot;&gt;'Z'&lt;/span&gt;, &lt;span class=&quot;red&quot;&gt;'+00:00'&lt;/span&gt;))
            &lt;span class=&quot;green&quot;&gt;# UTC에서 KST로 변환&lt;/span&gt;
            dt_kst = dt.astimezone(KST)
            &lt;span class=&quot;blue&quot;&gt;return&lt;/span&gt; dt_kst.strftime(&lt;span class=&quot;red&quot;&gt;'%Y-%m-%d %H:%M:%S'&lt;/span&gt;)
        &lt;span class=&quot;blue&quot;&gt;except&lt;/span&gt;:
            &lt;span class=&quot;green&quot;&gt;# 변환 실패 시 원본 반환&lt;/span&gt;
            &lt;span class=&quot;blue&quot;&gt;return&lt;/span&gt; date_str

&lt;span class=&quot;blue&quot;&gt;def&lt;/span&gt; &lt;span class=&quot;purple&quot;&gt;is_valid_feed&lt;/span&gt;(file_path):
    &lt;span class=&quot;green&quot;&gt;&quot;&quot;&quot;파일이 유효한 피드인지 확인합니다.&quot;&quot;&quot;&lt;/span&gt;
    &lt;span class=&quot;blue&quot;&gt;try&lt;/span&gt;:
        &lt;span class=&quot;green&quot;&gt;# 파일 내용 읽기 (디버깅 출력)&lt;/span&gt;
        &lt;span class=&quot;blue&quot;&gt;try&lt;/span&gt;:
            &lt;span class=&quot;blue&quot;&gt;with&lt;/span&gt; &lt;span class=&quot;purple&quot;&gt;open&lt;/span&gt;(file_path, &lt;span class=&quot;red&quot;&gt;'r'&lt;/span&gt;, encoding=&lt;span class=&quot;red&quot;&gt;'utf-8'&lt;/span&gt;, errors=&lt;span class=&quot;red&quot;&gt;'ignore'&lt;/span&gt;) &lt;span class=&quot;blue&quot;&gt;as&lt;/span&gt; f:
                content = f.read(1000)  &lt;span class=&quot;green&quot;&gt;# 처음 1000자만 읽기&lt;/span&gt;
                &lt;span class=&quot;purple&quot;&gt;print&lt;/span&gt;(&lt;span class=&quot;red&quot;&gt;f&quot;파일 확인: {file_path}&quot;&lt;/span&gt;)
                &lt;span class=&quot;purple&quot;&gt;print&lt;/span&gt;(&lt;span class=&quot;red&quot;&gt;f&quot;파일 미리보기: {content[:100]}...&quot;&lt;/span&gt;)  &lt;span class=&quot;green&quot;&gt;# 처음 100자 출력&lt;/span&gt;
        &lt;span class=&quot;blue&quot;&gt;except&lt;/span&gt; Exception &lt;span class=&quot;blue&quot;&gt;as&lt;/span&gt; e:
            &lt;span class=&quot;purple&quot;&gt;print&lt;/span&gt;(&lt;span class=&quot;red&quot;&gt;f&quot;파일 읽기 오류: {file_path} - {str(e)}&quot;&lt;/span&gt;)
        
        &lt;span class=&quot;green&quot;&gt;# 피드 파싱 시도&lt;/span&gt;
        feed = feedparser.parse(file_path)
        
        &lt;span class=&quot;green&quot;&gt;# 디버깅 출력&lt;/span&gt;
        &lt;span class=&quot;purple&quot;&gt;print&lt;/span&gt;(&lt;span class=&quot;red&quot;&gt;f&quot;파싱 결과: version={feed.get('version', 'None')}, entries={len(feed.get('entries', []))}&quot;&lt;/span&gt;)
        
        &lt;span class=&quot;green&quot;&gt;# 피드에 entries가 있으면 유효한 피드로 간주&lt;/span&gt;
        &lt;span class=&quot;blue&quot;&gt;return&lt;/span&gt; &lt;span class=&quot;purple&quot;&gt;hasattr&lt;/span&gt;(feed, &lt;span class=&quot;red&quot;&gt;'entries'&lt;/span&gt;) &lt;span class=&quot;blue&quot;&gt;and&lt;/span&gt; &lt;span class=&quot;purple&quot;&gt;len&lt;/span&gt;(feed.entries) &amp;gt; 0
    &lt;span class=&quot;blue&quot;&gt;except&lt;/span&gt; Exception &lt;span class=&quot;blue&quot;&gt;as&lt;/span&gt; e:
        &lt;span class=&quot;purple&quot;&gt;print&lt;/span&gt;(&lt;span class=&quot;red&quot;&gt;f&quot;파일 유효성 확인 오류: {file_path} - {str(e)}&quot;&lt;/span&gt;)
        &lt;span class=&quot;blue&quot;&gt;return&lt;/span&gt; &lt;span class=&quot;blue&quot;&gt;False&lt;/span&gt;

&lt;span class=&quot;blue&quot;&gt;def&lt;/span&gt; &lt;span class=&quot;purple&quot;&gt;process_feed_file&lt;/span&gt;(feed_file_path, output_folder, KST):
    &lt;span class=&quot;green&quot;&gt;&quot;&quot;&quot;피드 파일을 처리하여 CSV 파일로 저장합니다.&quot;&quot;&quot;&lt;/span&gt;
    &lt;span class=&quot;green&quot;&gt;# 출력 CSV 파일 경로 생성 (피드 파일명 + google_url)&lt;/span&gt;
    base_filename = os.path.splitext(os.path.basename(feed_file_path))[0]
    csv_file_path = os.path.join(output_folder, base_filename + &lt;span class=&quot;red&quot;&gt;&quot;_google_url.csv&quot;&lt;/span&gt;)
    
    &lt;span class=&quot;blue&quot;&gt;try&lt;/span&gt;:
        &lt;span class=&quot;green&quot;&gt;# 피드 파일 파싱&lt;/span&gt;
        feed = feedparser.parse(feed_file_path)
        
        &lt;span class=&quot;green&quot;&gt;# 피드 기본 정보 (디버깅)&lt;/span&gt;
        &lt;span class=&quot;blue&quot;&gt;if&lt;/span&gt; &lt;span class=&quot;purple&quot;&gt;hasattr&lt;/span&gt;(feed, &lt;span class=&quot;red&quot;&gt;'feed'&lt;/span&gt;) &lt;span class=&quot;blue&quot;&gt;and&lt;/span&gt; &lt;span class=&quot;purple&quot;&gt;hasattr&lt;/span&gt;(feed.feed, &lt;span class=&quot;red&quot;&gt;'title'&lt;/span&gt;):
            &lt;span class=&quot;purple&quot;&gt;print&lt;/span&gt;(&lt;span class=&quot;red&quot;&gt;f&quot;피드 제목: {feed.feed.title}&quot;&lt;/span&gt;)
        
        &lt;span class=&quot;green&quot;&gt;# 항목이 없으면 건너뛰기&lt;/span&gt;
        &lt;span class=&quot;blue&quot;&gt;if&lt;/span&gt; &lt;span class=&quot;blue&quot;&gt;not&lt;/span&gt; &lt;span class=&quot;purple&quot;&gt;hasattr&lt;/span&gt;(feed, &lt;span class=&quot;red&quot;&gt;'entries'&lt;/span&gt;) &lt;span class=&quot;blue&quot;&gt;or&lt;/span&gt; &lt;span class=&quot;purple&quot;&gt;len&lt;/span&gt;(feed.entries) == 0:
            &lt;span class=&quot;purple&quot;&gt;print&lt;/span&gt;(&lt;span class=&quot;red&quot;&gt;f&quot;건너뜀: {feed_file_path} 파일에 유효한 피드 항목이 없습니다.&quot;&lt;/span&gt;)
            &lt;span class=&quot;blue&quot;&gt;return&lt;/span&gt; &lt;span class=&quot;blue&quot;&gt;False&lt;/span&gt;
            
        &lt;span class=&quot;green&quot;&gt;# CSV 파일로 데이터 저장&lt;/span&gt;
        &lt;span class=&quot;blue&quot;&gt;with&lt;/span&gt; &lt;span class=&quot;purple&quot;&gt;open&lt;/span&gt;(csv_file_path, &lt;span class=&quot;red&quot;&gt;'w'&lt;/span&gt;, newline=&lt;span class=&quot;red&quot;&gt;''&lt;/span&gt;, encoding=&lt;span class=&quot;red&quot;&gt;'utf-8'&lt;/span&gt;) &lt;span class=&quot;blue&quot;&gt;as&lt;/span&gt; csvfile:
            &lt;span class=&quot;green&quot;&gt;# CSV 작성자 생성&lt;/span&gt;
            csv_writer = csv.writer(csvfile)
            
            &lt;span class=&quot;green&quot;&gt;# 헤더 작성&lt;/span&gt;
            csv_writer.writerow([&lt;span class=&quot;red&quot;&gt;'발행일(KST)'&lt;/span&gt;, &lt;span class=&quot;red&quot;&gt;'제목'&lt;/span&gt;, &lt;span class=&quot;red&quot;&gt;'링크'&lt;/span&gt;])
            
            &lt;span class=&quot;green&quot;&gt;# 각 항목(기사) 정보를 CSV에 저장&lt;/span&gt;
            &lt;span class=&quot;blue&quot;&gt;for&lt;/span&gt; entry &lt;span class=&quot;blue&quot;&gt;in&lt;/span&gt; feed.entries:
                title = entry.title &lt;span class=&quot;blue&quot;&gt;if&lt;/span&gt; &lt;span class=&quot;purple&quot;&gt;hasattr&lt;/span&gt;(entry, &lt;span class=&quot;red&quot;&gt;'title'&lt;/span&gt;) &lt;span class=&quot;blue&quot;&gt;else&lt;/span&gt; &lt;span class=&quot;red&quot;&gt;&quot;제목 없음&quot;&lt;/span&gt;
                link = entry.link &lt;span class=&quot;blue&quot;&gt;if&lt;/span&gt; &lt;span class=&quot;purple&quot;&gt;hasattr&lt;/span&gt;(entry, &lt;span class=&quot;red&quot;&gt;'link'&lt;/span&gt;) &lt;span class=&quot;blue&quot;&gt;else&lt;/span&gt; &lt;span class=&quot;red&quot;&gt;&quot;링크 없음&quot;&lt;/span&gt;
                
                &lt;span class=&quot;green&quot;&gt;# 발행일 형식 변환 (KST 적용)&lt;/span&gt;
                &lt;span class=&quot;blue&quot;&gt;if&lt;/span&gt; &lt;span class=&quot;purple&quot;&gt;hasattr&lt;/span&gt;(entry, &lt;span class=&quot;red&quot;&gt;'published'&lt;/span&gt;):
                    published = format_date_to_kst(entry.published, KST)
                &lt;span class=&quot;blue&quot;&gt;else&lt;/span&gt;:
                    published = &lt;span class=&quot;red&quot;&gt;&quot;발행일 없음&quot;&lt;/span&gt;
                
                &lt;span class=&quot;green&quot;&gt;# CSV 파일에 데이터 저장&lt;/span&gt;
                csv_writer.writerow([published, title, link])
        
        &lt;span class=&quot;purple&quot;&gt;print&lt;/span&gt;(&lt;span class=&quot;red&quot;&gt;f&quot;처리 완료: {feed_file_path} -&amp;gt; {csv_file_path} (항목 {len(feed.entries)}개)&quot;&lt;/span&gt;)
        &lt;span class=&quot;blue&quot;&gt;return&lt;/span&gt; &lt;span class=&quot;blue&quot;&gt;True&lt;/span&gt;
    
    &lt;span class=&quot;blue&quot;&gt;except&lt;/span&gt; Exception &lt;span class=&quot;blue&quot;&gt;as&lt;/span&gt; e:
        &lt;span class=&quot;purple&quot;&gt;print&lt;/span&gt;(&lt;span class=&quot;red&quot;&gt;f&quot;오류 발생: {feed_file_path} 처리 중 - {str(e)}&quot;&lt;/span&gt;)
        &lt;span class=&quot;blue&quot;&gt;return&lt;/span&gt; &lt;span class=&quot;blue&quot;&gt;False&lt;/span&gt;

&lt;span class=&quot;green&quot;&gt;# 메인 함수 실행&lt;/span&gt;
&lt;span class=&quot;blue&quot;&gt;if&lt;/span&gt; __name__ == &lt;span class=&quot;red&quot;&gt;&quot;__main__&quot;&lt;/span&gt;:
    &lt;span class=&quot;blue&quot;&gt;try&lt;/span&gt;:
        main()
    &lt;span class=&quot;blue&quot;&gt;except&lt;/span&gt; KeyboardInterrupt:
        &lt;span class=&quot;purple&quot;&gt;print&lt;/span&gt;(&lt;span class=&quot;red&quot;&gt;&quot;\n사용자에 의해 프로그램이 중단되었습니다.&quot;&lt;/span&gt;)
        sys.exit(0)
    &lt;span class=&quot;blue&quot;&gt;except&lt;/span&gt; Exception &lt;span class=&quot;blue&quot;&gt;as&lt;/span&gt; e:
        &lt;span class=&quot;purple&quot;&gt;print&lt;/span&gt;(&lt;span class=&quot;red&quot;&gt;f&quot;\n예상치 못한 오류가 발생했습니다: {str(e)}&quot;&lt;/span&gt;)
        sys.exit(1)&lt;/pre&gt;
&lt;/div&gt;
&lt;/details&gt;&lt;/div&gt;
&lt;/div&gt;
&lt;p style=&quot;text-align: left;&quot; data-ke-size=&quot;size16&quot;&gt;&amp;nbsp;&lt;/p&gt;
&lt;p style=&quot;text-align: left;&quot; data-ke-size=&quot;size16&quot;&gt;파이썬 코드 실행 결과 예시&lt;/p&gt;
&lt;p&gt;&lt;figure class=&quot;imageblock alignCenter&quot; data-ke-mobileStyle=&quot;widthOrigin&quot; data-origin-width=&quot;1115&quot; data-origin-height=&quot;628&quot;&gt;&lt;span data-url=&quot;https://blog.kakaocdn.net/dn/bbyKJV/btsNXpknLvg/xRw76a0iye0VwSZPZ8O5K0/img.png&quot; data-phocus=&quot;https://blog.kakaocdn.net/dn/bbyKJV/btsNXpknLvg/xRw76a0iye0VwSZPZ8O5K0/img.png&quot;&gt;&lt;img src=&quot;https://blog.kakaocdn.net/dn/bbyKJV/btsNXpknLvg/xRw76a0iye0VwSZPZ8O5K0/img.png&quot; srcset=&quot;https://img1.daumcdn.net/thumb/R1280x0/?scode=mtistory2&amp;fname=https%3A%2F%2Fblog.kakaocdn.net%2Fdn%2FbbyKJV%2FbtsNXpknLvg%2FxRw76a0iye0VwSZPZ8O5K0%2Fimg.png&quot; onerror=&quot;this.onerror=null; this.src='//t1.daumcdn.net/tistory_admin/static/images/no-image-v1.png'; this.srcset='//t1.daumcdn.net/tistory_admin/static/images/no-image-v1.png';&quot; loading=&quot;lazy&quot; width=&quot;1115&quot; height=&quot;628&quot; data-origin-width=&quot;1115&quot; data-origin-height=&quot;628&quot;/&gt;&lt;/span&gt;&lt;/figure&gt;
&lt;/p&gt;
&lt;p data-ke-size=&quot;size16&quot;&gt;&amp;nbsp;&lt;/p&gt;
&lt;h3 data-end=&quot;181&quot; data-start=&quot;149&quot; data-ke-size=&quot;size23&quot;&gt;  Power Automate와 연동한 자동 실행&lt;/h3&gt;
&lt;p data-end=&quot;305&quot; data-start=&quot;183&quot; data-ke-size=&quot;size16&quot;&gt;아래의 흐름에&amp;nbsp; &lt;b&gt;파이썬 스크립트&lt;/b&gt;를&amp;nbsp; Powerautomate와 연동해 자동화를 진행한 예시다. Powerautomate만 사용하면 약 5분이 걸리는 작업을 단 3초만에 끝낼 수 있다.&amp;nbsp;&lt;/p&gt;
&lt;p&gt;&lt;figure class=&quot;imageblock alignCenter&quot; data-ke-mobileStyle=&quot;widthOrigin&quot; data-origin-width=&quot;873&quot; data-origin-height=&quot;863&quot;&gt;&lt;span data-url=&quot;https://blog.kakaocdn.net/dn/rCa9Z/btsNWnOddtl/mPHhkyzCZnOICKwdH3NFnk/img.png&quot; data-phocus=&quot;https://blog.kakaocdn.net/dn/rCa9Z/btsNWnOddtl/mPHhkyzCZnOICKwdH3NFnk/img.png&quot;&gt;&lt;img src=&quot;https://blog.kakaocdn.net/dn/rCa9Z/btsNWnOddtl/mPHhkyzCZnOICKwdH3NFnk/img.png&quot; srcset=&quot;https://img1.daumcdn.net/thumb/R1280x0/?scode=mtistory2&amp;fname=https%3A%2F%2Fblog.kakaocdn.net%2Fdn%2FrCa9Z%2FbtsNWnOddtl%2FmPHhkyzCZnOICKwdH3NFnk%2Fimg.png&quot; onerror=&quot;this.onerror=null; this.src='//t1.daumcdn.net/tistory_admin/static/images/no-image-v1.png'; this.srcset='//t1.daumcdn.net/tistory_admin/static/images/no-image-v1.png';&quot; loading=&quot;lazy&quot; width=&quot;873&quot; height=&quot;863&quot; data-origin-width=&quot;873&quot; data-origin-height=&quot;863&quot;/&gt;&lt;/span&gt;&lt;/figure&gt;
&lt;/p&gt;
&lt;p data-ke-size=&quot;size16&quot;&gt;&amp;nbsp;&lt;/p&gt;
&lt;p&gt;&lt;figure class=&quot;imageblock alignCenter&quot; data-ke-mobileStyle=&quot;widthOrigin&quot; data-origin-width=&quot;616&quot; data-origin-height=&quot;281&quot;&gt;&lt;span data-url=&quot;https://blog.kakaocdn.net/dn/d3tqnu/btsNX3BcAYi/znijUaQyzdLJQCejolKis0/img.png&quot; data-phocus=&quot;https://blog.kakaocdn.net/dn/d3tqnu/btsNX3BcAYi/znijUaQyzdLJQCejolKis0/img.png&quot;&gt;&lt;img src=&quot;https://blog.kakaocdn.net/dn/d3tqnu/btsNX3BcAYi/znijUaQyzdLJQCejolKis0/img.png&quot; srcset=&quot;https://img1.daumcdn.net/thumb/R1280x0/?scode=mtistory2&amp;fname=https%3A%2F%2Fblog.kakaocdn.net%2Fdn%2Fd3tqnu%2FbtsNX3BcAYi%2FznijUaQyzdLJQCejolKis0%2Fimg.png&quot; onerror=&quot;this.onerror=null; this.src='//t1.daumcdn.net/tistory_admin/static/images/no-image-v1.png'; this.srcset='//t1.daumcdn.net/tistory_admin/static/images/no-image-v1.png';&quot; loading=&quot;lazy&quot; width=&quot;616&quot; height=&quot;281&quot; data-origin-width=&quot;616&quot; data-origin-height=&quot;281&quot;/&gt;&lt;/span&gt;&lt;/figure&gt;
&lt;/p&gt;
&lt;p data-ke-size=&quot;size16&quot;&gt;&amp;nbsp;&lt;/p&gt;
&lt;p data-ke-size=&quot;size16&quot;&gt;&amp;nbsp;&lt;/p&gt;
&lt;h3 data-ke-size=&quot;size23&quot;&gt;&lt;span&gt;마치며&lt;/span&gt;&lt;/h3&gt;
&lt;p data-ke-size=&quot;size16&quot;&gt;이번 글에서는 feedparser를 활용해 구글 뉴스 RSS에서 가져온 XML 파일을 정리하고, 기사 제목과 링크, 발행일을 빠르게 추출하는 방법을 살펴보았다. 다음 글에서는 Google 뉴스 주소를 원문 URL로 변환하는 방법을 소개하고자 한다.&lt;/p&gt;</description>
      <category>feedparser</category>
      <category>PowerAutomate</category>
      <category>웹스크래핑</category>
      <author>catalystmind</author>
      <guid isPermaLink="true">https://catalystmind.tistory.com/13</guid>
      <comments>https://catalystmind.tistory.com/13#entry13comment</comments>
      <pubDate>Wed, 14 May 2025 20:22:32 +0900</pubDate>
    </item>
    <item>
      <title>Power Automate와 Python을 활용한 주도주 재료 분석 자동화 - 서론</title>
      <link>https://catalystmind.tistory.com/12</link>
      <description>&lt;p data-ke-size=&quot;size16&quot;&gt;&amp;nbsp;&lt;/p&gt;
&lt;div id=&quot;code_1747140859252&quot; data-ke-type=&quot;html&quot; data-source=&quot;&amp;lt;!-- HTML 시작 --&amp;gt;
&amp;lt;div class=&amp;quot;tldr-container&amp;quot; style=&amp;quot;margin: 20px 0;&amp;quot;&amp;gt;
  &amp;lt;style&amp;gt;
    .tldr-container .tldr-card {
      background-color: #f0f4f9;
      border-radius: 8px;
      width: 100%;
      border-left: 4px solid #4e7ed7;
      padding: 15px;
      margin: 15px 0;
      font-family: 'Noto Sans KR', 'Apple SD Gothic Neo', sans-serif;
    }
    
    .tldr-container .tldr-title {
      font-size: 20px;
      font-weight: 700;
      color: #1a3c75;
      margin-bottom: 15px;
    }
    
    .tldr-container .tldr-list {
      list-style-type: none;
      padding: 0;
      margin: 0;
    }
    
    .tldr-container .tldr-list li {
      position: relative;
      padding-left: 20px;
      margin-bottom: 14px;
      line-height: 1.5;
      color: #333;
      font-size: 16px;
    }
    
    /* 불릿 포인트 스타일 - before 의사 요소만 사용 */
    .tldr-container .tldr-list li:before {
      content: &amp;quot;&amp;bull;&amp;quot;;
      position: absolute;
      left: 0;
      color: #4e7ed7;
      font-weight: bold;
    }
    
    /* 티스토리 기본 불릿 스타일 제거 */
    .tldr-container .tldr-list li::marker {
      content: none;
    }
  &amp;lt;/style&amp;gt;
  
  &amp;lt;div class=&amp;quot;tldr-card&amp;quot;&amp;gt;
    &amp;lt;div class=&amp;quot;tldr-title&amp;quot;&amp;gt;TL;DR&amp;lt;/div&amp;gt;
    &amp;lt;ul class=&amp;quot;tldr-list&amp;quot;&amp;gt;
      &amp;lt;li&amp;gt;Power Automate만 사용한 기존 방식은 &amp;lt;strong&amp;gt;속도 제한(90분+), 높은 비용, 유지보수 어려움&amp;lt;/strong&amp;gt;이라는 한계에 직면&amp;lt;/li&amp;gt;
      &amp;lt;li&amp;gt;해결책으로 &amp;lt;strong&amp;gt;Power Automate와 Python 연동&amp;lt;/strong&amp;gt; 방식 제안 (Jupyter로 작성, PowerShell로 실행)&amp;lt;/li&amp;gt;
      &amp;lt;li&amp;gt;주요 도구: &amp;lt;strong&amp;gt;feedparser, googlenewsdecoder, trafilatura&amp;lt;/strong&amp;gt;를 활용한 뉴스 수집 및 정제 자동화&amp;lt;/li&amp;gt;
    &amp;lt;/ul&amp;gt;
  &amp;lt;/div&amp;gt;
&amp;lt;/div&amp;gt;
&amp;lt;!-- HTML 끝 --&amp;gt;&quot;&gt;&lt;!-- HTML 시작 --&gt;
&lt;div class=&quot;tldr-container&quot; style=&quot;margin: 20px 0;&quot;&gt;
&lt;style&gt;
    .tldr-container .tldr-card {
      background-color: #f0f4f9;
      border-radius: 8px;
      width: 100%;
      border-left: 4px solid #4e7ed7;
      padding: 15px;
      margin: 15px 0;
      font-family: 'Noto Sans KR', 'Apple SD Gothic Neo', sans-serif;
    }
    
    .tldr-container .tldr-title {
      font-size: 20px;
      font-weight: 700;
      color: #1a3c75;
      margin-bottom: 15px;
    }
    
    .tldr-container .tldr-list {
      list-style-type: none;
      padding: 0;
      margin: 0;
    }
    
    .tldr-container .tldr-list li {
      position: relative;
      padding-left: 20px;
      margin-bottom: 14px;
      line-height: 1.5;
      color: #333;
      font-size: 16px;
    }
    
    /* 불릿 포인트 스타일 - before 의사 요소만 사용 */
    .tldr-container .tldr-list li:before {
      content: &quot;•&quot;;
      position: absolute;
      left: 0;
      color: #4e7ed7;
      font-weight: bold;
    }
    
    /* 티스토리 기본 불릿 스타일 제거 */
    .tldr-container .tldr-list li::marker {
      content: none;
    }
  &lt;/style&gt;
&lt;div class=&quot;tldr-card&quot;&gt;
&lt;div class=&quot;tldr-title&quot;&gt;TL;DR&lt;/div&gt;
&lt;ul class=&quot;tldr-list&quot; style=&quot;list-style-type: disc;&quot; data-ke-list-type=&quot;disc&quot;&gt;
&lt;li&gt;Power Automate만 사용한 기존 방식은 &lt;b&gt;속도 제한(90분+), 높은 비용, 유지보수 어려움&lt;/b&gt;이라는 한계에 직면&lt;/li&gt;
&lt;li&gt;해결책으로 &lt;b&gt;Power Automate와 Python 연동&lt;/b&gt; 방식 제안 (Jupyter로 작성, PowerShell로 실행)&lt;/li&gt;
&lt;li&gt;주요 도구: &lt;b&gt;feedparser, googlenewsdecoder, trafilatura&lt;/b&gt;를 활용한 뉴스 수집 및 정제 자동화&lt;/li&gt;
&lt;/ul&gt;
&lt;/div&gt;
&lt;/div&gt;
&lt;!-- HTML 끝 --&gt;&lt;/div&gt;
&lt;h2 data-end=&quot;1765&quot; data-start=&quot;1740&quot; data-ke-size=&quot;size26&quot;&gt;Power automate를 활용한 기존 방식의 개선 필요성&lt;/h2&gt;
&lt;p data-ke-size=&quot;size16&quot; data-start=&quot;1284&quot; data-end=&quot;1412&quot;&gt;매일 한 종목과 관련된 기사만 해도 &lt;b&gt;종목당 평균 30개&lt;/b&gt; 정도가 발생한다. 관심 있는 종목이 10개만 되어도 하루에 300개의 기사가 쏟아지는 셈이다. 이 기사들이 모두 의미가 있는 것은 아니지만 그 중에 의미있는 기사를 가려내고 읽는 것만으로도 전업 투자자가 아닌 직장인이 일일이 읽고 분석하는 것은 사실상 불가능하다.&lt;/p&gt;
&lt;p data-ke-size=&quot;size16&quot; data-start=&quot;1414&quot; data-end=&quot;1557&quot;&gt;앞의 글에서 Microsoft Power Automate 같은 기존 도구로는 자동화를 시도해 보았지만, 자동화는 가능하지만 시간이 너무 많이 소요된다는 문제가 발생했다.&lt;/p&gt;
&lt;p data-ke-size=&quot;size16&quot; data-start=&quot;1559&quot; data-end=&quot;1738&quot;&gt;이 문제를 해결하기 위해 여러 가지 접근을 시도해 왔고, 그중 가장 실용적이고 효율적이었던 방법을 &lt;b&gt;Power automate와 파이썬&lt;/b&gt;조합으로 찾았다. 이 글은 앞으로 이어질 파이썬을 활용한 뉴스 자동화 시리즈의 첫 번째로, 재료 분석 자동화에 필요한 절처화 도구를 소개하고자 한다.&lt;/p&gt;
&lt;h2 data-ke-size=&quot;size26&quot; data-start=&quot;1740&quot; data-end=&quot;1765&quot;&gt;기존 방식의 한계&lt;/h2&gt;
&lt;p data-ke-size=&quot;size16&quot; data-start=&quot;1767&quot; data-end=&quot;1873&quot;&gt;초기에는 Microsoft Power Automate를 기반으로 뉴스 기사를 수집하고, GPT를 활용해 본문을 정제하는 워크플로우를 구성했다. 하지만, 다음과 같은 문제에 곧 직면하게 되었다:&lt;/p&gt;
&lt;ul style=&quot;list-style-type: disc;&quot; data-ke-list-type=&quot;disc&quot; data-start=&quot;1875&quot; data-end=&quot;2167&quot;&gt;
&lt;li data-start=&quot;1875&quot; data-end=&quot;2018&quot;&gt;&lt;b&gt;처리 속도 문제&lt;/b&gt;
&lt;ul style=&quot;list-style-type: disc;&quot; data-ke-list-type=&quot;disc&quot; data-start=&quot;1893&quot; data-end=&quot;2018&quot;&gt;
&lt;li data-start=&quot;1893&quot; data-end=&quot;1943&quot;&gt;브라우저 기반 자동화는 기사 300개를 처리하는 데 &lt;b&gt;약 90분 이상&lt;/b&gt; 소요&lt;/li&gt;
&lt;li data-start=&quot;1946&quot; data-end=&quot;2018&quot;&gt;퇴근 후 데이터 수집을 시작하면 &lt;b&gt;분석할 시간 자체가 부족&lt;/b&gt;&lt;/li&gt;
&lt;/ul&gt;
&lt;/li&gt;
&lt;li data-start=&quot;2020&quot; data-end=&quot;2103&quot;&gt;&lt;b&gt;정제 비용 증가&lt;/b&gt;
&lt;ul style=&quot;list-style-type: disc;&quot; data-ke-list-type=&quot;disc&quot; data-start=&quot;2038&quot; data-end=&quot;2103&quot;&gt;
&lt;li data-start=&quot;2038&quot; data-end=&quot;2103&quot;&gt;광고, 댓글, 관련 기사 링크 등이 함께 수집&lt;/li&gt;
&lt;li data-start=&quot;2038&quot; data-end=&quot;2103&quot;&gt;GPT를 통한 후처리에 &lt;b&gt;불필요한 토큰 비용 낭비&amp;nbsp;&lt;/b&gt;&lt;/li&gt;
&lt;/ul&gt;
&lt;/li&gt;
&lt;li data-start=&quot;2105&quot; data-end=&quot;2167&quot;&gt;&lt;b&gt;유지보수 어려움&lt;/b&gt;
&lt;ul style=&quot;list-style-type: disc;&quot; data-ke-list-type=&quot;disc&quot; data-start=&quot;2123&quot; data-end=&quot;2167&quot;&gt;
&lt;li data-start=&quot;2123&quot; data-end=&quot;2167&quot;&gt;언론사마다 HTML 구조가 다르고 본문의 위치가 다름&lt;/li&gt;
&lt;/ul&gt;
&lt;/li&gt;
&lt;/ul&gt;
&lt;h2 data-end=&quot;1765&quot; data-start=&quot;1740&quot; data-ke-size=&quot;size26&quot;&gt;주요 파이썬 패키지&lt;/h2&gt;
&lt;div id=&quot;code_1747140241540&quot; data-ke-type=&quot;html&quot; data-source=&quot;&amp;lt;!DOCTYPE html&amp;gt;
&amp;lt;html lang=&amp;quot;ko&amp;quot;&amp;gt;
&amp;lt;head&amp;gt;
    &amp;lt;meta charset=&amp;quot;UTF-8&amp;quot;&amp;gt;
    &amp;lt;meta name=&amp;quot;viewport&amp;quot; content=&amp;quot;width=device-width, initial-scale=1.0&amp;quot;&amp;gt;
    &amp;lt;title&amp;gt;주식 뉴스 수집 작업 단계&amp;lt;/title&amp;gt;
    &amp;lt;style&amp;gt;
        .table-container {
            margin: 20px auto;
            max-width: 900px;
            overflow-x: auto;
            box-shadow: 0 2px 8px rgba(0,0,0,0.1);
        .highlight-row {
            background-color: #f7fafc !important;
        }
        
        table {
            width: 100%;
            border-collapse: collapse;
            background-color: #fff;
            margin: 0;
        }
        
        thead {
            background-color: #4a5568 !important;
            color: white !important;
        }
        
        th, td {
            padding: 12px 16px;
            text-align: left;
            border: 1px solid #e2e8f0;
        }
        
        th {
            font-weight: bold;
            font-size: 16px;
            text-align: center;
            color: #ffffff !important;
        }
        
        tr:nth-child(even) {
            background-color: #f7fafc;
        }
        
        tr:hover {
            background-color: #edf2f7;
            transition: background-color 0.3s ease;
        }
        
        td {
            font-size: 15px;
        }
        
        .step-number {
            text-align: center;
            font-weight: bold;
            color: #4a5568;
        }
        
        .package-cell {
            text-align: center;
            font-family: 'Courier New', monospace;
            color: #2c5aa0;
        }
        
        a {
            color: #2c5aa0;
            text-decoration: none;
            font-weight: bold;
        }
        
        a:hover {
            text-decoration: underline;
        }
        
        @media (max-width: 768px) {
            .table-container {
                margin: 20px 10px;
            }
            
            th, td {
                padding: 8px 10px;
                font-size: 14px;
            }
            
            th {
                font-size: 15px;
            }
        }
    &amp;lt;/style&amp;gt;
&amp;lt;/head&amp;gt;
&amp;lt;body&amp;gt;
    &amp;lt;div class=&amp;quot;table-container&amp;quot;&amp;gt;
        &amp;lt;table&amp;gt;
            &amp;lt;thead&amp;gt;
                &amp;lt;tr&amp;gt;
                    &amp;lt;th&amp;gt;단계&amp;lt;/th&amp;gt;
                    &amp;lt;th&amp;gt;작업 내용&amp;lt;/th&amp;gt;
                    &amp;lt;th&amp;gt;파이썬 패키지&amp;lt;/th&amp;gt;
                &amp;lt;/tr&amp;gt;
            &amp;lt;/thead&amp;gt;
            &amp;lt;tbody&amp;gt;
                &amp;lt;tr&amp;gt;
                    &amp;lt;td class=&amp;quot;step-number&amp;quot;&amp;gt;1&amp;lt;/td&amp;gt;
                    &amp;lt;td&amp;gt;KRX 거래소에서 당일 거래량 상위 10개 종목 추출&amp;lt;/td&amp;gt;
                    &amp;lt;td class=&amp;quot;package-cell&amp;quot;&amp;gt;Power Automate&amp;lt;/td&amp;gt;
                &amp;lt;/tr&amp;gt;
                &amp;lt;tr class=&amp;quot;highlight-row&amp;quot;&amp;gt;
                    &amp;lt;td class=&amp;quot;step-number&amp;quot;&amp;gt;2&amp;lt;/td&amp;gt;
                    &amp;lt;td&amp;gt;종목별 구글 뉴스 RSS(XML) 수집&amp;lt;/td&amp;gt;
                    &amp;lt;td class=&amp;quot;package-cell&amp;quot;&amp;gt;&amp;lt;a href=&amp;quot;https://pypi.org/project/feedparser/&amp;quot; target=&amp;quot;_blank&amp;quot;&amp;gt;feedparser&amp;lt;/a&amp;gt;&amp;lt;/td&amp;gt;
                &amp;lt;/tr&amp;gt;
                &amp;lt;tr&amp;gt;
                    &amp;lt;td class=&amp;quot;step-number&amp;quot;&amp;gt;3&amp;lt;/td&amp;gt;
                    &amp;lt;td&amp;gt;XML 정제 후 뉴스 URL 추출&amp;lt;/td&amp;gt;
                    &amp;lt;td class=&amp;quot;package-cell&amp;quot;&amp;gt;&amp;lt;a href=&amp;quot;https://pypi.org/project/googlenewsdecoder/&amp;quot; target=&amp;quot;_blank&amp;quot;&amp;gt;googlenewsdecoder&amp;lt;/a&amp;gt;&amp;lt;/td&amp;gt;
                &amp;lt;/tr&amp;gt;
                &amp;lt;tr class=&amp;quot;highlight-row&amp;quot;&amp;gt;
                    &amp;lt;td class=&amp;quot;step-number&amp;quot;&amp;gt;4&amp;lt;/td&amp;gt;
                    &amp;lt;td&amp;gt;뉴스 URL로부터 원문 기사 내용 수집&amp;lt;/td&amp;gt;
                    &amp;lt;td class=&amp;quot;package-cell&amp;quot;&amp;gt;&amp;lt;a href=&amp;quot;https://pypi.org/project/trafilatura/&amp;quot; target=&amp;quot;_blank&amp;quot;&amp;gt;Trafilatura&amp;lt;/a&amp;gt;&amp;lt;/td&amp;gt;
                &amp;lt;/tr&amp;gt;
            &amp;lt;/tbody&amp;gt;
        &amp;lt;/table&amp;gt;
    &amp;lt;/div&amp;gt;
&amp;lt;/body&amp;gt;
&amp;lt;/html&amp;gt;&quot;&gt;
&lt;style&gt;
        .table-container {
            margin: 20px auto;
            max-width: 900px;
            overflow-x: auto;
            box-shadow: 0 2px 8px rgba(0,0,0,0.1);
        .highlight-row {
            background-color: #f7fafc !important;
        }
        
        table {
            width: 100%;
            border-collapse: collapse;
            background-color: #fff;
            margin: 0;
        }
        
        thead {
            background-color: #4a5568 !important;
            color: white !important;
        }
        
        th, td {
            padding: 12px 16px;
            text-align: left;
            border: 1px solid #e2e8f0;
        }
        
        th {
            font-weight: bold;
            font-size: 16px;
            text-align: center;
            color: #ffffff !important;
        }
        
        tr:nth-child(even) {
            background-color: #f7fafc;
        }
        
        tr:hover {
            background-color: #edf2f7;
            transition: background-color 0.3s ease;
        }
        
        td {
            font-size: 15px;
        }
        
        .step-number {
            text-align: center;
            font-weight: bold;
            color: #4a5568;
        }
        
        .package-cell {
            text-align: center;
            font-family: 'Courier New', monospace;
            color: #2c5aa0;
        }
        
        a {
            color: #2c5aa0;
            text-decoration: none;
            font-weight: bold;
        }
        
        a:hover {
            text-decoration: underline;
        }
        
        @media (max-width: 768px) {
            .table-container {
                margin: 20px 10px;
            }
            
            th, td {
                padding: 8px 10px;
                font-size: 14px;
            }
            
            th {
                font-size: 15px;
            }
        }
    &lt;/style&gt;
&lt;div class=&quot;table-container&quot;&gt;
&lt;table&gt;
&lt;thead&gt;
&lt;tr&gt;
&lt;th&gt;단계&lt;/th&gt;
&lt;th&gt;작업 내용&lt;/th&gt;
&lt;th&gt;파이썬 패키지&lt;/th&gt;
&lt;/tr&gt;
&lt;/thead&gt;
&lt;tbody&gt;
&lt;tr&gt;
&lt;td class=&quot;step-number&quot;&gt;1&lt;/td&gt;
&lt;td&gt;KRX 거래소에서 당일 거래량 상위 10개 종목 추출&lt;/td&gt;
&lt;td class=&quot;package-cell&quot;&gt;Power Automate&lt;/td&gt;
&lt;/tr&gt;
&lt;tr class=&quot;highlight-row&quot;&gt;
&lt;td class=&quot;step-number&quot;&gt;2&lt;/td&gt;
&lt;td&gt;종목별 구글 뉴스 RSS(XML) 수집&lt;/td&gt;
&lt;td class=&quot;package-cell&quot;&gt;&lt;a href=&quot;https://pypi.org/project/feedparser/&quot; target=&quot;_blank&quot; rel=&quot;noopener&quot;&gt;feedparser&lt;/a&gt;&lt;/td&gt;
&lt;/tr&gt;
&lt;tr&gt;
&lt;td class=&quot;step-number&quot;&gt;3&lt;/td&gt;
&lt;td&gt;XML 정제 후 뉴스 URL 추출&lt;/td&gt;
&lt;td class=&quot;package-cell&quot;&gt;&lt;a href=&quot;https://pypi.org/project/googlenewsdecoder/&quot; target=&quot;_blank&quot; rel=&quot;noopener&quot;&gt;googlenewsdecoder&lt;/a&gt;&lt;/td&gt;
&lt;/tr&gt;
&lt;tr class=&quot;highlight-row&quot;&gt;
&lt;td class=&quot;step-number&quot;&gt;4&lt;/td&gt;
&lt;td&gt;뉴스 URL로부터 원문 기사 내용 수집&lt;/td&gt;
&lt;td class=&quot;package-cell&quot;&gt;&lt;a href=&quot;https://pypi.org/project/trafilatura/&quot; target=&quot;_blank&quot; rel=&quot;noopener&quot;&gt;Trafilatura&lt;/a&gt;&lt;/td&gt;
&lt;/tr&gt;
&lt;/tbody&gt;
&lt;/table&gt;
&lt;/div&gt;
&lt;/div&gt;
&lt;h2 data-end=&quot;1765&quot; data-start=&quot;1740&quot; data-ke-size=&quot;size26&quot;&gt;Power Automate는 파이썬을 지원하지만 문제가 있다&lt;/h2&gt;
&lt;p data-end=&quot;1873&quot; data-start=&quot;1767&quot; data-ke-size=&quot;size16&quot;&gt;Power Automate는 파이썬 스크립트를 바로 작성해서 실행할 수 있다. 하지만, Power Automate가 지원하는 파이썬 버전이 3.4로 제한되어 있어 pandas와 같은 필수 package를 사용할 수 없다. &lt;span style=&quot;background-color: #f6e199;&quot;&gt;pandas의 &lt;/span&gt;&lt;span&gt;&lt;span style=&quot;background-color: #f6e199;&quot;&gt;&lt;b&gt;요구사항은 &lt;/b&gt;Python &amp;gt;=3.9&lt;/span&gt;&lt;/span&gt;&lt;/p&gt;
&lt;p&gt;&lt;figure class=&quot;imageblock alignCenter&quot; data-ke-mobileStyle=&quot;widthOrigin&quot; data-origin-width=&quot;720&quot; data-origin-height=&quot;407&quot;&gt;&lt;span data-url=&quot;https://blog.kakaocdn.net/dn/bG456L/btsNULhnmab/vKzmPlXZ1293IyxuECxsy1/img.png&quot; data-phocus=&quot;https://blog.kakaocdn.net/dn/bG456L/btsNULhnmab/vKzmPlXZ1293IyxuECxsy1/img.png&quot;&gt;&lt;img src=&quot;https://blog.kakaocdn.net/dn/bG456L/btsNULhnmab/vKzmPlXZ1293IyxuECxsy1/img.png&quot; srcset=&quot;https://img1.daumcdn.net/thumb/R1280x0/?scode=mtistory2&amp;fname=https%3A%2F%2Fblog.kakaocdn.net%2Fdn%2FbG456L%2FbtsNULhnmab%2FvKzmPlXZ1293IyxuECxsy1%2Fimg.png&quot; onerror=&quot;this.onerror=null; this.src='//t1.daumcdn.net/tistory_admin/static/images/no-image-v1.png'; this.srcset='//t1.daumcdn.net/tistory_admin/static/images/no-image-v1.png';&quot; loading=&quot;lazy&quot; width=&quot;720&quot; height=&quot;407&quot; data-origin-width=&quot;720&quot; data-origin-height=&quot;407&quot;/&gt;&lt;/span&gt;&lt;/figure&gt;
&lt;/p&gt;
&lt;p data-ke-size=&quot;size16&quot;&gt;&amp;nbsp;&lt;/p&gt;
&lt;p data-ke-size=&quot;size16&quot;&gt;이 제약사항을 해결하기 위한 방안은 아래와 같다.&lt;/p&gt;
&lt;ul style=&quot;list-style-type: disc;&quot; data-ke-list-type=&quot;disc&quot;&gt;
&lt;li data-end=&quot;1343&quot; data-start=&quot;1311&quot;&gt;&lt;b&gt;Jupyter Notebook에서 파이썬 스크립트 작성&lt;/b&gt;&lt;/li&gt;
&lt;li data-end=&quot;1378&quot; data-start=&quot;1344&quot;&gt;&lt;b&gt;PowerShell로 외부 Python 실행 자동화&lt;/b&gt;&lt;/li&gt;
&lt;/ul&gt;
&lt;p&gt;&lt;figure class=&quot;imageblock alignCenter&quot; data-ke-mobileStyle=&quot;widthOrigin&quot; data-origin-width=&quot;616&quot; data-origin-height=&quot;521&quot;&gt;&lt;span data-url=&quot;https://blog.kakaocdn.net/dn/LeXaJ/btsNWRGNJu9/lSyPk6565gvgBgCHPjK9F1/img.png&quot; data-phocus=&quot;https://blog.kakaocdn.net/dn/LeXaJ/btsNWRGNJu9/lSyPk6565gvgBgCHPjK9F1/img.png&quot;&gt;&lt;img src=&quot;https://blog.kakaocdn.net/dn/LeXaJ/btsNWRGNJu9/lSyPk6565gvgBgCHPjK9F1/img.png&quot; srcset=&quot;https://img1.daumcdn.net/thumb/R1280x0/?scode=mtistory2&amp;fname=https%3A%2F%2Fblog.kakaocdn.net%2Fdn%2FLeXaJ%2FbtsNWRGNJu9%2FlSyPk6565gvgBgCHPjK9F1%2Fimg.png&quot; onerror=&quot;this.onerror=null; this.src='//t1.daumcdn.net/tistory_admin/static/images/no-image-v1.png'; this.srcset='//t1.daumcdn.net/tistory_admin/static/images/no-image-v1.png';&quot; loading=&quot;lazy&quot; width=&quot;616&quot; height=&quot;521&quot; data-origin-width=&quot;616&quot; data-origin-height=&quot;521&quot;/&gt;&lt;/span&gt;&lt;/figure&gt;
&lt;/p&gt;
&lt;h2 data-ke-size=&quot;size26&quot; data-start=&quot;3179&quot; data-end=&quot;3203&quot;&gt;앞으로 다룰 내용&lt;/h2&gt;
&lt;p data-ke-size=&quot;size16&quot; data-start=&quot;3205&quot; data-end=&quot;3252&quot;&gt;다음 글들에서는 아래 주제들을 구체적으로 다룰 예정이다:&lt;/p&gt;
&lt;ul style=&quot;list-style-type: disc;&quot; data-ke-list-type=&quot;disc&quot; data-start=&quot;3254&quot; data-end=&quot;3375&quot;&gt;
&lt;li data-start=&quot;3254&quot; data-end=&quot;3287&quot;&gt;feedparser -&amp;nbsp; XML에서 구글 url 및 메타 데이터 (발행일시, 기사 제목 등) 추출&lt;/li&gt;
&lt;li data-start=&quot;3254&quot; data-end=&quot;3287&quot;&gt;googlenewsdecoder - 구글 뉴스 url을 실제 언론 기사 url로 변환&lt;/li&gt;
&lt;li data-start=&quot;3254&quot; data-end=&quot;3287&quot;&gt;Trafilatura - 기사 본문 추출&lt;/li&gt;
&lt;/ul&gt;
&lt;h2 data-ke-size=&quot;size26&quot; data-start=&quot;3377&quot; data-end=&quot;3383&quot;&gt;마치며&lt;/h2&gt;
&lt;p data-ke-size=&quot;size16&quot; data-start=&quot;3385&quot; data-end=&quot;3501&quot;&gt;시간이 부족한 직장인은 제한된 시간 내에서 효율적인 정보 수집과 분석이 요구되며, 이를 위한 자동화는 필수적이다. 다음 글에서는 &lt;b&gt;feedparser를 활용해 구글 뉴스 RSS에서 원하는 기사를 자동 수집하는 방법&lt;/b&gt;을 실전 예시와 함께 소개한다.&lt;/p&gt;</description>
      <category>feedparser</category>
      <category>googlenewsdecoder</category>
      <category>trafilatura</category>
      <category>자동화</category>
      <author>catalystmind</author>
      <guid isPermaLink="true">https://catalystmind.tistory.com/12</guid>
      <comments>https://catalystmind.tistory.com/12#entry12comment</comments>
      <pubDate>Tue, 13 May 2025 21:52:32 +0900</pubDate>
    </item>
  </channel>
</rss>