
    j              	       (	   % S r SSKrSSKrSSKrSSKrSSKrSSKrSSKJrJr  SSK	J
r
  SSKJr  SSKJr  SSKJr  SSKrSSKrSSKJr  SS	KJrJrJr  SS
KJr  SSKJr  SSKJr  SSKJ r   \
" \!5      RD                  r#\#S-  r$S r%\%" 5       r&\#\&RO                  SS5      -  r(\#S-  r)\#S-  r*\#\&RO                  SS5      -  r+\#\&RO                  SS5      -  r,\#\&RO                  SS5      -  r-\(R]                  SS9  \)R]                  SS9  \*R]                  SS9  \&RO                  SS5      r/\&RO                  SS5      r0\&RO                  S S5      r1\&RO                  S!S5      r2\" \&RO                  S"5      5      r3\3S#   r4\Rj                  " \Rl                  S$\Rn                  " \*S%-  S&S'9\Rp                  " 5       /S(9  \Rr                  " \:5      r;\" S)S*9r<\<R{                  \S+/S+/S+/S,9  \R|                  " 5       r?Sq@\\R                     \BS-'    " S. S/\ 5      rC " S0 S1\ 5      rDS2\E4S3 jrFS4\E4S5 jrGS2\H4S6 jrIS7 rJS2\E4S8 jrKS4\E4S9 jrLS2\M4S: jrNS2\M4S; jrOS<\PS2\\E   4S= jrQS<\PS>\E4S? jrRS<\P4S@ jrSS<\P4SA jrTS<\P4SB jrUSC\M\P   4SD jrVS<\PS2\P4SE jrWS<\PS2\
4SF jrXS~SH\ESI\PSJ\YS2\E4SK jjrZSL\M\E   S2\M\E   4SM jr[SL\M\E   S2\H4SN jr\S<\PS2\E4SO jr]S<\PS4\E4SP jr^SQ r_ " SR SS5      r`S<\PS2\4ST jraS<\PS2\b\E\M4   4SU jrcS<\PS2\b\E\M4   4SV jrdS<\PS2\P4SW jreSX\PS2\P4SY jrfSZ\ESL\MS2\P4S[ jrgS\ rhS] ri\<R                  S^5      S_ 5       rk\<RO                  S`5      Sa 5       rl\<RO                  Sb5      Sc 5       rm\<RO                  Sd5      Se 5       rn\<R                  Sb5      Sf\C4Sg j5       rp\<R                  Sh5      Si\P4Sj j5       rr\<R                  Sk5      Sf\D4Sl j5       rt\<RO                  Sh5      Si\P4Sm j5       ru\<RO                  Sn5      Si\P4So j5       rv\<RO                  Sp5      Si\P4Sq j5       rw\<R                  Sr5      Si\P4Ss j5       rx\<R                  St5      Su\4Sv j5       ry\<RO                  Sw5      Sx 5       rz\:Sy:X  a  \R                  " SzS{S|SGS}9  gg)z
Book Scraper Web App - FastAPI Backend
Manages a queue of books to scrape from wikicv.net,
respects daily chapter limits, and auto-schedules at 7AM daily.
    N)datetimedate)Path)Optional)unquote)MongoClient)BeautifulSoup)FastAPIHTTPExceptionBackgroundTasks)CORSMiddleware)FileResponse)StaticFiles)	BaseModelzconfig.jsonc                      [         R                  5       (       a0  [        [         S5       n [        R                  " U 5      sS S S 5        $ 0 $ ! , (       d  f       0 $ = f)Nr)CONFIG_FILEexistsopenjsonloadfs    /home/ubuntu/wiki/main.pyload_configr   "   sC    +s#q99Q< $#I $#Is   A
ABOOK_CACHE_DIR
book_cacheepubslogsBOOK_QUEUE_FILEzbooks_queue.jsonBOOK_DOWNLOADED_FILEzdownloaded_books.jsonDAILY_LIMIT_FILEzdaily_limit.jsonT)exist_okMAX_CHAPTERS_PER_DAY2   WAIT_TIME_PER_CHAPTER   EXTRA_WAIT_AFTER_PAGE_LOADBROWSER_HEADLESS	MONGO_URIwikiz'%(asctime)s [%(levelname)s] %(message)szscraper.logutf-8encoding)levelformathandlerszBook Scraper API)title*)allow_originsallow_methodsallow_headers_scheduler_taskc                        \ rS rSr% \\S'   Srg)AddBookRequestY   url N)__name__
__module____qualname____firstlineno__str__annotations____static_attributes__r<       r   r9   r9   Y   s    	HrD   r9   c                   &    \ rS rSr% \\   \S'   Srg)ReorderRequest\   ordered_urlsr<   N)r=   r>   r?   r@   listrA   rB   rC   r<   rD   r   rF   rF   \   s    s)rD   rF   returnc                     [         R                  5       (       ai   [        [         S5       n [        R                  " U 5      nS S S 5        WR                  S5      [        R                  " 5       R                  5       :X  a  U$  [        R                  " 5       R                  5       S[        R                  " 5       R                  5       S.n[        U5        U$ ! , (       d  f       N= f! [         a     Nrf = f)Nr   r   r   )r   count
last_reset)r"   r   r   r   r   getr   today	isoformat	Exceptionr   nowsave_daily_limit)r   dfreshs      r   load_daily_limitrV   b   s      	&,IIaL -uuV}

 6 6 88 9 ZZ\++-IaIaIcdEUL -,  		s(   C) C?C) 
C&"C) )
C65C6datac                     [        [        S5       n[        R                  " XSS9  S S S 5        g ! , (       d  f       g = f)Nwr'   )indent)r   r"   r   dumprW   r   s     r   rS   rS   o   s)    		$		$!$ 
%	$	$s   0
>c                  B    [        5       n [        S[        U S   -
  5      $ )Nr   rL   )rV   maxr$   rT   s    r   daily_remainingr`   s   s!    Aq&7344rD   c                  H    [        5       n U S==   S-  ss'   [        U 5        g )NrL      )rV   rS   r_   s    r   daily_incrementrc   w   s    AgJ!OJQrD   c                      [         R                  5       (       a/  [        [         SSS9 n [        R                  " U 5      sS S S 5        $ S/ 0$ ! , (       d  f       S/ 0$ = fNr   r,   r-   books)
QUEUE_FILEr   r   r   r   r   s    r   
load_queuerh      sO    *cG499Q< 54R= 54R=   A
Ac                     [        [        SSS9 n[        R                  " XSSS9  S S S 5        g ! , (       d  f       g = fNrY   r,   r-   r'   FrZ   ensure_ascii)r   rg   r   r[   r\   s     r   
save_queuern      s,    	j#	0A		$!%8 
1	0	0s   0
>c                  6    [        5       R                  S/ 5      $ )Nrf   )rh   rN   r<   rD   r   queue_booksrp      s    <GR((rD   c                      [         R                  5       (       a/  [        [         SSS9 n [        R                  " U 5      sS S S 5        $ S/ 0$ ! , (       d  f       S/ 0$ = fre   )DOWNLOADED_FILEr   r   r   r   r   s    r   downloaded_booksrs      sO    /39Q99Q< :9R= :9R=ri   r;   c                 @    [        5        H  nUS   U :X  d  M  Us  $    g )Nbook_urlrp   )r;   bs     r   get_book_by_urlrx      s$    ]Z=CH  rD   updatesc                 ~    [        5       nUS    H  nUS   U :X  d  M  UR                  U5          O   [        U5        g Nrf   ru   )rh   updatern   )r;   ry   qrw   s       r   update_bookr~      s;    AwZZ=CHHW  qMrD   c                    ^  [        5       n[        U 4S jUS    5       5      (       a  [        S5      eUS   R                  T S[        R
                  " 5       R                  5       S S S SSS S S.
5        [        U5        g )Nc              3   2   >#    U  H  oS    T:H  v   M     g7fru   Nr<   .0rw   r;   s     r   	<genexpr>$add_book_to_queue.<locals>.<genexpr>   s     
4AZ=C   rf   zBook already in queuequeuedr   )
ru   statusadded_atr2   author	cover_urltotal_chaptersdownloaded_chapters	epub_patherror)rh   any
ValueErrorappendr   rR   rP   rn   )r;   r}   s   ` r   add_book_to_queuer      sx    A

47
444011gJLLN,,.   qMrD   c                 ~    [        5       nUS    Vs/ s H  o"S   U :w  d  M  UPM     snUS'   [        U5        g s  snf r{   rh   rn   )r;   r}   rw   s      r   remove_book_from_queuer      s;    AwZ@ZZ=C+?!Z@AgJqM As   ::c                   ^  [        T 5      nU(       d  [        S5      e[        T 5        [        5       n[	        U[
        5      (       d  S/ 0nSU;  a  / US'   [        U 4S jUS    5       5      (       d  US   R                  U5        [        [        SSS9 n[        R                  " X#SSS	9  S
S
S
5        g
! , (       d  f       g
= f)z#Move book from queue to downloaded.zBook not found in queuerf   c              3   2   >#    U  H  oS    T:H  v   M     g7fr   r<   r   s     r   r   archive_book.<locals>.<genexpr>   s     A-@}#-@r   rY   r,   r-   r'   Frl   N)rx   r   r   rs   
isinstancedictr   r   r   rr   r   r[   )r;   book
downloadedr   s   `   r   archive_bookr      s    3D233 3 "#Jj$''r]
j  
7 AZ-@AAA7""4(	osW	5		*> 
6	5	5s   B88
CrH   c                     [        5       nUS    Vs0 s H  o"S   U_M
     nnU  Vs/ s H  oDU;   d  M
  X4   PM     nnUS    Vs/ s H  o"S   U ;  d  M  UPM     nnXV-   US'   [        U5        g s  snf s  snf s  snf r{   r   )rH   r}   rw   url_mapu	reorderedextrass          r   reorder_queuer      s    A)*74A}aG4%1B\'\\IB7IA}L'HaFI#AgJqM 5BIs   A0	A5A5A:A:c                 H    U R                  S5      R                  S5      S   $ )N/)rstripsplitr;   s    r   slug_from_urlr      s     ::c?  %b))rD   c                 ,    [         [        U 5       S3-  $ )Nz.json)r   r   r   s    r   
cache_pathr      s    }S12%888rD   Fchapterhtmlr   c                 P    U R                  SS5      U R                  SS5      UUS.$ )Nr2    r;   )r2   r;   r   r   rN   )r   r   r   s      r   _chapter_recordr      s.    Wb){{5"% 	 rD   chaptersc                     U  Vs/ s H4  oR                  S5      (       d  M  UR                  S5      (       d  M2  UPM6     sn$ s  snf )Nr   r   r   )r   chs     r   _downloaded_chaptersr      s/    !M2VVL%9BbffVnBMMMs   AAAc                 &    [        S U  5       5      $ )Nc              3   T   #    U  H  oR                  S 5      (       d  M  Sv   M      g7f)r   rb   Nr   r   r   s     r   r   $_downloaded_count.<locals>.<genexpr>   s     <8Rvvl';qq8s   (	()sum)r   s    r   _downloaded_countr      s    <8<<<rD   c                     [        U 5      nUR                  5       (       a+  [        USSS9 n[        R                  " U5      sS S S 5        $ U 0 / S.$ ! , (       d  f       N= f)Nr   r,   r-   )r;   metadatar   )r   r   r   r   r   )r;   pr   s      r   load_book_cacher      sU    3Axxzz!S7+q99Q< ,+   ,+s   A
A c                     [        U 5      n[        USSS9 n[        R                  " XSSS9  S S S 5        g ! , (       d  f       g = frk   )r   r   r   r[   )r;   rW   r   r   s       r   save_book_cacher      s5    3A	aw	'1		$!%8 
(	'	's	   7
Ac                    [         R                  SU R                  SS5       35        [        R                  R                  SU S   0SU 0SS9nUR                  (       a  UR                  nO'[        R                  R                  SU S   05      S   n[        R                  R                  SU0SU0SS9  [         R                  S	U S
    SU 35        g )NzBackup book: ru   Unknownz$setT)upsert_idbook_idzUpdated backup for book r2   : )	loginforN   db	book_info
update_oneupserted_idfind_onebook_chapters)r   
cache_dataresultr   s       r   upsert_book_recordr     s    HH}TXXj)<=>?\\$$j$z2B%Cfd^\`$aF$$,,''T*5E(FGNG 4vz6JSWXHH'Wb	BCrD   c                   8    \ rS rSrSr/ SQr\S\4S j5       rSr	g)ContentCleaneri  z6Removes ads and unwanted elements from chapter content)   ·u   dkạhsdsadjdáu   oiewơieu   ✧⋄⋆⋅⋆⋄✧⋄⋆⋅⋆⋄✧ ฅ/ᐠ｡ꞈ｡ᐟ\ฅ Convert by Haruko ฅ/ᐠ｡ꞈ｡ᐟ\ฅ ✧⋄⋆⋅⋆⋄✧⋄⋆⋅⋆⋄✧u*   ☀Truyện được đăng bởi Reine☀rJ   c                    Uc  gUR                  S5      nSnU Hy  nU(       d  M  UR                  SS9nU(       d  M$  SR                  S U R                   5       5      n[        R
                  " US[        U5      5      nU(       d  Mp  USU S3-  nM{     UR                  5         [        US	5      n[        UR                  5       H  nUR                  U5        M     U$ )
zGExtract clean paragraphs and remove ads, scripts, and unwanted elementsr   r   Tstrip|c              3   N   #    U  H  n[         R                  " U5      v   M     g 7fN)reescape)r   ss     r   r   'ContentCleaner.clean.<locals>.<genexpr>.  s     J5Iryy||5Is   #%z<p>z</p>html.parser)find_allget_textjoinUNWANTED_STRINGSr   subrA   clearr	   rI   contentsr   )	clscontent_div
paragraphs
clean_htmlr   	para_textpatterncleaned_soupchilds	            r   cleanContentCleaner.clean  s      ))#.

A


.I hhJS5I5IJJGwC	N;I yI;d33
   	$Z?,//0Eu% 1 rD   r<   N)
r=   r>   r?   r@   __doc__r   classmethodrA   r   rC   r<   rD   r   r   r     s(    @ 3  rD   r   c                   ^ SSK Jn  U" 5        nUR                  R                  [        S9nUR                  SS9nUR                  5       nUR                  U SSS9  [        R                  " [        5        UR                  5       m[        U4S	 jS
 5       5      (       a  [        S5      eUR                  5         SSS5        [        TS5      $ ! , (       d  f       N= f)z'Fetch with Playwright in headless mode.r   sync_playwrightheadless<Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36
user_agentdomcontentloaded 
wait_untiltimeoutc              3   ,   >#    U  H	  oT;   v   M     g 7fr   r<   r   keywordr   s     r   r   #_fetch_page_sync.<locals>.<genexpr>I  s       y  /x7$  /x   u"   Đăng nhập để xem nội dungu   Đã hết lượt truy cậpu>   Đăng nhập và xác minh email có thêm lượt truy cậpzAccess limit hitNr   )playwright.sync_apir   chromiumlaunchr)   new_contextnew_pagegototimesleepr(   contentr   rQ   closer	   )r;   r   r   browsercontextpager   s         @r   _fetch_page_syncr  =  s     4		a**##-=#>%%1o%p!		#"4f	E

-.||~  y  /x  y  y  y.// 
 }-- 
	s   B-C
Cc           
         [        U 5      nUR                  SS S9nU(       a  UR                  SS9OSnSnUR                  S5       HW  nS	UR                  5       R	                  5       ;   d  M'  UR                  5       R                  S
5      S   R                  5       n  O   SnUR                  S5      nU(       aN  UR                  S5      nU(       a6  UR                  S5      (       d   SUR                  S5      (       a  UOSU-   -   nU UUUUR                  S5      (       a  [        UR                  S5      5      OSUR                  S5      (       a  [        UR                  S5      5      OSS.n/ n	UR                  S5      n
U
(       aY  U
R                  SSSS9 HD  nUS   nUR                  S5      (       a  SU-   nU	R                  UR                  SS9US.5        MF     X4$ )z2Extract metadata and chapter list from intro page.h2c                     U =(       a    SU ;   $ Nzfont-size: 1.7remr<   xs    r   <lambda>&_get_intro_data_sync.<locals>.<lambda>T  s    0N6IQ6N0NrD   styleTr   r   
   Không rõr   
   tác giả:r   N0.cover-wrapper img, .book-info img[src*='cover']srchttphttps://wikicv.netr   .cover-infor   
.book-descru   r2   r   r   
cover_infodescriptiondiv.volume-listatruncateclass_hrefr2  r2   r;   )r  findr   r   lowerr   r   
select_onerN   
startswithrA   r   )r;   soup	title_tag
book_titler   r   r   imgr   r   v_listr.  r2  s                r   _get_intro_data_syncr=  O  s   C D 		$&N	OI3<##$#/)J F]]31::<--//ZZ\'',R0668F   I
//L
MC
GGEN	Y11&99,Y=Q=QRU=V=V	\_bk\klI =A__]=[=[c$//-89ac=A__\=Z=Zs4??<89`bH H__./FZdCAV9Ds##+d2OOajjtj&<TJK	 D rD   c                   ^ SSK Jn  U" 5        nUR                  R                  [        S9nUR                  SS9nUR                  5       m TR                  U SSS9  [        R                  " [        5        TR                  5       n[        US	5      nUR                  S
S S9nU(       a  UR                  SS9OSnSn	UR                  S5       HW  n
SU
R                  5       R!                  5       ;   d  M'  U
R                  5       R#                  S5      S   R%                  5       n	  O   SnUR'                  S5      nU(       aN  UR)                  S5      nU(       a6  UR+                  S5      (       d   SUR+                  S5      (       a  UOSU-   -   nU UU	UUR'                  S5      (       a  [-        UR'                  S5      5      OSUR'                  S5      (       a  [-        UR'                  S5      5      OSS.nS[.        4U4S  jjnUR1                  S!5      n[3        5       n/ nU Hd  nUR                  SS9nUR)                  S"5      nUR5                  5       (       d  M:  UU;   a  MB  UR7                  U5        UR9                  U5        Mf     U" 5       nU(       a  [;        U5      S#::  a#  [<        R?                  S$[;        U5       S%35        O[<        R?                  S&[;        U5       S'35        US#S  H  nUR                  SS9nUR)                  S"5      n[<        R?                  S(U S)U S*35        TRA                  S+5        TRC                  S,U S-3SS.9  [        R                  " [        5        U" 5       n[<        R?                  S/U S0[;        U5       S135        URE                  U5        M     URG                  5          SSS5        [3        5       n/ nW H3  nUS2   U;  d  M  UR7                  US2   5        UR9                  U5        M5     [<        R?                  S3[;        U5       35        WU4$ ! URG                  5         f = f! , (       d  f       N= f)4zGExtract metadata and chapter list from intro page, handling pagination.r   r   r   r   r   r   r   r   r   r  c                     U =(       a    SU ;   $ r  r<   r  s    r   r  (_get_updated_data_sync.<locals>.<lambda>  s    8V>QUV>V8VrD   r  Tr   r   r!  r   r"  r#  r   Nr$  r%  r&  r'  r   r(  r   r)  r*  rJ   c                  &  > TR                  5       n [        U S5      nUR                  S5      n/ nU(       aY  UR                  SSSS9 HD  nUS   nUR	                  S5      (       a  S	U-   nUR                  UR                  SS
9US.5        MF     U$ )Nr   r-  r.  r/  Tr0  r2  r   r'  r   r3  )r  r	   r6  r   r7  r   r   )current_htmlcurrent_soupr<  foundr.  r2  r  s         r   "extract_chapters_from_current_pageB_get_updated_data_sync.<locals>.extract_chapters_from_current_page  s    #||~,\=I%001BC#__S$_O y??3//#7$#>Dqzzz/Ed%ST	 P
 rD   z/ul.pagination li a[data-action='loadBookIndex']z
data-startrb   z  No pagination detected. z chapters found.z  Found z pagination page(s)z  Loading pagination page z (start=z)...z
                    () => {
                        document.querySelectorAll(
                            '.fc-consent-root, .fc-dialog-overlay'
                        ).forEach(el => el.remove());
                    }
                    zul.pagination li a[data-start='z'][data-action='loadBookIndex']r  z  Page r   z	 chaptersr;   z#  Total unique chapters collected: )$r	  r   r
  r  r)   r  r  r  r  r  r(   r  r	   r4  r   r   r5  r   r   r6  rN   r7  rA   rI   selectsetisdigitaddr   lenr   r   evaluateclickextendr  )r;   r   r   r  r  r   r8  r9  r:  r   p_tagr   r;  r   rE  pagination_itemsseen_starts
page_linksr.  	page_text
data_startr   	page_linkpage_numpage_chaptersseenunique_chaptersr   r  s                               @r   _get_updated_data_syncr[  {  s   3		a**##-=#>%%U & W!f	IIc&8&IIJJ12<<>D }5D 		$.V	WI;D++$+7)J!Fs+5>>#3#9#9#;;"^^-33C8<BBDF ,
 I//"TUCGGEN	Y%9%9&%A%A 4%.%9%9#%>%>	C)O!UI !$'$!*IMYfIgIg#doom&D"EmoIMYeIfIf3t|'D#ElnH   ${{E  %KJ%JJTJ2	UU<0
 ((**,
+!!!$ & :;HZA!55c(m_DTUV8C
O#44GHI!+ABI(111=H!*|!<JHH9(8J<W[\] MM #  JJ=j\Ihi$*   JJ9:$F$HMHHwxj3}3E2FiPQOOM2/ "02 MMOW 
\ 5DOe9D HHRY""2& 
 HH233G2HIJ_$$ MMOW 
	s+   =QB&P:7K P:7Q:QQ
Qc                     [        U 5      nUR                  S5      n[        R                  U5        U(       a  [	        U5      $ S$ )z/Download a chapter and return its HTML content.z#bookContentBodyr   )r  r6  r   r   rA   )r;   r8  bodys      r   _fetch_chapter_syncr^    s:    C D??-.D3t9$"$rD   textc                 N    [         R                  " SSU 5      R                  5       $ )Nz[\\/:*?"<>|]r   )r   r   r   )r_  s    r   _make_safe_filenamera    s    66/2t,2244rD   r   c                     SSK Jn  U R                  S0 5      nU S   nUR                  5       nUR	                  S[        [        R                  " 5       5       35        UR                  UR                  SS5      5        UR                  S5        UR                  UR                  S	S
5      5        SnUR                  S5      (       aE   [        R                  " US   SS9nUR                  S:X  a  UR                  SUR                  5        Sn	UR#                  SSSU	S9nUR%                  U5        UR'                  SSSS9n
SUR                  SS5       SU SU SUR                  SS5       S UR                  S!S5       S"3U
l        U
R%                  U5        UR%                  U
5        / nS#U
/n[)        U5       H  u  pUR                  S$5      (       d  M  UR'                  US   S%US&-   S' S(3SS9nS)US    S*US$    S+3Ul        UR%                  U5        UR%                  U5        UR+                  U5        UR+                  U5        M     U
/U-   Ul        Xl        UR%                  UR1                  5       5        UR%                  UR3                  5       5        [5        UR                  SS,5      5      n[7        [8        U S-3-  5      nUR;                  UU0 5        [        R=                  S.U 35        U$ ! [         a#  n[        R!                  SU 35         SnAGNSnAff = f! [         a  n[        R?                  S/U 35        e SnAff = f)0z"Build EPUB and return output path.r   )epubr   ru   zwiki-r2   r   vir   r!  Nr      rG     z	cover.jpgzCover download failed: a  
            body { font-family: Georgia, serif; line-height: 1.8; margin: 2em; color: #222; }
            h1, h2 { font-size: 1.4em; margin-top: 2em; color: #444; }
            p { margin: 0.6em 0; text-indent: 1.5em; }
            .bookContentBody { margin: 1em 0; }
        
style_mainzstyle/main.cssztext/css)uid	file_name
media_typer  u
   Bìa sáchzcover_page.xhtml)r2   ri  langz<html><head><link rel="stylesheet" href="style/main.css"/></head>
        <body>
          <div style="text-align:center;margin:2em 0;">
            <h1>r   zq</h1>
            <img src="cover.jpg" alt="Cover" style="max-width:100%;height:auto;"/>
            <p><a href="z">z$</a></p>
          </div>
          r+  z
          r,  z
        </body></html>navr   chap_rb   04dz.xhtmlzK<html><head><link rel='stylesheet' href='style/main.css'/></head><body><h1>z</h1>z</body></html>r   z.epubzEPUB saved: zEPUB build error: ) ebooklibrc  rN   EpubBookset_identifierintr  	set_titleset_language
add_authorrequestsstatus_code	set_coverr  rQ   r   warningEpubItemadd_itemEpubHtml	enumerater   tocspineEpubNcxEpubNavra  rA   EPUB_DIR
write_epubr   r   )r   r   epub_libr   ru   rw   css_itemrespecss_content
cover_pageepub_chaptersr  idxr   ec	safe_nameout_paths                     r   _build_epub_syncr    sf   N-88J+
#	5TYY[!1 234	HLL)45	t	X\\(L9: <<$$;||H[$92F##s*KKT\\: $$(8!; % 
 	


8 &&\EW^b&c
" gb)* +!
"XJ /<<R(
) *<<b)
* +	
 	H%	

:
# *GC66&>>""k!#a%F3 # B
 ggijqgrfssxy{  }C  zD  yE  ES  TBJKK!JJrN  $LL + },	

8##%&	

8##%&'Wf(EF	xYKu"556Ha,<z*+q  ;5aS9::;r  		&qc*+sD   CL0 AL  G0L0  
L-
L("L0 (L--L0 0
M:MMc                  	  ^#    [         R                  5       (       a  [        R                  S5        g[          ISh  vN   [        R                  S5        [        R
                  " 5       n [        5       nU Vs/ s H  o"S   S;  d  M  UPM     nnU GH  nUS   n[        5       nUS::  a;  [        R                  S[        U5       35        US   S	:X  a  [        USS
05          GO@[        R                  SU SU S35        [        US	SS.5         [        U5      nUR                  S5      (       d  [        R                  S5        U R                  S[        U5      I Sh  vN u  pXS'   U	 V
s/ s H  n
[        U
5      PM     sn
US'   [        XW5        [        UUR                  S5      UR                  S5      UR                  S5      [        U	5      S.5        US   n[!        U5      nU V
s/ s H  oR                  S5      (       a  M  U
PM     nn
[        R                  S[        U5       SU S[        U5       35        U GHu  n
[        5       S::  a3  [        R                  S5        [        US
[!        US   5      S.5          GO5U
R                  S5      =(       d    SnU
R                  S5      nU(       d  [        R#                  SU
 35        M  [        R                  SU 35        U R                  S[$        U5      I Sh  vN m['        U4S  jS! 5       5      (       a8  S"n[        R)                  S#U 35        [        US$U[!        US   5      S%.5          O`TU
S&'   S'U
S'   [        XW5        [+        5         [        US([!        US   5      05        [        R,                  " [.        5      I Sh  vN   GMx     [!        US   5      nU(       a.  [1        S) US    5       5      n[        UU(       a  S*OS
US.5        O[        US(S05         [7        UW5        GM     [        R                  S-5        SSS5      ISh  vN   g GNs  snf  GNs  sn
f s  sn
f  GNU N! [2         a=  n[        R)                  S+U S,U 35        [        US$[5        U5      S.5         SnANSnAff = f Ni! , ISh  vN  (       d  f       g= f7f).z|
Process books in queue order, respecting daily chapter limit.
Saves progress after each chapter so it can resume tomorrow.
z+Scraping session already running, skipping.Nz!=== Starting scraping session ===r   )done	cancelledru   r   z0Daily limit reached. Stopping. Books remaining: in_progresspausedzProcessing book: z (remaining limit: ))r   r   r   z  Fetching intro page...r   r2   r   r   )r2   r   r   r   r   z	  Total: z | Downloaded: z | Pending: z+  Daily limit hit mid-book. Progress saved.)r   r   z(untitled chapter)r;   z$  Skipping malformed chapter entry: z  Downloading: c              3   ,   >#    U  H	  oT;   v   M     g 7fr   r<   r  s     r   r   'run_scraping_session.<locals>.<genexpr>  s       E  ;Dwd?  ;Dr  r  z#Book requires login to view contentz  r   )r   r   r   r   Tr   c              3   B   #    U  H  oR                  S 5      v   M     g7fr   Nr   r   s     r   r   r    s     %WEVrff\&:&:EV   r  zError processing r   z!=== Scraping session complete ===)_scraping_locklockedr   r   asyncioget_event_looprp   r`   rL  r~   r   rN   run_in_executorr=  r   r   r   ry  r^  r   r   rc   r  r&   allrQ   rA   r   )looprf   rw   pendingr   r;   	remainingcacher   chapter_listr   r   downloaded_countpending_chaptersr2   chapter_url	error_msgis_completer  r   s                      @r   run_scraping_sessionr  V  s]    
 >?~45%%'#Pe{:O'O1ePDz"C')IA~KCPWL>Z[>]2h%9:HH(-@1MNEFNG', yy,,HH79373G3G2C4 .*H )1*%GS(T|)<|(TE*%#C/!)g!6"*,,x"8%-\\+%>*-l*;	&  !,#4X#> 19#V2AUB #V9S]O?CSBTT`adeuav`wxy*B&(A-#NP#C&.3DU:EV3W*  FF7OC/CE"$&&-K&&J2$$OP HHug67!%!5!5d<OQ\!]]D   E  ;D  E  E  E$I			Byk"23#C&-%.3DU:EV3W* 
 !%BvJ'+B|$#C/#%&;=NuU_O`=a%bc!--(=>>>I +N $5U:5F#G #"%%WU:EV%W"WK,7&X/?& 
  &;Q%?@ tU+{ ~ 	45M ~~
 Q,. )U $W( ^( ?  G		-cU"QC89CGc!f!EFFGA ~~~s   :SQS9R4:Q
QBR4AQ($Q%Q(3QA,Q(4QQC7Q(Q#
B,Q(;Q&
<AQ(R4Q('R4=SR2	SR4Q(Q(&Q((
R/23R*%R4*R//R42S4S:R=;SSc                    #    [         R                  S5         [        R                  " 5       n U R	                  SSSSS9nX:  a  UR	                  UR
                  S-   S9nX-
  R                  5       n[         R                  SUR                  5        SUS	-  S
 S35        [        R                  " U5      I Sh  vN   [        5       I Sh  vN   M   N N7f)z?Runs indefinitely, triggering a scrape session at 7AM each day.zScheduler started.   r   )hourminutesecondmicrosecondrb   )dayzNext scheduled run at z (in i  z.1fzh)N)r   r   r   rR   replacer  total_secondsrP   r  r  r  )rR   next_runwait_secondss      r   scheduler_loopr    s     HH!"
lln;;Aaq;I?''HLL1,<'=H 557)(*<*<*>)?u\RVEVWZD[[]^_mmL)))"$$$  	*$s$   B8C:C;CCCCstartupc                  r   #    [         R                  " [        5       5      q[        R                  S5        g 7f)NzApp started. Scheduler running.)r  create_taskr  r7   r   r   r<   rD   r   r  r    s'      )).*:;OHH./s   57z/api/statusc            
      H   [        5       n [        5       nUS   US   [        [        5       S.[	        U 5      [        S U  5       5      [        S U  5       5      [        S U  5       5      [        S U  5       5      [        S U  5       5      S	.[        R                  5       S
.$ )z1Overall status: queue summary + daily limit info.r   rL   r   usedlimitr  c              3   :   #    U  H  oS    S:X  d  M  Sv   M     g7f)r   r   rb   Nr<   r   rw   s     r   r   get_status.<locals>.<genexpr>       FUkX.E!!U   	c              3   :   #    U  H  oS    S:X  d  M  Sv   M     g7f)r   r  rb   Nr<   r  s     r   r   r    s     P%QX;-3Oqq%r  c              3   :   #    U  H  oS    S:X  d  M  Sv   M     g7f)r   r  rb   Nr<   r  s     r   r   r    r  r  c              3   :   #    U  H  oS    S:X  d  M  Sv   M     g7f)r   r  rb   Nr<   r  s     r   r   r    s     B5ahK6,A5r  c              3   :   #    U  H  oS    S:X  d  M  Sv   M     g7f)r   r   rb   Nr<   r  s     r   r   r    s     DEqx[G-CEr  )totalr   r  r  r  r   )dailyqueue_summaryis_scraping)rp   rV   r$   r`   rL  r   r  r  )rf   dls     r   
get_statusr    s     ME		B vJwK)(*	
 ZFUFFP%PPFUFFB5BBDEDD
 &,,. rD   z
/api/booksc                      S[        5       0$ )z#Return full book queue with status.rf   rv   r<   rD   r   	get_booksr    s     []##rD   z/api/downloaded/booksc                  6    [        5       n [        U S SS9n SU 0$ )z Return list of downloaded books.c                 &    U R                  SS5      $ )Nr   r   r   r  s    r   r  &get_downloaded_books.<locals>.<lambda>  s    aeeJ+rD   T)keyreverserf   )rs   sorted)rf   s    r   get_downloaded_booksr    s.     E +E UrD   reqc                     U R                   R                  5       nUR                  S5      (       d  [        SS5      e [	        U5        SUS.$ ! [
         a  n[        S[        U5      5      eSnAff = f)z%Add a book URL to the download queue.r&    zInvalid URLi  NT)okr;   )r;   r   r7  r   r   r   rA   )r  r;   r  s      r   add_bookr    sl     ''--/C>>&!!C//)# s##  )CQ(()s   A 
A2A--A2z/api/books/{slug}slugc                    ^  [        5       n[        U 4S jU 5       S5      nU(       d  [        SS5      e[        US   5        SS0$ )zRemove a book from the queue.c              3   b   >#    U  H$  n[        [        US    5      5      T:X  d  M   Uv   M&     g7fr   r   r   r   rw   r  s     r   r   cancel_book.<locals>.<genexpr>  s)     Uew}Qz]/K'LPT'T11e   /	/N  Book not foundru   r  T)rp   nextr   r   )r  rf   targets   `  r   cancel_bookr    sG     MEUeUW[\FC!1226*-.$<rD   z/api/books/reorderc                 4    [        U R                  5        SS0$ )z7Reorder the queue by providing an ordered list of URLs.r  T)r   rH   )r  s    r   reorder_booksr  "  s     #""#$<rD   c                    ^  [        5       n[        U 4S jU 5       S5      nU(       d  [        SS5      e[        US   5      n0 UEUR	                  S/ 5      UR	                  S0 5      S.E$ )	zBGet detailed info for a single book including cached chapter list.c              3   b   >#    U  H$  n[        [        US    5      5      T:X  d  M   Uv   M&     g7fr   r  r  s     r   r   "get_book_detail.<locals>.<genexpr>,  (     SEqW]1Z=-I%Jd%REr  Nr  r  ru   r   r   )r   r   )rp   r  r   r   rN   )r  rf   r   r  s   `   r   get_book_detailr  (  sr     MESESUYZDC!122D,-E
IIj"-IIj"- rD   z"/api/books/{slug}/refresh-chaptersc           	         ^  [        5       n[        U 4S jU 5       S5      nU(       d  [        SS5      e[        US   5      n [	        US   5      u  pEUR                  S/ 5       Vs0 s H  ofS   U_M
     nn/ nU HT  nUS   U;   a.  XvS      n	UR                  S5      U	S'   UR                  U	5        M:  UR                  [        U5      5        MV     XCS	'   XS'   [        US   U5        [        U5      n
[        S
 U 5       5      nX:X  a  U
S:  a  SO	US:  a  SOSn[        US   UR                  S5      UR                  S5      UR                  S5      [        U5      US.5        [        U Vs/ s H  ofR                  S5      (       a  M  UPM     sn5      nSS[        U5       SU S3[        U5      US.$ s  snf s  snf ! [         a;  n[        R                  SUS    SU 35        [        SS[        U5       35      eSnAff = f)zBRefresh the chapter list for a book by re-fetching the intro page.c              3   b   >#    U  H$  n[        [        US    5      5      T:X  d  M   Uv   M&     g7fr   r  r  s     r   r   #refresh_chapters.<locals>.<genexpr><  r  r  Nr  r  ru   r   r;   r2   r   c              3   B   #    U  H  oR                  S 5      v   M     g7fr  r   r   s     r   r   r  Y  s     E"--r  r   r  r  r   r   r   )r2   r   r   r   r   r   TzChapter list updated: z total, z new chapters)r  messager   new_chapterszError refreshing chapters for r   i  zFailed to refresh chapters: )rp   r  r   r   r[  rN   r   r   r   rL  r   r~   rQ   r   r   rA   )r  rf   r   r  r   r  r   old_chapters_mapr  existingr  r   r   	new_countr  s   `              r   refresh_chaptersr  8  s&    MESESUYZDC!122D,-E+J!7Z8H!I 5:IIj"4MN4MbuIrM4MNB%y,,+uI6$&FF7O!##H- ##OB$78  %j(jZ(%0L!EEE
%.519j[\n(bjD$\\'*ll8,!k2!,/'
 	 lOl&&:NlOP	/L0A/B(9+Ubc!,/%	
 	
= O: P  J		24
3C2DBqcJKC#?Ax!HIIJs=   $G (F>7C=G 4GG&G >
G 
H6HHz/api/books/{slug}/downloadc           
        ^  [        5       n[        U 4S jU 5       S5      nU(       d  [        SS5      e[        US   5      n[	        UR                  S/ 5      5      nU(       d  [        SS5      e[        US   UR                  S	0 5      S
.U5      n[        US   U[        UR                  S/ 5      5      S.5        [        U5      nUR                  5       (       d  [        SS5      e[        [        U5      SUR                  S9$ )zMBuild and download the EPUB file for a completed or partially completed book.c              3   b   >#    U  H$  n[        [        US    5      5      T:X  d  M   Uv   M&     g7fr   r  r  s     r   r    download_epub.<locals>.<genexpr>s  r  r  Nr  r  ru   r   r  z$No downloaded chapters to build EPUBr   )ru   r   )r   r   zEPUB file missingzapplication/epub+zip)rj  filename)rp   r  r   r   r   rN   r  r~   r   r   r   r   rA   name)r  rf   r   r  r   r   paths   `      r   download_epubr  o  s     MESESUYZDC!122D,-E.uyyR/HIC!GHH *%599Z3LMI Z 0:r1JK# 
 	?D;;==C!455D	.DtyyYYrD   z/api/books/{slug}/archivec                   ^  [        5       n[        U 4S jU 5       S5      nU(       d  [        SS5      e [        US   5        SSUR	                  ST 5       3S	.$ ! [
         a  n[        S
[        U5      5      eSnAff = f)z%Move a book from queue to downloaded.c              3   b   >#    U  H$  n[        [        US    5      5      T:X  d  M   Uv   M&     g7fr   r  r  s     r   r   (archive_book_endpoint.<locals>.<genexpr>  r  r  Nr  r  ru   TzBook archived: r2   r  r  r  )rp   r  r   r   rN   r   rA   )r  rf   r   r  s   `   r   archive_book_endpointr    s     MESESUYZDC!122)T*%&'49P8Q'RSS )CQ(()s   %A 
B &A;;B z/api/run-nowbackground_tasksc                 z   #    [         R                  5       (       a  SSS.$ U R                  [        5        SSS.$ 7f)z0Manually trigger a scraping session immediately.FzAlready runningr  TzScraping session started)r  r  add_taskr  )r  s    r   trigger_nowr    s=      (9::23#=>>s   9;z/api/daily-limitc                  H    [        5       n U S   U S   [        [        5       S.$ )Nr   rL   r  )rV   r$   r`   )r  s    r   get_daily_limitr
    s+    		B6
7%$&	 rD   __main__zmain:appz0.0.0.0i="  )hostportreload)NF)|r   r  r   loggingosr   r  r   r   pathlibr   typingr   urllib.parser   pymongor   rv  uvicornbs4r	   fastapir
   r   r   fastapi.middleware.corsr   fastapi.responsesr   fastapi.staticfilesr   pydanticr   __file__parentBASE_DIRr   r   _configrN   r   r  LOG_DIRrg   rr   r"   mkdirr$   r&   r(   r)   clientr   basicConfigINFOFileHandlerStreamHandler	getLoggerr=   r   appadd_middlewareLockr  r7   TaskrB   r9   rF   r   rV   rS   rr  r`   rc   rh   rn   rI   rp   rs   rA   rx   r~   r   r   r   r   r   r   boolr   r   r   r   r   r   r   r  tupler=  r[  r^  ra  r  r  r  on_eventr  r  r  r  postr  deleter  putr  r  r  r  r  r  r
  runr<   rD   r   <module>r3     s      	 	  #         ; ; 2 * + 
 >  & -GKK(8,GGg
V
$57IJJ
W[[)?AXYYgkk*<>PQQ    d  #   t {{#92> $;Q? $[[)EqI ;;148 	W[[-	.F^   
,,4Gm3gF !
 &'   %%%	   *.',,' .
Y Y $ %4 %5 5D 9T 9)T )$  $ S 4 3 & 
?c ?.S	 *s *s *9C 9D 9T   RV N4: N$t* N=T
 =s =	 	 	9 9D 9	D* *X.# .- .$)c )eD$J&7 )Xy% y%dDj(9 y%x%S %S %5c 5c 5P4 P4 PC Pjo6h% i0 0  , $ $ 	 ! " ,	$. 	$ 	$  c  ! 	~  
 	#   	-.4J3 4J /4Jl 	%&Z Z 'Z4 
%&) ) ') .? ? ? 	  zKK
eD rD   