From 2b64aca26b9a793f1694d653b1cc5f2f07b857c0 Mon Sep 17 00:00:00 2001 From: paituo <330435863@qq.com> Date: Fri, 23 Aug 2024 16:57:27 +0800 Subject: [PATCH 1/2] =?UTF-8?q?=E4=BF=AE=E6=94=B9=E6=96=87=E4=BB=B6?= =?UTF-8?q?=E6=A0=BC=E5=BC=8F=EF=BC=8C=E5=9B=A0=E4=B8=BA=E9=BB=98=E8=AE=A4?= =?UTF-8?q?=E4=B8=8D=E6=94=AF=E6=8C=81doc=E6=A0=BC=E5=BC=8F=E3=80=82?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- ...¨‹é€ ä»·åŸºç¡€çŸ¥è¯†.doc => 工程造价基础知识.docx} | Bin 141312 -> 152064 bytes 1 file changed, 0 insertions(+), 0 deletions(-) rename backend/data/{工程造价基础知识.doc => 工程造价基础知识.docx} (64%) diff --git a/backend/data/工程造价基础知识.doc b/backend/data/工程造价基础知识.docx similarity index 64% rename from backend/data/工程造价基础知识.doc rename to backend/data/工程造价基础知识.docx index 27d4d9fe1883bb8f3090d91ce3ee7b9f018aac79..b328c09e92c9afad8878f99bb8e4291804b7db88 100644 GIT binary patch delta 47064 zcmeI534B!5y~oeJSx9060YZSVg)qhl7{ekWQj9E>2FkuEh(IF*M4*T?fN~?oJW461 zNn|KROc5!M@@PaVlu{OJk@_s=QKXbo4M=%NX&lqYqe$U>|1+7)Ofp+S0!ceJpKtCm z=bn4+`JH{cS;b*lo5OQck~CGZMJuIqDCgI2+_+)C{JfmQ?$I6fty-IZ@>58Kkzn@m zm>1*bHgWouVCzv#JN8fD4`Vv`8@#>M&_)GK7=lBF+3& zTfMW*E%9-BOqjUJ>azZE`UP{aNk84>I`0>ubo1wJ^;YKgSG5?Uh!<(j{qp{Wz?QX@ zf8vxP5H}WsIPWxwCtLz?z!hi$!Q3nkG_en7-=BRX`=RWk*pFl1l>IdJ&DdwK$J==! zF1O$ylM5}`Z#DOb*IKr=KAQ*1rC==X#QqMY%&AwjwuYQw+`7rJI4omU_V}VFZ3dlx zb3VobHA2l-3)RzVx|*w!RXS&jD(#UMnX6{2XVntGGB&Ya9Mc0*L3RD`vBJN;d04Za zd^xyMMR}0;)SbMI?4O=xyE%sAZ(`}Ucel@qI|g$tq{_9ETq^+4LtHf!*s@0PPyBuoh<0;9 zv=k2z{htHzqHLuMt2nDUk%tyl5khX7I5sYK^@yueIkdj+fQr2Y5PAJ^OKg^LXth-K z>p--UEeXVNQ{)84rSngoC63bYF6)V?KTM5PBh@IY@1-Nlw{Y)3_OmgXt|lPULQJ`L zsJvBAeD*&6--AE*z@Pga`GFYnY^=0}Iz{cvJ%b3&v>kZxvm zt+>+eZt+qH#@bDZKioy{6(Wy{%s&%4%=~T_y?>Kqy_FiQ(#6^49TT+fhA6!=IKpC8 zTYNV{sn^vVmI)jhDvQ6DIN}8!mKON&o|HD+)i&%jl z^;W&C^kq*{H{p?m5>^#w313K*%?-kMl**HxIuIn!G8ZIhDdH0Aw)=ww?QpLqlTEHp z9`EdAJs6`Noh;7u)5+%M6SbE2*j(9@+gx1f;z-`tB2QK3dnamlIb1t+qBgQilM!7i zT^qEZ^(*E#Cun!Ocyf?e7mMP4x)_soPrjd^h_Dr&EZ*(n{8$g}Pg~~f{w2$_feCM& z34KYs`_dbS)OS~g2e6GzbhoAnrmrOY4jVgc?5|ZA;izSc;ILT9Jgf%fU)~bJ zYsdV#diUtf){AQ-#ewczt7fCuRLbKK2GJI9CgqeiiegxhJ zLkc<4<(>}rlP{Ps^&$C02KO55C25+;J*_bNV#vMNfB$asfe$req$y-FcO2x!^0gpc zMOOS^30<@)B4cfPy?<~ncbik&lM8<+NBukKMKRYn`X}6wJuy%cn@tN_Y+AE)h$M_L z5DR^wA56dzlVBqh!gY{fxdpbu6|kHsKCt8UycZTcG2(08#p8_51G{PqGJ5sur3R(7 zRU^}a)M%KRrm5+i4;!jfy_zs?&0{K`5;G{R4R5LOmYTe!VSUB=eaFg&cX0CS3RgU3 zbB5Iw&Of_KAu^~a5%4PPC5PJw`{AJEc7!b*%Agz;6ZSH}0;PZc;FsHexbizQ#}DpB z&`tbz)4OWmHEwbEuGGH5A&+~BXev?^D}(l=G#w!mmclw%56R@eDKH5hgXN@%0fk)? zu!gO$D3%5LU_W%FeAgW^VJSQZ%V9h05GbVt6mCoki(*AEf|CCz$bprR7(A#X%qF3l~~biiFTM6jI?7oCei4pm5_-SQOJC1LnYhIHgjd z1kS>dc1pbu7vU1@Y9COzaVac{y$xj@C^JKJN2OXo37mys8qGqXH}rvV5XgrcQ$8$; zO@KwP81}$kCF4T z@;t*eD%#pDZeidWROz1DhGx|)KQlW%KX_HVQTFMH_xApD|$0;byW1hmN&k%*+bk|ZLjiH&&TaGUe)yDOd9mS0%g#C3EirY3@NY%a^V78gpNynI5oB?_j)Oo&{@ zKp}{S1Q-QlART7HPS^#3hCkPoyJ2^ti(+Tt9F)Ul$b60tbQr~u=P|GzHb5b4f`Gz} zNnufJFYJQ_%avLPD`6EJg<`k}m*6pj0elcpxG^a#imitYq8wNtA&ZW6$b$m-7)l`W z1yXScDBPG77R3fYD&)XQ*bH0XAiM*KE9k<3rSKdC6mD1w&!|%Jw%1myMXg9tY!|!@ zd*EF-3@6|uB)*8cPzL4DGn)=|K;>WUcy;x%Igi%mDCF7}NGa}`a&afX2LO|ihq_8M<8Lq&Bl@zZb?z=RQ zKo+ckjZg>|;SvNCZcGY`VsWcb6uLt)OoJJ)3Rc5axCXty#~3#VDBPG77RA=VdMJUj z5WSkb7}8)kY=Dgr`h7+uLqOriq_8NK3IpL|D1n?cRHUHbWoG5W$4~+hxm1%Opm4)d zxN299OIbQn6zdNIUjCqBT=#UPjpB_6@^xB%|=T1(0AgQkPcMU|IebA@>7`Ja~ zv68+eQm=OgIiFV8vRNuz=051unxCIAUhr3CvslU6O+>NtF!e{&wBT*n0}&f2nL!>D zKna|MuvY^LH?BA=ibcRYSOD3O0~g>T9N$Rl4NVKFi9t35;&5Y%!=hLYwD>XY*-#1= zn70YVq40IeW01a?JQ(8N2q@gR;;<-|01IIeL~J2-hN&fFkI(jk-20hD_uCh-*XovgV4 zeW4$$hBc7(7CjkIDg@Rta~Y>{yIVZGw{44J7KqzLiVW*v1I+k2sWKdeVz>s^A>nOW zK)^}m`%KE)m=xoF0xmO(kB9U|X`JScz}?@*YBJSc#pPz=X_6yM7L1>Qp$p5DZF)Tt8_ zhV}Km#n3u=jK`-`R$zQT<|bmf=VWSjwJL>cu(L-k6(xEehZC^n*OcvG8*GOiPy{<+ zKOBTn@1p;y_uhK-yP2~e8`iHIOU=g`HIE>!d8+sOZNEyeaviSH%AUGNYUk-^n*j2w zry%KX22A9-=TWz11lxjQE|Ke&=6@BFOemN5ep?kO3XXvlkPTtKp|b??pa6ElE;tA0 zVew&_=%DnIqX%SekM#=!;-QsW{!~4*PI5fovBjea$L9v>B|`a(PZb(CPh)qKXDiAx zlGyvzq~R^$@(@?-^XNT#kaEO%Q8V~G2Bkq?=m&dXFIHJX^e$+upY|)o6;T({(!n1B>aw=I|LMNObUx)b73BAg3WLOPQrCiM^P0L zVGZO$K;g!uuqbvCPQmIAQ4!9=1-J}XU{NvoGhBjFA<&WHQYgONt!>^17K&oA|3jxI zbUjA5H)KN&g#DgU8FYtaD1&kcDBQ5(u=lPqQS8WZ@?l8&2$kU-co(LgAYX>w`ZJ;g0K|5#<9iSt00{M_?XZ8uu1x#=! zB*I-FzdLPj&L6K0k&spn34g#ml!68Non$~2tbttE4+o+9A4!WqYJxIBW#RNu4<*w8 z&^l20vo}Rlh6YFFPuc~f=I5c}=F*u_i;ADzKMU)9M;jF-;vI!z$T&pKAmucQ!Y0@Z7vLf!{+Ub(0tz>*B87K_ zMX|5LP#6v)U@feJDIZg;fX%Q4PQhtF;a_ZhCHt8fqf_rC4tH!g>9y}v%eM(zN2??0 zw0#1!Y9FGrJst`THMhuptGe@}+?w!f?qH-SXZnPA3fo{i#Fdcs!$v5CQm{bBGt60l zin#o2`q+WCj#!^=@gw872|FCkvbW50U0%q8(l_x&%HEFZmmR*l+eNh*kPgcs3%0;k zD28M3!CCTcD2K}sNUN(e`>V1JC^$% zFDZKF!5-KPiRUSM!E(rg0sqS&E69QskOu`|?R0x>jWO?u5q?*->QvgQ$`~%mtJ~bI zK5IHd4S#An_2>h0{j6$`-* z0mpql>oe{4omcylo%?=_bNX1bhR@p_;r(qn6+I`wB*^@NmQgqc$H7!an1FL|9^U>t zZlGK%jq&P|wbG=krR zC8|d4&XxVpQgV!~-u$P+Ram~BlQ^4P&9Tu?>DY=LhYSI5KH2VkvflY*rSr*R=ack& zEu^o}oOqXB#E6LLCKY5-Jxp3}lWMs+T}?J=G3=%=Y@r^_7`O6aZl3jk@3)U|=R+zy z-;|Ryn>vVN`;+5Psoy%E>~=nR!}%oN`Q!!XlljgkkJCrk_1|5qM|^ALPe={VADRPev^EoZUR`iC?{W7 zkgqAodi=5mzpT42E4RyP?Xp6q=cwPF7==mDgo;Zds9A zR^gVFnq_rnS&>=R^mS2}!jY4@#SWnZrK7qi(nZh~T|IN~D_;p0tz^w*S!Y>R>Xp@b zWkueIS82ZiS-Ul-u!^45lHy^Nk_Jjpbx}ljr)Oo6!T+*;xF{!UwaWUevcjjV>M1LE z${LWe?xU<2D60gz#MVGsT+fxOx>8q^lQkV>RX|w@P}Yud`gI9XjxRuq#}!~*KN zNgitvo%|}OyG@I0@vv}F&bmezNvf;`Cd)<15>c{3lNMI+VR0RI7<-HCMQ99c*Zofun9M zcG#=JWcs2^Sv-HjHbqe;iOIw;W|!#dGxM2Pt!>gGXK7qw;zZ|Ph9KHKezimSllZz# zjHVHlOOQq7fpSQ&bYvbCj-nOPIE9wo8>ioe6#0uwObzyeNheuz(8YZ6F>}AQeWzG*|#xkPDvfv)gOm$79xw zJ%&$WypV&}6*D*Nr3a{V#6lioLmWq1MWnZVC!A1@$nIVKe5el^Vrp z!0Bmin47Arr-z0IKLhiI28S(zCx&V~qUvT+52u#h_%Zh@DKA;KP<3+S=I{Odsjp>E zoIG}XJee2no9Dkjk8~7AFAf@up~9w_~X*MId4l0n{MxLtW1$@Mcx?v zqI!3*M2jnAE|w^Ufe<}+v>+0tlL^Y-}L%`@!R9llM?slWqDVZ8;FSwua zfc;3V2QUXS6Q!0?45k>fiAu;6f=<&H7+sf~&0HDeF5*nOxR)lNGkdV(G$^Y3a;a^0p zelY8$1K0odof}+r-73r)7+!Vq)YpHR|NYQjf7rM&=r6zjwMWC4)iOr^=5XSHwZFOC zd-0vKpSWDztZJro3(<~EAQDV#S`!ImWUnhLi>BW%lub%M5-6h8{7 zFd<&nqT;$WLBub6sXHvFs}9ga+Faeh3&PYx1RKq)?bf=rD7f)TVaq}M-)#GTBb97^ GqxS!wcxPJx delta 31418 zcmeI*cVJD|1Hkd~US1*yf`}l92x?{9kwg+?i4kJ&6@u8c+8e88HG{-zi`skDuGLm+ z@71CxrB+jvnE8Frn|WDO+h70b<#We7_nv#kJ?Gwc?@bc7IB(nu>v;F~3aNx{uoa>( z^K$WZdV0G2(1wKqD2NSRKUXCT%o}lU$v#oTKd(lJv=)1eTQXxMgo~w06cpY1cI$iG z^SGxjFymNbZ7)2_sKlB!LMw4(QaMeTq<+(@DU-G9CCa@s$Z5uT!)KYNcJc%LuvEY8 z0DIjB*-gU<+CjJvQ^WdSv=qW$VPkc1A?gy~!&DA`c4|*VYPNG=Du=h3y0G5Qttj~x zY*~Y13FQMHA}B}66wPNS1E1n7+AD8Nk$%r|?{Y9~ZkQUj*M*vL*pcmIsX0gLLNZU~ zc3r%MmoY*^M42O&MUZiDj!YVxzh)i7>V{w$KZl!fe}+vw_4_wOo0t@r-%l}oYKY|t zNwr}U_dWJ=Qa2pXSGy2~3ZEY)L_{7T4BHuEzeDWRSQtVg-I>aYmxEzFh2odPofbm4 zGSx5FGp?WwWn?&V3{zur^+_=;%UTOC#B12x5G?n*jSmg^GE5DnZ&;@C$L6ot&$OtE z$hfS}hCFKIU`Ummep5(@zJ-O*Sn9&$*n!>V%e0>ST&^bvwNR-x?Mv`eC&f?wdC|=1 zMX!q#k&WBVnu7#Ud4*_`H>Kcwm1~OTGnG$@W`W94xpQ{+n%&9c26oBqAF-FUkG6B& zB?79b0z*rPrlA(1Il3}GFxtz@OO)4zyN4Fad7X+`*HJP=6#Je^7ni7BOuI9)U4{h< z<8C{JYV^C78F$LKc3`N3(5hn}V)`hHxHq^tz0z!-`$l;t)%voJDMdPA&NzqhPJ1W1+3>k(?8hZ1=PD1>!sj|3a z_*71fA@?uZnHIL+ENNP3@v5R};m+PD(}LM*W(yOQ-UCaH}rUkPdl{;3@6k?VMne2>%oe&l-qDYVRDwQa@ zxQDQVhl|SFMYMNOb#PI2brHRL$YWa+5H6-wb*q>HOBha+uW4kDR2sEPrAl#JprYay z$;Px;j+V;NayeS5@`=2E`EUP6$5g!0-_aIDWb$g6&b2B#XEBcQnl8j<-2361EvEmA zJ|5AvS=WfJbYxW&i&oaEztYxBO~TY|WNkn>HZ>(ntIVmb>H8UdUzNGzFm*Gfvh+u$ zgjLBB)~hL9iJ?o8E1DvD$IdUizR!NCt!k7q{{v1j)W!ik{#b~CGw8=Z5n?zVATgfP zUjmnMa7+?n0UT#iiKsbCi0cSX7Q#A3h^t7R!>MV zgK|NUozBudZn`D8Ae);uNG`}u$GBh?VzwY#rExX0ZOlSs$B=0?Mc?n4ZIttZS#&uk z!ECiMd8=pMt7)f-sZ4s7FGAzc4S%L5$+06Tnx#8>Zt9No-Hk$LR9v=3c?^7`KMo#p z9LQr~t-c;FF#Q29@fu~QS6R1-n2DEr_#DoAxxj}fym4fo5XTVxoe=#n0KVUIEsfi! zcP~p`cDvZ=W&G358m1$=jHDBASKQyC6&f3dkV74dzH;)0?dV$!Ct+>7lAC1H*(mZN zz|;(LIkW%8-X@3SDuyc`nONPK{m)&j2uE2*vVPv1PyKuuK;9xQT1h%~bN2LIiE$fY>U; zZrm}n?-dgYn-*kqGKR==rg1?w4da4royLXx7nKYf$@?jd4NSGn&O9ld2ouzI!Mot7XJ?`r#dXb)~U~ ze?|YjTZkU|HY?kyY@=uP3o-wI5EW^ojcA`y3ZuC=bdWE^huH8i8y%rjIGUlJ|8l7q zvuo(>jn|vXoZ>d70+Xi*W7V2%6h5T8Da5Qc7g;wJ$Mhwpolp#>!E7O$)r#p?HCt~x zJyRu)OjDnm<8_{Q!x(v;XIeAEWnOM!+Uvty_Yh=uDQPEr(*OiP)?7LID=Y160DYkJHF7L`H#tu^^$Ua(*v$>SXuEV4&%5@t<4?8eg79-uOX>HT@CdIsa zHLa3OB1{qLzc*!CJI9N;jJwVhuOh!#zH9etEIV0YIevf4uIy?WPE*R;ZmKKAe6j}j z(;UCS0elbHJf9#9?P#VQ5RJZ=jH%cL*@Q!B!m=5kpc$Wm^EpOKY0t}X>(JUmDdS@F z|Ds|V#-tp&b%?{nLCK-|GSnUy*Kb)LZ-3wz>siU=*fguXOi9Rk(XPAbmPaIYkmxNh z6>{B1S9?l^I{>C@jJMcOyPfeGgX@`Gw=o>y#trOcwX8fU6tS8a+3#+|ZtO)f`m7ch zhY7Gg&dD1au^Bh6a$?7AJj7!-U*m+22heo|AI0*oO>E=X^>wwg-2BOy%)n4P5vBwS zazdB%=HsAR#*MYDEo-|peVKX7V?ohh{$xGKvevNiW{Fu*XZ~W=HMAIU3Wm2yf?_&s zjE&wbdU>BJ<1TV&^zxo|mh#DQOI`ZxsrCSEz010l^*Z7NC-Rd*tUkpZ*E7`Tc_9`< zTo7W$MLGg}bcv4P3LV8yx?|veN&#PWiImE9)dl{_NVO|z?9@R@@eC(5-RbYSuy;}E z&Tk>TRHTLb9ojD#vY1L_S-nR#s~gT~atpo(8SkTHikMjI_wFHA61Klw)g9fb9L@39 z`TBX2lK6$o-CII*y3PF^^tj9A%{?J%-{&NVg()3f)FqS;NuzQ2e=U$^{lIq%{qW}R zXxhBydOeV_=yJQ_=p;l;kDa5rH`u~ORoq3Tt18h_C9Kc6%jXg`#878pRl|@r)6~k( z*|eaWvwLDO)VWwqRq-bl^chbqQijx1yDDDitc0o%pLiQ)y2|i@=g5Fa*E7ydW!J(+ zc1E%aXHbQ*8pEl+2qa@RKD)y=6+F;aq0a-pMdBeI!~HkDaU%XVddfs*wqXYCooL2Uq zUZMS$_`&J7`6uuH%KFXm((6y2-W2Uka#=w+pKMHLZR3|`d8U1f&i&1OB^PwMzS4N^ zHp^SqbAkSjwTCP|>nEJR;EkGSi6|suCceiZI6tL&;0{mtBLFR)iZ-GpGs7_g$8ZAH zX+(^QcnQV))T1V1ffxkw*>$68T3z|Kt);ES;|5(6wvRoA2$x1TTH{HO&&95sO0m}q8U1F z!kVuv-f`{7KNDx**(tqZJVeU1zY!M?8qHZAW`u)Nv6_3P^QEZGL6<0XQoiU)*1xxrpOL6K(E-lN7Bi)P$^>m01obme(p|KPlu0l^yUAxwudO{v|8M``jU2;E>ME_}9l+Ltl)C$w%K z*;IZOAo}<}O5eoPw3bE;=v6X4da@6fppHU&2GyP~^I7!Moj5H?vG&BN)~3oy#P~l> z-zS`O?_OwkWg)KiEZ!n|GU<2Fen5vPUdJxtVVAo$b9}cJJQ{QQpkU`II&}h#E-4N7T*i0z!K)R+q22^@Zq;=cw(=*IewxQ-t^lF$XU(!(ZVf;Li&x4Y)gkdH5X_1Nhp6 zR2&YZ@dpXf7V$U)yM}ZH*wlzRY%IhWcn1>~iiGfr04^akREUZA8Zlu)1cs9doNOXQ z0*9NG!eKB7+nP~M&ADQLa|?2YpRg;EGZsy=bSu7#qhJ)h71B|%HQ&q8w+)AGJ9;0K zZ!bhuG{H@D?7(*L>?p)}T*SmqT)n`jGf!APWfawg!=o$p)lG;~5Z#6N4lfbNvEQ>7 zH+C_kw-7^-hTv#Qrw`}VzMOyI7DIJmV?QB!_vdaQ(okmzIpsK*i4~MI$DeM9V-iRG z>jzaffgB9l(0?$@u>UxAB>m$%WFK;f(^LN6JIu5{UH%QJ$rMN$O)340VY6ji=5;SW z(d|Fmz=1NH_AN)5H*}C6{vBa*|8aV`MY@f37_v=_E-3r3+%h4zll%YEk><9;KN+SN zbN?qde5>u=lzW-~Y;#Y)FNc3V%uxUT#13_N$UYN>FzxZ5`RFzT%4z1Xzd510`p+%W z-0Cp5?cOeo{Qq4!wAB?`w#Q74VY2*l%VCZivfbYirnu`;ZEQ~eL}GG@Q+elHl`W?RO(fzL5$x>6YuLaRnPqOT>s5lF@Ckq{O$J7e5=jwOKNUk zM4Qx6=_{T3myJ)_YqeFZ|7uAdQ=lnK=1Lx^8N<|kVrsa;EX)w6b~D3v`dylZbr5mc z?ocLXSF3kTOq@1dD*4j`iZ)%^cA`(O43p7Ui_%4(IV@wE4PoYa)kQD5bmHSh7Ce7x z^PP5wa!Xwq=`a2$A8wEscG%(C2yI?XfmsXC5SDp|WTm8{i@UHcf!b&dQP2EN7j@<^ z!>Y303}Kn0*4~HG=0V=0;x5%*{Z{(C6f;C3`wC+|GX+K~Q`)+NRe=A>#O!)$eX)a{V5_E;i}Oq9zeF^&~dJ9mTYvdG6m_ z+!ge(m~Z#yQZr;wn+aL}a-i|#;f|(QU707z+)I4jpaJBRTV%fl*LM{#W9@1YpBU%3q)Zw{0T|Z{R&=kp-k5yO)Uk-vGbVN7I!W`_v*Eoxdh_d8;B`n2i z?8gBVA-5$^9+fa0A0Qqxkv|WWR3s1gMo|Wj@EmH2zzY2^1PaDu6Lz3XULh(V7Q^u~ z?%*-f@EkAj3a`;NADO^x%)va&$5w^WHtfJoq#+%7@{>BmVmQWPBIe>Vti)REz;0Z^ zRXo8fXl$rPOu#fGA{nKr*D~-%Z73rcjlu*>!emTCB4!~ObFdIgu?gD{SWt+@=#3bp zU>@$`H`o>8YZiuMG-hKi3e(PuArh@^ssDIpW@0H;;tVd}25!TRj<^Pb5RCJqm59sG)4>syQ605WAB_=?_UMA1 z=!*|94pR`1WX!`tEX6*2jR#0Y8lFLRrz#YKJ*q2=T+tZ8h(a55Lr?U>5RAeHn1Mvh z#pl?BZTKEXaT|B=6t9raiGBo*D2*UAL^vX#jAt|f6Y&uyVKSy*Dpq41HexgOU@y+$ zB5vVVR4hm7!4oyn2Ltd4;;{znu?gGo8Wzsff4DP8IYwX%K0y+Gz>m0uEBFaN;}_h8 zPkBzG7=+=NhlSXVukam?!p?cJO^AEO{NLT7Zx0F1}SSb}vpgj2YK2lx%* zJNM&f+F+;Z9}h|1LA>u&BcGQWSzM3d0qi2t)|N&>U^i z5uMNtahQmy_!u(~j|3!QCX$hY*_exk_(EZ{7+>RC9L5ox#3@|BJyfnrV&Dru_#+7Y zF#uyR88a{o3$X-Cu?)+x8tbtMn{f)tX-0Q(4|dh)nNSU$@J4O;!VdumL^NXXKE~if zC|Hg)Scmoa88>hfx9|ur@EYpslz4UOzZf%J&=Y+TgZ>zRSPa4t48<_Kk40FD6KS&VMr77BUNaiAp1pgby}2I`;@!VrxZjK?HQ zM?5~mBCNwk9Ktc2!v);JuSms1Jn^LdpEFa=i|!VcP!)CIi|*)&5g3gzh{JTmV;1J% z3oOGn?7}gez)76OWn9Be{EFvz0Sj;Hzlb+SJseO9W#Nq)sEc}NfItKz4DHbo127nq zFbzAf3qRo+Zs0B~YtTKT6iOo!tua_(G!$bo4(D(Q_wW#+CRZt_j4JSfFM1;yLopIF zF$b4$1=o>|g0<+(V25HTjuI#fCnyydRYVn3g$L@R0Rj+(Hb}vISk>l0g9A#SG|Hkp zTu>2};0iZ*z!P;)7Xb)FIGWU^{$rULi1#61(ix30_!#nqodhHz8FMffpJD}8Vhz?} z9X4Pawqpl&VITJ6YkZ3XIEX_yT$}p;k(o=lj4SvVH*gcb;1QnSDbi4|4(A3GK~Z?X z8#Pc9!3aY$v_?mCK@arCP`re`F_+1oWdEL!zKKLtGJF@ z^{89;z!!mNgvJO#FZ4kS`eP`DV@*Bke=Rdda12k723sGB7p`!FC%h1XPz=Bz48|~w zz(|b22bhS-n2PC`flrWtB&1+AKJ%ge7c%n&mS7o{V2}P#IO>3O9Jc8#PfI_27#D1fdZcBLtxc zM-wzfbF@S&w1a%jtqbIPRk))z>Yz2+pdC748m9Xzq?(y{?8QDD$0_`QCn(T>Hjlz60(&^11WLjk9%zWh z_y8YbF6QG1jv^Hg@d%G$9l#Z^!l(i&Ap%V?9uu(u3-KF%hif3m0|F6*6wJpV9Klf> z!*581Wf1p~V2i>i278o2S$IRK!Ke=EqBZ1ei~TSFA40)oe2f`Lz#6Q@K^(??q{5;h zCo2?1F_eJ|f)RpdXpUuAj+G6m|5eO9z$3hdd<3upE3pxqaTV8a2X|4hF&8UvM-4PU zAX*?2qwxX8V*;jOI_6>?=HpW=Y)t(xW@a0m zmsThWdo0B&Y{d?o!v&OX&J_>p!3PZy+?@I!%*+tX!F<@a;4%s2-~vDRqa(Uu1`@Cy z8?g&}unz}t1=r9ulJ&6@yYURqVbhX&M+hPmM#C@y>#+e(@B*q<9C#=UdpM#j8lf@f z;&Uv)a;(B?tif7r$1Z%0Z*UkVa0X}b6O^lrZs8vOz!N-08lK|?UO_~WAZTEPJSYHL z*ufsf;Rsi_!5to`jXJ1{dZ-Uy1V>T-AVOX6(Rj?1`fO`5jZ@AP(UyF5(t$;~^g5HRL=FY?0%#o>UGD237}gAVA3&gg<} z=#JiqMjymr4(4Jt)?gDhV;8={etZq(ETi*Chq^5}Kr1w9$H@poF${^Ai4@GmA}qru z$hXKV;XPDFH8eycgd+ki(F$$Q7VXf!J@wz2nXc%8p6HJO7=~Z)EAHb3UZQ#j4lsDa z3*M-SAT&f6!qE)P(F*O*9-S}}qp$?aI#B-ynK=ZHjzowMgrYa(`|p==4exa#cW8hB z48s_V$45AbL%4_^aS4}k4cBo85AYC=VAYvwR~Xr#0E(d`s=yufP#@zk9tlXse0+ut z*o;Fsic7eRS4f9P7fK1O&>A;z6K-7zLpYQsjH1vU9ncwZ7>_BKiX~W%)%X&}aSGS) zGj8EF?&2PP$7B3~G(5*kICtY%K^JsI-)_`@3^NIsi8=TbTd)-;aRzsB59v^MrwO7c z8Y2`<5QTo|j|3zl87Vk`gE)rsc!bAz(Osc!GgG$*#f=~|L zh+u?Z0w!WA;*pFL%)#f_if!1A9oUIo_)1~48&B~ZQN3v2=z-qohyECdK^Tf*n2A|f zhb`ERJ=l-0aSb9FoiKLa#V{mcR$uD>3uYF>HHIRAFZ>XQAhbXv`ePsxk%Y6j zfCBxvAVoD)hX*|2g_@|1x(GuAx}z6jF+^eXK1M;lg0vZXuotIs4!7|u((nrE{+uUZ z1AFvBAB@6i%)@6mjuUu_XDBm(=7#|of{{>0Gn$Q0@fG&tdmO?gT*Fc1Z|2}s6${D#-C7(^39Q?x>BjKNq; zLp+Y)I4qGqV6IuoJs+3-_QILWC#^2RNe= zYM~zL!xw>Qj4*^F3at^1KIn^n7>Geo5QlMcW;`ZgGBiWkA*!Mpc3}^G#T^tGMirnq z98d~QsD!E*j3F3>(NGYFiI|E{FcUkl2g;9(uHYqJ!F4!CFlwMS{Lm0XFcgch1P5^n zr*Q@sa1obr1=o>^N3eXKjt2!$7-iuM@As+yTFm%iAr@m5*5Vw_qv8loyYNAMdiQgDYC8Xy3T&=|o8K@?h}JvtyB zGhr$6ygTKuZ2UKDtNf+Z|MdhY?_P9&edp|3`3rUTcO>Ay`u}$i!2jz1?>LIzJ&ym{ z|G(P-{n!8hyC|9`Ut{%imLwf}#!1pYh!|9AfX_ey}DKl%4|uI={!JOBSv zXFtKrGTf*_N;&R!BX4A~2an!9!Vj!2+P4Q`4s1I5tp#dlPBPC^X9HM3pdF%go64v<&ZMmG&iKxWfw`e%$XxUVrKo#o>t6 zScfgxfyxa?40@s$KEfnC!E@LKa32|`1Iay{gD4UCdd)T5fNdl0?;-@D=!*d;+L(L3 zXpCTlA`Gn%g*Bl(-A2JM*2OqXLNeySC!B*7;b?~R2<|gC<$#4!nbAp{hGjEuKBFh1 z5#OAr9k7ce*N}hF>^D?yNj>7jR@{b#O>1tYA`J3LxO@U0g|*m#wrz9|t~G7B4~*|| z2n*X$|MEfdDy+tX_8ggb4Hb`Mi=ZebgGq$*e6ShZyK{3H z{s=^s9y|xYN0@?r_y(!H6z)*>qKWk8zAqXg7)mt7h?6*nsys+?g?#Wd4PE>45CKbK zc)0*Ou^Zp@BWLK$@#`( zGy5QiNHedKaXI6>*IULX*OPeuVxEAE%h}Cm{LD)qNp{nW>&f}d>*&LkEV(yqm~o?w z^Iq9R_Gdpc#h0o6vx`WsoRM5d4)P~Odi9 zlD}YJo7kqEmut(<SiX1q*%=t8+?;Fm=Te^{nDmA6ckXR>Fe6 z?WR&$$T&qoYul6sq3U8vBpdc;AH&E9CpJpk!+Np<&)V2aw9Kyf7?!bRQf$B(~R3r`EB9dpLQx z^K(l6d@ejcTqf7m&R03U;4Xg#RXe|H>;(QKYSJfpOQ#&;VHJSRgR^#Ys$6tZ1th?^Vhn+o$QdP3CyQrSvy!HHPMtUYFoU5 z_HWo0Vf8q<1Fo417jR4JVp$|uFgd_!7x?8;@P}n9t*M#PmEligjQ(UDqau2fR z800Ueh^4z4D4NnC_N9C9P!&mPP{1q6U87O;;y0& Date: Mon, 26 Aug 2024 09:54:33 +0800 Subject: [PATCH 2/2] =?UTF-8?q?=E5=88=A0=E9=99=A4=E8=AF=AF=E4=B8=8A?= =?UTF-8?q?=E4=BC=A0=E7=9A=84=E6=96=87=E4=BB=B6?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- backend/app/18æ—¶33分46ç§’_副本___init__.py | 0 backend/app/18æ—¶33分46ç§’_副本_llmhub.py | 61 ---- .../app/18æ—¶33分46ç§’_副本_observability.py | 20 -- backend/app/18æ—¶33分46ç§’_副本_settings.py | 235 --------------- backend/app/api/18æ—¶33分47ç§’_副本___init__.py | 0 .../api/routers/18æ—¶33分49ç§’_副本___init__.py | 0 .../app/api/routers/18æ—¶33分49ç§’_副本_chat.py | 150 ---------- .../api/routers/18æ—¶33分49ç§’_副本_events.py | 149 ---------- .../api/routers/18æ—¶33分49ç§’_副本_models.py | 253 ---------------- .../api/routers/18æ—¶33分49ç§’_副本_upload.py | 25 -- .../18æ—¶33分49ç§’_副本_vercel_response.py | 109 ------- .../api/services/18æ—¶33分49ç§’_副本_file.py | 113 -------- .../services/18æ—¶33分49ç§’_副本_llama_cloud.py | 114 -------- .../services/18æ—¶33分49ç§’_副本_suggestion.py | 48 ---- backend/app/engine/18æ—¶33分46ç§’_副本_index.py | 22 -- .../app/engine/18æ—¶33分47ç§’_副本___init__.py | 61 ---- .../app/engine/18æ—¶33分47ç§’_副本_constants.py | 1 - .../app/engine/18æ—¶33分47ç§’_副本_engine.py | 108 ------- .../app/engine/18æ—¶33分47ç§’_副本_generate.py | 94 ------ .../app/engine/18æ—¶33分47ç§’_副本_prompt.py | 93 ------ .../app/engine/18æ—¶33分47ç§’_副本_vectordb.py | 71 ----- .../loaders/18æ—¶33分48ç§’_副本___init__.py | 40 --- .../engine/loaders/18æ—¶33分48ç§’_副本_db.py | 140 --------- .../engine/loaders/18æ—¶33分48ç§’_副本_file.py | 88 ------ .../engine/loaders/18æ—¶33分48ç§’_副本_web.py | 37 --- .../18æ—¶33分48ç§’_副本_CHBM25Retriever.py | 133 --------- .../retriever/18æ—¶33分48ç§’_副本_CHTokener.py | 46 --- .../18æ—¶33分49ç§’_副本_HybridRetriever.py | 67 ----- .../tools/18æ—¶33分47ç§’_副本_duckduckgo.py | 36 --- .../tools/18æ—¶33分48ç§’_副本___init__.py | 60 ---- .../engine/tools/18æ—¶33分48ç§’_副本_img_gen.py | 108 ------- .../tools/18æ—¶33分48ç§’_副本_interpreter.py | 143 --------- .../tools/18æ—¶33分48ç§’_副本_openapi_action.py | 78 ----- .../engine/tools/18æ—¶33分48ç§’_副本_weather.py | 73 ----- .../xinference/18æ—¶33分46ç§’_副本___init__.py | 0 .../app/xinference/18æ—¶33分46ç§’_副本_base.py | 272 ------------------ 36 files changed, 3048 deletions(-) delete mode 100644 backend/app/18æ—¶33分46ç§’_副本___init__.py delete mode 100644 backend/app/18æ—¶33分46ç§’_副本_llmhub.py delete mode 100644 backend/app/18æ—¶33分46ç§’_副本_observability.py delete mode 100644 backend/app/18æ—¶33分46ç§’_副本_settings.py delete mode 100644 backend/app/api/18æ—¶33分47ç§’_副本___init__.py delete mode 100644 backend/app/api/routers/18æ—¶33分49ç§’_副本___init__.py delete mode 100644 backend/app/api/routers/18æ—¶33分49ç§’_副本_chat.py delete mode 100644 backend/app/api/routers/18æ—¶33分49ç§’_副本_events.py delete mode 100644 backend/app/api/routers/18æ—¶33分49ç§’_副本_models.py delete mode 100644 backend/app/api/routers/18æ—¶33分49ç§’_副本_upload.py delete mode 100644 backend/app/api/routers/18æ—¶33分49ç§’_副本_vercel_response.py delete mode 100644 backend/app/api/services/18æ—¶33分49ç§’_副本_file.py delete mode 100644 backend/app/api/services/18æ—¶33分49ç§’_副本_llama_cloud.py delete mode 100644 backend/app/api/services/18æ—¶33分49ç§’_副本_suggestion.py delete mode 100644 backend/app/engine/18æ—¶33分46ç§’_副本_index.py delete mode 100644 backend/app/engine/18æ—¶33分47ç§’_副本___init__.py delete mode 100644 backend/app/engine/18æ—¶33分47ç§’_副本_constants.py delete mode 100644 backend/app/engine/18æ—¶33分47ç§’_副本_engine.py delete mode 100644 backend/app/engine/18æ—¶33分47ç§’_副本_generate.py delete mode 100644 backend/app/engine/18æ—¶33分47ç§’_副本_prompt.py delete mode 100644 backend/app/engine/18æ—¶33分47ç§’_副本_vectordb.py delete mode 100644 backend/app/engine/loaders/18æ—¶33分48ç§’_副本___init__.py delete mode 100644 backend/app/engine/loaders/18æ—¶33分48ç§’_副本_db.py delete mode 100644 backend/app/engine/loaders/18æ—¶33分48ç§’_副本_file.py delete mode 100644 backend/app/engine/loaders/18æ—¶33分48ç§’_副本_web.py delete mode 100644 backend/app/engine/retriever/18æ—¶33分48ç§’_副本_CHBM25Retriever.py delete mode 100644 backend/app/engine/retriever/18æ—¶33分48ç§’_副本_CHTokener.py delete mode 100644 backend/app/engine/retriever/18æ—¶33分49ç§’_副本_HybridRetriever.py delete mode 100644 backend/app/engine/tools/18æ—¶33分47ç§’_副本_duckduckgo.py delete mode 100644 backend/app/engine/tools/18æ—¶33分48ç§’_副本___init__.py delete mode 100644 backend/app/engine/tools/18æ—¶33分48ç§’_副本_img_gen.py delete mode 100644 backend/app/engine/tools/18æ—¶33分48ç§’_副本_interpreter.py delete mode 100644 backend/app/engine/tools/18æ—¶33分48ç§’_副本_openapi_action.py delete mode 100644 backend/app/engine/tools/18æ—¶33分48ç§’_副本_weather.py delete mode 100644 backend/app/xinference/18æ—¶33分46ç§’_副本___init__.py delete mode 100644 backend/app/xinference/18æ—¶33分46ç§’_副本_base.py diff --git a/backend/app/18æ—¶33分46ç§’_副本___init__.py b/backend/app/18æ—¶33分46ç§’_副本___init__.py deleted file mode 100644 index e69de29..0000000 diff --git a/backend/app/18æ—¶33分46ç§’_副本_llmhub.py b/backend/app/18æ—¶33分46ç§’_副本_llmhub.py deleted file mode 100644 index 69e0e32..0000000 --- a/backend/app/18æ—¶33分46ç§’_副本_llmhub.py +++ /dev/null @@ -1,61 +0,0 @@ -from llama_index.embeddings.openai import OpenAIEmbedding -from llama_index.core.settings import Settings -from typing import Dict -import os - -DEFAULT_MODEL = "gpt-3.5-turbo" -DEFAULT_EMBEDDING_MODEL = "text-embedding-3-large" - -class TSIEmbedding(OpenAIEmbedding): - def __init__(self, **kwargs): - super().__init__(**kwargs) - self._query_engine = self._text_engine = self.model_name - -def llm_config_from_env() -> Dict: - from llama_index.core.constants import DEFAULT_TEMPERATURE - - model = os.getenv("MODEL", DEFAULT_MODEL) - temperature = os.getenv("LLM_TEMPERATURE", DEFAULT_TEMPERATURE) - max_tokens = os.getenv("LLM_MAX_TOKENS") - api_key = os.getenv("T_SYSTEMS_LLMHUB_API_KEY") - api_base = os.getenv("T_SYSTEMS_LLMHUB_BASE_URL") - - config = { - "model": model, - "api_key": api_key, - "api_base": api_base, - "temperature": float(temperature), - "max_tokens": int(max_tokens) if max_tokens is not None else None, - } - return config - - -def embedding_config_from_env() -> Dict: - from llama_index.core.constants import DEFAULT_EMBEDDING_DIM - - model = os.getenv("EMBEDDING_MODEL", DEFAULT_EMBEDDING_MODEL) - dimension = os.getenv("EMBEDDING_DIM", DEFAULT_EMBEDDING_DIM) - api_key = os.getenv("T_SYSTEMS_LLMHUB_API_KEY") - api_base = os.getenv("T_SYSTEMS_LLMHUB_BASE_URL") - - config = { - "model_name": model, - "dimension": int(dimension) if dimension is not None else None, - "api_key": api_key, - "api_base": api_base, - } - return config - -def init_llmhub(): - from llama_index.llms.openai_like import OpenAILike - - llm_configs = llm_config_from_env() - embedding_configs = embedding_config_from_env() - - Settings.embed_model = TSIEmbedding(**embedding_configs) - Settings.llm = OpenAILike( - **llm_configs, - is_chat_model=True, - is_function_calling_model=False, - context_window=4096, - ) \ No newline at end of file diff --git a/backend/app/18æ—¶33分46ç§’_副本_observability.py b/backend/app/18æ—¶33分46ç§’_副本_observability.py deleted file mode 100644 index 780ae04..0000000 --- a/backend/app/18æ—¶33分46ç§’_副本_observability.py +++ /dev/null @@ -1,20 +0,0 @@ -import os - -import llama_index.core - -def init_observability(): - - PHOENIX_API_KEY = os.getenv("PHOENIX_API_KEY") - if not PHOENIX_API_KEY: - raise ValueError("PHOENIX_API_KEY environment variable is not set") - os.environ["OTEL_EXPORTER_OTLP_HEADERS"] = f"api_key={PHOENIX_API_KEY}" - PHOENIX_URL = os.getenv("PHOENIX_URL") - llama_index.core.set_global_handler( - "arize_phoenix", endpoint=PHOENIX_URL, eval_params={} - ) - - #debugHandle=[] - # llama_debug = LlamaDebugHandler(print_trace_on_end=True) - # debugHandle.append(llama_debug) - # callback_manager = CallbackManager(debugHandle) - # settings.Settings.callback_manager = callback_manager diff --git a/backend/app/18æ—¶33分46ç§’_副本_settings.py b/backend/app/18æ—¶33分46ç§’_副本_settings.py deleted file mode 100644 index 58333d9..0000000 --- a/backend/app/18æ—¶33分46ç§’_副本_settings.py +++ /dev/null @@ -1,235 +0,0 @@ -import os -from typing import Dict - -from llama_index.core.constants import DEFAULT_TEMPERATURE -from llama_index.core.settings import Settings -from llama_index.llms.xinference import Xinference -from llama_index.llms.xinference.base import DEFAULT_XINFERENCE_TEMP - -from app.xinference.base import XinferenceEmbedding, XinferenceRerank - - -def get_node_postprocessors(): - rerank_enabled = os.getenv("RERANK_ENABLED").title() - if rerank_enabled is None or rerank_enabled == 'False': - return [] - - rerank_model = os.getenv("RERANK_MODEL") - rerank_url = os.getenv("RERANK_BASE_URL") - rerank_top_n = os.getenv("RERANK_TOP_N") - rerank_threshold = os.getenv("RERANK_THRESHOLD") - postprocess = None - if rerank_model is not None: - postprocess = [XinferenceRerank(rerank_model, rerank_url, top_n=rerank_top_n, threshold=rerank_threshold)] - return postprocess - -def init_settings(): - model_provider = os.getenv("MODEL_PROVIDER") - match model_provider: - case "openai": - init_openai() - case "dashscope": - init_dashscope() - case "groq": - init_groq() - case "ollama": - init_ollama() - case "anthropic": - init_anthropic() - case "gemini": - init_gemini() - case "mistral": - init_mistral() - case "azure-openai": - init_azure_openai() - case "t-systems": - from .llmhub import init_llmhub - init_llmhub() - case "xinference": - init_xinference() - case _: - raise ValueError(f"Invalid model provider: {model_provider}") - - Settings.chunk_size = int(os.getenv("CHUNK_SIZE", "1024")) - Settings.chunk_overlap = int(os.getenv("CHUNK_OVERLAP", "20")) - - -def init_ollama(): - # from llama_index.embeddings.ollama import OllamaEmbedding - # from llama_index.llms.ollama.base import DEFAULT_REQUEST_TIMEOUT, Ollama - # - # base_url = os.getenv("OLLAMA_BASE_URL") or "http://127.0.0.1:11434" - # request_timeout = float( - # os.getenv("OLLAMA_REQUEST_TIMEOUT", DEFAULT_REQUEST_TIMEOUT) - # ) - # Settings.embed_model = OllamaEmbedding( - # base_url=base_url, - # model_name=os.getenv("EMBEDDING_MODEL"), - # ) - # Settings.llm = Ollama( - # base_url=base_url, model=os.getenv("MODEL"), request_timeout=request_timeout - # ) - pass - -def init_xinference(): - base_url = os.getenv("BASE_URL") - model = os.getenv("MODEL") - max_tokens = int(os.getenv("LLM_MAX_TOKENS")) if os.getenv("LLM_MAX_TOKENS") is not None else None - temperature = float(os.getenv("LLM_TEMPERATURE", DEFAULT_XINFERENCE_TEMP)) - - Settings.llm = Xinference(model, base_url, temperature, max_tokens) - - embedding_base_url = os.getenv("EMBEDDING_BASE_URL") - embedding_base_url = embedding_base_url if embedding_base_url != None and embedding_base_url != "" else base_url - - embed_model_name = os.getenv("EMBEDDING_MODEL") - dimensions = os.getenv("EMBEDDING_DIM") - dimensions = int(dimensions) if dimensions is not None else None - Settings.embed_model = XinferenceEmbedding(embed_model_name, embedding_base_url, dimensions=dimensions) - -def init_openai(): - from llama_index.core.constants import DEFAULT_TEMPERATURE - from llama_index.embeddings.openai import OpenAIEmbedding - from llama_index.llms.openai import OpenAI - - max_tokens = os.getenv("LLM_MAX_TOKENS") - config = { - "model": os.getenv("MODEL"), - "temperature": float(os.getenv("LLM_TEMPERATURE", DEFAULT_TEMPERATURE)), - "max_tokens": int(max_tokens) if max_tokens is not None else None, - } - Settings.llm = OpenAI(**config) - - dimensions = os.getenv("EMBEDDING_DIM") - config = { - "model": os.getenv("EMBEDDING_MODEL"), - "dimensions": int(dimensions) if dimensions is not None else None, - } - Settings.embed_model = OpenAIEmbedding(**config) - -def init_dashscope(): - from llama_index.llms.dashscope import DashScope,DashScopeGenerationModels - from llama_index.embeddings.dashscope import DashScopeEmbedding,DashScopeBatchTextEmbeddingModels,DashScopeTextEmbeddingType,DashScopeTextEmbeddingModels - - max_tokens = os.getenv("LLM_MAX_TOKENS") - config = { - "model": os.getenv("MODEL"), - "temperature": float(os.getenv("LLM_TEMPERATURE", DEFAULT_TEMPERATURE)), - "max_tokens": int(max_tokens) if max_tokens is not None else None, - } - Settings.llm = llm = DashScope(model_name=DashScopeGenerationModels.QWEN_MAX) - - dimensions = os.getenv("EMBEDDING_DIM") - config = { - "model": os.getenv("EMBEDDING_MODEL"), - "dimensions": int(dimensions) if dimensions is not None else None, - } - Settings.embed_model = DashScopeEmbedding(model_name=DashScopeTextEmbeddingModels.TEXT_EMBEDDING_V2, - text_type=DashScopeTextEmbeddingType.TEXT_TYPE_QUERY) - - -def init_azure_openai(): - # from llama_index.core.constants import DEFAULT_TEMPERATURE - # from llama_index.embeddings.azure_openai import AzureOpenAIEmbedding - # from llama_index.llms.azure_openai import AzureOpenAI - # - # llm_deployment = os.environ["AZURE_OPENAI_LLM_DEPLOYMENT"] - # embedding_deployment = os.environ["AZURE_OPENAI_EMBEDDING_DEPLOYMENT"] - # max_tokens = os.getenv("LLM_MAX_TOKENS") - # temperature = os.getenv("LLM_TEMPERATURE", DEFAULT_TEMPERATURE) - # dimensions = os.getenv("EMBEDDING_DIM") - # - # azure_config = { - # "api_key": os.environ["AZURE_OPENAI_KEY"], - # "azure_endpoint": os.environ["AZURE_OPENAI_ENDPOINT"], - # "api_version": os.getenv("AZURE_OPENAI_API_VERSION") - # or os.getenv("OPENAI_API_VERSION"), - # } - # - # Settings.llm = AzureOpenAI( - # model=os.getenv("MODEL"), - # max_tokens=int(max_tokens) if max_tokens is not None else None, - # temperature=float(temperature), - # deployment_name=llm_deployment, - # **azure_config, - # ) - # - # Settings.embed_model = AzureOpenAIEmbedding( - # model=os.getenv("EMBEDDING_MODEL"), - # dimensions=int(dimensions) if dimensions is not None else None, - # deployment_name=embedding_deployment, - # **azure_config, - # ) - pass - - -def init_fastembed(): - """ - Use Qdrant Fastembed as the local embedding provider. - """ - # from llama_index.embeddings.fastembed import FastEmbedEmbedding - # - # embed_model_map: Dict[str, str] = { - # # Small and multilingual - # "all-MiniLM-L6-v2": "sentence-transformers/all-MiniLM-L6-v2", - # # Large and multilingual - # "paraphrase-multilingual-mpnet-base-v2": "sentence-transformers/paraphrase-multilingual-mpnet-base-v2", # noqa: E501 - # } - # - # # This will download the model automatically if it is not already downloaded - # Settings.embed_model = FastEmbedEmbedding( - # model_name=embed_model_map[os.getenv("EMBEDDING_MODEL")] - # ) - pass - - -def init_groq(): - # from llama_index.llms.groq import Groq - # - # model_map: Dict[str, str] = { - # "llama3-8b": "llama3-8b-8192", - # "llama3-70b": "llama3-70b-8192", - # "mixtral-8x7b": "mixtral-8x7b-32768", - # } - # - # Settings.llm = Groq(model=model_map[os.getenv("MODEL")]) - # # Groq does not provide embeddings, so we use FastEmbed instead - # init_fastembed() - pass - - -def init_anthropic(): - # from llama_index.llms.anthropic import Anthropic - # - # model_map: Dict[str, str] = { - # "claude-3-opus": "claude-3-opus-20240229", - # "claude-3-sonnet": "claude-3-sonnet-20240229", - # "claude-3-haiku": "claude-3-haiku-20240307", - # "claude-2.1": "claude-2.1", - # "claude-instant-1.2": "claude-instant-1.2", - # } - # - # Settings.llm = Anthropic(model=model_map[os.getenv("MODEL")]) - # # Anthropic does not provide embeddings, so we use FastEmbed instead - # init_fastembed() - pass - - -def init_gemini(): - # from llama_index.embeddings.gemini import GeminiEmbedding - # from llama_index.llms.gemini import Gemini - # - # model_name = f"models/{os.getenv('MODEL')}" - # embed_model_name = f"models/{os.getenv('EMBEDDING_MODEL')}" - # - # Settings.llm = Gemini(model=model_name) - # Settings.embed_model = GeminiEmbedding(model_name=embed_model_name) - pass - -def init_mistral(): - # from llama_index.embeddings.mistralai import MistralAIEmbedding - # from llama_index.llms.mistralai import MistralAI - # - # Settings.llm = MistralAI(model=os.getenv("MODEL")) - # Settings.embed_model = MistralAIEmbedding(model_name=os.getenv("EMBEDDING_MODEL")) - pass \ No newline at end of file diff --git a/backend/app/api/18æ—¶33分47ç§’_副本___init__.py b/backend/app/api/18æ—¶33分47ç§’_副本___init__.py deleted file mode 100644 index e69de29..0000000 diff --git a/backend/app/api/routers/18æ—¶33分49ç§’_副本___init__.py b/backend/app/api/routers/18æ—¶33分49ç§’_副本___init__.py deleted file mode 100644 index e69de29..0000000 diff --git a/backend/app/api/routers/18æ—¶33分49ç§’_副本_chat.py b/backend/app/api/routers/18æ—¶33分49ç§’_副本_chat.py deleted file mode 100644 index 6476567..0000000 --- a/backend/app/api/routers/18æ—¶33分49ç§’_副本_chat.py +++ /dev/null @@ -1,150 +0,0 @@ -import logging -import os -from typing import List - -from fastapi import APIRouter, BackgroundTasks, Depends, HTTPException, Request, status -from llama_index.core.chat_engine.types import BaseChatEngine, NodeWithScore -from llama_index.core.llms import MessageRole -from llama_index.core.vector_stores.types import MetadataFilter, MetadataFilters - -from app.api.routers.events import EventCallbackHandler -from app.api.routers.models import ( - ChatConfig, - ChatData, - Message, - Result, - SourceNodes, -) -from app.api.routers.vercel_response import VercelStreamResponse -from app.api.services.llama_cloud import LLamaCloudFileService -from app.engine import get_chat_engine - -chat_router = r = APIRouter() - -logger = logging.getLogger("uvicorn") - - -def process_response_nodes( - nodes: List[NodeWithScore], - background_tasks: BackgroundTasks, -): - """ - Start background tasks on the source nodes if needed. - """ - files_to_download = SourceNodes.get_download_files(nodes) - for file in files_to_download: - background_tasks.add_task( - LLamaCloudFileService.download_llamacloud_pipeline_file, file - ) - - -# streaming endpoint - delete if not needed -@r.post("") -async def chat( - request: Request, - data: ChatData, - background_tasks: BackgroundTasks, - chat_engine: BaseChatEngine = Depends(get_chat_engine), -): - try: - last_message_content = data.get_last_message_content() - # ç”±äºŽåŸºäºŽåŽ†å²æ¶ˆæ¯çš„æç¤ºè¯æ²¡æœ‰è°ƒæ•´å¥½ï¼Œæ‰€ä»¥æš‚æ—¶å±è”½åކ岿¶ˆæ¯ - data.messages.clear() - messages = data.get_history_messages() - - doc_ids = data.get_chat_document_ids() - filters = generate_filters(doc_ids) - params = data.data or {} - logger.info("Creating chat engine with filters", filters.dict()) - chat_engine = get_chat_engine(filters=filters, params=params) - - event_handler = EventCallbackHandler() - chat_engine.callback_manager.handlers.append(event_handler) # type: ignore - - response = await chat_engine.astream_chat(last_message_content, messages) - process_response_nodes(response.source_nodes, background_tasks) - - return VercelStreamResponse(request, event_handler, response, data) - except Exception as e: - logger.exception("Error in chat engine", exc_info=True) - raise HTTPException( - status_code=status.HTTP_500_INTERNAL_SERVER_ERROR, - detail=f"Error in chat engine: {e}", - ) from e - - -def generate_filters(doc_ids): - if len(doc_ids) > 0: - filters = MetadataFilters( - filters=[ - MetadataFilter( - key="private", - value=["true"], - operator="nin", # type: ignore - ), - MetadataFilter( - key="doc_id", - value=doc_ids, - operator="in", # type: ignore - ), - ], - condition="or", # type: ignore - ) - else: - filters = MetadataFilters( - # Use the "NIN" - "not in" operator to include all public documents (don't have the private key set) - filters=[ - MetadataFilter( - key="private", - value=["true"], - operator="nin", # type: ignore - ), - ] - ) - - return filters - - -# non-streaming endpoint - delete if not needed -@r.post("/request") -async def chat_request( - data: ChatData, - chat_engine: BaseChatEngine = Depends(get_chat_engine), -) -> Result: - last_message_content = data.get_last_message_content() - messages = data.get_history_messages() - - response = await chat_engine.achat(last_message_content, messages) - return Result( - result=Message(role=MessageRole.ASSISTANT, content=response.response), - nodes=SourceNodes.from_source_nodes(response.source_nodes), - ) - - -@r.get("/config") -async def chat_config() -> ChatConfig: - starter_questions = None - conversation_starters = os.getenv("CONVERSATION_STARTERS") - if conversation_starters and conversation_starters.strip(): - starter_questions = conversation_starters.strip().split("\\n") - return ChatConfig(starter_questions=starter_questions) - - -@r.get("/config/llamacloud") -async def chat_llama_cloud_config(): - projects = LLamaCloudFileService.get_all_projects_with_pipelines() - pipeline = os.getenv("LLAMA_CLOUD_INDEX_NAME") - project = os.getenv("LLAMA_CLOUD_PROJECT_NAME") - pipeline_config = ( - pipeline - and project - and { - "pipeline": pipeline, - "project": project, - } - or None - ) - return { - "projects": projects, - "pipeline": pipeline_config, - } diff --git a/backend/app/api/routers/18æ—¶33分49ç§’_副本_events.py b/backend/app/api/routers/18æ—¶33分49ç§’_副本_events.py deleted file mode 100644 index a1d2ea8..0000000 --- a/backend/app/api/routers/18æ—¶33分49ç§’_副本_events.py +++ /dev/null @@ -1,149 +0,0 @@ -import json -import asyncio -import logging -from typing import AsyncGenerator, Dict, Any, List, Optional -from llama_index.core.callbacks.base import BaseCallbackHandler -from llama_index.core.callbacks.schema import CBEventType -from llama_index.core.tools.types import ToolOutput -from pydantic import BaseModel - - -logger = logging.getLogger(__name__) - - -class CallbackEvent(BaseModel): - event_type: CBEventType - payload: Optional[Dict[str, Any]] = None - event_id: str = "" - - def get_retrieval_message(self) -> dict | None: - if self.payload: - nodes = self.payload.get("nodes") - if nodes: - msg = f"æ ¹æ®æŸ¥è¯¢æ£€ç´¢åˆ° {len(nodes)} æºæ–‡ä»¶" - else: - msg = f"查询检索中: '{self.payload.get('query_str')}'" - return { - "type": "events", - "data": {"title": msg}, - } - else: - return None - - def get_tool_message(self) -> dict | None: - func_call_args = self.payload.get("function_call") - if func_call_args is not None and "tool" in self.payload: - tool = self.payload.get("tool") - return { - "type": "events", - "data": { - "title": f"调用工具 {tool.name} ï¼Œå‚æ•°: {func_call_args}", - }, - } - - def _is_output_serializable(self, output: Any) -> bool: - try: - json.dumps(output) - return True - except TypeError: - return False - - def get_agent_tool_response(self) -> dict | None: - response = self.payload.get("response") - if response is not None: - sources = response.sources - for source in sources: - # Return the tool response here to include the toolCall information - if isinstance(source, ToolOutput): - if self._is_output_serializable(source.raw_output): - output = source.raw_output - else: - output = source.content - - return { - "type": "tools", - "data": { - "toolOutput": { - "output": output, - "isError": source.is_error, - }, - "toolCall": { - "id": None, # There is no tool id in the ToolOutput - "name": source.tool_name, - "input": source.raw_input, - }, - }, - } - - def to_response(self): - try: - match self.event_type: - case "retrieve": - return self.get_retrieval_message() - case "function_call": - return self.get_tool_message() - case "agent_step": - return self.get_agent_tool_response() - case _: - return None - except Exception as e: - logger.error(f"转æ¢å›žåº”æ—¶é—´æ—¶å‘生错误,原因: {e}") - return None - - -class EventCallbackHandler(BaseCallbackHandler): - _aqueue: asyncio.Queue - is_done: bool = False - - def __init__( - self, - ): - """Initialize the base callback handler.""" - ignored_events = [ - CBEventType.CHUNKING, - CBEventType.NODE_PARSING, - CBEventType.EMBEDDING, - CBEventType.LLM, - CBEventType.TEMPLATING, - ] - super().__init__(ignored_events, ignored_events) - self._aqueue = asyncio.Queue() - - def on_event_start( - self, - event_type: CBEventType, - payload: Optional[Dict[str, Any]] = None, - event_id: str = "", - **kwargs: Any, - ) -> str: - event = CallbackEvent(event_id=event_id, event_type=event_type, payload=payload) - if event.to_response() is not None: - self._aqueue.put_nowait(event) - - def on_event_end( - self, - event_type: CBEventType, - payload: Optional[Dict[str, Any]] = None, - event_id: str = "", - **kwargs: Any, - ) -> None: - event = CallbackEvent(event_id=event_id, event_type=event_type, payload=payload) - if event.to_response() is not None: - self._aqueue.put_nowait(event) - - def start_trace(self, trace_id: Optional[str] = None) -> None: - """No-op.""" - - def end_trace( - self, - trace_id: Optional[str] = None, - trace_map: Optional[Dict[str, List[str]]] = None, - ) -> None: - """No-op.""" - - async def async_event_gen(self) -> AsyncGenerator[CallbackEvent, None]: - while not self._aqueue.empty() or not self.is_done: - try: - yield await asyncio.wait_for(self._aqueue.get(), timeout=0.1) - except asyncio.TimeoutError: - pass diff --git a/backend/app/api/routers/18æ—¶33分49ç§’_副本_models.py b/backend/app/api/routers/18æ—¶33分49ç§’_副本_models.py deleted file mode 100644 index b064218..0000000 --- a/backend/app/api/routers/18æ—¶33分49ç§’_副本_models.py +++ /dev/null @@ -1,253 +0,0 @@ -import logging -import os -from typing import Any, Dict, List, Literal, Optional, Set - -from llama_index.core.llms import ChatMessage, MessageRole -from llama_index.core.schema import NodeWithScore -from pydantic import BaseModel, Field, validator, field_validator -from pydantic.alias_generators import to_camel - -logger = logging.getLogger("uvicorn") - - -class FileContent(BaseModel): - type: Literal["text", "ref"] - # If the file is pure text then the value is be a string - # otherwise, it's a list of document IDs - value: str | List[str] - - -class File(BaseModel): - id: str - content: FileContent - filename: str - filesize: int - filetype: str - - -class AnnotationFileData(BaseModel): - files: List[File] = Field( - default=[], - description="List of files", - ) - - class Config: - json_schema_extra = { - "example": { - "csvFiles": [ - { - "content": "Name, Age\nAlice, 25\nBob, 30", - "filename": "example.csv", - "filesize": 123, - "id": "123", - "type": "text/csv", - } - ] - } - } - alias_generator = to_camel - - -class Annotation(BaseModel): - type: str - data: AnnotationFileData | List[str] - - def to_content(self) -> str | None: - if self.type == "document_file": - # We only support generating context content for CSV files for now - csv_files = [file for file in self.data.files if file.filetype == "csv"] - if len(csv_files) > 0: - return "Use data from following CSV raw content\n" + "\n".join( - [f"```csv\n{csv_file.content.value}\n```" for csv_file in csv_files] - ) - else: - logger.warning( - f"The annotation {self.type} is not supported for generating context content" - ) - return None - - -class Message(BaseModel): - role: MessageRole - content: str - annotations: List[Annotation] | None = None - - -class ChatData(BaseModel): - messages: List[Message] - data: Any = None - - class Config: - json_schema_extra = { - "example": { - "messages": [ - { - "role": "user", - "content": "What standards for letters exist?", - } - ] - } - } - - @field_validator("messages") - def messages_must_not_be_empty(cls, v): - if len(v) == 0: - raise ValueError("Messages must not be empty") - return v - - def get_last_message_content(self) -> str: - """ - Get the content of the last message along with the data content if available. - Fallback to use data content from previous messages - """ - if len(self.messages) == 0: - raise ValueError("There is not any message in the chat") - last_message = self.messages[-1] - message_content = last_message.content - for message in reversed(self.messages): - if message.role == MessageRole.USER and message.annotations is not None: - annotation_contents = filter( - None, - [annotation.to_content() for annotation in message.annotations], - ) - if not annotation_contents: - continue - annotation_text = "\n".join(annotation_contents) - message_content = f"{message_content}\n{annotation_text}" - break - return message_content - - def get_history_messages(self) -> List[ChatMessage]: - """ - Get the history messages - """ - return [ - ChatMessage(role=message.role, content=message.content) - for message in self.messages[:-1] - ] - - def is_last_message_from_user(self) -> bool: - return self.messages[-1].role == MessageRole.USER - - def get_chat_document_ids(self) -> List[str]: - """ - Get the document IDs from the chat messages - """ - document_ids: List[str] = [] - for message in self.messages: - if message.role == MessageRole.USER and message.annotations is not None: - for annotation in message.annotations: - if ( - annotation.type == "document_file" - and annotation.data.files is not None - ): - for fi in annotation.data.files: - if fi.content.type == "ref": - document_ids += fi.content.value - return list(set(document_ids)) - - -class LlamaCloudFile(BaseModel): - file_name: str - pipeline_id: str - - def __eq__(self, other): - if not isinstance(other, LlamaCloudFile): - return NotImplemented - return ( - self.file_name == other.file_name and self.pipeline_id == other.pipeline_id - ) - - def __hash__(self): - return hash((self.file_name, self.pipeline_id)) - - -class SourceNodes(BaseModel): - id: str - metadata: Dict[str, Any] - score: Optional[float] - text: str - url: Optional[str] - - @classmethod - def from_source_node(cls, source_node: NodeWithScore): - metadata = source_node.node.metadata - url = cls.get_url_from_metadata(metadata) - #text = 'filename' in metadata and metadata['filename'] or source_node.node.node_id - text = source_node.node.text - return cls( - id=source_node.node.node_id, - metadata=metadata, - score=source_node.score, - text=text, # type: ignore - url=url, - ) - - @classmethod - def get_url_from_metadata(cls, metadata: Dict[str, Any]) -> str: - url_prefix = os.getenv("FILESERVER_URL_PREFIX") - if not url_prefix: - logger.warning( - "Warning: FILESERVER_URL_PREFIX not set in environment variables. Can't use file server" - ) - file_name = metadata.get("file_name") - if file_name and url_prefix: - # file_name exists and file server is configured - pipeline_id = metadata.get("pipeline_id") - if pipeline_id and metadata.get("private") is None: - # file is from LlamaCloud and was not ingested locally - file_name = f"{pipeline_id}${file_name}" - return f"{url_prefix}/output/llamacloud/{file_name}" - is_private = metadata.get("private", "false") == "true" - if is_private: - return f"{url_prefix}/output/uploaded/{file_name}" - return f"{url_prefix}/data/{file_name}" - else: - # fallback to URL in metadata (e.g. for websites) - return metadata.get("URL") - - @classmethod - def from_source_nodes(cls, source_nodes: List[NodeWithScore]): - return [cls.from_source_node(node) for node in source_nodes] - - @staticmethod - def get_download_files(nodes: List[NodeWithScore]) -> Set[LlamaCloudFile]: - source_nodes = SourceNodes.from_source_nodes(nodes) - llama_cloud_files = [ - LlamaCloudFile( - file_name=node.metadata.get("file_name"), - pipeline_id=node.metadata.get("pipeline_id"), - ) - for node in source_nodes - if ( - node.metadata.get("private") - is None # Only download files are from LlamaCloud and were not ingested locally - and node.metadata.get("pipeline_id") is not None - and node.metadata.get("file_name") is not None - ) - ] - # Remove duplicates and return - return set(llama_cloud_files) - - -class Result(BaseModel): - result: Message - nodes: List[SourceNodes] - - -class ChatConfig(BaseModel): - starter_questions: Optional[List[str]] = Field( - default=None, - description="List of starter questions", - serialization_alias="starterQuestions", - ) - - class Config: - json_schema_extra = { - "example": { - "starterQuestions": [ - "What standards for letters exist?", - "What are the requirements for a letter to be considered a letter?", - ] - } - } diff --git a/backend/app/api/routers/18æ—¶33分49ç§’_副本_upload.py b/backend/app/api/routers/18æ—¶33分49ç§’_副本_upload.py deleted file mode 100644 index 94f3ce7..0000000 --- a/backend/app/api/routers/18æ—¶33分49ç§’_副本_upload.py +++ /dev/null @@ -1,25 +0,0 @@ -import logging -from typing import List - -from fastapi import APIRouter, HTTPException -from pydantic import BaseModel - -from app.api.services.file import PrivateFileService - -file_upload_router = r = APIRouter() - -logger = logging.getLogger("uvicorn") - - -class FileUploadRequest(BaseModel): - base64: str - - -@r.post("") -def upload_file(request: FileUploadRequest) -> List[str]: - try: - logger.info("Processing file") - return PrivateFileService.process_file(request.base64) - except Exception as e: - logger.error(f"Error processing file: {e}", exc_info=True) - raise HTTPException(status_code=500, detail="Error processing file") diff --git a/backend/app/api/routers/18æ—¶33分49ç§’_副本_vercel_response.py b/backend/app/api/routers/18æ—¶33分49ç§’_副本_vercel_response.py deleted file mode 100644 index 0222a14..0000000 --- a/backend/app/api/routers/18æ—¶33分49ç§’_副本_vercel_response.py +++ /dev/null @@ -1,109 +0,0 @@ -import json - -from aiostream import stream -from fastapi import Request -from fastapi.responses import StreamingResponse -from llama_index.core.chat_engine.types import StreamingAgentChatResponse - -from app.api.routers.events import EventCallbackHandler -from app.api.routers.models import ChatData, Message, SourceNodes -from app.api.services.suggestion import NextQuestionSuggestion - - -class VercelStreamResponse(StreamingResponse): - """ - Class to convert the response from the chat engine to the streaming format expected by Vercel - """ - - TEXT_PREFIX = "0:" - DATA_PREFIX = "8:" - - @classmethod - def convert_text(cls, token: str): - # Escape newlines and double quotes to avoid breaking the stream - token = json.dumps(token) - return f"{cls.TEXT_PREFIX}{token}\n" - - @classmethod - def convert_data(cls, data: dict): - data_str = json.dumps(data) - return f"{cls.DATA_PREFIX}[{data_str}]\n" - - def __init__( - self, - request: Request, - event_handler: EventCallbackHandler, - response: StreamingAgentChatResponse, - chat_data: ChatData, - ): - content = VercelStreamResponse.content_generator( - request, event_handler, response, chat_data - ) - super().__init__(content=content) - - @classmethod - async def content_generator( - cls, - request: Request, - event_handler: EventCallbackHandler, - response: StreamingAgentChatResponse, - chat_data: ChatData, - ): - # Yield the text response - async def _chat_response_generator(): - final_response = "" - async for token in response.async_response_gen(): - final_response += token - yield VercelStreamResponse.convert_text(token) - - # Generate questions that user might interested to - conversation = chat_data.messages + [ - Message(role="assistant", content=final_response) - ] - questions = await NextQuestionSuggestion.suggest_next_questions( - conversation - ) - if len(questions) > 0: - yield VercelStreamResponse.convert_data( - { - "type": "suggested_questions", - "data": questions, - } - ) - - # the text_generator is the leading stream, once it's finished, also finish the event stream - event_handler.is_done = True - - # Yield the source nodes - yield cls.convert_data( - { - "type": "sources", - "data": { - "nodes": [ - SourceNodes.from_source_node(node).dict() - for node in response.source_nodes - ] - }, - } - ) - - # Yield the events from the event handler - async def _event_generator(): - async for event in event_handler.async_event_gen(): - event_response = event.to_response() - if event_response is not None: - yield VercelStreamResponse.convert_data(event_response) - - combine = stream.merge(_chat_response_generator(), _event_generator()) - is_stream_started = False - async with combine.stream() as streamer: - async for output in streamer: - if not is_stream_started: - is_stream_started = True - # Stream a blank message to start the stream - yield VercelStreamResponse.convert_text("") - - yield output - - if await request.is_disconnected(): - break diff --git a/backend/app/api/services/18æ—¶33分49ç§’_副本_file.py b/backend/app/api/services/18æ—¶33分49ç§’_副本_file.py deleted file mode 100644 index a478570..0000000 --- a/backend/app/api/services/18æ—¶33分49ç§’_副本_file.py +++ /dev/null @@ -1,113 +0,0 @@ -import base64 -import mimetypes -import os -from pathlib import Path -from typing import Dict, List -from uuid import uuid4 - -from app.engine.index import get_index -from llama_index.core import VectorStoreIndex -from llama_index.core.ingestion import IngestionPipeline -from llama_index.core.readers.file.base import ( - _try_loading_included_file_formats as get_file_loaders_map, -) -from llama_index.core.readers.file.base import ( - default_file_metadata_func, -) -from llama_index.core.schema import Document -from llama_index.indices.managed.llama_cloud.base import LlamaCloudIndex -from llama_index.readers.file import FlatReader - - -def get_llamaparse_parser(): - from app.engine.loaders import load_configs - from app.engine.loaders.file import FileLoaderConfig, llama_parse_parser - - config = load_configs() - file_loader_config = FileLoaderConfig(**config["file"]) - if file_loader_config.use_llama_parse: - return llama_parse_parser() - else: - return None - - -def default_file_loaders_map(): - default_loaders = get_file_loaders_map() - default_loaders[".txt"] = FlatReader - return default_loaders - - -class PrivateFileService: - PRIVATE_STORE_PATH = "output/uploaded" - - @staticmethod - def preprocess_base64_file(base64_content: str) -> tuple: - header, data = base64_content.split(",", 1) - mime_type = header.split(";")[0].split(":", 1)[1] - extension = mimetypes.guess_extension(mime_type) - # File data as bytes - return base64.b64decode(data), extension - - @staticmethod - def store_and_parse_file(file_data, extension) -> List[Document]: - # Store file to the private directory - os.makedirs(PrivateFileService.PRIVATE_STORE_PATH, exist_ok=True) - - # random file name - file_name = f"{uuid4().hex}{extension}" - file_path = Path(os.path.join(PrivateFileService.PRIVATE_STORE_PATH, file_name)) - - # write file - with open(file_path, "wb") as f: - f.write(file_data) - - # Load file to documents - # If LlamaParse is enabled, use it to parse the file - # Otherwise, use the default file loaders - reader = get_llamaparse_parser() - if reader is None: - reader_cls = default_file_loaders_map().get(extension) - if reader_cls is None: - raise ValueError(f"File extension {extension} is not supported") - reader = reader_cls() - documents = reader.load_data(file_path) - # Add custom metadata - for doc in documents: - doc.metadata["file_name"] = file_name - doc.metadata["private"] = "true" - return documents - - @staticmethod - def process_file(base64_content: str) -> List[str]: - file_data, extension = PrivateFileService.preprocess_base64_file(base64_content) - documents = PrivateFileService.store_and_parse_file(file_data, extension) - - # Only process nodes, no store the index - pipeline = IngestionPipeline() - nodes = pipeline.run(documents=documents) - - # Add the nodes to the index and persist it - current_index = get_index() - - # Insert the documents into the index - if isinstance(current_index, LlamaCloudIndex): - # LlamaCloudIndex is a managed index so we don't need to process the nodes - # just insert the documents - for doc in documents: - current_index.insert(doc) - else: - # Only process nodes, no store the index - pipeline = IngestionPipeline() - nodes = pipeline.run(documents=documents) - - # Add the nodes to the index and persist it - if current_index is None: - current_index = VectorStoreIndex(nodes=nodes) - else: - current_index.insert_nodes(nodes=nodes) - current_index.storage_context.persist( - persist_dir=os.environ.get("STORAGE_DIR", "storage") - ) - - # Return the document ids - return [doc.doc_id for doc in documents] diff --git a/backend/app/api/services/18æ—¶33分49ç§’_副本_llama_cloud.py b/backend/app/api/services/18æ—¶33分49ç§’_副本_llama_cloud.py deleted file mode 100644 index 852ae7c..0000000 --- a/backend/app/api/services/18æ—¶33分49ç§’_副本_llama_cloud.py +++ /dev/null @@ -1,114 +0,0 @@ -import logging -import os -from typing import Any, Dict, List, Optional - -import requests -from app.api.routers.models import LlamaCloudFile - -logger = logging.getLogger("uvicorn") - - -class LLamaCloudFileService: - LLAMA_CLOUD_URL = "https://cloud.llamaindex.ai/api/v1" - LOCAL_STORE_PATH = "output/llamacloud" - - DOWNLOAD_FILE_NAME_TPL = "{pipeline_id}${filename}" - - @classmethod - def get_all_projects(cls) -> List[Dict[str, Any]]: - url = f"{cls.LLAMA_CLOUD_URL}/projects" - return cls._make_request(url) - - @classmethod - def get_all_pipelines(cls) -> List[Dict[str, Any]]: - url = f"{cls.LLAMA_CLOUD_URL}/pipelines" - return cls._make_request(url) - - @classmethod - def get_all_projects_with_pipelines(cls) -> List[Dict[str, Any]]: - try: - projects = cls.get_all_projects() - pipelines = cls.get_all_pipelines() - return [ - { - **project, - "pipelines": [p for p in pipelines if p["project_id"] == project["id"]], - } - for project in projects - ] - except Exception as error: - logger.error(f"Error listing projects and pipelines: {error}") - return [] - - @classmethod - def _get_files(cls, pipeline_id: str) -> List[Dict[str, Any]]: - url = f"{cls.LLAMA_CLOUD_URL}/pipelines/{pipeline_id}/files" - return cls._make_request(url) - - @classmethod - def _get_file_detail(cls, project_id: str, file_id: str) -> Dict[str, Any]: - url = f"{cls.LLAMA_CLOUD_URL}/files/{file_id}/content?project_id={project_id}" - return cls._make_request(url) - - @classmethod - def _download_file(cls, url: str, local_file_path: str): - logger.info(f"Downloading file to {local_file_path}") - # Create directory if it doesn't exist - os.makedirs(cls.LOCAL_STORE_PATH, exist_ok=True) - # Download the file - with requests.get(url, stream=True) as r: - r.raise_for_status() - with open(local_file_path, "wb") as f: - for chunk in r.iter_content(chunk_size=8192): - f.write(chunk) - logger.info("File downloaded successfully") - - @classmethod - def download_llamacloud_pipeline_file( - cls, - file: LlamaCloudFile, - force_download: bool = False, - ): - file_name = file.file_name - pipeline_id = file.pipeline_id - - # Check is the file already exists - downloaded_file_path = cls.get_file_path(file_name, pipeline_id) - if os.path.exists(downloaded_file_path) and not force_download: - logger.debug(f"File {file_name} already exists in local storage") - return - try: - logger.info(f"Downloading file {file_name} for pipeline {pipeline_id}") - files = cls._get_files(pipeline_id) - if not files or not isinstance(files, list): - raise Exception("No files found in LlamaCloud") - for file_entry in files: - if file_entry["name"] == file_name: - file_id = file_entry["file_id"] - project_id = file_entry["project_id"] - file_detail = cls._get_file_detail(project_id, file_id) - cls._download_file(file_detail["url"], downloaded_file_path) - break - except Exception as error: - logger.info(f"Error fetching file from LlamaCloud: {error}") - - @classmethod - def get_file_name(cls, name: str, pipeline_id: str) -> str: - return cls.DOWNLOAD_FILE_NAME_TPL.format(pipeline_id=pipeline_id, filename=name) - - @classmethod - def get_file_path(cls, name: str, pipeline_id: str) -> str: - return os.path.join(cls.LOCAL_STORE_PATH, cls.get_file_name(name, pipeline_id)) - - @staticmethod - def _make_request( - url: str, data=None, headers: Optional[Dict] = None, method: str = "get" - ): - if headers is None: - headers = { - "Accept": "application/json", - "Authorization": f'Bearer {os.getenv("LLAMA_CLOUD_API_KEY")}', - } - response = requests.request(method, url, headers=headers, data=data) - response.raise_for_status() - return response.json() diff --git a/backend/app/api/services/18æ—¶33分49ç§’_副本_suggestion.py b/backend/app/api/services/18æ—¶33分49ç§’_副本_suggestion.py deleted file mode 100644 index 7017307..0000000 --- a/backend/app/api/services/18æ—¶33分49ç§’_副本_suggestion.py +++ /dev/null @@ -1,48 +0,0 @@ -from typing import List - -from app.api.routers.models import Message -from llama_index.core.prompts import PromptTemplate -from llama_index.core.settings import Settings -from pydantic import BaseModel - -NEXT_QUESTIONS_SUGGESTION_PROMPT = PromptTemplate( - "你是一个ä¹äºŽåŠ©äººçš„åŠ©æ‰‹ï¼ä½ çš„任务是对用户å¯èƒ½ä¼šé—®çš„下一个问题给出建议。 " - "\n这是对è¯åކå²è®°å½•" - "\n---------------------\n{conversation}\n---------------------" - "考虑到对è¯åކå²è®°å½•,仅é™äºŽçŽ°åœ¨çŸ¥è¯†åº“å·²æœ‰å†…å®¹, 请给我 $number_of_questions 个你接下æ¥å¯èƒ½ä¼šé—®é¢˜çš„问题ï¼" -) -N_QUESTION_TO_GENERATE = 3 - - -class NextQuestions(BaseModel): - """A list of questions that user might ask next""" - - questions: List[str] - - -class NextQuestionSuggestion: - @staticmethod - async def suggest_next_questions( - messages: List[Message], - number_of_questions: int = N_QUESTION_TO_GENERATE, - ) -> List[str]: - # Reduce the cost by only using the last two messages - last_user_message = None - last_assistant_message = None - for message in reversed(messages): - if message.role == "user": - last_user_message = f"User: {message.content}" - elif message.role == "assistant": - last_assistant_message = f"Assistant: {message.content}" - if last_user_message and last_assistant_message: - break - conversation: str = f"{last_user_message}\n{last_assistant_message}" - - output: NextQuestions = await Settings.llm.astructured_predict( - NextQuestions, - prompt=NEXT_QUESTIONS_SUGGESTION_PROMPT, - conversation=conversation, - nun_questions=number_of_questions, - ) - - return output.questions diff --git a/backend/app/engine/18æ—¶33分46ç§’_副本_index.py b/backend/app/engine/18æ—¶33分46ç§’_副本_index.py deleted file mode 100644 index b21e695..0000000 --- a/backend/app/engine/18æ—¶33分46ç§’_副本_index.py +++ /dev/null @@ -1,22 +0,0 @@ -import logging -from llama_index.core.indices import VectorStoreIndex -from app.engine.vectordb import get_vector_store - - -logger = logging.getLogger("uvicorn") - -index = None - -def get_index(params=None): - global index - if index is None: - logger.info("Connecting vector store...") - - store = get_vector_store() - # Load the index from the vector store - # If you are using a vector store that doesn't store text, - # you must load the index from both the vector store and the document store - index = VectorStoreIndex.from_vector_store(store) - logger.info("Finished load index from vector store.") - - return index diff --git a/backend/app/engine/18æ—¶33分47ç§’_副本___init__.py b/backend/app/engine/18æ—¶33分47ç§’_副本___init__.py deleted file mode 100644 index 6e2a97a..0000000 --- a/backend/app/engine/18æ—¶33分47ç§’_副本___init__.py +++ /dev/null @@ -1,61 +0,0 @@ -import os - -from llama_index.core.agent import AgentRunner, ReActChatFormatter -from llama_index.core.settings import Settings -from llama_index.core.tools.query_engine import QueryEngineTool - -from app.engine.engine import create_query_engine, create_summary_query_engine -from app.engine.index import get_index -#from app.engine.loaders.db import makeDescriptionByEngine -from app.engine.tools import ToolFactory - - -def get_chat_engine(filters=None, params=None): - system_prompt = os.getenv("SYSTEM_PROMPT") - top_k = int(os.getenv("TOP_K", "3")) - use_reranker = os.getenv("RERANK_ENABLED") - tools = [] - - # 创建SQL查询工具 -# sql_query_engine = create_summary_query_engine(index) - # sql_query_tool = QueryEngineTool.from_defaults(query_engine=sql_query_engine, - # name="zjdata_query_tool", - # description="æ¥æºäºŽä¸€ä¸ªç”±åšå¾®å…¬å¸ç”µåŠ›é€ ä»·è½¯ä»¶ç¼–åˆ¶çš„é€ ä»·å·¥ç¨‹æ–‡ä»¶ã€‚è¯¥æ–‡ä»¶ä»¥å¤šå¼ è¡¨æ ¼çš„å½¢å¼å­˜å‚¨å­˜å‚¨äº†æ•´ä¸ªå·¥ç¨‹çš„全部数æ®å†…å®¹ã€‚é€‚ç”¨äºŽä»¥è¯¦ç»†çš„è‡ªç„¶è¯­è¨€æŸ¥è¯¢è¡¨æ ¼æ•°æ®æ–¹å¼æŸ¥è¯¢é€ ä»·å·¥ç¨‹å„项具体属性ã€è´¹ç”¨çš„æ•°å€¼ã€‚请先使用“zj_query_toolâ€æ— æ³•解决æ‰ä½¿ç”¨æœ¬å·¥å…·" - # ) - #tools.append(sql_query_tool) - - # Add query tool if index exists - index = get_index() - if index is not None: - summary_query_engine = create_summary_query_engine(index,top_k,use_reranker,filters) - summary_query_tool = QueryEngineTool.from_defaults( query_engine=summary_query_engine, name="summary_query_tool", - description="适用于任何需è¦è¿›è¡Œå…¨é¢æ€»ç»“ã€æ¦‚æ‹¬çš„è¦æ±‚。", - ) - query_engine = create_query_engine(index,top_k,use_reranker,filters) - query_engine_tool = QueryEngineTool.from_defaults(query_engine=query_engine, name="zj_query_tool", - description="ç”±åšå¾®å…¬å¸ç¼–制的关于电力造价知识ã€ç”µåŠ›é€ ä»·ç¼–åˆ¶è½¯ä»¶çŸ¥è¯†å’Œé€ ä»·å·¥ç¨‹æ–‡ä»¶ç»“æž„çš„çŸ¥è¯†åº“ã€‚é€‚ç”¨äºŽæŸ¥è¯¢ç”µåŠ›é¢†åŸŸã€ç”µåŠ›é€ ä»·é¢†åŸŸã€åšå¾®ã€åšå¾®ç”µåŠ›ã€åšå¾®é€ ä»·ç­‰ä¸šåŠ¡ç­‰å†…å®¹ã€‚å¦‚æžœæœ¬çŸ¥è¯†åº“æ²¡æœ‰ç›´æŽ¥ç­”æ¡ˆä½†æœ‰è§£å†³æ€è·¯çš„å¯ä»¥è¿”回解决办法åŽå»ºè®®ä½¿ç”¨â€œzjdata_query_toolâ€å·¥å…·ã€‚", - ) - - tools.append(summary_query_tool) - tools.append(query_engine_tool) - - # Add additional tools - tools += ToolFactory.from_env() - - prefix_messages = ("""您的设计旨在帮助完æˆå„ç§ä»»åŠ¡ï¼Œä»Žå›žç­”é—®é¢˜åˆ°æä¾›å…¶ä»–类型分æžçš„æ‘˜è¦ã€‚\n\n##工具\n\nä½ å¯ä»¥è®¿é—®å„ç§å·¥å…·ã€‚你有责任按照你认为åˆé€‚的顺åºä½¿ç”¨è¿™äº›å·¥å…·æ¥å®Œæˆå½“å‰çš„任务。\nè¿™å¯èƒ½éœ€è¦å°†ä»»åŠ¡åˆ†è§£ä¸ºå­ä»»åŠ¡ï¼Œå¹¶ä½¿ç”¨ä¸åŒçš„工具æ¥å®Œæˆæ¯ä¸ªå­ä»»åŠ¡ã€‚\n\nä½ å¯ä»¥è®¿é—®ä»¥ä¸‹å·¥å…·ï¼š\n{tool_desc}\n\n\n##输出格å¼\n\n请用与问题相åŒçš„语言回答,并使用以下格å¼ï¼š\n\n \nThought: 用户当å‰çš„语言是:(user's language)。我需è¦ä½¿ç”¨å·¥å…·æ¥å¸®åŠ©æˆ‘å›žç­”é—®é¢˜ã€‚\nAction: 如果使用工具,则为工具åç§°(one of {tool_names})。\nAction Input: 输入给工具的内容,使用JSONæ ¼å¼è¡¨ç¤ºkwargs(例如{{\"input\": \"hello world\", \"num_beams\": 5}})\n \n\n请始终以Thought开始。\n\n请始终以Thought开始。\n\n请始终以Thought开始。\n\n请始终以Thought开始。\n\n切勿用Markdownä»£ç æ ‡è®°åŒ…å›´ä½ çš„å“应。如果需è¦ï¼Œå¯ä»¥åœ¨å“åº”ä¸­ä½¿ç”¨ä»£ç æ ‡è®°ã€‚\n\n请为Action Input使用有效的JSONæ ¼å¼ã€‚ä¸è¦è¿™æ ·åš{{\'input\': \'hello world\', \'num_beams\': 5}}。\n\n如果使用此格å¼ï¼Œç”¨æˆ·å°†ä»¥ä¸‹é¢çš„æ ¼å¼è¿›è¡Œå›žåº”:\n\n \nObservation: 工具å“应\n \n\n你应该继续é‡å¤ä¸Šè¿°æ ¼å¼ï¼Œç›´åˆ°ä½ æœ‰è¶³å¤Ÿçš„ä¿¡æ¯æ¥å›žç­”é—®é¢˜è€Œæ— éœ€ä½¿ç”¨æ›´å¤šå·¥å…·ã€‚æ­¤æ—¶ï¼Œä½ å¿…é¡»ä½¿ç”¨ä»¥ä¸‹ä¸¤ç§æ ¼å¼ä¹‹ä¸€è¿›è¡Œå›žç­”:\n\n \nThought: 我å¯ä»¥ä¸ç”¨ä»»ä½•工具æ¥å›žç­”。我将使用用户的语言æ¥å›žç­”。\nAnswer: [你的答案(与用户问题相åŒçš„语言)]\n \n\n \nThought: 我无法使用æä¾›çš„工具回答问题。\nAnswer: [你的答案(与用户问题相åŒçš„语言)]\n \n\n##如果从工具中得到的回应是Empty Response,那么åªéœ€è¦å›žç­”“我ä¸çŸ¥é“â€ï¼Œä¸éœ€è¦é¢å¤–回答别的内容。## 当å‰å¯¹è¯\n\n以下是当å‰å¯¹è¯ï¼Œç”±äººç±»å’ŒåŠ©æ‰‹çš„æ¶ˆæ¯äº¤æ›¿ç»„æˆã€‚\n""") - react_chat_formatter = ReActChatFormatter.from_defaults(prefix_messages) - agentrunner = AgentRunner.from_llm( - llm=Settings.llm, - tools=tools, - react_chat_formatter=react_chat_formatter, - system_prompt=system_prompt, - verbose=True, - ) - return agentrunner - # create the function calling worker for reasoning - # worker = FunctionCallingAgentWorker.from_tools( - # tools, verbose=True - # ) - # - # # wrap the worker in the top-level planner - # return StructuredPlannerAgent(worker, tools) diff --git a/backend/app/engine/18æ—¶33分47ç§’_副本_constants.py b/backend/app/engine/18æ—¶33分47ç§’_副本_constants.py deleted file mode 100644 index bd93bb8..0000000 --- a/backend/app/engine/18æ—¶33分47ç§’_副本_constants.py +++ /dev/null @@ -1 +0,0 @@ -STORAGE_DIR = "storage" # directory to cache the generated index \ No newline at end of file diff --git a/backend/app/engine/18æ—¶33分47ç§’_副本_engine.py b/backend/app/engine/18æ—¶33分47ç§’_副本_engine.py deleted file mode 100644 index 379275e..0000000 --- a/backend/app/engine/18æ—¶33分47ç§’_副本_engine.py +++ /dev/null @@ -1,108 +0,0 @@ -import os - -from llama_index.core import SummaryIndex, SQLDatabase, VectorStoreIndex -from llama_index.core.indices.struct_store import SQLTableRetrieverQueryEngine -from llama_index.core.objects import SQLTableNodeMapping, ObjectIndex, SQLTableSchema -from llama_index.core.query_engine import RetrieverQueryEngine -from llama_index.core.response_synthesizers import ResponseMode -from llama_index.readers.database import DatabaseReader -from sqlalchemy import create_engine - -from app.engine.prompt import text_qa_template, refine_template, summary_template, simple_template -from app.engine.retriever.HybridRetriever import HybridRetriever -from app.settings import get_node_postprocessors - -def makeDescriptionByEngine(sql_database:SQLDatabase): - reader = DatabaseReader(sql_database) - - table_names = sql_database.get_usable_table_names() - table_schema_objs = [] - for table_name in table_names: - columns = sql_database.get_table_columns(table_name) - if len(columns) > 150: - continue - stats_txt = "" - - if table_name == 'gongchengshuxing': - stats_txt = '该表中有以下属性:' - documents = reader.load_data(query='select name from gongchengshuxing') - for index in range(len(documents) if len(documents) < 30 else 30): - if index == 0: - continue - elif index > 1: - stats_txt += ',' - stats_txt += documents[index].text.split(':')[1] - - tbSchema = (SQLTableSchema(table_name=table_name, context_str=stats_txt)) - table_schema_objs.append(tbSchema) - - return table_schema_objs - -def get_Retriever(index,**kwargs): - strEnableHybrid = os.getenv("HYBRID_ENABLED",'False') - bEnableHybrid = True if strEnableHybrid is not None and strEnableHybrid.title() == 'True' else False - if bEnableHybrid: - alpha = float(os.getenv("HYBRID_ALPHA", "0.5")) - retriever = HybridRetriever(index,alpha = alpha,**kwargs) - else: - retriever = index.as_retriever(**kwargs) - return retriever - - -sql_database = None -sql_obj_index = None - -# Create a summary query engine -def create_summary_query_engine(top_k=3, use_reranker=False, filters=None): - global sql_obj_index - global sql_database - if sql_obj_index is None or sql_database is None: - sqlengine = create_engine(os.getenv("SQL_DATABASE_URL", "")) - sql_database = SQLDatabase(sqlengine) - table_schema_objs = makeDescriptionByEngine(sql_database) - table_node_mapping = SQLTableNodeMapping(sql_database) - - sql_obj_index = ObjectIndex.from_objects( - table_schema_objs, - table_node_mapping, - index_cls=VectorStoreIndex, - ) - - # 创建SQL查询工具 - sql_query_engine = SQLTableRetrieverQueryEngine(sql_database, - sql_obj_index.as_retriever(similarity_top_k=top_k), - verbose=True, - ) - return sql_query_engine - -# Create a summary query engine -def create_summary_query_engine(index, top_k=3, use_reranker=False, filters=None): - summary_index = SummaryIndex(index.vector_store.get_nodes(node_ids=None)) - summary_query_engine = summary_index.as_query_engine( - response_mode=ResponseMode.TREE_SUMMARIZE, - use_async=True, - streaming=True, - ) - return summary_query_engine - -# Create a query engine -def create_query_engine(index, top_k=3, use_reranker=False, filters=None): - # 创建å‘釿£€ç´¢æŸ¥è¯¢å·¥å…· - postprocess = None - if use_reranker: - postprocess = get_node_postprocessors() - - query_engine = RetrieverQueryEngine.from_args( - get_Retriever(index, - similarity_top_k=top_k, - filters=filters), - text_qa_template=text_qa_template, - refine_template=refine_template, - summary_template = summary_template, - simple_template = simple_template, - node_postprocessors=postprocess, - use_async=True, - streaming=True, - ) - - return query_engine \ No newline at end of file diff --git a/backend/app/engine/18æ—¶33分47ç§’_副本_generate.py b/backend/app/engine/18æ—¶33分47ç§’_副本_generate.py deleted file mode 100644 index 87ecfa1..0000000 --- a/backend/app/engine/18æ—¶33分47ç§’_副本_generate.py +++ /dev/null @@ -1,94 +0,0 @@ -from dotenv import load_dotenv - -load_dotenv() - -import logging -import os - -from app.engine.loaders import get_documents -from app.engine.vectordb import get_vector_store -from app.settings import init_settings -from app.engine.retriever.CHBM25Retriever import CHBM25Retriever -from llama_index.core.ingestion import IngestionPipeline -from llama_index.core.node_parser import SentenceSplitter -from llama_index.core.settings import Settings -from llama_index.core.storage import StorageContext -from llama_index.core.storage.docstore import SimpleDocumentStore - -logging.basicConfig(level=logging.INFO) -logger = logging.getLogger() - -STORAGE_DIR = os.getenv("STORAGE_DIR", "storage") - - -def get_doc_store(): - - # If the storage directory is there, load the document store from it. - # If not, set up an in-memory document store since we can't load from a directory that doesn't exist. - if os.path.exists(STORAGE_DIR): - return SimpleDocumentStore.from_persist_dir(STORAGE_DIR) - else: - return SimpleDocumentStore() - - -def run_pipeline(docstore, vector_store, documents): - pipeline = IngestionPipeline( - transformations=[ - SentenceSplitter( - chunk_size=Settings.chunk_size, - chunk_overlap=Settings.chunk_overlap, - ), - Settings.embed_model, - ], - docstore=docstore, - docstore_strategy="upserts_and_delete", - vector_store=vector_store, - ) - - # Run the ingestion pipeline and store the results - nodes = pipeline.run(show_progress=True, documents=documents) - - return nodes - - -def persist_storage(docstore, vector_store): - storage_context = StorageContext.from_defaults( - docstore=docstore, - vector_store=vector_store, - ) - storage_context.persist(STORAGE_DIR) - - -def persist_BMRetriever(vector_store): - STORAGE_DIR = os.getenv("BM_RETRIEVER_PATH", "storage_bm") - top_k = int(os.getenv("TOP_K", "3")) - bmRetriver = CHBM25Retriever.from_defaults(similarity_top_k=top_k,nodes=vector_store.get_nodes([])) - bmRetriver.persist(STORAGE_DIR) - - -def generate_datasource(): - init_settings() - logger.info("Generate index for the provided data") - - # Get the stores and documents or create new ones - documents = get_documents() - # Set private=false to mark the document as public (required for filtering) - for doc in documents: - doc.metadata["private"] = "false" - docstore = get_doc_store() - vector_store = get_vector_store() - - # Run the ingestion pipeline - _ = run_pipeline(docstore, vector_store, documents) - - # Build the index and persist storage - persist_storage(docstore, vector_store) - persist_BMRetriever(vector_store) - - logger.info("Finished generating the index") - - -if __name__ == "__main__": - from phoenix.trace import using_project - with using_project(os.getenv("PHOENIX_PROJECT_NAME") + "_generate") as obj: - generate_datasource() diff --git a/backend/app/engine/18æ—¶33分47ç§’_副本_prompt.py b/backend/app/engine/18æ—¶33分47ç§’_副本_prompt.py deleted file mode 100644 index 29a2283..0000000 --- a/backend/app/engine/18æ—¶33分47ç§’_副本_prompt.py +++ /dev/null @@ -1,93 +0,0 @@ -from llama_index.core import PromptTemplate - -text_qa_template_str = ( - "# 角色\n" - "你是一ååšå¾®é€ ä»·å·¥ç¨‹æ•°æ®æŸ¥è¯¢åŠ©æ‰‹ï¼Œä¸“ç²¾äºŽç”µåŠ›å·¥ç¨‹æ–‡ä»¶ä¸­çš„ä¿¡æ¯ã€‚" - "ä½ çš„èŒè´£æ˜¯æä¾›æœ‰å…³ç”µåЛ造价ã€é€ ä»·ç¼–åˆ¶è½¯ä»¶ã€æ–‡ä»¶ç»“æž„åŠç›¸å…³æ•°æ®çš„精准ã€å®¢è§‚的回答," - "如åŒç›´æŽ¥ä»Žæ–‡ä»¶ä¸­æå–的内容。\n" - "知识库中已ç»å¯¼å…¥ä¸€ä¸ªå·¥ç¨‹çš„全部数æ®ï¼Œè¯·ä½ ç«™åœ¨å½“å‰å·¥ç¨‹çš„角度回答用户关于工程文件的问题。\n" - "ä¾‹å¦‚ï¼šè¯¢é—®â€œæ­¤å·¥ç¨‹â€æŒ‡å½“å‰å¯¼å…¥çš„工程。询问“此工程åç§°â€æŒ‡å½“å‰å¯¼å…¥çš„工程的工程å称。\n" - - "## 技能\n" - "### 技能 1: æ•°æ®æŸ¥è¯¢ä¸Žæä¾›\n" - "- 准确回答所有关于电力工程造价的相关问题。\n" - "- æä¾›å…·ä½“æ•°æ®ï¼Œå¦‚æˆæœ¬ä¼°ç®—ã€ææ–™æ¸…å•ã€åŠ³åŠ¨åŠ›éœ€æ±‚ç­‰ã€‚\n" - "- ç¡®ä¿æä¾›çš„ä¿¡æ¯ä¸¥æ ¼åŸºäºŽå·¥ç¨‹æ–‡æ¡£ä¸­çš„记录。\n" - - "### 技能 2: 技术性解释\n" - "- 解释造价工程中的技术术语和概念。\n" - "- ä¸ºå¤æ‚的工程细节æä¾›æ¸…晰易懂的说明。\n" - - "## 约æŸ\n" - "- 仅回答与电力工程造价文件相关的具体问题。\n" - "- ä¸è¿›è¡Œä»»ä½•超出文件内容的猜测或å‡è®¾ã€‚\n" - "- 所有回答å‡åŸºäºŽæ–‡ä»¶å†…容,采用客观和技术性的语言。\n" - "- 请基于这些信æ¯å›žç­”问题。如果无法找到相关信æ¯ï¼Œè¯·ä¸è¦é¢å¤–呿•£å›žç­”,ä¸è¦å›žç­”多余的信æ¯ï¼Œåªéœ€è¦å›žç­”“我ä¸çŸ¥é“这个问题的答案â€ã€‚\n" - "以下为上下文信æ¯\n" - "---------------------\n" - "{context_str}\n" - "---------------------\n" - "请根æ®ä¸Šä¸‹æ–‡ä¿¡æ¯è€Œéžå…ˆå‰çŸ¥è¯†å›žç­”æˆ‘çš„é—®é¢˜æˆ–å›žå¤æˆ‘的指令。å‰é¢çš„上下文信æ¯å¯èƒ½æœ‰ç”¨ï¼Œä¹Ÿå¯èƒ½æ²¡ç”¨ï¼Œä½ éœ€è¦ä»Žæˆ‘给出的上下文信æ¯ä¸­é€‰å‡ºä¸Žæˆ‘的问题最相关的那些,æ¥ä¸ºä½ çš„回答æä¾›ä¾æ®ã€‚回答一定è¦å¿ äºŽåŽŸæ–‡ï¼Œç®€æ´ä½†ä¸ä¸¢ä¿¡æ¯ï¼Œä¸è¦èƒ¡ä¹±ç¼–造。如果无法找到相关信æ¯ï¼Œè¯·ä¸è¦é¢å¤–呿•£å›žç­”,ä¸è¦å›žç­”多余的信æ¯ï¼Œåªéœ€è¦å›žç­”“我ä¸çŸ¥é“这个问题的答案â€ã€‚我的问题或指令是什么语ç§ï¼Œä½ å°±ç”¨ä»€ä¹ˆè¯­ç§å›žå¤ã€‚\n" - "如果是表结构或者是数æ®åº“的相关内容,åªç”¨äºŽæŽ¨å¯¼é—®é¢˜ï¼Œä¸éœ€è¦å‘Šè¯‰ç”¨æˆ·æ•°æ®åº“或表结构等物ç†ä¿¡æ¯ã€‚\n" - - "问题:{query_str}\n" - "你的回å¤ï¼š " -) - - -text_qa_template = PromptTemplate(text_qa_template_str) - -refine_template_str = ( - "这是原本的问题: {query_str}\n" - "æˆ‘ä»¬å·²ç»æä¾›äº†å›žç­”: {existing_answer}\n" - "现在我们有机会改进这个回答 " - "使用以下更多上下文(仅当有助于改进回答时使用)\n" - "如果新的上下文对回答没有影å“,或者原æ¥çš„å›žç­”å·²ç»æ­£ç¡®ï¼Œä¸è¦åœ¨ä¸Šæ¬¡å›žç­”çš„åŽè¾¹å†åŠ ä¸Šå¤šä½™çš„è¡¥å……ä¿¡æ¯ï¼Œç›´æŽ¥è¿”回原本的回答。\n" - "如果新的上下文对回答没有影å“,或者原æ¥çš„å›žç­”å·²ç»æ­£ç¡®ï¼Œä¸è¦åœ¨ä¸Šæ¬¡å›žç­”çš„åŽè¾¹å†åŠ ä¸Šå¤šä½™çš„è¡¥å……ä¿¡æ¯ï¼Œç›´æŽ¥è¿”回原本的回答。\n" - "------------\n" - "{context_msg}\n" - "------------\n" - "如果回答中已ç»åŒ…嫿œ‰æ­£ç¡®ç­”案,ä¸è¦è¿”回多余的解释等信æ¯ï¼Œåªè¿”回正确答案\n" - "如果是表结构或者是数æ®åº“的相关内容,仅用于推导问题,ä¸éœ€è¦å‘Šè¯‰ç”¨æˆ·æ•°æ®åº“或表结构等物ç†ä¿¡æ¯ã€‚\n" - "改进的回答: " -) - -refine_template = PromptTemplate(refine_template_str) - -summary_template_str = ( - "# 角色\n" - "你是一ååšå¾®é€ ä»·å·¥ç¨‹æ•°æ®æŸ¥è¯¢åŠ©æ‰‹ï¼Œä¸“ç²¾äºŽç”µåŠ›å·¥ç¨‹æ–‡ä»¶ä¸­çš„ä¿¡æ¯ã€‚" - "ä½ çš„èŒè´£æ˜¯æä¾›æœ‰å…³ç”µåЛ造价ã€é€ ä»·ç¼–åˆ¶è½¯ä»¶ã€æ–‡ä»¶ç»“æž„åŠç›¸å…³æ•°æ®çš„精准ã€å®¢è§‚的回答," - "如åŒç›´æŽ¥ä»Žæ–‡ä»¶ä¸­æå–的内容。\n" - - "## 技能\n" - "### 技能 1: æ•°æ®æŸ¥è¯¢ä¸Žæä¾›\n" - "- 准确回答所有关于电力工程造价的相关问题。\n" - "- æä¾›å…·ä½“æ•°æ®ï¼Œå¦‚æˆæœ¬ä¼°ç®—ã€ææ–™æ¸…å•ã€åŠ³åŠ¨åŠ›éœ€æ±‚ç­‰ã€‚\n" - "- ç¡®ä¿æä¾›çš„ä¿¡æ¯ä¸¥æ ¼åŸºäºŽå·¥ç¨‹æ–‡æ¡£ä¸­çš„记录。\n" - - "### 技能 2: 技术性解释\n" - "- 解释造价工程中的技术术语和概念。\n" - "- ä¸ºå¤æ‚的工程细节æä¾›æ¸…晰易懂的说明。\n" - - "## 约æŸ\n" - "- 仅回答与电力工程造价文件相关的具体问题。\n" - "- ä¸è¿›è¡Œä»»ä½•超出文件内容的猜测或å‡è®¾ã€‚\n" - "- 所有回答å‡åŸºäºŽæ–‡ä»¶å†…容,采用客观和技术性的语言。\n" - "- 请基于这些信æ¯å›žç­”问题。如果无法找到相关信æ¯ï¼Œè¯·ä¸è¦é¢å¤–呿•£å›žç­”,ä¸è¦å›žç­”多余的信æ¯ï¼Œåªéœ€è¦å›žç­”“我ä¸çŸ¥é“这个问题的答案â€ã€‚\n" - "æ¥è‡ªå¤šä¸ªæ¥æºçš„上下文信æ¯å¦‚下。\n" - "---------------------\n" - "{context_str}\n" - "---------------------\n" - "鉴于æ¥è‡ªå¤šä¸ªæ¥æºçš„ä¿¡æ¯è€Œéžå…ˆéªŒçŸ¥è¯†ï¼Œ " - "回答查询。\n" - "如果是表结构或者是数æ®åº“的相关内容,åªç”¨äºŽæŽ¨å¯¼é—®é¢˜ï¼Œä¸éœ€è¦å‘Šè¯‰ç”¨æˆ·æ•°æ®åº“或表结构等物ç†ä¿¡æ¯ã€‚\n" - "Query: {query_str}\n" - "Answer: " -) -summary_template = PromptTemplate(summary_template_str) - -simple_template_str = ( - "{query_str}" -) -simple_template = PromptTemplate(simple_template_str) diff --git a/backend/app/engine/18æ—¶33分47ç§’_副本_vectordb.py b/backend/app/engine/18æ—¶33分47ç§’_副本_vectordb.py deleted file mode 100644 index f3f2a7d..0000000 --- a/backend/app/engine/18æ—¶33分47ç§’_副本_vectordb.py +++ /dev/null @@ -1,71 +0,0 @@ -import os -from llama_index.vector_stores.chroma import ChromaVectorStore -from llama_index.vector_stores.qdrant import QdrantVectorStore -from qdrant_client import qdrant_client - -qclient = None - -def get_qdrant_vector_store(): - collection_name = os.getenv("VECTOR_STORE_COLLECTION", "default") - vector_store_path = os.getenv("VECTOR_STORE_PATH") - host=os.getenv("VECTOR_STORE_HOST", "127.0.0.1"), - port=int(os.getenv("VECTOR_STORE_PORT", "6333")), - - if not vector_store_path or not host: - raise ValueError( - "Please provide either VECTOR_STORE_PATH or VECTOR_STORE_HOST and VECTOR_STORE_PORT" - ) - # if VECTOR_STORE_PATH is set, use a local QdrantVectorStore from the path - # otherwise, use a remote QdrantVectorStore - global qclient - if qclient == None: - if vector_store_path: - qclient = qdrant_client.QdrantClient( - path=vector_store_path, - ) - else: - qclient = qdrant_client.QdrantClient( - host=host, - port=port, - ) - - vector_store = QdrantVectorStore(client=qclient, collection_name=collection_name) - return vector_store - -def get_chroma_vector_store(): - collection_name = os.getenv("VECTOR_STORE_COLLECTION", "default") - vector_store_path = os.getenv("VECTOR_STORE_PATH") - # if VECTOR_STORE_PATH is set, use a local ChromaVectorStore from the path - # otherwise, use a remote ChromaVectorStore (ChromaDB Cloud is not supported yet) - if vector_store_path: - store = ChromaVectorStore.from_params( - persist_dir=vector_store_path, collection_name=collection_name, - collection_kwargs={"metadata":{"hnsw:space":"cosine"}}, - ) - else: - if not os.getenv("VECTOR_STORE_HOST") or not os.getenv("VECTOR_STORE_PORT"): - raise ValueError( - "Please provide either VECTOR_STORE_PATH or VECTOR_STORE_HOST and VECTOR_STORE_PORT" - ) - store = ChromaVectorStore.from_params( - host=os.getenv("VECTOR_STORE_HOST"), - port=int(os.getenv("VECTOR_STORE_PORT")), - collection_name=collection_name, - collection_kwargs={"metadata":{"hnsw:space":"cosine"}}, - ) - return store - -def get_vector_store(): - store_type=os.getenv("VECTOR_STORE_TYPE") - - store = None - - match store_type: - case "chroma": - store = get_chroma_vector_store() - case "qdrant": - store = get_qdrant_vector_store() - case _: - raise ValueError(f"Invalid vector store type: {store_type}") - - return store \ No newline at end of file diff --git a/backend/app/engine/loaders/18æ—¶33分48ç§’_副本___init__.py b/backend/app/engine/loaders/18æ—¶33分48ç§’_副本___init__.py deleted file mode 100644 index a220170..0000000 --- a/backend/app/engine/loaders/18æ—¶33分48ç§’_副本___init__.py +++ /dev/null @@ -1,40 +0,0 @@ -import logging - -import yaml -from app.engine.loaders.db import DBLoaderConfig, get_db_documents -from app.engine.loaders.file import FileLoaderConfig, get_file_documents -from app.engine.loaders.web import WebLoaderConfig, get_web_documents - -logger = logging.getLogger(__name__) - - -def load_configs(): - with open("config/loaders.yaml") as f: - configs = yaml.safe_load(f) - return configs - - -def get_documents(): - documents = [] - config = load_configs() - if config is None or len(config.items()) == 0: - return documents - - for loader_type, loader_config in config.items(): - logger.info( - f"Loading documents from loader: {loader_type}, config: {loader_config}" - ) - - loader_config = loader_config or [] - match loader_type: - case "file": - document = get_file_documents(FileLoaderConfig(**loader_config)) - case "web": - document = get_web_documents(WebLoaderConfig(**loader_config)) - case "db": - document = get_db_documents(configs=[DBLoaderConfig(**cfg) for cfg in loader_config]) - case _: - raise ValueError(f"Invalid loader type: {loader_type}") - documents.extend(document) - - return documents diff --git a/backend/app/engine/loaders/18æ—¶33分48ç§’_副本_db.py b/backend/app/engine/loaders/18æ—¶33分48ç§’_副本_db.py deleted file mode 100644 index 4be984d..0000000 --- a/backend/app/engine/loaders/18æ—¶33分48ç§’_副本_db.py +++ /dev/null @@ -1,140 +0,0 @@ -import logging -from typing import Any, List, Optional - -from llama_index.core import SQLDatabase, Document -from llama_index.readers.database import DatabaseReader -from pydantic import BaseModel -from sqlalchemy import create_engine, text -from sqlalchemy.engine import Engine - -logger = logging.getLogger(__name__) - -class CustomDatabaseReader(DatabaseReader): - """Simple Database reader. - - Concatenates each row into Document used by LlamaIndex. - - Args: - sql_database (Optional[SQLDatabase]): SQL database to use, - including table names to specify. - See :ref:`Ref-Struct-Store` for more details. - - OR - - engine (Optional[Engine]): SQLAlchemy Engine object of the database connection. - - OR - - uri (Optional[str]): uri of the database connection. - - OR - - scheme (Optional[str]): scheme of the database connection. - host (Optional[str]): host of the database connection. - port (Optional[int]): port of the database connection. - user (Optional[str]): user of the database connection. - password (Optional[str]): password of the database connection. - dbname (Optional[str]): dbname of the database connection. - - Returns: - DatabaseReader: A DatabaseReader object. - """ - - def __init__( - self, - sql_database: Optional[SQLDatabase] = None, - engine: Optional[Engine] = None, - uri: Optional[str] = None, - scheme: Optional[str] = None, - host: Optional[str] = None, - port: Optional[str] = None, - user: Optional[str] = None, - password: Optional[str] = None, - dbname: Optional[str] = None, - *args: Any, - **kwargs: Any, - ) -> None: - """Initialize with parameters.""" - if sql_database: - self.sql_database = sql_database - elif engine: - self.sql_database = SQLDatabase(engine, *args, **kwargs) - elif uri: - self.uri = uri - self.sql_database = SQLDatabase.from_uri(uri, *args, **kwargs) - elif scheme and host and port and user and password and dbname: - uri = f"{scheme}://{user}:{password}@{host}:{port}/{dbname}" - self.uri = uri - self.sql_database = SQLDatabase.from_uri(uri, *args, **kwargs) - else: - raise ValueError( - "You must provide either a SQLDatabase, " - "a SQL Alchemy Engine, a valid connection URI, or a valid " - "set of credentials." - ) - - def load_data(self, query: str, explanation: str) -> List[Document]: - """Query and load data from the Database, returning a list of Documents. - - Args: - query (str): Query parameter to filter tables and rows. - explanation (str): Explanation for the query to be included in the document. - - Returns: - List[Document]: A list of Document objects. - """ - dco_str = explanation + "\n" - - with self.sql_database.engine.connect() as connection: - if query is None: - raise ValueError("A query parameter is necessary to filter the data") - else: - result = connection.execute(text(query)) - - dco_str += ", ".join( - [f"{entry}" for entry in result.keys()] - ) + "\n" - - for item in result.fetchall(): - # Fetch each item - record_str = ", ".join( - [f"{entry}" for col, entry in zip(result.keys(), item)] - ) - dco_str += record_str + "\n" - - doc = Document(text=dco_str) - doc.metadata["name"] = query - doc.metadata["context"] = query - doc.metadata["file_type"] = "application/vnd.ms-excel" - return [doc] - -class DBLoaderConfig(BaseModel): - uri: str - queries: List[dict] - -def get_db_documents(configs: list[DBLoaderConfig]): - docs = [] - - if len(configs) == 0 or configs[0].uri == "": - logger.warning( - f"Failed to load database, error message: uri is empty. Return as empty document list." - ) - return docs - - metadata = { - 'file_type': 'application/booway.document.zj', - } - - for entry in configs: - engine = create_engine(entry.uri) - sql_database = SQLDatabase(engine) - - loader = CustomDatabaseReader(sql_database) - for query_dict in entry.queries: - query = query_dict.get("sql", "") - explanation = query_dict.get("explanation", "") - logger.info(f"Loading data from database with query: {query}") - documents = loader.load_data(query=query, explanation=explanation) - - docs.extend(documents) - return docs diff --git a/backend/app/engine/loaders/18æ—¶33分48ç§’_副本_file.py b/backend/app/engine/loaders/18æ—¶33分48ç§’_副本_file.py deleted file mode 100644 index dc199db..0000000 --- a/backend/app/engine/loaders/18æ—¶33分48ç§’_副本_file.py +++ /dev/null @@ -1,88 +0,0 @@ -import os -import logging -from typing import Dict - -from llama_index.core.readers.base import BaseReader -from llama_index.core.readers.json import JSONReader -from llama_parse import LlamaParse -from pydantic import BaseModel, validator - -logger = logging.getLogger(__name__) - - -class FileLoaderConfig(BaseModel): - data_dir: str = "data" - use_llama_parse: bool = False - - @validator("data_dir") - def data_dir_must_exist(cls, v): - if not os.path.isdir(v): - raise ValueError(f"Directory '{v}' does not exist") - return v - - -def llama_parse_parser(): - if os.getenv("LLAMA_CLOUD_API_KEY") is None: - raise ValueError( - "LLAMA_CLOUD_API_KEY environment variable is not set. " - "Please set it in .env file or in your shell environment then run again!" - ) - parser = LlamaParse( - result_type="markdown", - verbose=True, - language="en", - ignore_errors=False, - ) - return parser - - -def llama_parse_extractor() -> Dict[str, LlamaParse]: - from llama_parse.utils import SUPPORTED_FILE_TYPES - - parser = llama_parse_parser() - return {file_type: parser for file_type in SUPPORTED_FILE_TYPES} - -def llama_local_extractor() -> Dict[str, BaseReader]: - return {".json" : JSONReader(clean_json=False,levels_back=0)} - - -def get_file_documents(config: FileLoaderConfig): - from llama_index.core.readers import SimpleDirectoryReader - - try: - file_extractor = None - if config.use_llama_parse: - # LlamaParse is async first, - # so we need to use nest_asyncio to run it in sync mode - import nest_asyncio - - nest_asyncio.apply() - - file_extractor = llama_parse_extractor() - else: - file_extractor = llama_local_extractor() - - reader = SimpleDirectoryReader( - config.data_dir, - recursive=True, - filename_as_id=True, - raise_on_error=True, - file_extractor=file_extractor, - ) - return reader.load_data() - except Exception as e: - import sys - import traceback - - # Catch the error if the data dir is empty - # and return as empty document list - _, _, exc_traceback = sys.exc_info() - function_name = traceback.extract_tb(exc_traceback)[-1].name - if function_name == "_add_files": - logger.warning( - f"Failed to load file documents, error message: {e} . Return as empty document list." - ) - return [] - else: - # Raise the error if it is not the case of empty data dir - raise e diff --git a/backend/app/engine/loaders/18æ—¶33分48ç§’_副本_web.py b/backend/app/engine/loaders/18æ—¶33分48ç§’_副本_web.py deleted file mode 100644 index e667a69..0000000 --- a/backend/app/engine/loaders/18æ—¶33分48ç§’_副本_web.py +++ /dev/null @@ -1,37 +0,0 @@ -import os -import json -from pydantic import BaseModel, Field - - -class CrawlUrl(BaseModel): - base_url: str - prefix: str - max_depth: int = Field(default=1, ge=0) - - -class WebLoaderConfig(BaseModel): - driver_arguments: list[str] = Field(default=None) - urls: list[CrawlUrl] = [] - - -def get_web_documents(config: WebLoaderConfig): - from llama_index.readers.web import WholeSiteReader - from selenium import webdriver - from selenium.webdriver.chrome.options import Options - - options = Options() - driver_arguments = config.driver_arguments or [] - for arg in driver_arguments: - options.add_argument(arg) - - docs = [] - urls = config.urls or [] - for url in config.urls: - scraper = WholeSiteReader( - prefix=url.prefix, - max_depth=url.max_depth, - driver=webdriver.Chrome(options=options), - ) - docs.extend(scraper.load_data(url.base_url)) - - return docs diff --git a/backend/app/engine/retriever/18æ—¶33分48ç§’_副本_CHBM25Retriever.py b/backend/app/engine/retriever/18æ—¶33分48ç§’_副本_CHBM25Retriever.py deleted file mode 100644 index fa5d5ec..0000000 --- a/backend/app/engine/retriever/18æ—¶33分48ç§’_副本_CHBM25Retriever.py +++ /dev/null @@ -1,133 +0,0 @@ -import json -import logging -import os - -from typing import Any, Callable, Dict, List, Optional, cast - -from llama_index.core.base.base_retriever import BaseRetriever -from llama_index.core.callbacks.base import CallbackManager -from llama_index.core.constants import DEFAULT_SIMILARITY_TOP_K -from llama_index.core.indices.vector_store.base import VectorStoreIndex -from llama_index.core.schema import BaseNode, IndexNode, NodeWithScore, QueryBundle -from llama_index.core.storage.docstore.types import BaseDocumentStore -from llama_index.core.vector_stores.utils import ( - node_to_metadata_dict, - metadata_dict_to_node, -) - -import bm25s -from app.engine.retriever.CHTokener import chTokenize - -CHDEFAULT_PERSIST_ARGS = {"similarity_top_k": "similarity_top_k", "_verbose": "verbose"} - -CHDEFAULT_PERSIST_FILENAME = "retriever.json" - -class CHBM25Retriever(BaseRetriever): - def __init__( - self, - nodes: Optional[List[BaseNode]] = None, - existing_bm25: Optional[bm25s.BM25] = None, - similarity_top_k: int = DEFAULT_SIMILARITY_TOP_K, - callback_manager: Optional[CallbackManager] = None, - objects: Optional[List[IndexNode]] = None, - object_map: Optional[dict] = None, - verbose: bool = False, - ) -> None: - self.similarity_top_k = similarity_top_k - if existing_bm25 is not None: - self.bm25 = existing_bm25 - self.corpus = existing_bm25.corpus - else: - from nltk.corpus import stopwords - if nodes is None: - raise ValueError("Please pass nodes or an existing BM25 object.") - - self.corpus = [node_to_metadata_dict(node) for node in nodes] - - corpus_tokens = chTokenize( - [node.get_content() for node in nodes], - show_progress=verbose, - ) - self.bm25 = bm25s.BM25() - self.bm25.index(corpus_tokens, show_progress=verbose) - super().__init__( - callback_manager=callback_manager, - object_map=object_map, - objects=objects, - verbose=verbose, - ) - - @classmethod - def from_defaults( - cls, - index: Optional[VectorStoreIndex] = None, - nodes: Optional[List[BaseNode]] = None, - docstore: Optional[BaseDocumentStore] = None, - similarity_top_k: int = DEFAULT_SIMILARITY_TOP_K, - verbose: bool = False, - ) -> "CHBM25Retriever": - if sum(bool(val) for val in [index, nodes, docstore]) != 1: - raise ValueError("Please pass exactly one of index, nodes, or docstore.") - - if index is not None: - docstore = index.docstore - - if docstore is not None: - nodes = cast(List[BaseNode], list(docstore.docs.values())) - - assert ( - nodes is not None - ), "Please pass exactly one of index, nodes, or docstore." - - return cls( - nodes=nodes, - similarity_top_k=similarity_top_k, - verbose=verbose, - ) - - def get_persist_args(self) -> Dict[str, Any]: - """Get Persist Args Dict to Save.""" - return { - CHDEFAULT_PERSIST_ARGS[key]: getattr(self, key) - for key in CHDEFAULT_PERSIST_ARGS - if hasattr(self, key) - } - - def persist(self, path: str, **kwargs: Any) -> None: - """Persist the retriever to a directory.""" - self.bm25.save(path, corpus=self.corpus, **kwargs) - with open(os.path.join(path, CHDEFAULT_PERSIST_FILENAME), "w") as f: - json.dump(self.get_persist_args(), f, indent=2) - - @classmethod - def from_persist_dir(cls, path: str, **kwargs: Any) -> "CHBM25Retriever": - """Load the retriever from a directory.""" - bm25 = bm25s.BM25.load(path, load_corpus=True, **kwargs) - with open(os.path.join(path, CHDEFAULT_PERSIST_FILENAME)) as f: - retriever_data = json.load(f) - return cls(existing_bm25=bm25, **retriever_data) - - def _retrieve(self, query_bundle: QueryBundle) -> List[NodeWithScore]: - query = query_bundle.query_str - tokenized_query = chTokenize( - query,show_progress=self._verbose - ) - indexes, scores = self.bm25.retrieve( - tokenized_query, k=self.similarity_top_k, show_progress=self._verbose - ) - - # batched, but only one query - indexes = indexes[0] - scores = scores[0] - - nodes: List[NodeWithScore] = [] - for idx, score in zip(indexes, scores): - # idx can be an int or a dict of the node - if isinstance(idx, dict): - node = metadata_dict_to_node(idx) - else: - node_dict = self.corpus[int(idx)] - node = metadata_dict_to_node(node_dict) - nodes.append(NodeWithScore(node=node, score=float(score))) - - return nodes \ No newline at end of file diff --git a/backend/app/engine/retriever/18æ—¶33分48ç§’_副本_CHTokener.py b/backend/app/engine/retriever/18æ—¶33分48ç§’_副本_CHTokener.py deleted file mode 100644 index 9c5a071..0000000 --- a/backend/app/engine/retriever/18æ—¶33分48ç§’_副本_CHTokener.py +++ /dev/null @@ -1,46 +0,0 @@ -from typing import Any, Dict, List, Union, Callable, NamedTuple -from bm25s.tokenization import * - -try: - from tqdm.auto import tqdm -except ImportError: - - def tqdm(iterable, *args, **kwargs): - return iterable - - -def chinese_tokenizer(text: str) -> List[str]: - import jieba - from nltk.corpus import stopwords - tokens = jieba.lcut(text) - return [token for token in tokens if token not in stopwords.words('chinese')] - -def chTokenize( - texts, - show_progress: bool = True, - leave: bool = False, -) -> Union[List[List[str]], Tokenized]: - if isinstance(texts, str): - texts = [texts] - - corpus_ids = [] - token_to_index = {} - - for text in tqdm( - texts, desc="Split strings", leave=leave, disable=not show_progress - ): - - splitted = chinese_tokenizer(text) - doc_ids = [] - - for token in splitted: - if token not in token_to_index: - token_to_index[token] = len(token_to_index) - - token_id = token_to_index[token] - doc_ids.append(token_id) - - corpus_ids.append(doc_ids) - - return Tokenized(ids=corpus_ids, vocab=token_to_index) - diff --git a/backend/app/engine/retriever/18æ—¶33分49ç§’_副本_HybridRetriever.py b/backend/app/engine/retriever/18æ—¶33分49ç§’_副本_HybridRetriever.py deleted file mode 100644 index 4bf0b8d..0000000 --- a/backend/app/engine/retriever/18æ—¶33分49ç§’_副本_HybridRetriever.py +++ /dev/null @@ -1,67 +0,0 @@ -import os -from typing import Optional, Any, Dict, List - -from llama_index.core.base.base_retriever import BaseRetriever -from llama_index.core.schema import NodeWithScore, QueryBundle - -from app.engine.retriever.CHBM25Retriever import CHBM25Retriever - - -class HybridRetriever(BaseRetriever): - def __init__( - self, - vector_index, - similarity_top_k: int = 2, - out_top_k: Optional[int] = None, - alpha: float = 0.5, - filters = None, - **kwargs: Any, - ) -> None: - super().__init__(**kwargs) - self._vector_index = vector_index - self._embed_model = vector_index._embed_model - self._out_top_k = out_top_k or similarity_top_k - self._vecRetriever = vector_index.as_retriever( - similarity_top_k=similarity_top_k,filters = filters - ) - - STORAGE_DIR = os.getenv("BM_RETRIEVER_PATH", "storage_bm") - if os.path.exists(STORAGE_DIR) and len(os.listdir(STORAGE_DIR)) > 0: - self._bm25Retriever = CHBM25Retriever.from_persist_dir(STORAGE_DIR) - else: - bmRetriver = CHBM25Retriever.from_defaults(similarity_top_k=similarity_top_k,nodes=self._vector_index.vector_store.get_nodes(None)) - bmRetriver.persist(STORAGE_DIR) - self._alpha = alpha - - - - def _retrieve(self, query_bundle: QueryBundle) -> List[NodeWithScore]: - vecNodes:List[NodeWithScore] = self._vecRetriever.retrieve(query_bundle.query_str) - bmNodes:List[NodeWithScore] = self._bm25Retriever.retrieve(query_bundle.query_str) - - bmDic:Dict[str,NodeWithScore] = {} - for node in bmNodes: - bmDic[node.node_id] = node - - result_tups = [] - for i in range(len(vecNodes)): - node = vecNodes[i] - bmScore = 0.0 - if node.node_id in bmDic: - bmScore = bmDic[node.node_id].score - bmDic.pop(node.node_id) - else: - bmScore = 0.0 - full_similarity = (self._alpha * node.score) + ( - (1 - self._alpha) * bmScore - ) - result_tups.append((full_similarity, node)) - - for _,node in bmDic.items(): - full_similarity = (1 - self._alpha) * node.score - result_tups.append((full_similarity, node)) - - result_tups = sorted(result_tups, key=lambda x: x[0], reverse=True) - for full_score, node in result_tups: - node.score = full_score - return [n for _, n in result_tups][:self._out_top_k] \ No newline at end of file diff --git a/backend/app/engine/tools/18æ—¶33分47ç§’_副本_duckduckgo.py b/backend/app/engine/tools/18æ—¶33分47ç§’_副本_duckduckgo.py deleted file mode 100644 index b63612a..0000000 --- a/backend/app/engine/tools/18æ—¶33分47ç§’_副本_duckduckgo.py +++ /dev/null @@ -1,36 +0,0 @@ -from llama_index.core.tools.function_tool import FunctionTool - - -def duckduckgo_search( - query: str, - region: str = "wt-wt", - max_results: int = 10, -): - """ - Use this function to search for any query in DuckDuckGo. - Args: - query (str): The query to search in DuckDuckGo. - region Optional(str): The region to be used for the search in [country-language] convention, ex us-en, uk-en, ru-ru, etc... - max_results Optional(int): The maximum number of results to be returned. Default is 10. - """ - try: - from duckduckgo_search import DDGS - except ImportError: - raise ImportError( - "duckduckgo_search package is required to use this function." - "Please install it by running: `poetry add duckduckgo_search` or `pip install duckduckgo_search`" - ) - - params = { - "keywords": query, - "region": region, - "max_results": max_results, - } - results = [] - with DDGS() as ddg: - results = list(ddg.text(**params)) - return results - - -def get_tools(**kwargs): - return [FunctionTool.from_defaults(duckduckgo_search)] diff --git a/backend/app/engine/tools/18æ—¶33分48ç§’_副本___init__.py b/backend/app/engine/tools/18æ—¶33分48ç§’_副本___init__.py deleted file mode 100644 index 1aced70..0000000 --- a/backend/app/engine/tools/18æ—¶33分48ç§’_副本___init__.py +++ /dev/null @@ -1,60 +0,0 @@ -import os -import yaml -import json -import importlib -from cachetools import cached, LRUCache -from llama_index.core.tools.tool_spec.base import BaseToolSpec -from llama_index.core.tools.function_tool import FunctionTool - - -class ToolType: - LLAMAHUB = "llamahub" - LOCAL = "local" - - -class ToolFactory: - - TOOL_SOURCE_PACKAGE_MAP = { - ToolType.LLAMAHUB: "llama_index.tools", - ToolType.LOCAL: "app.engine.tools", - } - - def load_tools(tool_type: str, tool_name: str, config: dict) -> list[FunctionTool]: - source_package = ToolFactory.TOOL_SOURCE_PACKAGE_MAP[tool_type] - try: - if "ToolSpec" in tool_name: - tool_package, tool_cls_name = tool_name.split(".") - module_name = f"{source_package}.{tool_package}" - module = importlib.import_module(module_name) - tool_class = getattr(module, tool_cls_name) - tool_spec: BaseToolSpec = tool_class(**config) - return tool_spec.to_tool_list() - else: - module = importlib.import_module(f"{source_package}.{tool_name}") - tools = module.get_tools(**config) - if not all(isinstance(tool, FunctionTool) for tool in tools): - raise ValueError( - f"The module {module} does not contain valid tools" - ) - return tools - except ImportError as e: - raise ValueError(f"Failed to import tool {tool_name}: {e}") - except AttributeError as e: - raise ValueError(f"Failed to load tool {tool_name}: {e}") - - @staticmethod - def from_env() -> list[FunctionTool]: - tools = [] - if os.path.exists("config/tools.yaml"): - with open("config/tools.yaml", "r") as f: - tool_configs = yaml.safe_load(f) - if tool_configs != None and len(tool_configs.items()) != 0: - for tool_type, config_entries in tool_configs.items(): - if config_entries == None or len(config_entries.items()) == 0: - continue - - for tool_name, config in config_entries.items(): - tools.extend( - ToolFactory.load_tools(tool_type, tool_name, config) - ) - return tools diff --git a/backend/app/engine/tools/18æ—¶33分48ç§’_副本_img_gen.py b/backend/app/engine/tools/18æ—¶33分48ç§’_副本_img_gen.py deleted file mode 100644 index 966e95d..0000000 --- a/backend/app/engine/tools/18æ—¶33分48ç§’_副本_img_gen.py +++ /dev/null @@ -1,108 +0,0 @@ -import os -import uuid -import logging -import requests -from typing import Optional -from pydantic import BaseModel, Field -from llama_index.core.tools import FunctionTool - -logger = logging.getLogger(__name__) - - -class ImageGeneratorToolOutput(BaseModel): - is_success: bool = Field( - ..., - description="Whether the image generation was successful.", - ) - image_url: Optional[str] = Field( - None, - description="The URL of the generated image.", - ) - error_message: Optional[str] = Field( - None, - description="The error message if the image generation failed.", - ) - - -class ImageGeneratorTool: - _IMG_OUTPUT_FORMAT = "webp" - _IMG_OUTPUT_DIR = "output/tool" - _IMG_GEN_API = "https://api.stability.ai/v2beta/stable-image/generate/core" - - def __init__(self, api_key: str = None): - if not api_key: - api_key = os.getenv("STABILITY_API_KEY") - self._api_key = api_key - self.fileserver_url_prefix = os.getenv("FILESERVER_URL_PREFIX") - if self._api_key is None: - raise ValueError( - "STABILITY_API_KEY key is required to run image generator. Get it here: https://platform.stability.ai/account/keys" - ) - if self.fileserver_url_prefix is None: - raise ValueError("FILESERVER_URL_PREFIX is required.") - - def _prepare_output_dir(self): - """ - Create the output directory if it doesn't exist - """ - if not os.path.exists(self._IMG_OUTPUT_DIR): - os.makedirs(self._IMG_OUTPUT_DIR, exist_ok=True) - - def _save_image(self, image_data: bytes): - self._prepare_output_dir() - filename = f"{uuid.uuid4()}.{self._IMG_OUTPUT_FORMAT}" - output_path = os.path.join(self._IMG_OUTPUT_DIR, filename) - with open(output_path, "wb") as f: - f.write(image_data) - url = f"{os.getenv('FILESERVER_URL_PREFIX')}/{self._IMG_OUTPUT_DIR}/{filename}" - logger.info(f"Saved image to {output_path}.\nURL: {url}") - return url - - def _call_stability_api(self, prompt: str): - headers = { - "authorization": f"Bearer {self._api_key}", - "accept": "image/*", - } - data = { - "prompt": prompt, - "output_format": self._IMG_OUTPUT_FORMAT, - } - - response = requests.post( - self._IMG_GEN_API, - headers=headers, - files={"none": ""}, - data=data, - ) - response.raise_for_status() - - return response - - def generate_image(self, prompt: str) -> ImageGeneratorToolOutput: - """ - Use this tool to generate an image based on the prompt. - Args: - prompt (str): The prompt to generate the image from. - """ - - try: - # Call the Stability API - response = self._call_stability_api(prompt) - - # Save the image and get the URL - image_url = self._save_image(response.content) - - return ImageGeneratorToolOutput( - is_success=True, - image_url=image_url, - ) - except Exception as e: - logger.exception(e, exc_info=True) - return ImageGeneratorToolOutput( - is_success=False, - error_message=str(e), - ) - - -def get_tools(**kwargs): - return [FunctionTool.from_defaults(ImageGeneratorTool(**kwargs).generate_image)] diff --git a/backend/app/engine/tools/18æ—¶33分48ç§’_副本_interpreter.py b/backend/app/engine/tools/18æ—¶33分48ç§’_副本_interpreter.py deleted file mode 100644 index 1d2c02c..0000000 --- a/backend/app/engine/tools/18æ—¶33分48ç§’_副本_interpreter.py +++ /dev/null @@ -1,143 +0,0 @@ -import os -import logging -import base64 -import uuid -from pydantic import BaseModel -from typing import List, Tuple, Dict, Optional -from llama_index.core.tools import FunctionTool -from e2b_code_interpreter import CodeInterpreter -from e2b_code_interpreter.models import Logs - - -logger = logging.getLogger(__name__) - - -class InterpreterExtraResult(BaseModel): - type: str - content: Optional[str] = None - filename: Optional[str] = None - url: Optional[str] = None - - -class E2BToolOutput(BaseModel): - is_error: bool - logs: Logs - results: List[InterpreterExtraResult] = [] - - -class E2BCodeInterpreter: - - output_dir = "output/tool" - - def __init__(self, api_key: str = None): - if api_key is None: - api_key = os.getenv("E2B_API_KEY") - filesever_url_prefix = os.getenv("FILESERVER_URL_PREFIX") - if not api_key: - raise ValueError( - "E2B_API_KEY key is required to run code interpreter. Get it here: https://e2b.dev/docs/getting-started/api-key" - ) - if not filesever_url_prefix: - raise ValueError( - "FILESERVER_URL_PREFIX is required to display file output from sandbox" - ) - - self.filesever_url_prefix = filesever_url_prefix - self.interpreter = CodeInterpreter(api_key=api_key) - - def __del__(self): - self.interpreter.close() - - def get_output_path(self, filename: str) -> str: - # if output directory doesn't exist, create it - if not os.path.exists(self.output_dir): - os.makedirs(self.output_dir, exist_ok=True) - return os.path.join(self.output_dir, filename) - - def save_to_disk(self, base64_data: str, ext: str) -> Dict: - filename = f"{uuid.uuid4()}.{ext}" # generate a unique filename - buffer = base64.b64decode(base64_data) - output_path = self.get_output_path(filename) - - try: - with open(output_path, "wb") as file: - file.write(buffer) - except IOError as e: - logger.error(f"Failed to write to file {output_path}: {str(e)}") - raise e - - logger.info(f"Saved file to {output_path}") - - return { - "outputPath": output_path, - "filename": filename, - } - - def get_file_url(self, filename: str) -> str: - return f"{self.filesever_url_prefix}/{self.output_dir}/{filename}" - - def parse_result(self, result) -> List[InterpreterExtraResult]: - """ - The result could include multiple formats (e.g. png, svg, etc.) but encoded in base64 - We save each result to disk and return saved file metadata (extension, filename, url) - """ - if not result: - return [] - - output = [] - - try: - formats = result.formats() - results = [result[format] for format in formats] - - for ext, data in zip(formats, results): - match ext: - case "png" | "svg" | "jpeg" | "pdf": - result = self.save_to_disk(data, ext) - filename = result["filename"] - output.append( - InterpreterExtraResult( - type=ext, - filename=filename, - url=self.get_file_url(filename), - ) - ) - case _: - output.append( - InterpreterExtraResult( - type=ext, - content=data, - ) - ) - except Exception as error: - logger.exception(error, exc_info=True) - logger.error("Error when parsing output from E2b interpreter tool", error) - - return output - - def interpret(self, code: str) -> E2BToolOutput: - """ - Execute python code in a Jupyter notebook cell, the toll will return result, stdout, stderr, display_data, and error. - - Parameters: - code (str): The python code to be executed in a single cell. - """ - logger.info( - f"\n{'='*50}\n> Running following AI-generated code:\n{code}\n{'='*50}" - ) - exec = self.interpreter.notebook.exec_cell(code) - - if exec.error: - logger.error("Error when executing code", exec.error) - output = E2BToolOutput(is_error=True, logs=exec.logs, results=[]) - else: - if len(exec.results) == 0: - output = E2BToolOutput(is_error=False, logs=exec.logs, results=[]) - else: - results = self.parse_result(exec.results[0]) - output = E2BToolOutput(is_error=False, logs=exec.logs, results=results) - return output - - -def get_tools(**kwargs): - return [FunctionTool.from_defaults(E2BCodeInterpreter(**kwargs).interpret)] diff --git a/backend/app/engine/tools/18æ—¶33分48ç§’_副本_openapi_action.py b/backend/app/engine/tools/18æ—¶33分48ç§’_副本_openapi_action.py deleted file mode 100644 index c19187d..0000000 --- a/backend/app/engine/tools/18æ—¶33分48ç§’_副本_openapi_action.py +++ /dev/null @@ -1,78 +0,0 @@ -from typing import Dict, List, Tuple -from llama_index.tools.openapi import OpenAPIToolSpec -from llama_index.tools.requests import RequestsToolSpec - - -class OpenAPIActionToolSpec(OpenAPIToolSpec, RequestsToolSpec): - """ - A combination of OpenAPI and Requests tool specs that can parse OpenAPI specs and make requests. - - openapi_uri: str: The file path or URL to the OpenAPI spec. - domain_headers: dict: Whitelist domains and the headers to use. - """ - - spec_functions = OpenAPIToolSpec.spec_functions + RequestsToolSpec.spec_functions - # Cached parsed specs by URI - _specs: Dict[str, Tuple[Dict, List[str]]] = {} - - def __init__(self, openapi_uri: str, domain_headers: dict = None, **kwargs): - if domain_headers is None: - domain_headers = {} - if openapi_uri not in self._specs: - openapi_spec, servers = self._load_openapi_spec(openapi_uri) - self._specs[openapi_uri] = (openapi_spec, servers) - else: - openapi_spec, servers = self._specs[openapi_uri] - - # Add the servers to the domain headers if they are not already present - for server in servers: - if server not in domain_headers: - domain_headers[server] = {} - - OpenAPIToolSpec.__init__(self, spec=openapi_spec) - RequestsToolSpec.__init__(self, domain_headers) - - @staticmethod - def _load_openapi_spec(uri: str) -> Tuple[Dict, List[str]]: - """ - Load an OpenAPI spec from a URI. - - Args: - uri (str): A file path or URL to the OpenAPI spec. - - Returns: - List[Document]: A list of Document objects. - """ - import yaml - from urllib.parse import urlparse - - if uri.startswith("http"): - import requests - - response = requests.get(uri) - if response.status_code != 200: - raise ValueError( - "Could not initialize OpenAPIActionToolSpec: " - f"Failed to load OpenAPI spec from {uri}, status code: {response.status_code}" - ) - spec = yaml.safe_load(response.text) - elif uri.startswith("file"): - filepath = urlparse(uri).path - with open(filepath, "r") as file: - spec = yaml.safe_load(file) - else: - raise ValueError( - "Could not initialize OpenAPIActionToolSpec: Invalid OpenAPI URI provided. " - "Only HTTP and file path are supported." - ) - # Add the servers to the whitelist - try: - servers = [ - urlparse(server["url"]).netloc for server in spec.get("servers", []) - ] - except KeyError as e: - raise ValueError( - "Could not initialize OpenAPIActionToolSpec: Invalid OpenAPI spec provided. " - "Could not get `servers` from the spec." - ) from e - return spec, servers diff --git a/backend/app/engine/tools/18æ—¶33分48ç§’_副本_weather.py b/backend/app/engine/tools/18æ—¶33分48ç§’_副本_weather.py deleted file mode 100644 index c8b6f1b..0000000 --- a/backend/app/engine/tools/18æ—¶33分48ç§’_副本_weather.py +++ /dev/null @@ -1,73 +0,0 @@ -"""Open Meteo weather map tool spec.""" - -import logging -import requests -import pytz -from llama_index.core.tools import FunctionTool - -logger = logging.getLogger(__name__) - - -class OpenMeteoWeather: - geo_api = "https://geocoding-api.open-meteo.com/v1" - weather_api = "https://api.open-meteo.com/v1" - - @classmethod - def _get_geo_location(cls, location: str) -> dict: - """Get geo location from location name.""" - params = {"name": location, "count": 10, "language": "en", "format": "json"} - response = requests.get(f"{cls.geo_api}/search", params=params) - if response.status_code != 200: - raise Exception(f"Failed to fetch geo location: {response.status_code}") - else: - data = response.json() - result = data["results"][0] - geo_location = { - "id": result["id"], - "name": result["name"], - "latitude": result["latitude"], - "longitude": result["longitude"], - } - return geo_location - - @classmethod - def get_weather_information(cls, location: str) -> dict: - """Use this function to get the weather of any given location. - Note that the weather code should follow WMO Weather interpretation codes (WW): - 0: Clear sky - 1, 2, 3: Mainly clear, partly cloudy, and overcast - 45, 48: Fog and depositing rime fog - 51, 53, 55: Drizzle: Light, moderate, and dense intensity - 56, 57: Freezing Drizzle: Light and dense intensity - 61, 63, 65: Rain: Slight, moderate and heavy intensity - 66, 67: Freezing Rain: Light and heavy intensity - 71, 73, 75: Snow fall: Slight, moderate, and heavy intensity - 77: Snow grains - 80, 81, 82: Rain showers: Slight, moderate, and violent - 85, 86: Snow showers slight and heavy - 95: Thunderstorm: Slight or moderate - 96, 99: Thunderstorm with slight and heavy hail - """ - logger.info( - f"Calling open-meteo api to get weather information of location: {location}" - ) - geo_location = cls._get_geo_location(location) - timezone = pytz.timezone("UTC").zone - params = { - "latitude": geo_location["latitude"], - "longitude": geo_location["longitude"], - "current": "temperature_2m,weather_code", - "hourly": "temperature_2m,weather_code", - "daily": "weather_code", - "timezone": timezone, - } - response = requests.get(f"{cls.weather_api}/forecast", params=params) - if response.status_code != 200: - raise Exception( - f"Failed to fetch weather information: {response.status_code}" - ) - return response.json() - - -def get_tools(**kwargs): - return [FunctionTool.from_defaults(OpenMeteoWeather.get_weather_information)] diff --git a/backend/app/xinference/18æ—¶33分46ç§’_副本___init__.py b/backend/app/xinference/18æ—¶33分46ç§’_副本___init__.py deleted file mode 100644 index e69de29..0000000 diff --git a/backend/app/xinference/18æ—¶33分46ç§’_副本_base.py b/backend/app/xinference/18æ—¶33分46ç§’_副本_base.py deleted file mode 100644 index f256ec8..0000000 --- a/backend/app/xinference/18æ—¶33分46ç§’_副本_base.py +++ /dev/null @@ -1,272 +0,0 @@ -"""Xinference embeddings file.""" - -import logging -from enum import Enum -from http import HTTPStatus -from typing import Any, Dict, List, Optional, Union, Tuple - -from llama_index.core.base.embeddings.base import BaseEmbedding, Embedding, dispatcher -from llama_index.core.bridge.pydantic import PrivateAttr -from llama_index.core.callbacks import CBEventType, EventPayload -from llama_index.core.embeddings.multi_modal_base import MultiModalEmbedding -from llama_index.core.instrumentation.events.rerank import ReRankStartEvent, ReRankEndEvent -from llama_index.core.postprocessor.types import BaseNodePostprocessor -from llama_index.core.schema import ImageType, NodeWithScore, QueryBundle -from pydantic import Field - -logger = logging.getLogger(__name__) - - -EMBED_MAX_INPUT_LENGTH = 2048 -EMBED_MAX_BATCH_SIZE = 1 - - -class XinferenceEmbedding(BaseEmbedding): - """Xinference class for text embedding. - - """ - model_description: Dict[str, Any] = Field( - description="The model description from Xinference." - ) - _generator: Any = PrivateAttr() - _model_uid: str = Field(description="The Xinference model to use.") - _endpoint: str = Field(description="The Xinference endpoint URL to use.") - - def __init__( - self, - model_uid: str, - endpoint: str, - embed_batch_size: int = EMBED_MAX_BATCH_SIZE, - dimensions: Optional[int] = None, - additional_kwargs: Optional[Dict[str, Any]] = None, - api_key: Optional[str] = None, - api_base: Optional[str] = None, - api_version: Optional[str] = None, - max_retries: int = 10, - # timeout: float = 60.0, - # reuse_client: bool = True, - # callback_manager: Optional[CallbackManager] = None, - # default_headers: Optional[Dict[str, str]] = None, - # http_client: Optional[httpx.Client] = None, - # async_http_client: Optional[httpx.AsyncClient] = None, - # num_workers: Optional[int] = None, - **kwargs: Any, - ) -> None: - generator, model_description, embed_batch_size, dimensions = self.load_model( - model_uid, endpoint - ) - self._generator = generator - #self._model_uid = model_uid - #self._endpoint = endpoint - super().__init__( - embed_batch_size=embed_batch_size, - dimensions=dimensions, - #callback_manager=callback_manager, - model_name=model_uid, - additional_kwargs=additional_kwargs, - api_key=api_key, - api_base=api_base, - api_version=api_version, - max_retries=max_retries, - # reuse_client=reuse_client, - # timeout=timeout, - # default_headers=default_headers, - # num_workers=num_workers, - **kwargs, - ) - - def load_model(self, model_uid: str, endpoint: str) -> Tuple[Any, int, dict]: - try: - from xinference.client import RESTfulClient - except ImportError: - raise ImportError( - "Could not import Xinference library." - 'Please install Xinference with `pip install "xinference[all]"`' - ) - - client = RESTfulClient(endpoint) - - try: - assert isinstance(client, RESTfulClient) - except AssertionError: - raise RuntimeError( - "Could not create RESTfulClient instance." - "Please make sure Xinference endpoint is running at the correct port." - ) - - generator = client.get_model(model_uid) - model_description = client.list_models()[model_uid] - - try: - assert generator is not None - assert model_description is not None - except AssertionError: - raise RuntimeError( - "Could not get model from endpoint." - "Please make sure Xinference endpoint is running at the correct port." - ) - - model = model_description["model_name"] - replica = model_description['replica'] - dimensions = model_description['dimensions'] - max_tokens = model_description['max_tokens'] - - return generator, model_description, replica, dimensions - - @classmethod - def class_name(cls) -> str: - return "XinferenceEmbedding" - - def _get_text_embedding(self, text: str) -> Embedding: - """ - Embed the input text synchronously. - - Subclasses should implement this method. Reference get_text_embedding's - docstring for more information. - """ - assert self._generator is not None - - response = self._generator.create_embedding(input=text) - return response['data'][0]['embedding'] - - def _get_query_embedding(self, query: str) -> Embedding: - """ - Embed the input query synchronously. - - Subclasses should implement this method. Reference get_query_embedding's - docstring for more information. - """ - return self._get_text_embedding(query) - - async def _aget_query_embedding(self, query: str) -> Embedding: - """ - Embed the input query asynchronously. - - Subclasses should implement this method. Reference get_query_embedding's - docstring for more information. - """ - return self._get_query_embedding(query) - -class XinferenceRerank(BaseNodePostprocessor): - """Xinference class for rerank. - - """ - model_description: Dict[str, Any] = Field( - description="The model description from Xinference." - ) - _generator: Any = PrivateAttr() - _model_uid: str = Field(description="The Xinference model to use.") - _endpoint: str = Field(description="The Xinference endpoint URL to use.") - model: str = Field(description="Dashscope rerank model name.") - top_n: int = Field(description="Top N nodes to return.") - threshold: float = Field(description="threshold nodes to return.") - - def __init__( - self, - model_uid: str, - endpoint: str, - top_n: int = None, - threshold: float = None, - return_documents: bool = False - ): - _model_uid = model_uid - _endpoint = endpoint - _op_n = top_n - threshold = threshold - generator, model_description = self.load_model( - model_uid, endpoint - ) - self._generator = generator - super().__init__(top_n=top_n, model=model_uid, model_uid=model_uid, threshold = threshold, return_documents=return_documents) - - @classmethod - def class_name(cls) -> str: - return "XinferenceRerank" - - def _postprocess_nodes( - self, - nodes: List[NodeWithScore], - query_bundle: Optional[QueryBundle] = None, - ) -> List[NodeWithScore]: - if query_bundle is None: - raise ValueError("Missing query bundle in extra info.") - if len(nodes) == 0: - return [] - - dispatcher.event( - ReRankStartEvent( - nodes = nodes, - top_n = self.top_n, - query = query_bundle, - model_name = self.model - ) - ) - - with self.callback_manager.event( - CBEventType.RERANKING, - payload={ - EventPayload.NODES: nodes, - EventPayload.MODEL_NAME: self._model_uid, - EventPayload.QUERY_STR: query_bundle.query_str, - EventPayload.TOP_K: self.top_n, - }, - ) as event: - texts = [node.node.get_content() for node in nodes] - response = self._generator.rerank(texts,query_bundle.query_str) - new_nodes = [] - for result in response['results']: - new_node_with_score = NodeWithScore( - node=nodes[result['index']].node, score=result['relevance_score'] - ) - if self.threshold is not None: - if new_node_with_score.score >=self.threshold: - new_nodes.append(new_node_with_score) - - if self.top_n is not None: - if len(new_nodes) > self.top_n: - for index in new_nodes[self.top_n:-1]: - new_nodes.remove(index) - - event.on_end(payload={EventPayload.NODES: new_nodes}) - - dispatcher.event( - ReRankEndEvent( - nodes= new_nodes - ) - ) - return new_nodes - - def load_model(self, model_uid: str, endpoint: str) -> Tuple[Any, int, dict]: - try: - from xinference.client import RESTfulClient - except ImportError: - raise ImportError( - "Could not import Xinference library." - 'Please install Xinference with `pip install "xinference[all]"`' - ) - - client = RESTfulClient(endpoint) - - try: - assert isinstance(client, RESTfulClient) - except AssertionError: - raise RuntimeError( - "Could not create RESTfulClient instance." - "Please make sure Xinference endpoint is running at the correct port." - ) - - generator = client.get_model(model_uid) - model_description = client.list_models()[model_uid] - - try: - assert generator is not None - assert model_description is not None - except AssertionError: - raise RuntimeError( - "Could not get model from endpoint." - "Please make sure Xinference endpoint is running at the correct port." - ) - - model = model_description["model_name"] - - return generator, model_description \ No newline at end of file